feat: Recipe import progress (#7252)

2026-06-11 11:30:12 -04:00 · 2026-03-15 16:44:19 -05:00
parent 04dd514e6a
commit c4fdab4e05
20 changed files with 491 additions and 60 deletions
--- a/mealie/services/scraper/recipe_scraper.py
+++ b/mealie/services/scraper/recipe_scraper.py
@@ -1,3 +1,5 @@
+from collections.abc import Awaitable, Callable
+
 from mealie.core.root_logger import get_logger
 from mealie.lang.providers import Translator
 from mealie.schema.recipe.recipe import Recipe
@@ -37,25 +39,34 @@ class RecipeScraper:
        self.translator = translator
        self.logger = get_logger()

-    async def scrape(self, url: str, html: str | None = None) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
+    async def scrape(
+        self,
+        url: str,
+        html: str | None = None,
+        on_progress: Callable[[str], Awaitable[None]] | None = None,
+    ) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
        """
        Scrapes a recipe from the web.
        Skips the network request if `html` is provided.
+        Optionally reports progress back via `on_progress`.
        """

-        raw_html = html or await safe_scrape_html(url)
+        if not html:
+            if on_progress:
+                await on_progress(self.translator.t("recipe.create-progress.fetching-webpage"))

-        if not raw_html:
-            return None, None
+            html = await safe_scrape_html(url)
+            if not html:
+                return None, None

        for ScraperClass in self.scrapers:
-            scraper = ScraperClass(url, self.translator, raw_html=raw_html)
+            scraper = ScraperClass(url, self.translator, raw_html=html)
            if not scraper.can_scrape():
                self.logger.debug(f"Skipping {scraper.__class__.__name__}")
                continue

            try:
-                result = await scraper.parse()
+                result = await scraper.parse(on_progress=on_progress)
            except Exception:
                self.logger.exception(f"Failed to scrape HTML with {scraper.__class__.__name__}")
                result = None
--- a/mealie/services/scraper/scraper.py
+++ b/mealie/services/scraper/scraper.py
@@ -1,3 +1,4 @@
+from collections.abc import Awaitable, Callable
 from enum import StrEnum
 from re import search as regex_search
 from uuid import uuid4
@@ -22,7 +23,10 @@ class ParserErrors(StrEnum):


 async def create_from_html(
-    url: str, translator: Translator, html: str | None = None
+    url: str,
+    translator: Translator,
+    html: str | None = None,
+    on_progress: Callable[[str], Awaitable[None]] | None = None,
 ) -> tuple[Recipe, ScrapedExtras | None]:
    """Main entry point for generating a recipe from a URL. Pass in a URL and
    a Recipe object will be returned if successful. Optionally pass in the HTML to skip fetching it.
@@ -30,6 +34,7 @@ async def create_from_html(
    Args:
        url (str): a valid string representing a URL
        html (str | None): optional HTML string to skip network request. Defaults to None.
+        on_progress: optional async callable invoked with a status message at each stage.

    Returns:
        Recipe: Recipe Object
@@ -42,7 +47,7 @@ async def create_from_html(
            raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})
        url = extracted_url.group(0)

-    new_recipe, extras = await scraper.scrape(url, html)
+    new_recipe, extras = await scraper.scrape(url, html, on_progress=on_progress)

    if not new_recipe:
        raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})
@@ -54,9 +59,13 @@ async def create_from_html(
    recipe_data_service = RecipeDataService(new_recipe.id)

    try:
-        if new_recipe.image and isinstance(new_recipe.image, list):
-            new_recipe.image = new_recipe.image[0]
-        await recipe_data_service.scrape_image(new_recipe.image)  # type: ignore
+        if new_recipe.image:
+            if isinstance(new_recipe.image, list):
+                new_recipe.image = new_recipe.image[0]
+
+            if on_progress:
+                await on_progress(translator.t("recipe.create-progress.downloading-image"))
+            await recipe_data_service.scrape_image(new_recipe.image)  # type: ignore

        if new_recipe.name is None:
            new_recipe.name = "Untitled"
--- a/mealie/services/scraper/scraper_strategies.py
+++ b/mealie/services/scraper/scraper_strategies.py
@@ -3,7 +3,7 @@ import functools
 import re
 import time
 from abc import ABC, abstractmethod
-from collections.abc import Callable
+from collections.abc import Awaitable, Callable
 from pathlib import Path
 from typing import Any, TypedDict

@@ -139,7 +139,9 @@ class ABCScraperStrategy(ABC):
    async def get_html(self, url: str) -> str: ...

    @abstractmethod
-    async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
+    async def parse(
+        self, on_progress: Callable[[str], Awaitable[None]] | None = None
+    ) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
        """Parse a recipe from a web URL.

        Args:
@@ -276,10 +278,14 @@ class RecipeScraperPackage(ABCScraperStrategy):
        self.logger.debug(f"Recipe Scraper [Package] was unable to extract a recipe from {self.url}")
        return None

-    async def parse(self):
+    async def parse(self, on_progress: Callable[[str], Awaitable[None]] | None = None):
        """
        Parse a recipe from a given url.
        """
+
+        if on_progress:
+            await on_progress(self.translator.t("recipe.create-progress.extracting-recipe-data"))
+
        scraped_data = await self.scrape_url()

        if scraped_data is None:
@@ -376,6 +382,12 @@ class RecipeScraperOpenAI(RecipeScraperPackage):
            self.logger.exception(f"OpenAI was unable to extract a recipe from {url}")
            return ""

+    async def parse(self, on_progress: Callable[[str], Awaitable[None]] | None = None):
+        if on_progress:
+            await on_progress(self.translator.t("recipe.create-progress.creating-recipe-with-ai"))
+
+        return super().parse()
+

 class TranscribedAudio(TypedDict):
    audio: Path
@@ -468,10 +480,16 @@ class RecipeScraperOpenAITranscription(ABCScraperStrategy):
    async def get_html(self, url: str) -> str:
        return self.raw_html or ""  # we don't use HTML with this scraper since we use ytdlp

-    async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
+    async def parse(
+        self,
+        on_progress: Callable[[str], Awaitable[None]] | None = None,
+    ) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
        openai_service = OpenAIService()

        with get_temporary_path() as temp_path:
+            if on_progress:
+                await on_progress(self.translator.t("recipe.create-progress.downloading-video"))
+
            video_data = await asyncio.to_thread(self._download_audio, temp_path)

            if video_data["subtitle"]:
@@ -485,6 +503,9 @@ class RecipeScraperOpenAITranscription(ABCScraperStrategy):
                    video_data["transcription"] = ""

            if not video_data["transcription"]:
+                if on_progress:
+                    await on_progress(self.translator.t("recipe.create-progress.transcribing-audio-with-ai"))
+
                try:
                    transcription = await openai_service.transcribe_audio(video_data["audio"])
                except exceptions.RateLimitError:
@@ -508,6 +529,9 @@ class RecipeScraperOpenAITranscription(ABCScraperStrategy):
            f"Transcription: {video_data['transcription']}",
        ]

+        if on_progress:
+            await on_progress(self.translator.t("recipe.create-progress.creating-recipe-from-transcript-with-ai"))
+
        try:
            response = await openai_service.get_response(prompt, "\n".join(message_parts), response_schema=OpenAIRecipe)
        except exceptions.RateLimitError:
@@ -586,10 +610,17 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
            "extras": [],
        }

-    async def parse(self):
+    async def parse(
+        self,
+        on_progress: Callable[[str], Awaitable[None]] | None = None,
+    ):
        """
        Parse a recipe from a given url.
        """
+
+        if on_progress:
+            await on_progress(self.translator.t("recipe.create-progress.creating-recipe-from-webpage-data"))
+
        html = await self.get_html(self.url)

        og_data = self.get_recipe_fields(html)