feat: Recipe import progress (#7252)

2026-04-13 00:15:34 -04:00 · 2026-03-15 16:44:19 -05:00
parent 04dd514e6a
commit c4fdab4e05
20 changed files with 491 additions and 60 deletions
--- a/mealie/lang/messages/en-US.json
+++ b/mealie/lang/messages/en-US.json
@@ -19,7 +19,17 @@
            "yields": "Yields"
        },
        "and-amount": "and {amount}",
-        "or-ingredient": "or {ingredient}"
+        "or-ingredient": "or {ingredient}",
+        "create-progress": {
+            "creating-recipe-with-ai": "Creating recipe with AI...",
+            "creating-recipe-from-transcript-with-ai": "Creating recipe from transcript with AI...",
+            "creating-recipe-from-webpage-data": "Creating recipe from webpage data...",
+            "downloading-image": "Downloading image...",
+            "downloading-video": "Downloading video...",
+            "extracting-recipe-data": "Extracting recipe data...",
+            "fetching-webpage": "Fetching webpage...",
+            "transcribing-audio-with-ai": "Transcribing audio with AI..."
+        }
    },
    "mealplan": {
        "no-recipes-match-your-rules": "No recipes match your rules"
--- a/mealie/routes/_base/routers.py
+++ b/mealie/routes/_base/routers.py
@@ -33,7 +33,14 @@ class MealieCrudRoute(APIRoute):
        async def custom_route_handler(request: Request) -> Response:
            with contextlib.suppress(JSONDecodeError):
                response = await original_route_handler(request)
-                response_body = json.loads(response.body)
+
+                # StreamingResponse from starlette doesn't have a body attribute, even though it inherits from Response,
+                # so we may get an attribute error here even though our type hinting suggests otherwise.
+                try:
+                    response_body = json.loads(response.body)
+                except AttributeError:
+                    return response
+
                if isinstance(response_body, dict):
                    if last_modified := response_body.get("updatedAt"):
                        response.headers["last-modified"] = last_modified
--- a/mealie/routes/recipe/recipe_crud_routes.py
+++ b/mealie/routes/recipe/recipe_crud_routes.py
@@ -1,4 +1,6 @@
+import asyncio
 from collections import defaultdict
+from collections.abc import AsyncIterable
 from shutil import copyfileobj
 from uuid import UUID

@@ -17,6 +19,7 @@ from fastapi import (
    status,
 )
 from fastapi.datastructures import UploadFile
+from fastapi.sse import EventSourceResponse, ServerSentEvent
 from pydantic import UUID4
 from slugify import slugify

@@ -46,7 +49,13 @@ from mealie.schema.recipe.request_helpers import (
 )
 from mealie.schema.response import PaginationBase, PaginationQuery
 from mealie.schema.response.pagination import RecipeSearchQuery
-from mealie.schema.response.responses import ErrorResponse, SuccessResponse
+from mealie.schema.response.responses import (
+    ErrorResponse,
+    SSEDataEventDone,
+    SSEDataEventMessage,
+    SSEDataEventStatus,
+    SuccessResponse,
+)
 from mealie.services import urls
 from mealie.services.event_bus_service.event_types import (
    EventOperation,
@@ -130,22 +139,70 @@ class RecipeController(BaseRecipeController):

        return "recipe_scrapers was unable to scrape this URL"

-    @router.post("/create/html-or-json", status_code=201)
-    async def create_recipe_from_html_or_json(self, req: ScrapeRecipeData):
+    @router.post("/create/html-or-json", status_code=201, response_model=str)
+    async def create_recipe_from_html_or_json(self, req: ScrapeRecipeData) -> str:
        """Takes in raw HTML or a https://schema.org/Recipe object as a JSON string and parses it like a URL"""

        if req.data.startswith("{"):
            req.data = RecipeScraperPackage.ld_json_to_html(req.data)

-        return await self._create_recipe_from_web(req)
+        async for event in self._create_recipe_from_web(req):
+            if isinstance(event.data, SSEDataEventDone):
+                return event.data.slug
+            if isinstance(event.data, SSEDataEventMessage) and event.event == SSEDataEventStatus.ERROR:
+                raise HTTPException(status_code=400, detail=ErrorResponse.respond(message=event.data.message))
+
+        # This should never be reachable, since we should always hit DONE or hit an exception/ERROR
+        raise HTTPException(status_code=500, detail=ErrorResponse.respond(message="Unknown Error"))
+
+    @router.post("/create/html-or-json/stream", response_class=EventSourceResponse)
+    async def create_recipe_from_html_or_json_stream(self, req: ScrapeRecipeData) -> AsyncIterable[ServerSentEvent]:
+        """
+        Takes in raw HTML or a https://schema.org/Recipe object as a JSON string and parses it like a URL,
+        streaming progress via SSE
+        """
+
+        if req.data.startswith("{"):
+            req.data = RecipeScraperPackage.ld_json_to_html(req.data)
+
+        async for event in self._create_recipe_from_web(req):
+            yield event

    @router.post("/create/url", status_code=201, response_model=str)
-    async def parse_recipe_url(self, req: ScrapeRecipe):
+    async def parse_recipe_url(self, req: ScrapeRecipe) -> str:
        """Takes in a URL and attempts to scrape data and load it into the database"""

-        return await self._create_recipe_from_web(req)
+        async for event in self._create_recipe_from_web(req):
+            if isinstance(event.data, SSEDataEventDone):
+                return event.data.slug
+            if isinstance(event.data, SSEDataEventMessage) and event.event == SSEDataEventStatus.ERROR:
+                raise HTTPException(status_code=400, detail=ErrorResponse.respond(message=event.data.message))
+
+        # This should never be reachable, since we should always hit DONE or hit an exception/ERROR
+        raise HTTPException(status_code=500, detail=ErrorResponse.respond(message="Unknown Error"))
+
+    @router.post("/create/url/stream", response_class=EventSourceResponse)
+    async def parse_recipe_url_stream(self, req: ScrapeRecipe) -> AsyncIterable[ServerSentEvent]:
+        """
+        Takes in a URL and attempts to scrape data and load it into the database,
+        streaming progress via SSE
+        """
+
+        async for event in self._create_recipe_from_web(req):
+            yield event
+
+    async def _create_recipe_from_web(self, req: ScrapeRecipe | ScrapeRecipeData) -> AsyncIterable[ServerSentEvent]:
+        """
+        Create a recipe from the web, returning progress via SSE.
+        Events will continue to be yielded until:
+            - The recipe is created, emitting:
+                - event=SSEDataEventStatus.DONE
+                - data=SSEDataEventDone(...)
+            - An exception is raised, emitting:
+                - event=SSEDataEventStatus.ERROR
+                - data=SSEDataEventMessage(...)
+        """

-    async def _create_recipe_from_web(self, req: ScrapeRecipe | ScrapeRecipeData):
        if isinstance(req, ScrapeRecipeData):
            html = req.data
            url = req.url or ""
@@ -153,21 +210,48 @@ class RecipeController(BaseRecipeController):
            html = None
            url = req.url

-        try:
-            recipe, extras = await create_from_html(url, self.translator, html)
-        except ForceTimeoutException as e:
-            raise HTTPException(
-                status_code=408, detail=ErrorResponse.respond(message="Recipe Scraping Timed Out")
-            ) from e
+        queue: asyncio.Queue[ServerSentEvent | None] = asyncio.Queue()

+        async def on_progress(message: str) -> None:
+            await queue.put(
+                ServerSentEvent(
+                    data=SSEDataEventMessage(message=message),
+                    event=SSEDataEventStatus.PROGRESS,
+                )
+            )
+
+        async def run() -> None:
+            try:
+                recipe, extras = await create_from_html(url, self.translator, html, on_progress=on_progress)
+                slug = self._finish_recipe_from_web(req, recipe, extras)
+                await queue.put(
+                    ServerSentEvent(
+                        data=SSEDataEventDone(slug=slug),
+                        event=SSEDataEventStatus.DONE,
+                    )
+                )
+            except Exception as e:
+                self.logger.exception("Error in streaming recipe creation")
+                await queue.put(
+                    ServerSentEvent(
+                        data=SSEDataEventMessage(message=e.__class__.__name__),
+                        event=SSEDataEventStatus.ERROR,
+                    )
+                )
+            finally:
+                await queue.put(None)
+
+        asyncio.create_task(run())
+        while (event := await queue.get()) is not None:
+            yield event
+
+    def _finish_recipe_from_web(self, req: ScrapeRecipe | ScrapeRecipeData, recipe: Recipe, extras: object) -> str:
        if req.include_tags:
            ctx = ScraperContext(self.repos)
-
            recipe.tags = extras.use_tags(ctx)  # type: ignore

        if req.include_categories:
            ctx = ScraperContext(self.repos)
-
            recipe.recipe_category = extras.use_categories(ctx)  # type: ignore

        new_recipe = self.service.create_one(recipe)
--- a/mealie/schema/response/init.py
+++ b/mealie/schema/response/init.py
@@ -8,12 +8,24 @@ from .pagination import (
    RequestQuery,
 )
 from .query_search import SearchFilter
-from .responses import ErrorResponse, FileTokenResponse, SuccessResponse
+from .responses import (
+    ErrorResponse,
+    FileTokenResponse,
+    SSEDataEventBase,
+    SSEDataEventDone,
+    SSEDataEventMessage,
+    SSEDataEventStatus,
+    SuccessResponse,
+)
 from .validation import ValidationResponse

 __all__ = [
    "ErrorResponse",
    "FileTokenResponse",
+    "SSEDataEventBase",
+    "SSEDataEventDone",
+    "SSEDataEventMessage",
+    "SSEDataEventStatus",
    "SuccessResponse",
    "SearchFilter",
    "OrderByNullPosition",
--- a/mealie/schema/response/responses.py
+++ b/mealie/schema/response/responses.py
@@ -1,3 +1,5 @@
+from enum import StrEnum
+
 from pydantic import BaseModel

 from mealie.schema._mealie import MealieModel
@@ -40,3 +42,20 @@ class FileTokenResponse(MealieModel):
        in the same call, for use while providing details to a HTTPException
        """
        return cls(file_token=token).model_dump()
+
+
+class SSEDataEventStatus(StrEnum):
+    PROGRESS = "progress"
+    DONE = "done"
+    ERROR = "error"
+
+
+class SSEDataEventBase(BaseModel): ...
+
+
+class SSEDataEventMessage(SSEDataEventBase):
+    message: str
+
+
+class SSEDataEventDone(SSEDataEventBase):
+    slug: str
--- a/mealie/services/scraper/recipe_scraper.py
+++ b/mealie/services/scraper/recipe_scraper.py
@@ -1,3 +1,5 @@
+from collections.abc import Awaitable, Callable
+
 from mealie.core.root_logger import get_logger
 from mealie.lang.providers import Translator
 from mealie.schema.recipe.recipe import Recipe
@@ -37,25 +39,34 @@ class RecipeScraper:
        self.translator = translator
        self.logger = get_logger()

-    async def scrape(self, url: str, html: str | None = None) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
+    async def scrape(
+        self,
+        url: str,
+        html: str | None = None,
+        on_progress: Callable[[str], Awaitable[None]] | None = None,
+    ) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
        """
        Scrapes a recipe from the web.
        Skips the network request if `html` is provided.
+        Optionally reports progress back via `on_progress`.
        """

-        raw_html = html or await safe_scrape_html(url)
+        if not html:
+            if on_progress:
+                await on_progress(self.translator.t("recipe.create-progress.fetching-webpage"))

-        if not raw_html:
-            return None, None
+            html = await safe_scrape_html(url)
+            if not html:
+                return None, None

        for ScraperClass in self.scrapers:
-            scraper = ScraperClass(url, self.translator, raw_html=raw_html)
+            scraper = ScraperClass(url, self.translator, raw_html=html)
            if not scraper.can_scrape():
                self.logger.debug(f"Skipping {scraper.__class__.__name__}")
                continue

            try:
-                result = await scraper.parse()
+                result = await scraper.parse(on_progress=on_progress)
            except Exception:
                self.logger.exception(f"Failed to scrape HTML with {scraper.__class__.__name__}")
                result = None
--- a/mealie/services/scraper/scraper.py
+++ b/mealie/services/scraper/scraper.py
@@ -1,3 +1,4 @@
+from collections.abc import Awaitable, Callable
 from enum import StrEnum
 from re import search as regex_search
 from uuid import uuid4
@@ -22,7 +23,10 @@ class ParserErrors(StrEnum):


 async def create_from_html(
-    url: str, translator: Translator, html: str | None = None
+    url: str,
+    translator: Translator,
+    html: str | None = None,
+    on_progress: Callable[[str], Awaitable[None]] | None = None,
 ) -> tuple[Recipe, ScrapedExtras | None]:
    """Main entry point for generating a recipe from a URL. Pass in a URL and
    a Recipe object will be returned if successful. Optionally pass in the HTML to skip fetching it.
@@ -30,6 +34,7 @@ async def create_from_html(
    Args:
        url (str): a valid string representing a URL
        html (str | None): optional HTML string to skip network request. Defaults to None.
+        on_progress: optional async callable invoked with a status message at each stage.

    Returns:
        Recipe: Recipe Object
@@ -42,7 +47,7 @@ async def create_from_html(
            raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})
        url = extracted_url.group(0)

-    new_recipe, extras = await scraper.scrape(url, html)
+    new_recipe, extras = await scraper.scrape(url, html, on_progress=on_progress)

    if not new_recipe:
        raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})
@@ -54,9 +59,13 @@ async def create_from_html(
    recipe_data_service = RecipeDataService(new_recipe.id)

    try:
-        if new_recipe.image and isinstance(new_recipe.image, list):
-            new_recipe.image = new_recipe.image[0]
-        await recipe_data_service.scrape_image(new_recipe.image)  # type: ignore
+        if new_recipe.image:
+            if isinstance(new_recipe.image, list):
+                new_recipe.image = new_recipe.image[0]
+
+            if on_progress:
+                await on_progress(translator.t("recipe.create-progress.downloading-image"))
+            await recipe_data_service.scrape_image(new_recipe.image)  # type: ignore

        if new_recipe.name is None:
            new_recipe.name = "Untitled"
--- a/mealie/services/scraper/scraper_strategies.py
+++ b/mealie/services/scraper/scraper_strategies.py
@@ -3,7 +3,7 @@ import functools
 import re
 import time
 from abc import ABC, abstractmethod
-from collections.abc import Callable
+from collections.abc import Awaitable, Callable
 from pathlib import Path
 from typing import Any, TypedDict

@@ -139,7 +139,9 @@ class ABCScraperStrategy(ABC):
    async def get_html(self, url: str) -> str: ...

    @abstractmethod
-    async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
+    async def parse(
+        self, on_progress: Callable[[str], Awaitable[None]] | None = None
+    ) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
        """Parse a recipe from a web URL.

        Args:
@@ -276,10 +278,14 @@ class RecipeScraperPackage(ABCScraperStrategy):
        self.logger.debug(f"Recipe Scraper [Package] was unable to extract a recipe from {self.url}")
        return None

-    async def parse(self):
+    async def parse(self, on_progress: Callable[[str], Awaitable[None]] | None = None):
        """
        Parse a recipe from a given url.
        """
+
+        if on_progress:
+            await on_progress(self.translator.t("recipe.create-progress.extracting-recipe-data"))
+
        scraped_data = await self.scrape_url()

        if scraped_data is None:
@@ -376,6 +382,12 @@ class RecipeScraperOpenAI(RecipeScraperPackage):
            self.logger.exception(f"OpenAI was unable to extract a recipe from {url}")
            return ""

+    async def parse(self, on_progress: Callable[[str], Awaitable[None]] | None = None):
+        if on_progress:
+            await on_progress(self.translator.t("recipe.create-progress.creating-recipe-with-ai"))
+
+        return super().parse()
+

 class TranscribedAudio(TypedDict):
    audio: Path
@@ -468,10 +480,16 @@ class RecipeScraperOpenAITranscription(ABCScraperStrategy):
    async def get_html(self, url: str) -> str:
        return self.raw_html or ""  # we don't use HTML with this scraper since we use ytdlp

-    async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
+    async def parse(
+        self,
+        on_progress: Callable[[str], Awaitable[None]] | None = None,
+    ) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
        openai_service = OpenAIService()

        with get_temporary_path() as temp_path:
+            if on_progress:
+                await on_progress(self.translator.t("recipe.create-progress.downloading-video"))
+
            video_data = await asyncio.to_thread(self._download_audio, temp_path)

            if video_data["subtitle"]:
@@ -485,6 +503,9 @@ class RecipeScraperOpenAITranscription(ABCScraperStrategy):
                    video_data["transcription"] = ""

            if not video_data["transcription"]:
+                if on_progress:
+                    await on_progress(self.translator.t("recipe.create-progress.transcribing-audio-with-ai"))
+
                try:
                    transcription = await openai_service.transcribe_audio(video_data["audio"])
                except exceptions.RateLimitError:
@@ -508,6 +529,9 @@ class RecipeScraperOpenAITranscription(ABCScraperStrategy):
            f"Transcription: {video_data['transcription']}",
        ]

+        if on_progress:
+            await on_progress(self.translator.t("recipe.create-progress.creating-recipe-from-transcript-with-ai"))
+
        try:
            response = await openai_service.get_response(prompt, "\n".join(message_parts), response_schema=OpenAIRecipe)
        except exceptions.RateLimitError:
@@ -586,10 +610,17 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
            "extras": [],
        }

-    async def parse(self):
+    async def parse(
+        self,
+        on_progress: Callable[[str], Awaitable[None]] | None = None,
+    ):
        """
        Parse a recipe from a given url.
        """
+
+        if on_progress:
+            await on_progress(self.translator.t("recipe.create-progress.creating-recipe-from-webpage-data"))
+
        html = await self.get_html(self.url)

        og_data = self.get_recipe_fields(html)