feat: Recipe import progress (#7252)

This commit is contained in:
Michael Genson
2026-03-15 16:44:19 -05:00
committed by GitHub
parent 04dd514e6a
commit c4fdab4e05
20 changed files with 491 additions and 60 deletions

View File

@@ -19,7 +19,17 @@
"yields": "Yields"
},
"and-amount": "and {amount}",
"or-ingredient": "or {ingredient}"
"or-ingredient": "or {ingredient}",
"create-progress": {
"creating-recipe-with-ai": "Creating recipe with AI...",
"creating-recipe-from-transcript-with-ai": "Creating recipe from transcript with AI...",
"creating-recipe-from-webpage-data": "Creating recipe from webpage data...",
"downloading-image": "Downloading image...",
"downloading-video": "Downloading video...",
"extracting-recipe-data": "Extracting recipe data...",
"fetching-webpage": "Fetching webpage...",
"transcribing-audio-with-ai": "Transcribing audio with AI..."
}
},
"mealplan": {
"no-recipes-match-your-rules": "No recipes match your rules"

View File

@@ -33,7 +33,14 @@ class MealieCrudRoute(APIRoute):
async def custom_route_handler(request: Request) -> Response:
with contextlib.suppress(JSONDecodeError):
response = await original_route_handler(request)
response_body = json.loads(response.body)
# StreamingResponse from starlette doesn't have a body attribute, even though it inherits from Response,
# so we may get an attribute error here even though our type hinting suggests otherwise.
try:
response_body = json.loads(response.body)
except AttributeError:
return response
if isinstance(response_body, dict):
if last_modified := response_body.get("updatedAt"):
response.headers["last-modified"] = last_modified

View File

@@ -1,4 +1,6 @@
import asyncio
from collections import defaultdict
from collections.abc import AsyncIterable
from shutil import copyfileobj
from uuid import UUID
@@ -17,6 +19,7 @@ from fastapi import (
status,
)
from fastapi.datastructures import UploadFile
from fastapi.sse import EventSourceResponse, ServerSentEvent
from pydantic import UUID4
from slugify import slugify
@@ -46,7 +49,13 @@ from mealie.schema.recipe.request_helpers import (
)
from mealie.schema.response import PaginationBase, PaginationQuery
from mealie.schema.response.pagination import RecipeSearchQuery
from mealie.schema.response.responses import ErrorResponse, SuccessResponse
from mealie.schema.response.responses import (
ErrorResponse,
SSEDataEventDone,
SSEDataEventMessage,
SSEDataEventStatus,
SuccessResponse,
)
from mealie.services import urls
from mealie.services.event_bus_service.event_types import (
EventOperation,
@@ -130,22 +139,70 @@ class RecipeController(BaseRecipeController):
return "recipe_scrapers was unable to scrape this URL"
@router.post("/create/html-or-json", status_code=201)
async def create_recipe_from_html_or_json(self, req: ScrapeRecipeData):
@router.post("/create/html-or-json", status_code=201, response_model=str)
async def create_recipe_from_html_or_json(self, req: ScrapeRecipeData) -> str:
"""Takes in raw HTML or a https://schema.org/Recipe object as a JSON string and parses it like a URL"""
if req.data.startswith("{"):
req.data = RecipeScraperPackage.ld_json_to_html(req.data)
return await self._create_recipe_from_web(req)
async for event in self._create_recipe_from_web(req):
if isinstance(event.data, SSEDataEventDone):
return event.data.slug
if isinstance(event.data, SSEDataEventMessage) and event.event == SSEDataEventStatus.ERROR:
raise HTTPException(status_code=400, detail=ErrorResponse.respond(message=event.data.message))
# This should never be reachable, since we should always hit DONE or hit an exception/ERROR
raise HTTPException(status_code=500, detail=ErrorResponse.respond(message="Unknown Error"))
@router.post("/create/html-or-json/stream", response_class=EventSourceResponse)
async def create_recipe_from_html_or_json_stream(self, req: ScrapeRecipeData) -> AsyncIterable[ServerSentEvent]:
"""
Takes in raw HTML or a https://schema.org/Recipe object as a JSON string and parses it like a URL,
streaming progress via SSE
"""
if req.data.startswith("{"):
req.data = RecipeScraperPackage.ld_json_to_html(req.data)
async for event in self._create_recipe_from_web(req):
yield event
@router.post("/create/url", status_code=201, response_model=str)
async def parse_recipe_url(self, req: ScrapeRecipe):
async def parse_recipe_url(self, req: ScrapeRecipe) -> str:
"""Takes in a URL and attempts to scrape data and load it into the database"""
return await self._create_recipe_from_web(req)
async for event in self._create_recipe_from_web(req):
if isinstance(event.data, SSEDataEventDone):
return event.data.slug
if isinstance(event.data, SSEDataEventMessage) and event.event == SSEDataEventStatus.ERROR:
raise HTTPException(status_code=400, detail=ErrorResponse.respond(message=event.data.message))
# This should never be reachable, since we should always hit DONE or hit an exception/ERROR
raise HTTPException(status_code=500, detail=ErrorResponse.respond(message="Unknown Error"))
@router.post("/create/url/stream", response_class=EventSourceResponse)
async def parse_recipe_url_stream(self, req: ScrapeRecipe) -> AsyncIterable[ServerSentEvent]:
"""
Takes in a URL and attempts to scrape data and load it into the database,
streaming progress via SSE
"""
async for event in self._create_recipe_from_web(req):
yield event
async def _create_recipe_from_web(self, req: ScrapeRecipe | ScrapeRecipeData) -> AsyncIterable[ServerSentEvent]:
"""
Create a recipe from the web, returning progress via SSE.
Events will continue to be yielded until:
- The recipe is created, emitting:
- event=SSEDataEventStatus.DONE
- data=SSEDataEventDone(...)
- An exception is raised, emitting:
- event=SSEDataEventStatus.ERROR
- data=SSEDataEventMessage(...)
"""
async def _create_recipe_from_web(self, req: ScrapeRecipe | ScrapeRecipeData):
if isinstance(req, ScrapeRecipeData):
html = req.data
url = req.url or ""
@@ -153,21 +210,48 @@ class RecipeController(BaseRecipeController):
html = None
url = req.url
try:
recipe, extras = await create_from_html(url, self.translator, html)
except ForceTimeoutException as e:
raise HTTPException(
status_code=408, detail=ErrorResponse.respond(message="Recipe Scraping Timed Out")
) from e
queue: asyncio.Queue[ServerSentEvent | None] = asyncio.Queue()
async def on_progress(message: str) -> None:
await queue.put(
ServerSentEvent(
data=SSEDataEventMessage(message=message),
event=SSEDataEventStatus.PROGRESS,
)
)
async def run() -> None:
try:
recipe, extras = await create_from_html(url, self.translator, html, on_progress=on_progress)
slug = self._finish_recipe_from_web(req, recipe, extras)
await queue.put(
ServerSentEvent(
data=SSEDataEventDone(slug=slug),
event=SSEDataEventStatus.DONE,
)
)
except Exception as e:
self.logger.exception("Error in streaming recipe creation")
await queue.put(
ServerSentEvent(
data=SSEDataEventMessage(message=e.__class__.__name__),
event=SSEDataEventStatus.ERROR,
)
)
finally:
await queue.put(None)
asyncio.create_task(run())
while (event := await queue.get()) is not None:
yield event
def _finish_recipe_from_web(self, req: ScrapeRecipe | ScrapeRecipeData, recipe: Recipe, extras: object) -> str:
if req.include_tags:
ctx = ScraperContext(self.repos)
recipe.tags = extras.use_tags(ctx) # type: ignore
if req.include_categories:
ctx = ScraperContext(self.repos)
recipe.recipe_category = extras.use_categories(ctx) # type: ignore
new_recipe = self.service.create_one(recipe)

View File

@@ -8,12 +8,24 @@ from .pagination import (
RequestQuery,
)
from .query_search import SearchFilter
from .responses import ErrorResponse, FileTokenResponse, SuccessResponse
from .responses import (
ErrorResponse,
FileTokenResponse,
SSEDataEventBase,
SSEDataEventDone,
SSEDataEventMessage,
SSEDataEventStatus,
SuccessResponse,
)
from .validation import ValidationResponse
__all__ = [
"ErrorResponse",
"FileTokenResponse",
"SSEDataEventBase",
"SSEDataEventDone",
"SSEDataEventMessage",
"SSEDataEventStatus",
"SuccessResponse",
"SearchFilter",
"OrderByNullPosition",

View File

@@ -1,3 +1,5 @@
from enum import StrEnum
from pydantic import BaseModel
from mealie.schema._mealie import MealieModel
@@ -40,3 +42,20 @@ class FileTokenResponse(MealieModel):
in the same call, for use while providing details to a HTTPException
"""
return cls(file_token=token).model_dump()
class SSEDataEventStatus(StrEnum):
PROGRESS = "progress"
DONE = "done"
ERROR = "error"
class SSEDataEventBase(BaseModel): ...
class SSEDataEventMessage(SSEDataEventBase):
message: str
class SSEDataEventDone(SSEDataEventBase):
slug: str

View File

@@ -1,3 +1,5 @@
from collections.abc import Awaitable, Callable
from mealie.core.root_logger import get_logger
from mealie.lang.providers import Translator
from mealie.schema.recipe.recipe import Recipe
@@ -37,25 +39,34 @@ class RecipeScraper:
self.translator = translator
self.logger = get_logger()
async def scrape(self, url: str, html: str | None = None) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
async def scrape(
self,
url: str,
html: str | None = None,
on_progress: Callable[[str], Awaitable[None]] | None = None,
) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
"""
Scrapes a recipe from the web.
Skips the network request if `html` is provided.
Optionally reports progress back via `on_progress`.
"""
raw_html = html or await safe_scrape_html(url)
if not html:
if on_progress:
await on_progress(self.translator.t("recipe.create-progress.fetching-webpage"))
if not raw_html:
return None, None
html = await safe_scrape_html(url)
if not html:
return None, None
for ScraperClass in self.scrapers:
scraper = ScraperClass(url, self.translator, raw_html=raw_html)
scraper = ScraperClass(url, self.translator, raw_html=html)
if not scraper.can_scrape():
self.logger.debug(f"Skipping {scraper.__class__.__name__}")
continue
try:
result = await scraper.parse()
result = await scraper.parse(on_progress=on_progress)
except Exception:
self.logger.exception(f"Failed to scrape HTML with {scraper.__class__.__name__}")
result = None

View File

@@ -1,3 +1,4 @@
from collections.abc import Awaitable, Callable
from enum import StrEnum
from re import search as regex_search
from uuid import uuid4
@@ -22,7 +23,10 @@ class ParserErrors(StrEnum):
async def create_from_html(
url: str, translator: Translator, html: str | None = None
url: str,
translator: Translator,
html: str | None = None,
on_progress: Callable[[str], Awaitable[None]] | None = None,
) -> tuple[Recipe, ScrapedExtras | None]:
"""Main entry point for generating a recipe from a URL. Pass in a URL and
a Recipe object will be returned if successful. Optionally pass in the HTML to skip fetching it.
@@ -30,6 +34,7 @@ async def create_from_html(
Args:
url (str): a valid string representing a URL
html (str | None): optional HTML string to skip network request. Defaults to None.
on_progress: optional async callable invoked with a status message at each stage.
Returns:
Recipe: Recipe Object
@@ -42,7 +47,7 @@ async def create_from_html(
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})
url = extracted_url.group(0)
new_recipe, extras = await scraper.scrape(url, html)
new_recipe, extras = await scraper.scrape(url, html, on_progress=on_progress)
if not new_recipe:
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})
@@ -54,9 +59,13 @@ async def create_from_html(
recipe_data_service = RecipeDataService(new_recipe.id)
try:
if new_recipe.image and isinstance(new_recipe.image, list):
new_recipe.image = new_recipe.image[0]
await recipe_data_service.scrape_image(new_recipe.image) # type: ignore
if new_recipe.image:
if isinstance(new_recipe.image, list):
new_recipe.image = new_recipe.image[0]
if on_progress:
await on_progress(translator.t("recipe.create-progress.downloading-image"))
await recipe_data_service.scrape_image(new_recipe.image) # type: ignore
if new_recipe.name is None:
new_recipe.name = "Untitled"

View File

@@ -3,7 +3,7 @@ import functools
import re
import time
from abc import ABC, abstractmethod
from collections.abc import Callable
from collections.abc import Awaitable, Callable
from pathlib import Path
from typing import Any, TypedDict
@@ -139,7 +139,9 @@ class ABCScraperStrategy(ABC):
async def get_html(self, url: str) -> str: ...
@abstractmethod
async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
async def parse(
self, on_progress: Callable[[str], Awaitable[None]] | None = None
) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
"""Parse a recipe from a web URL.
Args:
@@ -276,10 +278,14 @@ class RecipeScraperPackage(ABCScraperStrategy):
self.logger.debug(f"Recipe Scraper [Package] was unable to extract a recipe from {self.url}")
return None
async def parse(self):
async def parse(self, on_progress: Callable[[str], Awaitable[None]] | None = None):
"""
Parse a recipe from a given url.
"""
if on_progress:
await on_progress(self.translator.t("recipe.create-progress.extracting-recipe-data"))
scraped_data = await self.scrape_url()
if scraped_data is None:
@@ -376,6 +382,12 @@ class RecipeScraperOpenAI(RecipeScraperPackage):
self.logger.exception(f"OpenAI was unable to extract a recipe from {url}")
return ""
async def parse(self, on_progress: Callable[[str], Awaitable[None]] | None = None):
if on_progress:
await on_progress(self.translator.t("recipe.create-progress.creating-recipe-with-ai"))
return super().parse()
class TranscribedAudio(TypedDict):
audio: Path
@@ -468,10 +480,16 @@ class RecipeScraperOpenAITranscription(ABCScraperStrategy):
async def get_html(self, url: str) -> str:
return self.raw_html or "" # we don't use HTML with this scraper since we use ytdlp
async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
async def parse(
self,
on_progress: Callable[[str], Awaitable[None]] | None = None,
) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
openai_service = OpenAIService()
with get_temporary_path() as temp_path:
if on_progress:
await on_progress(self.translator.t("recipe.create-progress.downloading-video"))
video_data = await asyncio.to_thread(self._download_audio, temp_path)
if video_data["subtitle"]:
@@ -485,6 +503,9 @@ class RecipeScraperOpenAITranscription(ABCScraperStrategy):
video_data["transcription"] = ""
if not video_data["transcription"]:
if on_progress:
await on_progress(self.translator.t("recipe.create-progress.transcribing-audio-with-ai"))
try:
transcription = await openai_service.transcribe_audio(video_data["audio"])
except exceptions.RateLimitError:
@@ -508,6 +529,9 @@ class RecipeScraperOpenAITranscription(ABCScraperStrategy):
f"Transcription: {video_data['transcription']}",
]
if on_progress:
await on_progress(self.translator.t("recipe.create-progress.creating-recipe-from-transcript-with-ai"))
try:
response = await openai_service.get_response(prompt, "\n".join(message_parts), response_schema=OpenAIRecipe)
except exceptions.RateLimitError:
@@ -586,10 +610,17 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
"extras": [],
}
async def parse(self):
async def parse(
self,
on_progress: Callable[[str], Awaitable[None]] | None = None,
):
"""
Parse a recipe from a given url.
"""
if on_progress:
await on_progress(self.translator.t("recipe.create-progress.creating-recipe-from-webpage-data"))
html = await self.get_html(self.url)
og_data = self.get_recipe_fields(html)