mirror of
https://github.com/mealie-recipes/mealie.git
synced 2026-04-13 00:15:34 -04:00
feat: Recipe import progress (#7252)
This commit is contained in:
@@ -19,7 +19,17 @@
|
||||
"yields": "Yields"
|
||||
},
|
||||
"and-amount": "and {amount}",
|
||||
"or-ingredient": "or {ingredient}"
|
||||
"or-ingredient": "or {ingredient}",
|
||||
"create-progress": {
|
||||
"creating-recipe-with-ai": "Creating recipe with AI...",
|
||||
"creating-recipe-from-transcript-with-ai": "Creating recipe from transcript with AI...",
|
||||
"creating-recipe-from-webpage-data": "Creating recipe from webpage data...",
|
||||
"downloading-image": "Downloading image...",
|
||||
"downloading-video": "Downloading video...",
|
||||
"extracting-recipe-data": "Extracting recipe data...",
|
||||
"fetching-webpage": "Fetching webpage...",
|
||||
"transcribing-audio-with-ai": "Transcribing audio with AI..."
|
||||
}
|
||||
},
|
||||
"mealplan": {
|
||||
"no-recipes-match-your-rules": "No recipes match your rules"
|
||||
|
||||
@@ -33,7 +33,14 @@ class MealieCrudRoute(APIRoute):
|
||||
async def custom_route_handler(request: Request) -> Response:
|
||||
with contextlib.suppress(JSONDecodeError):
|
||||
response = await original_route_handler(request)
|
||||
response_body = json.loads(response.body)
|
||||
|
||||
# StreamingResponse from starlette doesn't have a body attribute, even though it inherits from Response,
|
||||
# so we may get an attribute error here even though our type hinting suggests otherwise.
|
||||
try:
|
||||
response_body = json.loads(response.body)
|
||||
except AttributeError:
|
||||
return response
|
||||
|
||||
if isinstance(response_body, dict):
|
||||
if last_modified := response_body.get("updatedAt"):
|
||||
response.headers["last-modified"] = last_modified
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
import asyncio
|
||||
from collections import defaultdict
|
||||
from collections.abc import AsyncIterable
|
||||
from shutil import copyfileobj
|
||||
from uuid import UUID
|
||||
|
||||
@@ -17,6 +19,7 @@ from fastapi import (
|
||||
status,
|
||||
)
|
||||
from fastapi.datastructures import UploadFile
|
||||
from fastapi.sse import EventSourceResponse, ServerSentEvent
|
||||
from pydantic import UUID4
|
||||
from slugify import slugify
|
||||
|
||||
@@ -46,7 +49,13 @@ from mealie.schema.recipe.request_helpers import (
|
||||
)
|
||||
from mealie.schema.response import PaginationBase, PaginationQuery
|
||||
from mealie.schema.response.pagination import RecipeSearchQuery
|
||||
from mealie.schema.response.responses import ErrorResponse, SuccessResponse
|
||||
from mealie.schema.response.responses import (
|
||||
ErrorResponse,
|
||||
SSEDataEventDone,
|
||||
SSEDataEventMessage,
|
||||
SSEDataEventStatus,
|
||||
SuccessResponse,
|
||||
)
|
||||
from mealie.services import urls
|
||||
from mealie.services.event_bus_service.event_types import (
|
||||
EventOperation,
|
||||
@@ -130,22 +139,70 @@ class RecipeController(BaseRecipeController):
|
||||
|
||||
return "recipe_scrapers was unable to scrape this URL"
|
||||
|
||||
@router.post("/create/html-or-json", status_code=201)
|
||||
async def create_recipe_from_html_or_json(self, req: ScrapeRecipeData):
|
||||
@router.post("/create/html-or-json", status_code=201, response_model=str)
|
||||
async def create_recipe_from_html_or_json(self, req: ScrapeRecipeData) -> str:
|
||||
"""Takes in raw HTML or a https://schema.org/Recipe object as a JSON string and parses it like a URL"""
|
||||
|
||||
if req.data.startswith("{"):
|
||||
req.data = RecipeScraperPackage.ld_json_to_html(req.data)
|
||||
|
||||
return await self._create_recipe_from_web(req)
|
||||
async for event in self._create_recipe_from_web(req):
|
||||
if isinstance(event.data, SSEDataEventDone):
|
||||
return event.data.slug
|
||||
if isinstance(event.data, SSEDataEventMessage) and event.event == SSEDataEventStatus.ERROR:
|
||||
raise HTTPException(status_code=400, detail=ErrorResponse.respond(message=event.data.message))
|
||||
|
||||
# This should never be reachable, since we should always hit DONE or hit an exception/ERROR
|
||||
raise HTTPException(status_code=500, detail=ErrorResponse.respond(message="Unknown Error"))
|
||||
|
||||
@router.post("/create/html-or-json/stream", response_class=EventSourceResponse)
|
||||
async def create_recipe_from_html_or_json_stream(self, req: ScrapeRecipeData) -> AsyncIterable[ServerSentEvent]:
|
||||
"""
|
||||
Takes in raw HTML or a https://schema.org/Recipe object as a JSON string and parses it like a URL,
|
||||
streaming progress via SSE
|
||||
"""
|
||||
|
||||
if req.data.startswith("{"):
|
||||
req.data = RecipeScraperPackage.ld_json_to_html(req.data)
|
||||
|
||||
async for event in self._create_recipe_from_web(req):
|
||||
yield event
|
||||
|
||||
@router.post("/create/url", status_code=201, response_model=str)
|
||||
async def parse_recipe_url(self, req: ScrapeRecipe):
|
||||
async def parse_recipe_url(self, req: ScrapeRecipe) -> str:
|
||||
"""Takes in a URL and attempts to scrape data and load it into the database"""
|
||||
|
||||
return await self._create_recipe_from_web(req)
|
||||
async for event in self._create_recipe_from_web(req):
|
||||
if isinstance(event.data, SSEDataEventDone):
|
||||
return event.data.slug
|
||||
if isinstance(event.data, SSEDataEventMessage) and event.event == SSEDataEventStatus.ERROR:
|
||||
raise HTTPException(status_code=400, detail=ErrorResponse.respond(message=event.data.message))
|
||||
|
||||
# This should never be reachable, since we should always hit DONE or hit an exception/ERROR
|
||||
raise HTTPException(status_code=500, detail=ErrorResponse.respond(message="Unknown Error"))
|
||||
|
||||
@router.post("/create/url/stream", response_class=EventSourceResponse)
|
||||
async def parse_recipe_url_stream(self, req: ScrapeRecipe) -> AsyncIterable[ServerSentEvent]:
|
||||
"""
|
||||
Takes in a URL and attempts to scrape data and load it into the database,
|
||||
streaming progress via SSE
|
||||
"""
|
||||
|
||||
async for event in self._create_recipe_from_web(req):
|
||||
yield event
|
||||
|
||||
async def _create_recipe_from_web(self, req: ScrapeRecipe | ScrapeRecipeData) -> AsyncIterable[ServerSentEvent]:
|
||||
"""
|
||||
Create a recipe from the web, returning progress via SSE.
|
||||
Events will continue to be yielded until:
|
||||
- The recipe is created, emitting:
|
||||
- event=SSEDataEventStatus.DONE
|
||||
- data=SSEDataEventDone(...)
|
||||
- An exception is raised, emitting:
|
||||
- event=SSEDataEventStatus.ERROR
|
||||
- data=SSEDataEventMessage(...)
|
||||
"""
|
||||
|
||||
async def _create_recipe_from_web(self, req: ScrapeRecipe | ScrapeRecipeData):
|
||||
if isinstance(req, ScrapeRecipeData):
|
||||
html = req.data
|
||||
url = req.url or ""
|
||||
@@ -153,21 +210,48 @@ class RecipeController(BaseRecipeController):
|
||||
html = None
|
||||
url = req.url
|
||||
|
||||
try:
|
||||
recipe, extras = await create_from_html(url, self.translator, html)
|
||||
except ForceTimeoutException as e:
|
||||
raise HTTPException(
|
||||
status_code=408, detail=ErrorResponse.respond(message="Recipe Scraping Timed Out")
|
||||
) from e
|
||||
queue: asyncio.Queue[ServerSentEvent | None] = asyncio.Queue()
|
||||
|
||||
async def on_progress(message: str) -> None:
|
||||
await queue.put(
|
||||
ServerSentEvent(
|
||||
data=SSEDataEventMessage(message=message),
|
||||
event=SSEDataEventStatus.PROGRESS,
|
||||
)
|
||||
)
|
||||
|
||||
async def run() -> None:
|
||||
try:
|
||||
recipe, extras = await create_from_html(url, self.translator, html, on_progress=on_progress)
|
||||
slug = self._finish_recipe_from_web(req, recipe, extras)
|
||||
await queue.put(
|
||||
ServerSentEvent(
|
||||
data=SSEDataEventDone(slug=slug),
|
||||
event=SSEDataEventStatus.DONE,
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
self.logger.exception("Error in streaming recipe creation")
|
||||
await queue.put(
|
||||
ServerSentEvent(
|
||||
data=SSEDataEventMessage(message=e.__class__.__name__),
|
||||
event=SSEDataEventStatus.ERROR,
|
||||
)
|
||||
)
|
||||
finally:
|
||||
await queue.put(None)
|
||||
|
||||
asyncio.create_task(run())
|
||||
while (event := await queue.get()) is not None:
|
||||
yield event
|
||||
|
||||
def _finish_recipe_from_web(self, req: ScrapeRecipe | ScrapeRecipeData, recipe: Recipe, extras: object) -> str:
|
||||
if req.include_tags:
|
||||
ctx = ScraperContext(self.repos)
|
||||
|
||||
recipe.tags = extras.use_tags(ctx) # type: ignore
|
||||
|
||||
if req.include_categories:
|
||||
ctx = ScraperContext(self.repos)
|
||||
|
||||
recipe.recipe_category = extras.use_categories(ctx) # type: ignore
|
||||
|
||||
new_recipe = self.service.create_one(recipe)
|
||||
|
||||
@@ -8,12 +8,24 @@ from .pagination import (
|
||||
RequestQuery,
|
||||
)
|
||||
from .query_search import SearchFilter
|
||||
from .responses import ErrorResponse, FileTokenResponse, SuccessResponse
|
||||
from .responses import (
|
||||
ErrorResponse,
|
||||
FileTokenResponse,
|
||||
SSEDataEventBase,
|
||||
SSEDataEventDone,
|
||||
SSEDataEventMessage,
|
||||
SSEDataEventStatus,
|
||||
SuccessResponse,
|
||||
)
|
||||
from .validation import ValidationResponse
|
||||
|
||||
__all__ = [
|
||||
"ErrorResponse",
|
||||
"FileTokenResponse",
|
||||
"SSEDataEventBase",
|
||||
"SSEDataEventDone",
|
||||
"SSEDataEventMessage",
|
||||
"SSEDataEventStatus",
|
||||
"SuccessResponse",
|
||||
"SearchFilter",
|
||||
"OrderByNullPosition",
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from enum import StrEnum
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from mealie.schema._mealie import MealieModel
|
||||
@@ -40,3 +42,20 @@ class FileTokenResponse(MealieModel):
|
||||
in the same call, for use while providing details to a HTTPException
|
||||
"""
|
||||
return cls(file_token=token).model_dump()
|
||||
|
||||
|
||||
class SSEDataEventStatus(StrEnum):
|
||||
PROGRESS = "progress"
|
||||
DONE = "done"
|
||||
ERROR = "error"
|
||||
|
||||
|
||||
class SSEDataEventBase(BaseModel): ...
|
||||
|
||||
|
||||
class SSEDataEventMessage(SSEDataEventBase):
|
||||
message: str
|
||||
|
||||
|
||||
class SSEDataEventDone(SSEDataEventBase):
|
||||
slug: str
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from collections.abc import Awaitable, Callable
|
||||
|
||||
from mealie.core.root_logger import get_logger
|
||||
from mealie.lang.providers import Translator
|
||||
from mealie.schema.recipe.recipe import Recipe
|
||||
@@ -37,25 +39,34 @@ class RecipeScraper:
|
||||
self.translator = translator
|
||||
self.logger = get_logger()
|
||||
|
||||
async def scrape(self, url: str, html: str | None = None) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
|
||||
async def scrape(
|
||||
self,
|
||||
url: str,
|
||||
html: str | None = None,
|
||||
on_progress: Callable[[str], Awaitable[None]] | None = None,
|
||||
) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
|
||||
"""
|
||||
Scrapes a recipe from the web.
|
||||
Skips the network request if `html` is provided.
|
||||
Optionally reports progress back via `on_progress`.
|
||||
"""
|
||||
|
||||
raw_html = html or await safe_scrape_html(url)
|
||||
if not html:
|
||||
if on_progress:
|
||||
await on_progress(self.translator.t("recipe.create-progress.fetching-webpage"))
|
||||
|
||||
if not raw_html:
|
||||
return None, None
|
||||
html = await safe_scrape_html(url)
|
||||
if not html:
|
||||
return None, None
|
||||
|
||||
for ScraperClass in self.scrapers:
|
||||
scraper = ScraperClass(url, self.translator, raw_html=raw_html)
|
||||
scraper = ScraperClass(url, self.translator, raw_html=html)
|
||||
if not scraper.can_scrape():
|
||||
self.logger.debug(f"Skipping {scraper.__class__.__name__}")
|
||||
continue
|
||||
|
||||
try:
|
||||
result = await scraper.parse()
|
||||
result = await scraper.parse(on_progress=on_progress)
|
||||
except Exception:
|
||||
self.logger.exception(f"Failed to scrape HTML with {scraper.__class__.__name__}")
|
||||
result = None
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from collections.abc import Awaitable, Callable
|
||||
from enum import StrEnum
|
||||
from re import search as regex_search
|
||||
from uuid import uuid4
|
||||
@@ -22,7 +23,10 @@ class ParserErrors(StrEnum):
|
||||
|
||||
|
||||
async def create_from_html(
|
||||
url: str, translator: Translator, html: str | None = None
|
||||
url: str,
|
||||
translator: Translator,
|
||||
html: str | None = None,
|
||||
on_progress: Callable[[str], Awaitable[None]] | None = None,
|
||||
) -> tuple[Recipe, ScrapedExtras | None]:
|
||||
"""Main entry point for generating a recipe from a URL. Pass in a URL and
|
||||
a Recipe object will be returned if successful. Optionally pass in the HTML to skip fetching it.
|
||||
@@ -30,6 +34,7 @@ async def create_from_html(
|
||||
Args:
|
||||
url (str): a valid string representing a URL
|
||||
html (str | None): optional HTML string to skip network request. Defaults to None.
|
||||
on_progress: optional async callable invoked with a status message at each stage.
|
||||
|
||||
Returns:
|
||||
Recipe: Recipe Object
|
||||
@@ -42,7 +47,7 @@ async def create_from_html(
|
||||
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})
|
||||
url = extracted_url.group(0)
|
||||
|
||||
new_recipe, extras = await scraper.scrape(url, html)
|
||||
new_recipe, extras = await scraper.scrape(url, html, on_progress=on_progress)
|
||||
|
||||
if not new_recipe:
|
||||
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})
|
||||
@@ -54,9 +59,13 @@ async def create_from_html(
|
||||
recipe_data_service = RecipeDataService(new_recipe.id)
|
||||
|
||||
try:
|
||||
if new_recipe.image and isinstance(new_recipe.image, list):
|
||||
new_recipe.image = new_recipe.image[0]
|
||||
await recipe_data_service.scrape_image(new_recipe.image) # type: ignore
|
||||
if new_recipe.image:
|
||||
if isinstance(new_recipe.image, list):
|
||||
new_recipe.image = new_recipe.image[0]
|
||||
|
||||
if on_progress:
|
||||
await on_progress(translator.t("recipe.create-progress.downloading-image"))
|
||||
await recipe_data_service.scrape_image(new_recipe.image) # type: ignore
|
||||
|
||||
if new_recipe.name is None:
|
||||
new_recipe.name = "Untitled"
|
||||
|
||||
@@ -3,7 +3,7 @@ import functools
|
||||
import re
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Awaitable, Callable
|
||||
from pathlib import Path
|
||||
from typing import Any, TypedDict
|
||||
|
||||
@@ -139,7 +139,9 @@ class ABCScraperStrategy(ABC):
|
||||
async def get_html(self, url: str) -> str: ...
|
||||
|
||||
@abstractmethod
|
||||
async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
|
||||
async def parse(
|
||||
self, on_progress: Callable[[str], Awaitable[None]] | None = None
|
||||
) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
|
||||
"""Parse a recipe from a web URL.
|
||||
|
||||
Args:
|
||||
@@ -276,10 +278,14 @@ class RecipeScraperPackage(ABCScraperStrategy):
|
||||
self.logger.debug(f"Recipe Scraper [Package] was unable to extract a recipe from {self.url}")
|
||||
return None
|
||||
|
||||
async def parse(self):
|
||||
async def parse(self, on_progress: Callable[[str], Awaitable[None]] | None = None):
|
||||
"""
|
||||
Parse a recipe from a given url.
|
||||
"""
|
||||
|
||||
if on_progress:
|
||||
await on_progress(self.translator.t("recipe.create-progress.extracting-recipe-data"))
|
||||
|
||||
scraped_data = await self.scrape_url()
|
||||
|
||||
if scraped_data is None:
|
||||
@@ -376,6 +382,12 @@ class RecipeScraperOpenAI(RecipeScraperPackage):
|
||||
self.logger.exception(f"OpenAI was unable to extract a recipe from {url}")
|
||||
return ""
|
||||
|
||||
async def parse(self, on_progress: Callable[[str], Awaitable[None]] | None = None):
|
||||
if on_progress:
|
||||
await on_progress(self.translator.t("recipe.create-progress.creating-recipe-with-ai"))
|
||||
|
||||
return super().parse()
|
||||
|
||||
|
||||
class TranscribedAudio(TypedDict):
|
||||
audio: Path
|
||||
@@ -468,10 +480,16 @@ class RecipeScraperOpenAITranscription(ABCScraperStrategy):
|
||||
async def get_html(self, url: str) -> str:
|
||||
return self.raw_html or "" # we don't use HTML with this scraper since we use ytdlp
|
||||
|
||||
async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
|
||||
async def parse(
|
||||
self,
|
||||
on_progress: Callable[[str], Awaitable[None]] | None = None,
|
||||
) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
|
||||
openai_service = OpenAIService()
|
||||
|
||||
with get_temporary_path() as temp_path:
|
||||
if on_progress:
|
||||
await on_progress(self.translator.t("recipe.create-progress.downloading-video"))
|
||||
|
||||
video_data = await asyncio.to_thread(self._download_audio, temp_path)
|
||||
|
||||
if video_data["subtitle"]:
|
||||
@@ -485,6 +503,9 @@ class RecipeScraperOpenAITranscription(ABCScraperStrategy):
|
||||
video_data["transcription"] = ""
|
||||
|
||||
if not video_data["transcription"]:
|
||||
if on_progress:
|
||||
await on_progress(self.translator.t("recipe.create-progress.transcribing-audio-with-ai"))
|
||||
|
||||
try:
|
||||
transcription = await openai_service.transcribe_audio(video_data["audio"])
|
||||
except exceptions.RateLimitError:
|
||||
@@ -508,6 +529,9 @@ class RecipeScraperOpenAITranscription(ABCScraperStrategy):
|
||||
f"Transcription: {video_data['transcription']}",
|
||||
]
|
||||
|
||||
if on_progress:
|
||||
await on_progress(self.translator.t("recipe.create-progress.creating-recipe-from-transcript-with-ai"))
|
||||
|
||||
try:
|
||||
response = await openai_service.get_response(prompt, "\n".join(message_parts), response_schema=OpenAIRecipe)
|
||||
except exceptions.RateLimitError:
|
||||
@@ -586,10 +610,17 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
|
||||
"extras": [],
|
||||
}
|
||||
|
||||
async def parse(self):
|
||||
async def parse(
|
||||
self,
|
||||
on_progress: Callable[[str], Awaitable[None]] | None = None,
|
||||
):
|
||||
"""
|
||||
Parse a recipe from a given url.
|
||||
"""
|
||||
|
||||
if on_progress:
|
||||
await on_progress(self.translator.t("recipe.create-progress.creating-recipe-from-webpage-data"))
|
||||
|
||||
html = await self.get_html(self.url)
|
||||
|
||||
og_data = self.get_recipe_fields(html)
|
||||
|
||||
Reference in New Issue
Block a user