From c4fdab4e05089e92779153cffc33951e58de6e0b Mon Sep 17 00:00:00 2001 From: Michael Genson <71845777+michael-genson@users.noreply.github.com> Date: Sun, 15 Mar 2026 16:44:19 -0500 Subject: [PATCH] feat: Recipe import progress (#7252) --- frontend/composables/use-auth-backend.ts | 2 + frontend/composables/use-mealie-auth.ts | 1 + frontend/lib/api/types/non-generated.ts | 6 + frontend/lib/api/types/response.ts | 9 +- frontend/lib/api/user/recipes/recipe.ts | 71 +++++++++- frontend/package.json | 1 + .../pages/g/[groupSlug]/r/create/html.vue | 33 +++-- frontend/pages/g/[groupSlug]/r/create/url.vue | 32 +++-- frontend/yarn.lock | 5 + mealie/lang/messages/en-US.json | 12 +- mealie/routes/_base/routers.py | 9 +- mealie/routes/recipe/recipe_crud_routes.py | 114 +++++++++++++-- mealie/schema/response/__init__.py | 14 +- mealie/schema/response/responses.py | 19 +++ mealie/services/scraper/recipe_scraper.py | 23 ++- mealie/services/scraper/scraper.py | 19 ++- mealie/services/scraper/scraper_strategies.py | 41 +++++- pyproject.toml | 3 + .../user_recipe_tests/test_recipe_crud.py | 133 ++++++++++++++++++ tests/utils/api_routes/__init__.py | 4 + 20 files changed, 491 insertions(+), 60 deletions(-) diff --git a/frontend/composables/use-auth-backend.ts b/frontend/composables/use-auth-backend.ts index 227d13494..5814702b2 100644 --- a/frontend/composables/use-auth-backend.ts +++ b/frontend/composables/use-auth-backend.ts @@ -13,6 +13,7 @@ interface AuthStatus { interface AuthState { data: AuthData; status: AuthStatus; + token: { readonly value: string | null | undefined }; signIn: (credentials: FormData, options?: { redirect?: boolean }) => Promise; signOut: (callbackUrl?: string) => Promise; refresh: () => Promise; @@ -131,6 +132,7 @@ export const useAuthBackend = function (): AuthState { return { data: computed(() => authUser.value), status: computed(() => authStatus.value), + token: computed(() => tokenCookie.value), signIn, signOut, refresh, diff --git a/frontend/composables/use-mealie-auth.ts b/frontend/composables/use-mealie-auth.ts index ae3a03590..91f77dfd8 100644 --- a/frontend/composables/use-mealie-auth.ts +++ b/frontend/composables/use-mealie-auth.ts @@ -47,6 +47,7 @@ export const useMealieAuth = function () { return { user, loggedIn, + token: auth.token, signIn: auth.signIn, signOut: auth.signOut, refresh: auth.refresh, diff --git a/frontend/lib/api/types/non-generated.ts b/frontend/lib/api/types/non-generated.ts index 752236b1f..39a04348c 100644 --- a/frontend/lib/api/types/non-generated.ts +++ b/frontend/lib/api/types/non-generated.ts @@ -41,6 +41,12 @@ export enum Organizer { User = "users", } +export enum SSEDataEventStatus { + Progress = "progress", + Done = "done", + Error = "error", +} + export type PlaceholderKeyword = "$NOW"; export type RelationalKeyword = "IS" | "IS NOT" | "IN" | "NOT IN" | "CONTAINS ALL" | "LIKE" | "NOT LIKE"; export type LogicalOperator = "AND" | "OR"; diff --git a/frontend/lib/api/types/response.ts b/frontend/lib/api/types/response.ts index 68f7ba9c6..3ff39df69 100644 --- a/frontend/lib/api/types/response.ts +++ b/frontend/lib/api/types/response.ts @@ -1,5 +1,5 @@ /* tslint:disable */ - +/* eslint-disable */ /** /* This file was automatically generated from pydantic models by running pydantic2ts. /* Do not modify it by hand - just update the pydantic models and then re-run the script @@ -40,6 +40,13 @@ export interface RequestQuery { queryFilter?: string | null; paginationSeed?: string | null; } +export interface SSEDataEventBase {} +export interface SSEDataEventDone { + slug: string; +} +export interface SSEDataEventMessage { + message: string; +} export interface SuccessResponse { message: string; error?: boolean; diff --git a/frontend/lib/api/user/recipes/recipe.ts b/frontend/lib/api/user/recipes/recipe.ts index abf398631..70ae3aae7 100644 --- a/frontend/lib/api/user/recipes/recipe.ts +++ b/frontend/lib/api/user/recipes/recipe.ts @@ -1,3 +1,5 @@ +import { SSE } from "sse.js"; +import type { SSEvent } from "sse.js"; import { BaseCRUDAPI } from "../../base/base-clients"; import { route } from "../../base"; import { CommentsApi } from "./recipe-comments"; @@ -16,7 +18,9 @@ import type { RecipeTimelineEventOut, RecipeTimelineEventUpdate, } from "~/lib/api/types/recipe"; -import type { ApiRequestInstance, PaginationData } from "~/lib/api/types/non-generated"; +import type { SSEDataEventDone, SSEDataEventMessage } from "~/lib/api/types/response"; +import type { ApiRequestInstance, PaginationData, RequestResponse } from "~/lib/api/types/non-generated"; +import { SSEDataEventStatus } from "~/lib/api/types/non-generated"; export type Parser = "nlp" | "brute" | "openai"; @@ -34,11 +38,11 @@ const routes = { recipesBase: `${prefix}/recipes`, recipesSuggestions: `${prefix}/recipes/suggestions`, recipesTestScrapeUrl: `${prefix}/recipes/test-scrape-url`, - recipesCreateUrl: `${prefix}/recipes/create/url`, + recipesCreateUrl: `${prefix}/recipes/create/url/stream`, recipesCreateUrlBulk: `${prefix}/recipes/create/url/bulk`, recipesCreateFromZip: `${prefix}/recipes/create/zip`, recipesCreateFromImage: `${prefix}/recipes/create/image`, - recipesCreateFromHtmlOrJson: `${prefix}/recipes/create/html-or-json`, + recipesCreateFromHtmlOrJson: `${prefix}/recipes/create/html-or-json/stream`, recipesCategory: `${prefix}/recipes/category`, recipesParseIngredient: `${prefix}/parser/ingredient`, recipesParseIngredients: `${prefix}/parser/ingredients`, @@ -146,12 +150,65 @@ export class RecipeAPI extends BaseCRUDAPI { return await this.requests.post(routes.recipesTestScrapeUrl, { url, useOpenAI }); } - async createOneByHtmlOrJson(data: string, includeTags: boolean, includeCategories: boolean, url: string | null = null) { - return await this.requests.post(routes.recipesCreateFromHtmlOrJson, { data, includeTags, includeCategories, url }); + private streamRecipeCreate(streamRoute: string, payload: object, onProgress?: (message: string) => void): Promise> { + return new Promise((resolve) => { + const { token } = useMealieAuth(); + + const sse = new SSE(streamRoute, { + headers: { + "Content-Type": "application/json", + ...(token.value ? { Authorization: `Bearer ${token.value}` } : {}), + }, + payload: JSON.stringify(payload), + withCredentials: true, + autoReconnect: false, + }); + + if (onProgress) { + sse.addEventListener(SSEDataEventStatus.Progress, (e: SSEvent) => { + const { message } = JSON.parse(e.data) as SSEDataEventMessage; + onProgress(message); + }); + } + + sse.addEventListener(SSEDataEventStatus.Done, (e: SSEvent) => { + const { slug } = JSON.parse(e.data) as SSEDataEventDone; + sse.close(); + resolve({ response: { status: 201, data: slug } as any, data: slug, error: null }); + }); + + sse.addEventListener(SSEDataEventStatus.Error, (e: SSEvent) => { + try { + const { message } = JSON.parse(e.data) as SSEDataEventMessage; + sse.close(); + resolve({ response: null, data: null, error: new Error(message) }); + } + catch { + // Not a backend error payload (e.g. XHR connection-close event); ignore + } + }); + + sse.stream(); + }); } - async createOneByUrl(url: string, includeTags: boolean, includeCategories: boolean) { - return await this.requests.post(routes.recipesCreateUrl, { url, includeTags, includeCategories }); + async createOneByHtmlOrJson( + data: string, + includeTags: boolean, + includeCategories: boolean, + url: string | null = null, + onProgress?: (message: string) => void, + ): Promise> { + return this.streamRecipeCreate(routes.recipesCreateFromHtmlOrJson, { data, includeTags, includeCategories, url }, onProgress); + } + + async createOneByUrl( + url: string, + includeTags: boolean, + includeCategories: boolean, + onProgress?: (message: string) => void, + ): Promise> { + return this.streamRecipeCreate(routes.recipesCreateUrl, { url, includeTags, includeCategories }, onProgress); } async createManyByUrl(payload: CreateRecipeByUrlBulk) { diff --git a/frontend/package.json b/frontend/package.json index d8c60325b..35dd1f15d 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -30,6 +30,7 @@ "json-editor-vue": "^0.18.1", "marked": "^15.0.12", "nuxt": "^3.19.2", + "sse.js": "^2.8.0", "vite": "^7.0.0", "vue-advanced-cropper": "^2.8.9", "vue-draggable-plus": "^0.6.0", diff --git a/frontend/pages/g/[groupSlug]/r/create/html.vue b/frontend/pages/g/[groupSlug]/r/create/html.vue index e161de795..0b01298e0 100644 --- a/frontend/pages/g/[groupSlug]/r/create/html.vue +++ b/frontend/pages/g/[groupSlug]/r/create/html.vue @@ -83,14 +83,20 @@ /> -
- +
+
+ +
+ + + {{ createStatus }}  +
@@ -167,6 +173,7 @@ export default defineNuxtComponent({ } handleIsEditJson(); + const createStatus = ref(null); async function createFromHtmlOrJson(htmlOrJsonData: string | object | null, importKeywordsAsTags: boolean, importCategories: boolean, url: string | null = null) { if (!htmlOrJsonData) { return; @@ -186,7 +193,14 @@ export default defineNuxtComponent({ } state.loading = true; - const { response } = await api.recipes.createOneByHtmlOrJson(dataString, importKeywordsAsTags, importCategories, url); + const { response } = await api.recipes.createOneByHtmlOrJson( + dataString, + importKeywordsAsTags, + importCategories, + url, + (message: string) => createStatus.value = message, + ); + createStatus.value = null; handleResponse(response, importKeywordsAsTags); } @@ -199,6 +213,7 @@ export default defineNuxtComponent({ newRecipeData, newRecipeUrl, handleIsEditJson, + createStatus, createFromHtmlOrJson, ...toRefs(state), validators, diff --git a/frontend/pages/g/[groupSlug]/r/create/url.vue b/frontend/pages/g/[groupSlug]/r/create/url.vue index 6e51ce4a7..92858fe4c 100644 --- a/frontend/pages/g/[groupSlug]/r/create/url.vue +++ b/frontend/pages/g/[groupSlug]/r/create/url.vue @@ -65,14 +65,20 @@ :label="$t('recipe.parse-recipe-ingredients-after-import')" /> -
- +
+
+ +
+ + + {{ createStatus }}  +
@@ -234,6 +240,7 @@ export default defineNuxtComponent({ router.replace({ query: undefined }).then(() => router.push(to)); }); + const createStatus = ref(null); async function createByUrl(url: string | null, importKeywordsAsTags: boolean, importCategories: boolean) { if (url === null) { return; @@ -244,7 +251,13 @@ export default defineNuxtComponent({ return; } state.loading = true; - const { response } = await api.recipes.createOneByUrl(url, importKeywordsAsTags, importCategories); + const { response } = await api.recipes.createOneByUrl( + url, + importKeywordsAsTags, + importCategories, + (message: string) => createStatus.value = message, + ); + createStatus.value = null; handleResponse(response, importKeywordsAsTags); } @@ -257,6 +270,7 @@ export default defineNuxtComponent({ stayInEditMode, parseRecipe, domUrlForm, + createStatus, createByUrl, ...toRefs(state), validators, diff --git a/frontend/yarn.lock b/frontend/yarn.lock index 8ec6da933..cd554b298 100644 --- a/frontend/yarn.lock +++ b/frontend/yarn.lock @@ -9940,6 +9940,11 @@ srvx@^0.8.9: resolved "https://registry.yarnpkg.com/srvx/-/srvx-0.8.16.tgz#f2582bd747351b5b0a1c65bce8179bae83e8b2a6" integrity sha512-hmcGW4CgroeSmzgF1Ihwgl+Ths0JqAJ7HwjP2X7e3JzY7u4IydLMcdnlqGQiQGUswz+PO9oh/KtCpOISIvs9QQ== +sse.js@^2.8.0: + version "2.8.0" + resolved "https://registry.yarnpkg.com/sse.js/-/sse.js-2.8.0.tgz#28e922720ef41f0de3312e33d23183682bec4b1e" + integrity sha512-35RyyFYpzzHZgMw9D5GxwADbL6gnntSwW/rKXcuIy1KkYCPjW6oia0moNdNRhs34oVHU1Sjgovj3l7uIEZjrKA== + stable-hash-x@^0.2.0: version "0.2.0" resolved "https://registry.yarnpkg.com/stable-hash-x/-/stable-hash-x-0.2.0.tgz#dfd76bfa5d839a7470125c6a6b3c8b22061793e9" diff --git a/mealie/lang/messages/en-US.json b/mealie/lang/messages/en-US.json index e6d1a46eb..29e1ee8f7 100644 --- a/mealie/lang/messages/en-US.json +++ b/mealie/lang/messages/en-US.json @@ -19,7 +19,17 @@ "yields": "Yields" }, "and-amount": "and {amount}", - "or-ingredient": "or {ingredient}" + "or-ingredient": "or {ingredient}", + "create-progress": { + "creating-recipe-with-ai": "Creating recipe with AI...", + "creating-recipe-from-transcript-with-ai": "Creating recipe from transcript with AI...", + "creating-recipe-from-webpage-data": "Creating recipe from webpage data...", + "downloading-image": "Downloading image...", + "downloading-video": "Downloading video...", + "extracting-recipe-data": "Extracting recipe data...", + "fetching-webpage": "Fetching webpage...", + "transcribing-audio-with-ai": "Transcribing audio with AI..." + } }, "mealplan": { "no-recipes-match-your-rules": "No recipes match your rules" diff --git a/mealie/routes/_base/routers.py b/mealie/routes/_base/routers.py index 7488a172f..4424f9a26 100644 --- a/mealie/routes/_base/routers.py +++ b/mealie/routes/_base/routers.py @@ -33,7 +33,14 @@ class MealieCrudRoute(APIRoute): async def custom_route_handler(request: Request) -> Response: with contextlib.suppress(JSONDecodeError): response = await original_route_handler(request) - response_body = json.loads(response.body) + + # StreamingResponse from starlette doesn't have a body attribute, even though it inherits from Response, + # so we may get an attribute error here even though our type hinting suggests otherwise. + try: + response_body = json.loads(response.body) + except AttributeError: + return response + if isinstance(response_body, dict): if last_modified := response_body.get("updatedAt"): response.headers["last-modified"] = last_modified diff --git a/mealie/routes/recipe/recipe_crud_routes.py b/mealie/routes/recipe/recipe_crud_routes.py index becae3e0a..927352efb 100644 --- a/mealie/routes/recipe/recipe_crud_routes.py +++ b/mealie/routes/recipe/recipe_crud_routes.py @@ -1,4 +1,6 @@ +import asyncio from collections import defaultdict +from collections.abc import AsyncIterable from shutil import copyfileobj from uuid import UUID @@ -17,6 +19,7 @@ from fastapi import ( status, ) from fastapi.datastructures import UploadFile +from fastapi.sse import EventSourceResponse, ServerSentEvent from pydantic import UUID4 from slugify import slugify @@ -46,7 +49,13 @@ from mealie.schema.recipe.request_helpers import ( ) from mealie.schema.response import PaginationBase, PaginationQuery from mealie.schema.response.pagination import RecipeSearchQuery -from mealie.schema.response.responses import ErrorResponse, SuccessResponse +from mealie.schema.response.responses import ( + ErrorResponse, + SSEDataEventDone, + SSEDataEventMessage, + SSEDataEventStatus, + SuccessResponse, +) from mealie.services import urls from mealie.services.event_bus_service.event_types import ( EventOperation, @@ -130,22 +139,70 @@ class RecipeController(BaseRecipeController): return "recipe_scrapers was unable to scrape this URL" - @router.post("/create/html-or-json", status_code=201) - async def create_recipe_from_html_or_json(self, req: ScrapeRecipeData): + @router.post("/create/html-or-json", status_code=201, response_model=str) + async def create_recipe_from_html_or_json(self, req: ScrapeRecipeData) -> str: """Takes in raw HTML or a https://schema.org/Recipe object as a JSON string and parses it like a URL""" if req.data.startswith("{"): req.data = RecipeScraperPackage.ld_json_to_html(req.data) - return await self._create_recipe_from_web(req) + async for event in self._create_recipe_from_web(req): + if isinstance(event.data, SSEDataEventDone): + return event.data.slug + if isinstance(event.data, SSEDataEventMessage) and event.event == SSEDataEventStatus.ERROR: + raise HTTPException(status_code=400, detail=ErrorResponse.respond(message=event.data.message)) + + # This should never be reachable, since we should always hit DONE or hit an exception/ERROR + raise HTTPException(status_code=500, detail=ErrorResponse.respond(message="Unknown Error")) + + @router.post("/create/html-or-json/stream", response_class=EventSourceResponse) + async def create_recipe_from_html_or_json_stream(self, req: ScrapeRecipeData) -> AsyncIterable[ServerSentEvent]: + """ + Takes in raw HTML or a https://schema.org/Recipe object as a JSON string and parses it like a URL, + streaming progress via SSE + """ + + if req.data.startswith("{"): + req.data = RecipeScraperPackage.ld_json_to_html(req.data) + + async for event in self._create_recipe_from_web(req): + yield event @router.post("/create/url", status_code=201, response_model=str) - async def parse_recipe_url(self, req: ScrapeRecipe): + async def parse_recipe_url(self, req: ScrapeRecipe) -> str: """Takes in a URL and attempts to scrape data and load it into the database""" - return await self._create_recipe_from_web(req) + async for event in self._create_recipe_from_web(req): + if isinstance(event.data, SSEDataEventDone): + return event.data.slug + if isinstance(event.data, SSEDataEventMessage) and event.event == SSEDataEventStatus.ERROR: + raise HTTPException(status_code=400, detail=ErrorResponse.respond(message=event.data.message)) + + # This should never be reachable, since we should always hit DONE or hit an exception/ERROR + raise HTTPException(status_code=500, detail=ErrorResponse.respond(message="Unknown Error")) + + @router.post("/create/url/stream", response_class=EventSourceResponse) + async def parse_recipe_url_stream(self, req: ScrapeRecipe) -> AsyncIterable[ServerSentEvent]: + """ + Takes in a URL and attempts to scrape data and load it into the database, + streaming progress via SSE + """ + + async for event in self._create_recipe_from_web(req): + yield event + + async def _create_recipe_from_web(self, req: ScrapeRecipe | ScrapeRecipeData) -> AsyncIterable[ServerSentEvent]: + """ + Create a recipe from the web, returning progress via SSE. + Events will continue to be yielded until: + - The recipe is created, emitting: + - event=SSEDataEventStatus.DONE + - data=SSEDataEventDone(...) + - An exception is raised, emitting: + - event=SSEDataEventStatus.ERROR + - data=SSEDataEventMessage(...) + """ - async def _create_recipe_from_web(self, req: ScrapeRecipe | ScrapeRecipeData): if isinstance(req, ScrapeRecipeData): html = req.data url = req.url or "" @@ -153,21 +210,48 @@ class RecipeController(BaseRecipeController): html = None url = req.url - try: - recipe, extras = await create_from_html(url, self.translator, html) - except ForceTimeoutException as e: - raise HTTPException( - status_code=408, detail=ErrorResponse.respond(message="Recipe Scraping Timed Out") - ) from e + queue: asyncio.Queue[ServerSentEvent | None] = asyncio.Queue() + async def on_progress(message: str) -> None: + await queue.put( + ServerSentEvent( + data=SSEDataEventMessage(message=message), + event=SSEDataEventStatus.PROGRESS, + ) + ) + + async def run() -> None: + try: + recipe, extras = await create_from_html(url, self.translator, html, on_progress=on_progress) + slug = self._finish_recipe_from_web(req, recipe, extras) + await queue.put( + ServerSentEvent( + data=SSEDataEventDone(slug=slug), + event=SSEDataEventStatus.DONE, + ) + ) + except Exception as e: + self.logger.exception("Error in streaming recipe creation") + await queue.put( + ServerSentEvent( + data=SSEDataEventMessage(message=e.__class__.__name__), + event=SSEDataEventStatus.ERROR, + ) + ) + finally: + await queue.put(None) + + asyncio.create_task(run()) + while (event := await queue.get()) is not None: + yield event + + def _finish_recipe_from_web(self, req: ScrapeRecipe | ScrapeRecipeData, recipe: Recipe, extras: object) -> str: if req.include_tags: ctx = ScraperContext(self.repos) - recipe.tags = extras.use_tags(ctx) # type: ignore if req.include_categories: ctx = ScraperContext(self.repos) - recipe.recipe_category = extras.use_categories(ctx) # type: ignore new_recipe = self.service.create_one(recipe) diff --git a/mealie/schema/response/__init__.py b/mealie/schema/response/__init__.py index a323b6e34..5422e2113 100644 --- a/mealie/schema/response/__init__.py +++ b/mealie/schema/response/__init__.py @@ -8,12 +8,24 @@ from .pagination import ( RequestQuery, ) from .query_search import SearchFilter -from .responses import ErrorResponse, FileTokenResponse, SuccessResponse +from .responses import ( + ErrorResponse, + FileTokenResponse, + SSEDataEventBase, + SSEDataEventDone, + SSEDataEventMessage, + SSEDataEventStatus, + SuccessResponse, +) from .validation import ValidationResponse __all__ = [ "ErrorResponse", "FileTokenResponse", + "SSEDataEventBase", + "SSEDataEventDone", + "SSEDataEventMessage", + "SSEDataEventStatus", "SuccessResponse", "SearchFilter", "OrderByNullPosition", diff --git a/mealie/schema/response/responses.py b/mealie/schema/response/responses.py index 9c7a35523..28c5bcc66 100644 --- a/mealie/schema/response/responses.py +++ b/mealie/schema/response/responses.py @@ -1,3 +1,5 @@ +from enum import StrEnum + from pydantic import BaseModel from mealie.schema._mealie import MealieModel @@ -40,3 +42,20 @@ class FileTokenResponse(MealieModel): in the same call, for use while providing details to a HTTPException """ return cls(file_token=token).model_dump() + + +class SSEDataEventStatus(StrEnum): + PROGRESS = "progress" + DONE = "done" + ERROR = "error" + + +class SSEDataEventBase(BaseModel): ... + + +class SSEDataEventMessage(SSEDataEventBase): + message: str + + +class SSEDataEventDone(SSEDataEventBase): + slug: str diff --git a/mealie/services/scraper/recipe_scraper.py b/mealie/services/scraper/recipe_scraper.py index f4bca66df..aeb37465a 100644 --- a/mealie/services/scraper/recipe_scraper.py +++ b/mealie/services/scraper/recipe_scraper.py @@ -1,3 +1,5 @@ +from collections.abc import Awaitable, Callable + from mealie.core.root_logger import get_logger from mealie.lang.providers import Translator from mealie.schema.recipe.recipe import Recipe @@ -37,25 +39,34 @@ class RecipeScraper: self.translator = translator self.logger = get_logger() - async def scrape(self, url: str, html: str | None = None) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]: + async def scrape( + self, + url: str, + html: str | None = None, + on_progress: Callable[[str], Awaitable[None]] | None = None, + ) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]: """ Scrapes a recipe from the web. Skips the network request if `html` is provided. + Optionally reports progress back via `on_progress`. """ - raw_html = html or await safe_scrape_html(url) + if not html: + if on_progress: + await on_progress(self.translator.t("recipe.create-progress.fetching-webpage")) - if not raw_html: - return None, None + html = await safe_scrape_html(url) + if not html: + return None, None for ScraperClass in self.scrapers: - scraper = ScraperClass(url, self.translator, raw_html=raw_html) + scraper = ScraperClass(url, self.translator, raw_html=html) if not scraper.can_scrape(): self.logger.debug(f"Skipping {scraper.__class__.__name__}") continue try: - result = await scraper.parse() + result = await scraper.parse(on_progress=on_progress) except Exception: self.logger.exception(f"Failed to scrape HTML with {scraper.__class__.__name__}") result = None diff --git a/mealie/services/scraper/scraper.py b/mealie/services/scraper/scraper.py index 76a34382b..f72cb46fb 100644 --- a/mealie/services/scraper/scraper.py +++ b/mealie/services/scraper/scraper.py @@ -1,3 +1,4 @@ +from collections.abc import Awaitable, Callable from enum import StrEnum from re import search as regex_search from uuid import uuid4 @@ -22,7 +23,10 @@ class ParserErrors(StrEnum): async def create_from_html( - url: str, translator: Translator, html: str | None = None + url: str, + translator: Translator, + html: str | None = None, + on_progress: Callable[[str], Awaitable[None]] | None = None, ) -> tuple[Recipe, ScrapedExtras | None]: """Main entry point for generating a recipe from a URL. Pass in a URL and a Recipe object will be returned if successful. Optionally pass in the HTML to skip fetching it. @@ -30,6 +34,7 @@ async def create_from_html( Args: url (str): a valid string representing a URL html (str | None): optional HTML string to skip network request. Defaults to None. + on_progress: optional async callable invoked with a status message at each stage. Returns: Recipe: Recipe Object @@ -42,7 +47,7 @@ async def create_from_html( raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value}) url = extracted_url.group(0) - new_recipe, extras = await scraper.scrape(url, html) + new_recipe, extras = await scraper.scrape(url, html, on_progress=on_progress) if not new_recipe: raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value}) @@ -54,9 +59,13 @@ async def create_from_html( recipe_data_service = RecipeDataService(new_recipe.id) try: - if new_recipe.image and isinstance(new_recipe.image, list): - new_recipe.image = new_recipe.image[0] - await recipe_data_service.scrape_image(new_recipe.image) # type: ignore + if new_recipe.image: + if isinstance(new_recipe.image, list): + new_recipe.image = new_recipe.image[0] + + if on_progress: + await on_progress(translator.t("recipe.create-progress.downloading-image")) + await recipe_data_service.scrape_image(new_recipe.image) # type: ignore if new_recipe.name is None: new_recipe.name = "Untitled" diff --git a/mealie/services/scraper/scraper_strategies.py b/mealie/services/scraper/scraper_strategies.py index 03057723b..d6b7e489e 100644 --- a/mealie/services/scraper/scraper_strategies.py +++ b/mealie/services/scraper/scraper_strategies.py @@ -3,7 +3,7 @@ import functools import re import time from abc import ABC, abstractmethod -from collections.abc import Callable +from collections.abc import Awaitable, Callable from pathlib import Path from typing import Any, TypedDict @@ -139,7 +139,9 @@ class ABCScraperStrategy(ABC): async def get_html(self, url: str) -> str: ... @abstractmethod - async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]: + async def parse( + self, on_progress: Callable[[str], Awaitable[None]] | None = None + ) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]: """Parse a recipe from a web URL. Args: @@ -276,10 +278,14 @@ class RecipeScraperPackage(ABCScraperStrategy): self.logger.debug(f"Recipe Scraper [Package] was unable to extract a recipe from {self.url}") return None - async def parse(self): + async def parse(self, on_progress: Callable[[str], Awaitable[None]] | None = None): """ Parse a recipe from a given url. """ + + if on_progress: + await on_progress(self.translator.t("recipe.create-progress.extracting-recipe-data")) + scraped_data = await self.scrape_url() if scraped_data is None: @@ -376,6 +382,12 @@ class RecipeScraperOpenAI(RecipeScraperPackage): self.logger.exception(f"OpenAI was unable to extract a recipe from {url}") return "" + async def parse(self, on_progress: Callable[[str], Awaitable[None]] | None = None): + if on_progress: + await on_progress(self.translator.t("recipe.create-progress.creating-recipe-with-ai")) + + return super().parse() + class TranscribedAudio(TypedDict): audio: Path @@ -468,10 +480,16 @@ class RecipeScraperOpenAITranscription(ABCScraperStrategy): async def get_html(self, url: str) -> str: return self.raw_html or "" # we don't use HTML with this scraper since we use ytdlp - async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]: + async def parse( + self, + on_progress: Callable[[str], Awaitable[None]] | None = None, + ) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]: openai_service = OpenAIService() with get_temporary_path() as temp_path: + if on_progress: + await on_progress(self.translator.t("recipe.create-progress.downloading-video")) + video_data = await asyncio.to_thread(self._download_audio, temp_path) if video_data["subtitle"]: @@ -485,6 +503,9 @@ class RecipeScraperOpenAITranscription(ABCScraperStrategy): video_data["transcription"] = "" if not video_data["transcription"]: + if on_progress: + await on_progress(self.translator.t("recipe.create-progress.transcribing-audio-with-ai")) + try: transcription = await openai_service.transcribe_audio(video_data["audio"]) except exceptions.RateLimitError: @@ -508,6 +529,9 @@ class RecipeScraperOpenAITranscription(ABCScraperStrategy): f"Transcription: {video_data['transcription']}", ] + if on_progress: + await on_progress(self.translator.t("recipe.create-progress.creating-recipe-from-transcript-with-ai")) + try: response = await openai_service.get_response(prompt, "\n".join(message_parts), response_schema=OpenAIRecipe) except exceptions.RateLimitError: @@ -586,10 +610,17 @@ class RecipeScraperOpenGraph(ABCScraperStrategy): "extras": [], } - async def parse(self): + async def parse( + self, + on_progress: Callable[[str], Awaitable[None]] | None = None, + ): """ Parse a recipe from a given url. """ + + if on_progress: + await on_progress(self.translator.t("recipe.create-progress.creating-recipe-from-webpage-data")) + html = await self.get_html(self.url) og_data = self.get_recipe_fields(html) diff --git a/pyproject.toml b/pyproject.toml index 7dc9c1dba..8d1e272e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -174,3 +174,6 @@ select = [ [tool.ruff.lint.mccabe] # Unlike Flake8, default to a complexity level of 10. max-complexity = 24 # Default is 10. + +[tool.uv] +add-bounds = "exact" diff --git a/tests/integration_tests/user_recipe_tests/test_recipe_crud.py b/tests/integration_tests/user_recipe_tests/test_recipe_crud.py index ba1974649..fc5f636a1 100644 --- a/tests/integration_tests/user_recipe_tests/test_recipe_crud.py +++ b/tests/integration_tests/user_recipe_tests/test_recipe_crud.py @@ -96,6 +96,23 @@ def open_graph_override(html: str): return get_html +def parse_sse_events(text: str) -> list[dict]: + """Parse SSE response text into a list of events with 'event' and 'data' keys.""" + events = [] + current: dict = {} + for line in text.splitlines(): + if line.startswith("event:"): + current["event"] = line[len("event:") :].strip() + elif line.startswith("data:"): + current["data"] = json.loads(line[len("data:") :].strip()) + elif line == "" and current: + events.append(current) + current = {} + if current: + events.append(current) + return events + + def test_create_by_url( api_client: TestClient, unique_user: TestUser, @@ -218,6 +235,122 @@ def test_create_by_html_or_json( assert tag["name"] in expected_tags +def test_create_by_url_stream_done( + api_client: TestClient, + unique_user: TestUser, + monkeypatch: MonkeyPatch, +): + async def mock_safe_scrape_html(url: str) -> str: + return "" + + monkeypatch.setattr(recipe_scraper_module, "safe_scrape_html", mock_safe_scrape_html) + + recipe_data = recipe_test_data[0] + for scraper_cls in DEFAULT_SCRAPER_STRATEGIES: + monkeypatch.setattr( + scraper_cls, + "get_html", + open_graph_override(recipe_data.html_file.read_text()), + ) + + async def return_empty_response(*args, **kwargs): + return Response(200, content=b"") + + monkeypatch.setattr(AsyncSafeTransport, "handle_async_request", return_empty_response) + monkeypatch.setattr(RecipeDataService, "scrape_image", lambda *_: "TEST_IMAGE") + + api_client.delete(api_routes.recipes_slug(recipe_data.expected_slug), headers=unique_user.token) + + response = api_client.post( + api_routes.recipes_create_url_stream, + json={"url": recipe_data.url, "include_tags": False}, + headers=unique_user.token, + ) + + assert response.status_code == 200 + events = parse_sse_events(response.text) + event_types = [e["event"] for e in events] + + assert "done" in event_types + done_event = next(e for e in events if e["event"] == "done") + assert done_event["data"]["slug"] == recipe_data.expected_slug + + assert any(e["event"] == "progress" for e in events) + + +def test_create_by_url_stream_error( + api_client: TestClient, + unique_user: TestUser, + monkeypatch: MonkeyPatch, +): + async def raise_error(*args, **kwargs): + raise Exception("Test scrape error") + + monkeypatch.setattr("mealie.routes.recipe.recipe_crud_routes.create_from_html", raise_error) + + response = api_client.post( + api_routes.recipes_create_url_stream, + json={"url": "https://example.com/recipe"}, + headers=unique_user.token, + ) + + assert response.status_code == 200 + events = parse_sse_events(response.text) + event_types = [e["event"] for e in events] + + assert "error" in event_types + + +def test_create_by_html_or_json_stream_done( + api_client: TestClient, + unique_user: TestUser, + monkeypatch: MonkeyPatch, +): + monkeypatch.setattr(RecipeDataService, "scrape_image", lambda *_: "TEST_IMAGE") + + recipe_data = recipe_test_data[0] + api_client.delete(api_routes.recipes_slug(recipe_data.expected_slug), headers=unique_user.token) + + response = api_client.post( + api_routes.recipes_create_html_or_json_stream, + json={"data": recipe_data.html_file.read_text(), "include_tags": False}, + headers=unique_user.token, + ) + + assert response.status_code == 200 + events = parse_sse_events(response.text) + event_types = [e["event"] for e in events] + + assert "done" in event_types + done_event = next(e for e in events if e["event"] == "done") + assert done_event["data"]["slug"] == recipe_data.expected_slug + + assert any(e["event"] == "progress" for e in events) + + +def test_create_by_html_or_json_stream_error( + api_client: TestClient, + unique_user: TestUser, + monkeypatch: MonkeyPatch, +): + async def raise_error(*args, **kwargs): + raise Exception("Test scrape error") + + monkeypatch.setattr("mealie.routes.recipe.recipe_crud_routes.create_from_html", raise_error) + + response = api_client.post( + api_routes.recipes_create_html_or_json_stream, + json={"data": "test"}, + headers=unique_user.token, + ) + + assert response.status_code == 200 + events = parse_sse_events(response.text) + event_types = [e["event"] for e in events] + + assert "error" in event_types + + def test_create_recipe_from_zip(api_client: TestClient, unique_user: TestUser, tempdir: str): database = unique_user.repos recipe_name = random_string() diff --git a/tests/utils/api_routes/__init__.py b/tests/utils/api_routes/__init__.py index 4da4ba601..6fa3e6384 100644 --- a/tests/utils/api_routes/__init__.py +++ b/tests/utils/api_routes/__init__.py @@ -149,12 +149,16 @@ recipes_bulk_actions_tag = "/api/recipes/bulk-actions/tag" """`/api/recipes/bulk-actions/tag`""" recipes_create_html_or_json = "/api/recipes/create/html-or-json" """`/api/recipes/create/html-or-json`""" +recipes_create_html_or_json_stream = "/api/recipes/create/html-or-json/stream" +"""`/api/recipes/create/html-or-json/stream`""" recipes_create_image = "/api/recipes/create/image" """`/api/recipes/create/image`""" recipes_create_url = "/api/recipes/create/url" """`/api/recipes/create/url`""" recipes_create_url_bulk = "/api/recipes/create/url/bulk" """`/api/recipes/create/url/bulk`""" +recipes_create_url_stream = "/api/recipes/create/url/stream" +"""`/api/recipes/create/url/stream`""" recipes_create_zip = "/api/recipes/create/zip" """`/api/recipes/create/zip`""" recipes_exports = "/api/recipes/exports"