feat: Recipe import progress (#7252)

This commit is contained in:
Michael Genson
2026-03-15 16:44:19 -05:00
committed by GitHub
parent 04dd514e6a
commit c4fdab4e05
20 changed files with 491 additions and 60 deletions

View File

@@ -13,6 +13,7 @@ interface AuthStatus {
interface AuthState {
data: AuthData;
status: AuthStatus;
token: { readonly value: string | null | undefined };
signIn: (credentials: FormData, options?: { redirect?: boolean }) => Promise<void>;
signOut: (callbackUrl?: string) => Promise<void>;
refresh: () => Promise<void>;
@@ -131,6 +132,7 @@ export const useAuthBackend = function (): AuthState {
return {
data: computed(() => authUser.value),
status: computed(() => authStatus.value),
token: computed(() => tokenCookie.value),
signIn,
signOut,
refresh,

View File

@@ -47,6 +47,7 @@ export const useMealieAuth = function () {
return {
user,
loggedIn,
token: auth.token,
signIn: auth.signIn,
signOut: auth.signOut,
refresh: auth.refresh,

View File

@@ -41,6 +41,12 @@ export enum Organizer {
User = "users",
}
export enum SSEDataEventStatus {
Progress = "progress",
Done = "done",
Error = "error",
}
export type PlaceholderKeyword = "$NOW";
export type RelationalKeyword = "IS" | "IS NOT" | "IN" | "NOT IN" | "CONTAINS ALL" | "LIKE" | "NOT LIKE";
export type LogicalOperator = "AND" | "OR";

View File

@@ -1,5 +1,5 @@
/* tslint:disable */
/* eslint-disable */
/**
/* This file was automatically generated from pydantic models by running pydantic2ts.
/* Do not modify it by hand - just update the pydantic models and then re-run the script
@@ -40,6 +40,13 @@ export interface RequestQuery {
queryFilter?: string | null;
paginationSeed?: string | null;
}
export interface SSEDataEventBase {}
export interface SSEDataEventDone {
slug: string;
}
export interface SSEDataEventMessage {
message: string;
}
export interface SuccessResponse {
message: string;
error?: boolean;

View File

@@ -1,3 +1,5 @@
import { SSE } from "sse.js";
import type { SSEvent } from "sse.js";
import { BaseCRUDAPI } from "../../base/base-clients";
import { route } from "../../base";
import { CommentsApi } from "./recipe-comments";
@@ -16,7 +18,9 @@ import type {
RecipeTimelineEventOut,
RecipeTimelineEventUpdate,
} from "~/lib/api/types/recipe";
import type { ApiRequestInstance, PaginationData } from "~/lib/api/types/non-generated";
import type { SSEDataEventDone, SSEDataEventMessage } from "~/lib/api/types/response";
import type { ApiRequestInstance, PaginationData, RequestResponse } from "~/lib/api/types/non-generated";
import { SSEDataEventStatus } from "~/lib/api/types/non-generated";
export type Parser = "nlp" | "brute" | "openai";
@@ -34,11 +38,11 @@ const routes = {
recipesBase: `${prefix}/recipes`,
recipesSuggestions: `${prefix}/recipes/suggestions`,
recipesTestScrapeUrl: `${prefix}/recipes/test-scrape-url`,
recipesCreateUrl: `${prefix}/recipes/create/url`,
recipesCreateUrl: `${prefix}/recipes/create/url/stream`,
recipesCreateUrlBulk: `${prefix}/recipes/create/url/bulk`,
recipesCreateFromZip: `${prefix}/recipes/create/zip`,
recipesCreateFromImage: `${prefix}/recipes/create/image`,
recipesCreateFromHtmlOrJson: `${prefix}/recipes/create/html-or-json`,
recipesCreateFromHtmlOrJson: `${prefix}/recipes/create/html-or-json/stream`,
recipesCategory: `${prefix}/recipes/category`,
recipesParseIngredient: `${prefix}/parser/ingredient`,
recipesParseIngredients: `${prefix}/parser/ingredients`,
@@ -146,12 +150,65 @@ export class RecipeAPI extends BaseCRUDAPI<CreateRecipe, Recipe, Recipe> {
return await this.requests.post<Recipe | null>(routes.recipesTestScrapeUrl, { url, useOpenAI });
}
async createOneByHtmlOrJson(data: string, includeTags: boolean, includeCategories: boolean, url: string | null = null) {
return await this.requests.post<string>(routes.recipesCreateFromHtmlOrJson, { data, includeTags, includeCategories, url });
private streamRecipeCreate(streamRoute: string, payload: object, onProgress?: (message: string) => void): Promise<RequestResponse<string>> {
return new Promise((resolve) => {
const { token } = useMealieAuth();
const sse = new SSE(streamRoute, {
headers: {
"Content-Type": "application/json",
...(token.value ? { Authorization: `Bearer ${token.value}` } : {}),
},
payload: JSON.stringify(payload),
withCredentials: true,
autoReconnect: false,
});
if (onProgress) {
sse.addEventListener(SSEDataEventStatus.Progress, (e: SSEvent) => {
const { message } = JSON.parse(e.data) as SSEDataEventMessage;
onProgress(message);
});
}
sse.addEventListener(SSEDataEventStatus.Done, (e: SSEvent) => {
const { slug } = JSON.parse(e.data) as SSEDataEventDone;
sse.close();
resolve({ response: { status: 201, data: slug } as any, data: slug, error: null });
});
sse.addEventListener(SSEDataEventStatus.Error, (e: SSEvent) => {
try {
const { message } = JSON.parse(e.data) as SSEDataEventMessage;
sse.close();
resolve({ response: null, data: null, error: new Error(message) });
}
catch {
// Not a backend error payload (e.g. XHR connection-close event); ignore
}
});
sse.stream();
});
}
async createOneByUrl(url: string, includeTags: boolean, includeCategories: boolean) {
return await this.requests.post<string>(routes.recipesCreateUrl, { url, includeTags, includeCategories });
async createOneByHtmlOrJson(
data: string,
includeTags: boolean,
includeCategories: boolean,
url: string | null = null,
onProgress?: (message: string) => void,
): Promise<RequestResponse<string>> {
return this.streamRecipeCreate(routes.recipesCreateFromHtmlOrJson, { data, includeTags, includeCategories, url }, onProgress);
}
async createOneByUrl(
url: string,
includeTags: boolean,
includeCategories: boolean,
onProgress?: (message: string) => void,
): Promise<RequestResponse<string>> {
return this.streamRecipeCreate(routes.recipesCreateUrl, { url, includeTags, includeCategories }, onProgress);
}
async createManyByUrl(payload: CreateRecipeByUrlBulk) {

View File

@@ -30,6 +30,7 @@
"json-editor-vue": "^0.18.1",
"marked": "^15.0.12",
"nuxt": "^3.19.2",
"sse.js": "^2.8.0",
"vite": "^7.0.0",
"vue-advanced-cropper": "^2.8.9",
"vue-draggable-plus": "^0.6.0",

View File

@@ -83,14 +83,20 @@
/>
</v-card-text>
<v-card-actions class="justify-center">
<div style="width: 250px">
<BaseButton
:disabled="!newRecipeData"
rounded
block
type="submit"
:loading="loading"
/>
<div style="width: 100%" class="text-center">
<div style="width: 250px; margin: 0 auto">
<BaseButton
:disabled="!newRecipeData"
rounded
block
type="submit"
:loading="loading"
/>
</div>
<v-card-text class="py-2">
<!-- render &nbsp; to maintain layout -->
{{ createStatus }}&nbsp;
</v-card-text>
</div>
</v-card-actions>
</div>
@@ -167,6 +173,7 @@ export default defineNuxtComponent({
}
handleIsEditJson();
const createStatus = ref<string | null>(null);
async function createFromHtmlOrJson(htmlOrJsonData: string | object | null, importKeywordsAsTags: boolean, importCategories: boolean, url: string | null = null) {
if (!htmlOrJsonData) {
return;
@@ -186,7 +193,14 @@ export default defineNuxtComponent({
}
state.loading = true;
const { response } = await api.recipes.createOneByHtmlOrJson(dataString, importKeywordsAsTags, importCategories, url);
const { response } = await api.recipes.createOneByHtmlOrJson(
dataString,
importKeywordsAsTags,
importCategories,
url,
(message: string) => createStatus.value = message,
);
createStatus.value = null;
handleResponse(response, importKeywordsAsTags);
}
@@ -199,6 +213,7 @@ export default defineNuxtComponent({
newRecipeData,
newRecipeUrl,
handleIsEditJson,
createStatus,
createFromHtmlOrJson,
...toRefs(state),
validators,

View File

@@ -65,14 +65,20 @@
:label="$t('recipe.parse-recipe-ingredients-after-import')"
/>
<v-card-actions class="justify-center">
<div style="width: 250px">
<BaseButton
:disabled="recipeUrl === null"
rounded
block
type="submit"
:loading="loading"
/>
<div style="width: 100%" class="text-center">
<div style="width: 250px; margin: 0 auto">
<BaseButton
:disabled="recipeUrl === null"
rounded
block
type="submit"
:loading="loading"
/>
</div>
<v-card-text class="py-2">
<!-- render &nbsp; to maintain layout -->
{{ createStatus }}&nbsp;
</v-card-text>
</div>
</v-card-actions>
</div>
@@ -234,6 +240,7 @@ export default defineNuxtComponent({
router.replace({ query: undefined }).then(() => router.push(to));
});
const createStatus = ref<string | null>(null);
async function createByUrl(url: string | null, importKeywordsAsTags: boolean, importCategories: boolean) {
if (url === null) {
return;
@@ -244,7 +251,13 @@ export default defineNuxtComponent({
return;
}
state.loading = true;
const { response } = await api.recipes.createOneByUrl(url, importKeywordsAsTags, importCategories);
const { response } = await api.recipes.createOneByUrl(
url,
importKeywordsAsTags,
importCategories,
(message: string) => createStatus.value = message,
);
createStatus.value = null;
handleResponse(response, importKeywordsAsTags);
}
@@ -257,6 +270,7 @@ export default defineNuxtComponent({
stayInEditMode,
parseRecipe,
domUrlForm,
createStatus,
createByUrl,
...toRefs(state),
validators,

View File

@@ -9940,6 +9940,11 @@ srvx@^0.8.9:
resolved "https://registry.yarnpkg.com/srvx/-/srvx-0.8.16.tgz#f2582bd747351b5b0a1c65bce8179bae83e8b2a6"
integrity sha512-hmcGW4CgroeSmzgF1Ihwgl+Ths0JqAJ7HwjP2X7e3JzY7u4IydLMcdnlqGQiQGUswz+PO9oh/KtCpOISIvs9QQ==
sse.js@^2.8.0:
version "2.8.0"
resolved "https://registry.yarnpkg.com/sse.js/-/sse.js-2.8.0.tgz#28e922720ef41f0de3312e33d23183682bec4b1e"
integrity sha512-35RyyFYpzzHZgMw9D5GxwADbL6gnntSwW/rKXcuIy1KkYCPjW6oia0moNdNRhs34oVHU1Sjgovj3l7uIEZjrKA==
stable-hash-x@^0.2.0:
version "0.2.0"
resolved "https://registry.yarnpkg.com/stable-hash-x/-/stable-hash-x-0.2.0.tgz#dfd76bfa5d839a7470125c6a6b3c8b22061793e9"

View File

@@ -19,7 +19,17 @@
"yields": "Yields"
},
"and-amount": "and {amount}",
"or-ingredient": "or {ingredient}"
"or-ingredient": "or {ingredient}",
"create-progress": {
"creating-recipe-with-ai": "Creating recipe with AI...",
"creating-recipe-from-transcript-with-ai": "Creating recipe from transcript with AI...",
"creating-recipe-from-webpage-data": "Creating recipe from webpage data...",
"downloading-image": "Downloading image...",
"downloading-video": "Downloading video...",
"extracting-recipe-data": "Extracting recipe data...",
"fetching-webpage": "Fetching webpage...",
"transcribing-audio-with-ai": "Transcribing audio with AI..."
}
},
"mealplan": {
"no-recipes-match-your-rules": "No recipes match your rules"

View File

@@ -33,7 +33,14 @@ class MealieCrudRoute(APIRoute):
async def custom_route_handler(request: Request) -> Response:
with contextlib.suppress(JSONDecodeError):
response = await original_route_handler(request)
response_body = json.loads(response.body)
# StreamingResponse from starlette doesn't have a body attribute, even though it inherits from Response,
# so we may get an attribute error here even though our type hinting suggests otherwise.
try:
response_body = json.loads(response.body)
except AttributeError:
return response
if isinstance(response_body, dict):
if last_modified := response_body.get("updatedAt"):
response.headers["last-modified"] = last_modified

View File

@@ -1,4 +1,6 @@
import asyncio
from collections import defaultdict
from collections.abc import AsyncIterable
from shutil import copyfileobj
from uuid import UUID
@@ -17,6 +19,7 @@ from fastapi import (
status,
)
from fastapi.datastructures import UploadFile
from fastapi.sse import EventSourceResponse, ServerSentEvent
from pydantic import UUID4
from slugify import slugify
@@ -46,7 +49,13 @@ from mealie.schema.recipe.request_helpers import (
)
from mealie.schema.response import PaginationBase, PaginationQuery
from mealie.schema.response.pagination import RecipeSearchQuery
from mealie.schema.response.responses import ErrorResponse, SuccessResponse
from mealie.schema.response.responses import (
ErrorResponse,
SSEDataEventDone,
SSEDataEventMessage,
SSEDataEventStatus,
SuccessResponse,
)
from mealie.services import urls
from mealie.services.event_bus_service.event_types import (
EventOperation,
@@ -130,22 +139,70 @@ class RecipeController(BaseRecipeController):
return "recipe_scrapers was unable to scrape this URL"
@router.post("/create/html-or-json", status_code=201)
async def create_recipe_from_html_or_json(self, req: ScrapeRecipeData):
@router.post("/create/html-or-json", status_code=201, response_model=str)
async def create_recipe_from_html_or_json(self, req: ScrapeRecipeData) -> str:
"""Takes in raw HTML or a https://schema.org/Recipe object as a JSON string and parses it like a URL"""
if req.data.startswith("{"):
req.data = RecipeScraperPackage.ld_json_to_html(req.data)
return await self._create_recipe_from_web(req)
async for event in self._create_recipe_from_web(req):
if isinstance(event.data, SSEDataEventDone):
return event.data.slug
if isinstance(event.data, SSEDataEventMessage) and event.event == SSEDataEventStatus.ERROR:
raise HTTPException(status_code=400, detail=ErrorResponse.respond(message=event.data.message))
# This should never be reachable, since we should always hit DONE or hit an exception/ERROR
raise HTTPException(status_code=500, detail=ErrorResponse.respond(message="Unknown Error"))
@router.post("/create/html-or-json/stream", response_class=EventSourceResponse)
async def create_recipe_from_html_or_json_stream(self, req: ScrapeRecipeData) -> AsyncIterable[ServerSentEvent]:
"""
Takes in raw HTML or a https://schema.org/Recipe object as a JSON string and parses it like a URL,
streaming progress via SSE
"""
if req.data.startswith("{"):
req.data = RecipeScraperPackage.ld_json_to_html(req.data)
async for event in self._create_recipe_from_web(req):
yield event
@router.post("/create/url", status_code=201, response_model=str)
async def parse_recipe_url(self, req: ScrapeRecipe):
async def parse_recipe_url(self, req: ScrapeRecipe) -> str:
"""Takes in a URL and attempts to scrape data and load it into the database"""
return await self._create_recipe_from_web(req)
async for event in self._create_recipe_from_web(req):
if isinstance(event.data, SSEDataEventDone):
return event.data.slug
if isinstance(event.data, SSEDataEventMessage) and event.event == SSEDataEventStatus.ERROR:
raise HTTPException(status_code=400, detail=ErrorResponse.respond(message=event.data.message))
# This should never be reachable, since we should always hit DONE or hit an exception/ERROR
raise HTTPException(status_code=500, detail=ErrorResponse.respond(message="Unknown Error"))
@router.post("/create/url/stream", response_class=EventSourceResponse)
async def parse_recipe_url_stream(self, req: ScrapeRecipe) -> AsyncIterable[ServerSentEvent]:
"""
Takes in a URL and attempts to scrape data and load it into the database,
streaming progress via SSE
"""
async for event in self._create_recipe_from_web(req):
yield event
async def _create_recipe_from_web(self, req: ScrapeRecipe | ScrapeRecipeData) -> AsyncIterable[ServerSentEvent]:
"""
Create a recipe from the web, returning progress via SSE.
Events will continue to be yielded until:
- The recipe is created, emitting:
- event=SSEDataEventStatus.DONE
- data=SSEDataEventDone(...)
- An exception is raised, emitting:
- event=SSEDataEventStatus.ERROR
- data=SSEDataEventMessage(...)
"""
async def _create_recipe_from_web(self, req: ScrapeRecipe | ScrapeRecipeData):
if isinstance(req, ScrapeRecipeData):
html = req.data
url = req.url or ""
@@ -153,21 +210,48 @@ class RecipeController(BaseRecipeController):
html = None
url = req.url
try:
recipe, extras = await create_from_html(url, self.translator, html)
except ForceTimeoutException as e:
raise HTTPException(
status_code=408, detail=ErrorResponse.respond(message="Recipe Scraping Timed Out")
) from e
queue: asyncio.Queue[ServerSentEvent | None] = asyncio.Queue()
async def on_progress(message: str) -> None:
await queue.put(
ServerSentEvent(
data=SSEDataEventMessage(message=message),
event=SSEDataEventStatus.PROGRESS,
)
)
async def run() -> None:
try:
recipe, extras = await create_from_html(url, self.translator, html, on_progress=on_progress)
slug = self._finish_recipe_from_web(req, recipe, extras)
await queue.put(
ServerSentEvent(
data=SSEDataEventDone(slug=slug),
event=SSEDataEventStatus.DONE,
)
)
except Exception as e:
self.logger.exception("Error in streaming recipe creation")
await queue.put(
ServerSentEvent(
data=SSEDataEventMessage(message=e.__class__.__name__),
event=SSEDataEventStatus.ERROR,
)
)
finally:
await queue.put(None)
asyncio.create_task(run())
while (event := await queue.get()) is not None:
yield event
def _finish_recipe_from_web(self, req: ScrapeRecipe | ScrapeRecipeData, recipe: Recipe, extras: object) -> str:
if req.include_tags:
ctx = ScraperContext(self.repos)
recipe.tags = extras.use_tags(ctx) # type: ignore
if req.include_categories:
ctx = ScraperContext(self.repos)
recipe.recipe_category = extras.use_categories(ctx) # type: ignore
new_recipe = self.service.create_one(recipe)

View File

@@ -8,12 +8,24 @@ from .pagination import (
RequestQuery,
)
from .query_search import SearchFilter
from .responses import ErrorResponse, FileTokenResponse, SuccessResponse
from .responses import (
ErrorResponse,
FileTokenResponse,
SSEDataEventBase,
SSEDataEventDone,
SSEDataEventMessage,
SSEDataEventStatus,
SuccessResponse,
)
from .validation import ValidationResponse
__all__ = [
"ErrorResponse",
"FileTokenResponse",
"SSEDataEventBase",
"SSEDataEventDone",
"SSEDataEventMessage",
"SSEDataEventStatus",
"SuccessResponse",
"SearchFilter",
"OrderByNullPosition",

View File

@@ -1,3 +1,5 @@
from enum import StrEnum
from pydantic import BaseModel
from mealie.schema._mealie import MealieModel
@@ -40,3 +42,20 @@ class FileTokenResponse(MealieModel):
in the same call, for use while providing details to a HTTPException
"""
return cls(file_token=token).model_dump()
class SSEDataEventStatus(StrEnum):
PROGRESS = "progress"
DONE = "done"
ERROR = "error"
class SSEDataEventBase(BaseModel): ...
class SSEDataEventMessage(SSEDataEventBase):
message: str
class SSEDataEventDone(SSEDataEventBase):
slug: str

View File

@@ -1,3 +1,5 @@
from collections.abc import Awaitable, Callable
from mealie.core.root_logger import get_logger
from mealie.lang.providers import Translator
from mealie.schema.recipe.recipe import Recipe
@@ -37,25 +39,34 @@ class RecipeScraper:
self.translator = translator
self.logger = get_logger()
async def scrape(self, url: str, html: str | None = None) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
async def scrape(
self,
url: str,
html: str | None = None,
on_progress: Callable[[str], Awaitable[None]] | None = None,
) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
"""
Scrapes a recipe from the web.
Skips the network request if `html` is provided.
Optionally reports progress back via `on_progress`.
"""
raw_html = html or await safe_scrape_html(url)
if not html:
if on_progress:
await on_progress(self.translator.t("recipe.create-progress.fetching-webpage"))
if not raw_html:
return None, None
html = await safe_scrape_html(url)
if not html:
return None, None
for ScraperClass in self.scrapers:
scraper = ScraperClass(url, self.translator, raw_html=raw_html)
scraper = ScraperClass(url, self.translator, raw_html=html)
if not scraper.can_scrape():
self.logger.debug(f"Skipping {scraper.__class__.__name__}")
continue
try:
result = await scraper.parse()
result = await scraper.parse(on_progress=on_progress)
except Exception:
self.logger.exception(f"Failed to scrape HTML with {scraper.__class__.__name__}")
result = None

View File

@@ -1,3 +1,4 @@
from collections.abc import Awaitable, Callable
from enum import StrEnum
from re import search as regex_search
from uuid import uuid4
@@ -22,7 +23,10 @@ class ParserErrors(StrEnum):
async def create_from_html(
url: str, translator: Translator, html: str | None = None
url: str,
translator: Translator,
html: str | None = None,
on_progress: Callable[[str], Awaitable[None]] | None = None,
) -> tuple[Recipe, ScrapedExtras | None]:
"""Main entry point for generating a recipe from a URL. Pass in a URL and
a Recipe object will be returned if successful. Optionally pass in the HTML to skip fetching it.
@@ -30,6 +34,7 @@ async def create_from_html(
Args:
url (str): a valid string representing a URL
html (str | None): optional HTML string to skip network request. Defaults to None.
on_progress: optional async callable invoked with a status message at each stage.
Returns:
Recipe: Recipe Object
@@ -42,7 +47,7 @@ async def create_from_html(
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})
url = extracted_url.group(0)
new_recipe, extras = await scraper.scrape(url, html)
new_recipe, extras = await scraper.scrape(url, html, on_progress=on_progress)
if not new_recipe:
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})
@@ -54,9 +59,13 @@ async def create_from_html(
recipe_data_service = RecipeDataService(new_recipe.id)
try:
if new_recipe.image and isinstance(new_recipe.image, list):
new_recipe.image = new_recipe.image[0]
await recipe_data_service.scrape_image(new_recipe.image) # type: ignore
if new_recipe.image:
if isinstance(new_recipe.image, list):
new_recipe.image = new_recipe.image[0]
if on_progress:
await on_progress(translator.t("recipe.create-progress.downloading-image"))
await recipe_data_service.scrape_image(new_recipe.image) # type: ignore
if new_recipe.name is None:
new_recipe.name = "Untitled"

View File

@@ -3,7 +3,7 @@ import functools
import re
import time
from abc import ABC, abstractmethod
from collections.abc import Callable
from collections.abc import Awaitable, Callable
from pathlib import Path
from typing import Any, TypedDict
@@ -139,7 +139,9 @@ class ABCScraperStrategy(ABC):
async def get_html(self, url: str) -> str: ...
@abstractmethod
async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
async def parse(
self, on_progress: Callable[[str], Awaitable[None]] | None = None
) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
"""Parse a recipe from a web URL.
Args:
@@ -276,10 +278,14 @@ class RecipeScraperPackage(ABCScraperStrategy):
self.logger.debug(f"Recipe Scraper [Package] was unable to extract a recipe from {self.url}")
return None
async def parse(self):
async def parse(self, on_progress: Callable[[str], Awaitable[None]] | None = None):
"""
Parse a recipe from a given url.
"""
if on_progress:
await on_progress(self.translator.t("recipe.create-progress.extracting-recipe-data"))
scraped_data = await self.scrape_url()
if scraped_data is None:
@@ -376,6 +382,12 @@ class RecipeScraperOpenAI(RecipeScraperPackage):
self.logger.exception(f"OpenAI was unable to extract a recipe from {url}")
return ""
async def parse(self, on_progress: Callable[[str], Awaitable[None]] | None = None):
if on_progress:
await on_progress(self.translator.t("recipe.create-progress.creating-recipe-with-ai"))
return super().parse()
class TranscribedAudio(TypedDict):
audio: Path
@@ -468,10 +480,16 @@ class RecipeScraperOpenAITranscription(ABCScraperStrategy):
async def get_html(self, url: str) -> str:
return self.raw_html or "" # we don't use HTML with this scraper since we use ytdlp
async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
async def parse(
self,
on_progress: Callable[[str], Awaitable[None]] | None = None,
) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
openai_service = OpenAIService()
with get_temporary_path() as temp_path:
if on_progress:
await on_progress(self.translator.t("recipe.create-progress.downloading-video"))
video_data = await asyncio.to_thread(self._download_audio, temp_path)
if video_data["subtitle"]:
@@ -485,6 +503,9 @@ class RecipeScraperOpenAITranscription(ABCScraperStrategy):
video_data["transcription"] = ""
if not video_data["transcription"]:
if on_progress:
await on_progress(self.translator.t("recipe.create-progress.transcribing-audio-with-ai"))
try:
transcription = await openai_service.transcribe_audio(video_data["audio"])
except exceptions.RateLimitError:
@@ -508,6 +529,9 @@ class RecipeScraperOpenAITranscription(ABCScraperStrategy):
f"Transcription: {video_data['transcription']}",
]
if on_progress:
await on_progress(self.translator.t("recipe.create-progress.creating-recipe-from-transcript-with-ai"))
try:
response = await openai_service.get_response(prompt, "\n".join(message_parts), response_schema=OpenAIRecipe)
except exceptions.RateLimitError:
@@ -586,10 +610,17 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
"extras": [],
}
async def parse(self):
async def parse(
self,
on_progress: Callable[[str], Awaitable[None]] | None = None,
):
"""
Parse a recipe from a given url.
"""
if on_progress:
await on_progress(self.translator.t("recipe.create-progress.creating-recipe-from-webpage-data"))
html = await self.get_html(self.url)
og_data = self.get_recipe_fields(html)

View File

@@ -174,3 +174,6 @@ select = [
[tool.ruff.lint.mccabe]
# Unlike Flake8, default to a complexity level of 10.
max-complexity = 24 # Default is 10.
[tool.uv]
add-bounds = "exact"

View File

@@ -96,6 +96,23 @@ def open_graph_override(html: str):
return get_html
def parse_sse_events(text: str) -> list[dict]:
"""Parse SSE response text into a list of events with 'event' and 'data' keys."""
events = []
current: dict = {}
for line in text.splitlines():
if line.startswith("event:"):
current["event"] = line[len("event:") :].strip()
elif line.startswith("data:"):
current["data"] = json.loads(line[len("data:") :].strip())
elif line == "" and current:
events.append(current)
current = {}
if current:
events.append(current)
return events
def test_create_by_url(
api_client: TestClient,
unique_user: TestUser,
@@ -218,6 +235,122 @@ def test_create_by_html_or_json(
assert tag["name"] in expected_tags
def test_create_by_url_stream_done(
api_client: TestClient,
unique_user: TestUser,
monkeypatch: MonkeyPatch,
):
async def mock_safe_scrape_html(url: str) -> str:
return "<html></html>"
monkeypatch.setattr(recipe_scraper_module, "safe_scrape_html", mock_safe_scrape_html)
recipe_data = recipe_test_data[0]
for scraper_cls in DEFAULT_SCRAPER_STRATEGIES:
monkeypatch.setattr(
scraper_cls,
"get_html",
open_graph_override(recipe_data.html_file.read_text()),
)
async def return_empty_response(*args, **kwargs):
return Response(200, content=b"")
monkeypatch.setattr(AsyncSafeTransport, "handle_async_request", return_empty_response)
monkeypatch.setattr(RecipeDataService, "scrape_image", lambda *_: "TEST_IMAGE")
api_client.delete(api_routes.recipes_slug(recipe_data.expected_slug), headers=unique_user.token)
response = api_client.post(
api_routes.recipes_create_url_stream,
json={"url": recipe_data.url, "include_tags": False},
headers=unique_user.token,
)
assert response.status_code == 200
events = parse_sse_events(response.text)
event_types = [e["event"] for e in events]
assert "done" in event_types
done_event = next(e for e in events if e["event"] == "done")
assert done_event["data"]["slug"] == recipe_data.expected_slug
assert any(e["event"] == "progress" for e in events)
def test_create_by_url_stream_error(
api_client: TestClient,
unique_user: TestUser,
monkeypatch: MonkeyPatch,
):
async def raise_error(*args, **kwargs):
raise Exception("Test scrape error")
monkeypatch.setattr("mealie.routes.recipe.recipe_crud_routes.create_from_html", raise_error)
response = api_client.post(
api_routes.recipes_create_url_stream,
json={"url": "https://example.com/recipe"},
headers=unique_user.token,
)
assert response.status_code == 200
events = parse_sse_events(response.text)
event_types = [e["event"] for e in events]
assert "error" in event_types
def test_create_by_html_or_json_stream_done(
api_client: TestClient,
unique_user: TestUser,
monkeypatch: MonkeyPatch,
):
monkeypatch.setattr(RecipeDataService, "scrape_image", lambda *_: "TEST_IMAGE")
recipe_data = recipe_test_data[0]
api_client.delete(api_routes.recipes_slug(recipe_data.expected_slug), headers=unique_user.token)
response = api_client.post(
api_routes.recipes_create_html_or_json_stream,
json={"data": recipe_data.html_file.read_text(), "include_tags": False},
headers=unique_user.token,
)
assert response.status_code == 200
events = parse_sse_events(response.text)
event_types = [e["event"] for e in events]
assert "done" in event_types
done_event = next(e for e in events if e["event"] == "done")
assert done_event["data"]["slug"] == recipe_data.expected_slug
assert any(e["event"] == "progress" for e in events)
def test_create_by_html_or_json_stream_error(
api_client: TestClient,
unique_user: TestUser,
monkeypatch: MonkeyPatch,
):
async def raise_error(*args, **kwargs):
raise Exception("Test scrape error")
monkeypatch.setattr("mealie.routes.recipe.recipe_crud_routes.create_from_html", raise_error)
response = api_client.post(
api_routes.recipes_create_html_or_json_stream,
json={"data": "<html><body>test</body></html>"},
headers=unique_user.token,
)
assert response.status_code == 200
events = parse_sse_events(response.text)
event_types = [e["event"] for e in events]
assert "error" in event_types
def test_create_recipe_from_zip(api_client: TestClient, unique_user: TestUser, tempdir: str):
database = unique_user.repos
recipe_name = random_string()

View File

@@ -149,12 +149,16 @@ recipes_bulk_actions_tag = "/api/recipes/bulk-actions/tag"
"""`/api/recipes/bulk-actions/tag`"""
recipes_create_html_or_json = "/api/recipes/create/html-or-json"
"""`/api/recipes/create/html-or-json`"""
recipes_create_html_or_json_stream = "/api/recipes/create/html-or-json/stream"
"""`/api/recipes/create/html-or-json/stream`"""
recipes_create_image = "/api/recipes/create/image"
"""`/api/recipes/create/image`"""
recipes_create_url = "/api/recipes/create/url"
"""`/api/recipes/create/url`"""
recipes_create_url_bulk = "/api/recipes/create/url/bulk"
"""`/api/recipes/create/url/bulk`"""
recipes_create_url_stream = "/api/recipes/create/url/stream"
"""`/api/recipes/create/url/stream`"""
recipes_create_zip = "/api/recipes/create/zip"
"""`/api/recipes/create/zip`"""
recipes_exports = "/api/recipes/exports"