mirror of
https://github.com/mealie-recipes/mealie.git
synced 2026-04-18 10:55:36 -04:00
feat: Recipe import progress (#7252)
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
from collections.abc import Awaitable, Callable
|
||||
|
||||
from mealie.core.root_logger import get_logger
|
||||
from mealie.lang.providers import Translator
|
||||
from mealie.schema.recipe.recipe import Recipe
|
||||
@@ -37,25 +39,34 @@ class RecipeScraper:
|
||||
self.translator = translator
|
||||
self.logger = get_logger()
|
||||
|
||||
async def scrape(self, url: str, html: str | None = None) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
|
||||
async def scrape(
|
||||
self,
|
||||
url: str,
|
||||
html: str | None = None,
|
||||
on_progress: Callable[[str], Awaitable[None]] | None = None,
|
||||
) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
|
||||
"""
|
||||
Scrapes a recipe from the web.
|
||||
Skips the network request if `html` is provided.
|
||||
Optionally reports progress back via `on_progress`.
|
||||
"""
|
||||
|
||||
raw_html = html or await safe_scrape_html(url)
|
||||
if not html:
|
||||
if on_progress:
|
||||
await on_progress(self.translator.t("recipe.create-progress.fetching-webpage"))
|
||||
|
||||
if not raw_html:
|
||||
return None, None
|
||||
html = await safe_scrape_html(url)
|
||||
if not html:
|
||||
return None, None
|
||||
|
||||
for ScraperClass in self.scrapers:
|
||||
scraper = ScraperClass(url, self.translator, raw_html=raw_html)
|
||||
scraper = ScraperClass(url, self.translator, raw_html=html)
|
||||
if not scraper.can_scrape():
|
||||
self.logger.debug(f"Skipping {scraper.__class__.__name__}")
|
||||
continue
|
||||
|
||||
try:
|
||||
result = await scraper.parse()
|
||||
result = await scraper.parse(on_progress=on_progress)
|
||||
except Exception:
|
||||
self.logger.exception(f"Failed to scrape HTML with {scraper.__class__.__name__}")
|
||||
result = None
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from collections.abc import Awaitable, Callable
|
||||
from enum import StrEnum
|
||||
from re import search as regex_search
|
||||
from uuid import uuid4
|
||||
@@ -22,7 +23,10 @@ class ParserErrors(StrEnum):
|
||||
|
||||
|
||||
async def create_from_html(
|
||||
url: str, translator: Translator, html: str | None = None
|
||||
url: str,
|
||||
translator: Translator,
|
||||
html: str | None = None,
|
||||
on_progress: Callable[[str], Awaitable[None]] | None = None,
|
||||
) -> tuple[Recipe, ScrapedExtras | None]:
|
||||
"""Main entry point for generating a recipe from a URL. Pass in a URL and
|
||||
a Recipe object will be returned if successful. Optionally pass in the HTML to skip fetching it.
|
||||
@@ -30,6 +34,7 @@ async def create_from_html(
|
||||
Args:
|
||||
url (str): a valid string representing a URL
|
||||
html (str | None): optional HTML string to skip network request. Defaults to None.
|
||||
on_progress: optional async callable invoked with a status message at each stage.
|
||||
|
||||
Returns:
|
||||
Recipe: Recipe Object
|
||||
@@ -42,7 +47,7 @@ async def create_from_html(
|
||||
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})
|
||||
url = extracted_url.group(0)
|
||||
|
||||
new_recipe, extras = await scraper.scrape(url, html)
|
||||
new_recipe, extras = await scraper.scrape(url, html, on_progress=on_progress)
|
||||
|
||||
if not new_recipe:
|
||||
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})
|
||||
@@ -54,9 +59,13 @@ async def create_from_html(
|
||||
recipe_data_service = RecipeDataService(new_recipe.id)
|
||||
|
||||
try:
|
||||
if new_recipe.image and isinstance(new_recipe.image, list):
|
||||
new_recipe.image = new_recipe.image[0]
|
||||
await recipe_data_service.scrape_image(new_recipe.image) # type: ignore
|
||||
if new_recipe.image:
|
||||
if isinstance(new_recipe.image, list):
|
||||
new_recipe.image = new_recipe.image[0]
|
||||
|
||||
if on_progress:
|
||||
await on_progress(translator.t("recipe.create-progress.downloading-image"))
|
||||
await recipe_data_service.scrape_image(new_recipe.image) # type: ignore
|
||||
|
||||
if new_recipe.name is None:
|
||||
new_recipe.name = "Untitled"
|
||||
|
||||
@@ -3,7 +3,7 @@ import functools
|
||||
import re
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Awaitable, Callable
|
||||
from pathlib import Path
|
||||
from typing import Any, TypedDict
|
||||
|
||||
@@ -139,7 +139,9 @@ class ABCScraperStrategy(ABC):
|
||||
async def get_html(self, url: str) -> str: ...
|
||||
|
||||
@abstractmethod
|
||||
async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
|
||||
async def parse(
|
||||
self, on_progress: Callable[[str], Awaitable[None]] | None = None
|
||||
) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
|
||||
"""Parse a recipe from a web URL.
|
||||
|
||||
Args:
|
||||
@@ -276,10 +278,14 @@ class RecipeScraperPackage(ABCScraperStrategy):
|
||||
self.logger.debug(f"Recipe Scraper [Package] was unable to extract a recipe from {self.url}")
|
||||
return None
|
||||
|
||||
async def parse(self):
|
||||
async def parse(self, on_progress: Callable[[str], Awaitable[None]] | None = None):
|
||||
"""
|
||||
Parse a recipe from a given url.
|
||||
"""
|
||||
|
||||
if on_progress:
|
||||
await on_progress(self.translator.t("recipe.create-progress.extracting-recipe-data"))
|
||||
|
||||
scraped_data = await self.scrape_url()
|
||||
|
||||
if scraped_data is None:
|
||||
@@ -376,6 +382,12 @@ class RecipeScraperOpenAI(RecipeScraperPackage):
|
||||
self.logger.exception(f"OpenAI was unable to extract a recipe from {url}")
|
||||
return ""
|
||||
|
||||
async def parse(self, on_progress: Callable[[str], Awaitable[None]] | None = None):
|
||||
if on_progress:
|
||||
await on_progress(self.translator.t("recipe.create-progress.creating-recipe-with-ai"))
|
||||
|
||||
return super().parse()
|
||||
|
||||
|
||||
class TranscribedAudio(TypedDict):
|
||||
audio: Path
|
||||
@@ -468,10 +480,16 @@ class RecipeScraperOpenAITranscription(ABCScraperStrategy):
|
||||
async def get_html(self, url: str) -> str:
|
||||
return self.raw_html or "" # we don't use HTML with this scraper since we use ytdlp
|
||||
|
||||
async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
|
||||
async def parse(
|
||||
self,
|
||||
on_progress: Callable[[str], Awaitable[None]] | None = None,
|
||||
) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
|
||||
openai_service = OpenAIService()
|
||||
|
||||
with get_temporary_path() as temp_path:
|
||||
if on_progress:
|
||||
await on_progress(self.translator.t("recipe.create-progress.downloading-video"))
|
||||
|
||||
video_data = await asyncio.to_thread(self._download_audio, temp_path)
|
||||
|
||||
if video_data["subtitle"]:
|
||||
@@ -485,6 +503,9 @@ class RecipeScraperOpenAITranscription(ABCScraperStrategy):
|
||||
video_data["transcription"] = ""
|
||||
|
||||
if not video_data["transcription"]:
|
||||
if on_progress:
|
||||
await on_progress(self.translator.t("recipe.create-progress.transcribing-audio-with-ai"))
|
||||
|
||||
try:
|
||||
transcription = await openai_service.transcribe_audio(video_data["audio"])
|
||||
except exceptions.RateLimitError:
|
||||
@@ -508,6 +529,9 @@ class RecipeScraperOpenAITranscription(ABCScraperStrategy):
|
||||
f"Transcription: {video_data['transcription']}",
|
||||
]
|
||||
|
||||
if on_progress:
|
||||
await on_progress(self.translator.t("recipe.create-progress.creating-recipe-from-transcript-with-ai"))
|
||||
|
||||
try:
|
||||
response = await openai_service.get_response(prompt, "\n".join(message_parts), response_schema=OpenAIRecipe)
|
||||
except exceptions.RateLimitError:
|
||||
@@ -586,10 +610,17 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
|
||||
"extras": [],
|
||||
}
|
||||
|
||||
async def parse(self):
|
||||
async def parse(
|
||||
self,
|
||||
on_progress: Callable[[str], Awaitable[None]] | None = None,
|
||||
):
|
||||
"""
|
||||
Parse a recipe from a given url.
|
||||
"""
|
||||
|
||||
if on_progress:
|
||||
await on_progress(self.translator.t("recipe.create-progress.creating-recipe-from-webpage-data"))
|
||||
|
||||
html = await self.get_html(self.url)
|
||||
|
||||
og_data = self.get_recipe_fields(html)
|
||||
|
||||
Reference in New Issue
Block a user