mirror of
https://github.com/mealie-recipes/mealie.git
synced 2026-04-14 08:55:34 -04:00
feat: Add social media video import (YouTube, TikTok, Instagram) (#6764)
Co-authored-by: Maxime Louward <61564950+mlouward@users.noreply.github.com> Co-authored-by: Michael Genson <genson.michael@gmail.com> Co-authored-by: Michael Genson <71845777+michael-genson@users.noreply.github.com>
This commit is contained in:
@@ -46,6 +46,30 @@ class NoEntryFound(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class OpenAIServiceError(Exception):
|
||||
"""
|
||||
This exception is raised when there is an error communicating with OpenAI services.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class RateLimitError(Exception):
|
||||
"""
|
||||
This exception is raised when an external API returns a rate limit (429) error.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class VideoDownloadError(Exception):
|
||||
"""
|
||||
This exception is raised when there is an error downloading or processing a video.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
def mealie_registered_exceptions(t: Translator) -> dict:
|
||||
"""
|
||||
This function returns a dictionary of all the globally registered exceptions in the Mealie application.
|
||||
|
||||
@@ -393,12 +393,16 @@ class AppSettings(AppLoggingSettings):
|
||||
"""Your OpenAI API key. Required to enable OpenAI features"""
|
||||
OPENAI_MODEL: str = "gpt-4o"
|
||||
"""Which OpenAI model to send requests to. Leave this unset for most usecases"""
|
||||
OPENAI_AUDIO_MODEL: str = "whisper-1"
|
||||
"""Which OpenAI model to use for audio transcription. Leave this unset for most usecases"""
|
||||
OPENAI_CUSTOM_HEADERS: dict[str, str] = {}
|
||||
"""Custom HTTP headers to send with each OpenAI request"""
|
||||
OPENAI_CUSTOM_PARAMS: dict[str, Any] = {}
|
||||
"""Custom HTTP parameters to send with each OpenAI request"""
|
||||
OPENAI_ENABLE_IMAGE_SERVICES: bool = True
|
||||
"""Whether to enable image-related features in OpenAI"""
|
||||
OPENAI_ENABLE_TRANSCRIPTION_SERVICES: bool = True
|
||||
"""Whether to enable audio transcription features in OpenAI"""
|
||||
OPENAI_WORKERS: int = 2
|
||||
"""
|
||||
Number of OpenAI workers per request. Higher values may increase
|
||||
|
||||
@@ -38,6 +38,8 @@ class AdminAboutController(BaseAdminController):
|
||||
oidc_provider_name=settings.OIDC_PROVIDER_NAME,
|
||||
enable_openai=settings.OPENAI_ENABLED,
|
||||
enable_openai_image_services=settings.OPENAI_ENABLED and settings.OPENAI_ENABLE_IMAGE_SERVICES,
|
||||
enable_openai_transcription_services=settings.OPENAI_ENABLED
|
||||
and settings.OPENAI_ENABLE_TRANSCRIPTION_SERVICES,
|
||||
)
|
||||
|
||||
@router.get("/statistics", response_model=AppStatistics)
|
||||
|
||||
@@ -34,14 +34,14 @@ class AdminDebugController(BaseAdminController):
|
||||
|
||||
try:
|
||||
openai_service = OpenAIService()
|
||||
prompt = openai_service.get_prompt("debug")
|
||||
prompt = openai_service.get_prompt("general.debug")
|
||||
|
||||
message = "Hello, checking to see if I can reach you."
|
||||
if local_images:
|
||||
message = f"{message} Here is an image to test with:"
|
||||
|
||||
response = await openai_service.get_response(
|
||||
prompt, message, response_schema=OpenAIText, images=local_images
|
||||
prompt, message, response_schema=OpenAIText, attachments=local_images
|
||||
)
|
||||
|
||||
if not response:
|
||||
|
||||
@@ -43,6 +43,7 @@ def get_app_info(session: Session = Depends(generate_session)):
|
||||
oidc_provider_name=settings.OIDC_PROVIDER_NAME,
|
||||
enable_openai=settings.OPENAI_ENABLED,
|
||||
enable_openai_image_services=settings.OPENAI_ENABLED and settings.OPENAI_ENABLE_IMAGE_SERVICES,
|
||||
enable_openai_transcription_services=settings.OPENAI_ENABLED and settings.OPENAI_ENABLE_TRANSCRIPTION_SERVICES,
|
||||
allow_password_login=settings.ALLOW_PASSWORD_LOGIN,
|
||||
token_time=settings.TOKEN_TIME,
|
||||
)
|
||||
|
||||
@@ -23,6 +23,7 @@ class AppInfo(MealieModel):
|
||||
oidc_provider_name: str
|
||||
enable_openai: bool
|
||||
enable_openai_image_services: bool
|
||||
enable_openai_transcription_services: bool
|
||||
token_time: int
|
||||
|
||||
|
||||
|
||||
@@ -7,14 +7,16 @@ from pathlib import Path
|
||||
from textwrap import dedent
|
||||
from typing import TypeVar
|
||||
|
||||
import openai
|
||||
from openai import AsyncOpenAI
|
||||
from openai.types.chat import ChatCompletion
|
||||
from pydantic import BaseModel, field_validator
|
||||
|
||||
from mealie.core import root_logger
|
||||
from mealie.core import exceptions, root_logger
|
||||
from mealie.core.config import get_app_settings
|
||||
from mealie.pkgs import img
|
||||
from mealie.schema.openai._base import OpenAIBase
|
||||
from mealie.schema.openai.general import OpenAIText
|
||||
|
||||
from .._base_service import BaseService
|
||||
|
||||
@@ -48,7 +50,12 @@ class OpenAIDataInjection(BaseModel):
|
||||
return value
|
||||
|
||||
|
||||
class OpenAIImageBase(BaseModel, ABC):
|
||||
class OpenAIAttachment(BaseModel, ABC):
|
||||
@abstractmethod
|
||||
def build_message(self) -> dict: ...
|
||||
|
||||
|
||||
class OpenAIImageBase(OpenAIAttachment):
|
||||
@abstractmethod
|
||||
def get_image_url(self) -> str: ...
|
||||
|
||||
@@ -79,6 +86,17 @@ class OpenAILocalImage(OpenAIImageBase):
|
||||
return f"data:image/jpeg;base64,{b64content}"
|
||||
|
||||
|
||||
class OpenAILocalAudio(OpenAIAttachment):
|
||||
data: str
|
||||
format: str
|
||||
|
||||
def build_message(self) -> dict:
|
||||
return {
|
||||
"type": "input_audio",
|
||||
"input_audio": {"data": self.data, "format": self.format},
|
||||
}
|
||||
|
||||
|
||||
class OpenAIService(BaseService):
|
||||
PROMPTS_DIR = Path(os.path.dirname(os.path.abspath(__file__))) / "prompts"
|
||||
|
||||
@@ -88,9 +106,9 @@ class OpenAIService(BaseService):
|
||||
raise ValueError("OpenAI is not enabled")
|
||||
|
||||
self.model = settings.OPENAI_MODEL
|
||||
self.audio_model = settings.OPENAI_AUDIO_MODEL
|
||||
self.workers = settings.OPENAI_WORKERS
|
||||
self.send_db_data = settings.OPENAI_SEND_DATABASE_DATA
|
||||
self.enable_image_services = settings.OPENAI_ENABLE_IMAGE_SERVICES
|
||||
self.custom_prompt_dir = settings.OPENAI_CUSTOM_PROMPT_DIR
|
||||
|
||||
self.get_client = lambda: AsyncOpenAI(
|
||||
@@ -215,17 +233,14 @@ class OpenAIService(BaseService):
|
||||
message: str,
|
||||
*,
|
||||
response_schema: type[T],
|
||||
images: list[OpenAIImageBase] | None = None,
|
||||
attachments: list[OpenAIAttachment] | None = None,
|
||||
) -> T | None:
|
||||
"""Send data to OpenAI and return the response message content"""
|
||||
if images and not self.enable_image_services:
|
||||
self.logger.warning("OpenAI image services are disabled, ignoring images")
|
||||
images = None
|
||||
|
||||
try:
|
||||
user_messages = [{"type": "text", "text": message}]
|
||||
for image in images or []:
|
||||
user_messages.append(image.build_message())
|
||||
for attachment in attachments or []:
|
||||
user_messages.append(attachment.build_message())
|
||||
|
||||
response = await self._get_raw_response(prompt, user_messages, response_schema)
|
||||
if not response.choices:
|
||||
@@ -233,5 +248,41 @@ class OpenAIService(BaseService):
|
||||
|
||||
response_text = response.choices[0].message.content
|
||||
return response_schema.parse_openai_response(response_text)
|
||||
except openai.RateLimitError as e:
|
||||
raise exceptions.RateLimitError(str(e)) from e
|
||||
except Exception as e:
|
||||
raise Exception(f"OpenAI Request Failed. {e.__class__.__name__}: {e}") from e
|
||||
|
||||
async def transcribe_audio(self, audio_file_path: Path) -> str | None:
|
||||
client = self.get_client()
|
||||
|
||||
# Create a transcription from the audio
|
||||
try:
|
||||
with open(audio_file_path, "rb") as audio_file:
|
||||
transcript = await client.audio.transcriptions.create(
|
||||
model=self.audio_model,
|
||||
file=audio_file,
|
||||
)
|
||||
return transcript.text
|
||||
except openai.RateLimitError as e:
|
||||
raise exceptions.RateLimitError(str(e)) from e
|
||||
except Exception as e:
|
||||
self.logger.warning(
|
||||
f"Failed to create audio transcription, falling back to chat completion ({e.__class__.__name__}: {e})"
|
||||
)
|
||||
|
||||
# Fallback to chat completion
|
||||
path_obj = Path(audio_file_path)
|
||||
with open(path_obj, "rb") as audio_file:
|
||||
audio_data = base64.b64encode(audio_file.read()).decode("utf-8")
|
||||
|
||||
file_ext = path_obj.suffix.lstrip(".").lower()
|
||||
audio_attachment = OpenAILocalAudio(data=audio_data, format=file_ext)
|
||||
response = await self.get_response(
|
||||
self.get_prompt("general.transcribe-audio"),
|
||||
"Attached is the audio data.",
|
||||
response_schema=OpenAIText,
|
||||
attachments=[audio_attachment],
|
||||
)
|
||||
|
||||
return response.text if response else None
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
Transcribe any audio data provided to you. You should respond only with the audio transcription and nothing else.
|
||||
@@ -0,0 +1,7 @@
|
||||
You will receive a video transcript and the video's original caption text. Analyze BOTH inputs to generate a single, accurate recipe in schema.org Recipe format. Reference: https://schema.org/Recipe
|
||||
|
||||
Do not create or make up any information. If insufficient data is found, return an empty object.
|
||||
|
||||
- The video transcript is the primary source for instructions.
|
||||
- The caption text is the primary source for the ingredient list and title.
|
||||
- If there is a conflict (e.g., caption says "1 cup" but transcript says "1.5 cups"), trust the video transcript.
|
||||
@@ -611,7 +611,7 @@ class OpenAIRecipeService(RecipeServiceBase):
|
||||
prompt,
|
||||
message,
|
||||
response_schema=OpenAIRecipe,
|
||||
images=openai_images,
|
||||
attachments=openai_images,
|
||||
)
|
||||
if not response:
|
||||
raise ValueError("Received empty response from OpenAI")
|
||||
|
||||
@@ -7,6 +7,7 @@ from mealie.services.scraper.scraped_extras import ScrapedExtras
|
||||
from .scraper_strategies import (
|
||||
ABCScraperStrategy,
|
||||
RecipeScraperOpenAI,
|
||||
RecipeScraperOpenAITranscription,
|
||||
RecipeScraperOpenGraph,
|
||||
RecipeScraperPackage,
|
||||
safe_scrape_html,
|
||||
@@ -14,6 +15,7 @@ from .scraper_strategies import (
|
||||
|
||||
DEFAULT_SCRAPER_STRATEGIES: list[type[ABCScraperStrategy]] = [
|
||||
RecipeScraperPackage,
|
||||
RecipeScraperOpenAITranscription,
|
||||
RecipeScraperOpenAI,
|
||||
RecipeScraperOpenGraph,
|
||||
]
|
||||
@@ -42,8 +44,11 @@ class RecipeScraper:
|
||||
"""
|
||||
|
||||
raw_html = html or await safe_scrape_html(url)
|
||||
for scraper_type in self.scrapers:
|
||||
scraper = scraper_type(url, self.translator, raw_html=raw_html)
|
||||
for ScraperClass in self.scrapers:
|
||||
scraper = ScraperClass(url, self.translator, raw_html=raw_html)
|
||||
if not scraper.can_scrape():
|
||||
self.logger.debug(f"Skipping {scraper.__class__.__name__}")
|
||||
continue
|
||||
|
||||
try:
|
||||
result = await scraper.parse()
|
||||
|
||||
@@ -1,22 +1,33 @@
|
||||
import asyncio
|
||||
import functools
|
||||
import re
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Callable
|
||||
from typing import Any
|
||||
from pathlib import Path
|
||||
from typing import Any, TypedDict
|
||||
|
||||
import bs4
|
||||
import extruct
|
||||
import yt_dlp
|
||||
from fastapi import HTTPException, status
|
||||
from httpx import AsyncClient, Response
|
||||
from recipe_scrapers import NoSchemaFoundInWildMode, SchemaScraperFactory, scrape_html
|
||||
from slugify import slugify
|
||||
from w3lib.html import get_base_url
|
||||
from yt_dlp.extractor.generic import GenericIE
|
||||
|
||||
from mealie.core import exceptions
|
||||
from mealie.core.config import get_app_settings
|
||||
from mealie.core.dependencies.dependencies import get_temporary_path
|
||||
from mealie.core.root_logger import get_logger
|
||||
from mealie.lang.providers import Translator
|
||||
from mealie.pkgs import safehttp
|
||||
from mealie.schema.openai.general import OpenAIText
|
||||
from mealie.schema.openai.recipe import OpenAIRecipe
|
||||
from mealie.schema.recipe.recipe import Recipe, RecipeStep
|
||||
from mealie.schema.recipe.recipe_ingredient import RecipeIngredient
|
||||
from mealie.schema.recipe.recipe_notes import RecipeNote
|
||||
from mealie.services.openai import OpenAIService
|
||||
from mealie.services.scraper.scraped_extras import ScrapedExtras
|
||||
|
||||
@@ -27,6 +38,12 @@ SCRAPER_TIMEOUT = 15
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
@functools.cache
|
||||
def _get_yt_dlp_extractors() -> list:
|
||||
"""Build and cache the yt-dlp extractor list once per process lifetime."""
|
||||
return [ie for ie in yt_dlp.extractor.gen_extractors() if ie.working() and not isinstance(ie, GenericIE)]
|
||||
|
||||
|
||||
class ForceTimeoutException(Exception):
|
||||
pass
|
||||
|
||||
@@ -115,6 +132,9 @@ class ABCScraperStrategy(ABC):
|
||||
self.raw_html = raw_html
|
||||
self.translator = translator
|
||||
|
||||
@abstractmethod
|
||||
def can_scrape(self) -> bool: ...
|
||||
|
||||
@abstractmethod
|
||||
async def get_html(self, url: str) -> str: ...
|
||||
|
||||
@@ -132,6 +152,9 @@ class ABCScraperStrategy(ABC):
|
||||
|
||||
|
||||
class RecipeScraperPackage(ABCScraperStrategy):
|
||||
def can_scrape(self) -> bool:
|
||||
return bool(self.url or self.raw_html)
|
||||
|
||||
@staticmethod
|
||||
def ld_json_to_html(ld_json: str) -> str:
|
||||
return (
|
||||
@@ -271,6 +294,10 @@ class RecipeScraperOpenAI(RecipeScraperPackage):
|
||||
rather than trying to scrape it directly.
|
||||
"""
|
||||
|
||||
def can_scrape(self) -> bool:
|
||||
settings = get_app_settings()
|
||||
return settings.OPENAI_ENABLED and super().can_scrape()
|
||||
|
||||
def extract_json_ld_data_from_html(self, soup: bs4.BeautifulSoup) -> str:
|
||||
data_parts: list[str] = []
|
||||
for script in soup.find_all("script", type="application/ld+json"):
|
||||
@@ -350,7 +377,178 @@ class RecipeScraperOpenAI(RecipeScraperPackage):
|
||||
return ""
|
||||
|
||||
|
||||
class TranscribedAudio(TypedDict):
|
||||
audio: Path
|
||||
subtitle: Path | None
|
||||
title: str
|
||||
description: str
|
||||
thumbnail_url: str | None
|
||||
transcription: str
|
||||
|
||||
|
||||
class RecipeScraperOpenAITranscription(ABCScraperStrategy):
|
||||
SUBTITLE_LANGS = ["en", "fr", "es", "de", "it"]
|
||||
|
||||
def can_scrape(self) -> bool:
|
||||
if not self.url:
|
||||
return False
|
||||
|
||||
settings = get_app_settings()
|
||||
if not (settings.OPENAI_ENABLED and settings.OPENAI_ENABLE_TRANSCRIPTION_SERVICES):
|
||||
return False
|
||||
|
||||
# Check if we can actually download something to transcribe
|
||||
return any(ie.suitable(self.url) for ie in _get_yt_dlp_extractors())
|
||||
|
||||
@staticmethod
|
||||
def _parse_subtitle_content(subtitle_content: str) -> str:
|
||||
# TODO: is there a better way to parse subtitles that's more efficient?
|
||||
|
||||
lines = []
|
||||
for line in subtitle_content.split("\n"):
|
||||
if line.strip() and not line.startswith("WEBVTT") and "-->" not in line and not line.isdigit():
|
||||
lines.append(line.strip())
|
||||
|
||||
raw_content = " ".join(lines)
|
||||
content = re.sub(r"<[^>]+>", "", raw_content)
|
||||
return content
|
||||
|
||||
def _download_audio(self, temp_path: Path) -> TranscribedAudio:
|
||||
"""Downloads audio and subtitles from the video URL."""
|
||||
output_template = temp_path / "mealie" # No extension here
|
||||
|
||||
ydl_opts = {
|
||||
"format": "bestaudio/best",
|
||||
"outtmpl": str(output_template) + ".%(ext)s",
|
||||
"quiet": True,
|
||||
"writesubtitles": True,
|
||||
"writeautomaticsub": True,
|
||||
"subtitleslangs": self.SUBTITLE_LANGS,
|
||||
"skip_download": False,
|
||||
"ignoreerrors": True,
|
||||
"postprocessors": [
|
||||
{
|
||||
"key": "FFmpegExtractAudio",
|
||||
"preferredcodec": "mp3",
|
||||
"preferredquality": "32",
|
||||
}
|
||||
],
|
||||
"postprocessor_args": ["-ac", "1"],
|
||||
}
|
||||
|
||||
try:
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
info = ydl.extract_info(self.url, download=True)
|
||||
|
||||
if info is None:
|
||||
raise exceptions.VideoDownloadError(
|
||||
"Failed to extract video information. The video may be unavailable or the URL is invalid."
|
||||
)
|
||||
|
||||
sub_path = None
|
||||
for lang in self.SUBTITLE_LANGS:
|
||||
potential_path = output_template.with_suffix(f".{lang}.vtt")
|
||||
if potential_path.exists():
|
||||
sub_path = potential_path
|
||||
break
|
||||
|
||||
return {
|
||||
"audio": output_template.with_suffix(".mp3"),
|
||||
"subtitle": sub_path,
|
||||
"title": info.get("title", ""),
|
||||
"description": info.get("description", ""),
|
||||
"thumbnail_url": info.get("thumbnail") or None,
|
||||
"transcription": "",
|
||||
}
|
||||
except exceptions.VideoDownloadError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise exceptions.VideoDownloadError(f"Failed to download video: {e}") from e
|
||||
|
||||
async def get_html(self, url: str) -> str:
|
||||
return self.raw_html or "" # we don't use HTML with this scraper since we use ytdlp
|
||||
|
||||
async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
|
||||
openai_service = OpenAIService()
|
||||
|
||||
with get_temporary_path() as temp_path:
|
||||
video_data = await asyncio.to_thread(self._download_audio, temp_path)
|
||||
|
||||
if video_data["subtitle"]:
|
||||
try:
|
||||
with open(video_data["subtitle"], encoding="utf-8") as f:
|
||||
subtitle_content = f.read()
|
||||
video_data["transcription"] = self._parse_subtitle_content(subtitle_content)
|
||||
self.logger.info("Using subtitles from video instead of transcription")
|
||||
except Exception:
|
||||
self.logger.exception("Failed to read subtitles, falling back to transcription")
|
||||
video_data["transcription"] = ""
|
||||
|
||||
if not video_data["transcription"]:
|
||||
try:
|
||||
transcription = await openai_service.transcribe_audio(video_data["audio"])
|
||||
except exceptions.RateLimitError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise exceptions.OpenAIServiceError(f"Failed to transcribe audio: {e}") from e
|
||||
if not transcription:
|
||||
raise exceptions.OpenAIServiceError("No transcription returned from OpenAI")
|
||||
video_data["transcription"] = transcription
|
||||
|
||||
if not video_data["transcription"]:
|
||||
self.logger.error("Could not extract a transcript (no data)")
|
||||
return None, None
|
||||
|
||||
self.logger.debug(f"Transcription: {video_data['transcription'][:200]}...")
|
||||
prompt = openai_service.get_prompt("recipes.parse-recipe-video")
|
||||
|
||||
message_parts = [
|
||||
f"Title: {video_data['title']}",
|
||||
f"Description: {video_data['description']}",
|
||||
f"Transcription: {video_data['transcription']}",
|
||||
]
|
||||
|
||||
try:
|
||||
response = await openai_service.get_response(prompt, "\n".join(message_parts), response_schema=OpenAIRecipe)
|
||||
except exceptions.RateLimitError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise exceptions.OpenAIServiceError(f"Failed to extract recipe from video: {e}") from e
|
||||
|
||||
if not response:
|
||||
raise exceptions.OpenAIServiceError("OpenAI returned an empty response when extracting recipe")
|
||||
|
||||
recipe = Recipe(
|
||||
name=response.name,
|
||||
slug="",
|
||||
description=response.description,
|
||||
recipe_yield=response.recipe_yield,
|
||||
total_time=response.total_time,
|
||||
prep_time=response.prep_time,
|
||||
perform_time=response.perform_time,
|
||||
recipe_ingredient=[
|
||||
RecipeIngredient(title=ingredient.title, note=ingredient.text)
|
||||
for ingredient in response.ingredients
|
||||
if ingredient.text
|
||||
],
|
||||
recipe_instructions=[
|
||||
RecipeStep(title=instruction.title, text=instruction.text)
|
||||
for instruction in response.instructions
|
||||
if instruction.text
|
||||
],
|
||||
notes=[RecipeNote(title=note.title or "", text=note.text) for note in response.notes if note.text],
|
||||
image=video_data["thumbnail_url"] or None,
|
||||
org_url=self.url,
|
||||
)
|
||||
|
||||
self.logger.info(f"Successfully extracted recipe from video: {video_data['title']}")
|
||||
return recipe, ScrapedExtras()
|
||||
|
||||
|
||||
class RecipeScraperOpenGraph(ABCScraperStrategy):
|
||||
def can_scrape(self) -> bool:
|
||||
return bool(self.url or self.raw_html)
|
||||
|
||||
async def get_html(self, url: str) -> str:
|
||||
return self.raw_html or await safe_scrape_html(url)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user