feat: Add social media video import (YouTube, TikTok, Instagram) (#6764)

Co-authored-by: Maxime Louward <61564950+mlouward@users.noreply.github.com>
Co-authored-by: Michael Genson <genson.michael@gmail.com>
Co-authored-by: Michael Genson <71845777+michael-genson@users.noreply.github.com>
This commit is contained in:
Aurelien
2026-03-09 21:44:27 +01:00
committed by GitHub
parent 5a223aa92d
commit 1344f1674d
25 changed files with 563 additions and 45 deletions

View File

@@ -46,6 +46,30 @@ class NoEntryFound(Exception):
pass
class OpenAIServiceError(Exception):
"""
This exception is raised when there is an error communicating with OpenAI services.
"""
pass
class RateLimitError(Exception):
"""
This exception is raised when an external API returns a rate limit (429) error.
"""
pass
class VideoDownloadError(Exception):
"""
This exception is raised when there is an error downloading or processing a video.
"""
pass
def mealie_registered_exceptions(t: Translator) -> dict:
"""
This function returns a dictionary of all the globally registered exceptions in the Mealie application.

View File

@@ -393,12 +393,16 @@ class AppSettings(AppLoggingSettings):
"""Your OpenAI API key. Required to enable OpenAI features"""
OPENAI_MODEL: str = "gpt-4o"
"""Which OpenAI model to send requests to. Leave this unset for most usecases"""
OPENAI_AUDIO_MODEL: str = "whisper-1"
"""Which OpenAI model to use for audio transcription. Leave this unset for most usecases"""
OPENAI_CUSTOM_HEADERS: dict[str, str] = {}
"""Custom HTTP headers to send with each OpenAI request"""
OPENAI_CUSTOM_PARAMS: dict[str, Any] = {}
"""Custom HTTP parameters to send with each OpenAI request"""
OPENAI_ENABLE_IMAGE_SERVICES: bool = True
"""Whether to enable image-related features in OpenAI"""
OPENAI_ENABLE_TRANSCRIPTION_SERVICES: bool = True
"""Whether to enable audio transcription features in OpenAI"""
OPENAI_WORKERS: int = 2
"""
Number of OpenAI workers per request. Higher values may increase

View File

@@ -38,6 +38,8 @@ class AdminAboutController(BaseAdminController):
oidc_provider_name=settings.OIDC_PROVIDER_NAME,
enable_openai=settings.OPENAI_ENABLED,
enable_openai_image_services=settings.OPENAI_ENABLED and settings.OPENAI_ENABLE_IMAGE_SERVICES,
enable_openai_transcription_services=settings.OPENAI_ENABLED
and settings.OPENAI_ENABLE_TRANSCRIPTION_SERVICES,
)
@router.get("/statistics", response_model=AppStatistics)

View File

@@ -34,14 +34,14 @@ class AdminDebugController(BaseAdminController):
try:
openai_service = OpenAIService()
prompt = openai_service.get_prompt("debug")
prompt = openai_service.get_prompt("general.debug")
message = "Hello, checking to see if I can reach you."
if local_images:
message = f"{message} Here is an image to test with:"
response = await openai_service.get_response(
prompt, message, response_schema=OpenAIText, images=local_images
prompt, message, response_schema=OpenAIText, attachments=local_images
)
if not response:

View File

@@ -43,6 +43,7 @@ def get_app_info(session: Session = Depends(generate_session)):
oidc_provider_name=settings.OIDC_PROVIDER_NAME,
enable_openai=settings.OPENAI_ENABLED,
enable_openai_image_services=settings.OPENAI_ENABLED and settings.OPENAI_ENABLE_IMAGE_SERVICES,
enable_openai_transcription_services=settings.OPENAI_ENABLED and settings.OPENAI_ENABLE_TRANSCRIPTION_SERVICES,
allow_password_login=settings.ALLOW_PASSWORD_LOGIN,
token_time=settings.TOKEN_TIME,
)

View File

@@ -23,6 +23,7 @@ class AppInfo(MealieModel):
oidc_provider_name: str
enable_openai: bool
enable_openai_image_services: bool
enable_openai_transcription_services: bool
token_time: int

View File

@@ -7,14 +7,16 @@ from pathlib import Path
from textwrap import dedent
from typing import TypeVar
import openai
from openai import AsyncOpenAI
from openai.types.chat import ChatCompletion
from pydantic import BaseModel, field_validator
from mealie.core import root_logger
from mealie.core import exceptions, root_logger
from mealie.core.config import get_app_settings
from mealie.pkgs import img
from mealie.schema.openai._base import OpenAIBase
from mealie.schema.openai.general import OpenAIText
from .._base_service import BaseService
@@ -48,7 +50,12 @@ class OpenAIDataInjection(BaseModel):
return value
class OpenAIImageBase(BaseModel, ABC):
class OpenAIAttachment(BaseModel, ABC):
@abstractmethod
def build_message(self) -> dict: ...
class OpenAIImageBase(OpenAIAttachment):
@abstractmethod
def get_image_url(self) -> str: ...
@@ -79,6 +86,17 @@ class OpenAILocalImage(OpenAIImageBase):
return f"data:image/jpeg;base64,{b64content}"
class OpenAILocalAudio(OpenAIAttachment):
data: str
format: str
def build_message(self) -> dict:
return {
"type": "input_audio",
"input_audio": {"data": self.data, "format": self.format},
}
class OpenAIService(BaseService):
PROMPTS_DIR = Path(os.path.dirname(os.path.abspath(__file__))) / "prompts"
@@ -88,9 +106,9 @@ class OpenAIService(BaseService):
raise ValueError("OpenAI is not enabled")
self.model = settings.OPENAI_MODEL
self.audio_model = settings.OPENAI_AUDIO_MODEL
self.workers = settings.OPENAI_WORKERS
self.send_db_data = settings.OPENAI_SEND_DATABASE_DATA
self.enable_image_services = settings.OPENAI_ENABLE_IMAGE_SERVICES
self.custom_prompt_dir = settings.OPENAI_CUSTOM_PROMPT_DIR
self.get_client = lambda: AsyncOpenAI(
@@ -215,17 +233,14 @@ class OpenAIService(BaseService):
message: str,
*,
response_schema: type[T],
images: list[OpenAIImageBase] | None = None,
attachments: list[OpenAIAttachment] | None = None,
) -> T | None:
"""Send data to OpenAI and return the response message content"""
if images and not self.enable_image_services:
self.logger.warning("OpenAI image services are disabled, ignoring images")
images = None
try:
user_messages = [{"type": "text", "text": message}]
for image in images or []:
user_messages.append(image.build_message())
for attachment in attachments or []:
user_messages.append(attachment.build_message())
response = await self._get_raw_response(prompt, user_messages, response_schema)
if not response.choices:
@@ -233,5 +248,41 @@ class OpenAIService(BaseService):
response_text = response.choices[0].message.content
return response_schema.parse_openai_response(response_text)
except openai.RateLimitError as e:
raise exceptions.RateLimitError(str(e)) from e
except Exception as e:
raise Exception(f"OpenAI Request Failed. {e.__class__.__name__}: {e}") from e
async def transcribe_audio(self, audio_file_path: Path) -> str | None:
client = self.get_client()
# Create a transcription from the audio
try:
with open(audio_file_path, "rb") as audio_file:
transcript = await client.audio.transcriptions.create(
model=self.audio_model,
file=audio_file,
)
return transcript.text
except openai.RateLimitError as e:
raise exceptions.RateLimitError(str(e)) from e
except Exception as e:
self.logger.warning(
f"Failed to create audio transcription, falling back to chat completion ({e.__class__.__name__}: {e})"
)
# Fallback to chat completion
path_obj = Path(audio_file_path)
with open(path_obj, "rb") as audio_file:
audio_data = base64.b64encode(audio_file.read()).decode("utf-8")
file_ext = path_obj.suffix.lstrip(".").lower()
audio_attachment = OpenAILocalAudio(data=audio_data, format=file_ext)
response = await self.get_response(
self.get_prompt("general.transcribe-audio"),
"Attached is the audio data.",
response_schema=OpenAIText,
attachments=[audio_attachment],
)
return response.text if response else None

View File

@@ -0,0 +1 @@
Transcribe any audio data provided to you. You should respond only with the audio transcription and nothing else.

View File

@@ -0,0 +1,7 @@
You will receive a video transcript and the video's original caption text. Analyze BOTH inputs to generate a single, accurate recipe in schema.org Recipe format. Reference: https://schema.org/Recipe
Do not create or make up any information. If insufficient data is found, return an empty object.
- The video transcript is the primary source for instructions.
- The caption text is the primary source for the ingredient list and title.
- If there is a conflict (e.g., caption says "1 cup" but transcript says "1.5 cups"), trust the video transcript.

View File

@@ -611,7 +611,7 @@ class OpenAIRecipeService(RecipeServiceBase):
prompt,
message,
response_schema=OpenAIRecipe,
images=openai_images,
attachments=openai_images,
)
if not response:
raise ValueError("Received empty response from OpenAI")

View File

@@ -7,6 +7,7 @@ from mealie.services.scraper.scraped_extras import ScrapedExtras
from .scraper_strategies import (
ABCScraperStrategy,
RecipeScraperOpenAI,
RecipeScraperOpenAITranscription,
RecipeScraperOpenGraph,
RecipeScraperPackage,
safe_scrape_html,
@@ -14,6 +15,7 @@ from .scraper_strategies import (
DEFAULT_SCRAPER_STRATEGIES: list[type[ABCScraperStrategy]] = [
RecipeScraperPackage,
RecipeScraperOpenAITranscription,
RecipeScraperOpenAI,
RecipeScraperOpenGraph,
]
@@ -42,8 +44,11 @@ class RecipeScraper:
"""
raw_html = html or await safe_scrape_html(url)
for scraper_type in self.scrapers:
scraper = scraper_type(url, self.translator, raw_html=raw_html)
for ScraperClass in self.scrapers:
scraper = ScraperClass(url, self.translator, raw_html=raw_html)
if not scraper.can_scrape():
self.logger.debug(f"Skipping {scraper.__class__.__name__}")
continue
try:
result = await scraper.parse()

View File

@@ -1,22 +1,33 @@
import asyncio
import functools
import re
import time
from abc import ABC, abstractmethod
from collections.abc import Callable
from typing import Any
from pathlib import Path
from typing import Any, TypedDict
import bs4
import extruct
import yt_dlp
from fastapi import HTTPException, status
from httpx import AsyncClient, Response
from recipe_scrapers import NoSchemaFoundInWildMode, SchemaScraperFactory, scrape_html
from slugify import slugify
from w3lib.html import get_base_url
from yt_dlp.extractor.generic import GenericIE
from mealie.core import exceptions
from mealie.core.config import get_app_settings
from mealie.core.dependencies.dependencies import get_temporary_path
from mealie.core.root_logger import get_logger
from mealie.lang.providers import Translator
from mealie.pkgs import safehttp
from mealie.schema.openai.general import OpenAIText
from mealie.schema.openai.recipe import OpenAIRecipe
from mealie.schema.recipe.recipe import Recipe, RecipeStep
from mealie.schema.recipe.recipe_ingredient import RecipeIngredient
from mealie.schema.recipe.recipe_notes import RecipeNote
from mealie.services.openai import OpenAIService
from mealie.services.scraper.scraped_extras import ScrapedExtras
@@ -27,6 +38,12 @@ SCRAPER_TIMEOUT = 15
logger = get_logger()
@functools.cache
def _get_yt_dlp_extractors() -> list:
"""Build and cache the yt-dlp extractor list once per process lifetime."""
return [ie for ie in yt_dlp.extractor.gen_extractors() if ie.working() and not isinstance(ie, GenericIE)]
class ForceTimeoutException(Exception):
pass
@@ -115,6 +132,9 @@ class ABCScraperStrategy(ABC):
self.raw_html = raw_html
self.translator = translator
@abstractmethod
def can_scrape(self) -> bool: ...
@abstractmethod
async def get_html(self, url: str) -> str: ...
@@ -132,6 +152,9 @@ class ABCScraperStrategy(ABC):
class RecipeScraperPackage(ABCScraperStrategy):
def can_scrape(self) -> bool:
return bool(self.url or self.raw_html)
@staticmethod
def ld_json_to_html(ld_json: str) -> str:
return (
@@ -271,6 +294,10 @@ class RecipeScraperOpenAI(RecipeScraperPackage):
rather than trying to scrape it directly.
"""
def can_scrape(self) -> bool:
settings = get_app_settings()
return settings.OPENAI_ENABLED and super().can_scrape()
def extract_json_ld_data_from_html(self, soup: bs4.BeautifulSoup) -> str:
data_parts: list[str] = []
for script in soup.find_all("script", type="application/ld+json"):
@@ -350,7 +377,178 @@ class RecipeScraperOpenAI(RecipeScraperPackage):
return ""
class TranscribedAudio(TypedDict):
audio: Path
subtitle: Path | None
title: str
description: str
thumbnail_url: str | None
transcription: str
class RecipeScraperOpenAITranscription(ABCScraperStrategy):
SUBTITLE_LANGS = ["en", "fr", "es", "de", "it"]
def can_scrape(self) -> bool:
if not self.url:
return False
settings = get_app_settings()
if not (settings.OPENAI_ENABLED and settings.OPENAI_ENABLE_TRANSCRIPTION_SERVICES):
return False
# Check if we can actually download something to transcribe
return any(ie.suitable(self.url) for ie in _get_yt_dlp_extractors())
@staticmethod
def _parse_subtitle_content(subtitle_content: str) -> str:
# TODO: is there a better way to parse subtitles that's more efficient?
lines = []
for line in subtitle_content.split("\n"):
if line.strip() and not line.startswith("WEBVTT") and "-->" not in line and not line.isdigit():
lines.append(line.strip())
raw_content = " ".join(lines)
content = re.sub(r"<[^>]+>", "", raw_content)
return content
def _download_audio(self, temp_path: Path) -> TranscribedAudio:
"""Downloads audio and subtitles from the video URL."""
output_template = temp_path / "mealie" # No extension here
ydl_opts = {
"format": "bestaudio/best",
"outtmpl": str(output_template) + ".%(ext)s",
"quiet": True,
"writesubtitles": True,
"writeautomaticsub": True,
"subtitleslangs": self.SUBTITLE_LANGS,
"skip_download": False,
"ignoreerrors": True,
"postprocessors": [
{
"key": "FFmpegExtractAudio",
"preferredcodec": "mp3",
"preferredquality": "32",
}
],
"postprocessor_args": ["-ac", "1"],
}
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(self.url, download=True)
if info is None:
raise exceptions.VideoDownloadError(
"Failed to extract video information. The video may be unavailable or the URL is invalid."
)
sub_path = None
for lang in self.SUBTITLE_LANGS:
potential_path = output_template.with_suffix(f".{lang}.vtt")
if potential_path.exists():
sub_path = potential_path
break
return {
"audio": output_template.with_suffix(".mp3"),
"subtitle": sub_path,
"title": info.get("title", ""),
"description": info.get("description", ""),
"thumbnail_url": info.get("thumbnail") or None,
"transcription": "",
}
except exceptions.VideoDownloadError:
raise
except Exception as e:
raise exceptions.VideoDownloadError(f"Failed to download video: {e}") from e
async def get_html(self, url: str) -> str:
return self.raw_html or "" # we don't use HTML with this scraper since we use ytdlp
async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
openai_service = OpenAIService()
with get_temporary_path() as temp_path:
video_data = await asyncio.to_thread(self._download_audio, temp_path)
if video_data["subtitle"]:
try:
with open(video_data["subtitle"], encoding="utf-8") as f:
subtitle_content = f.read()
video_data["transcription"] = self._parse_subtitle_content(subtitle_content)
self.logger.info("Using subtitles from video instead of transcription")
except Exception:
self.logger.exception("Failed to read subtitles, falling back to transcription")
video_data["transcription"] = ""
if not video_data["transcription"]:
try:
transcription = await openai_service.transcribe_audio(video_data["audio"])
except exceptions.RateLimitError:
raise
except Exception as e:
raise exceptions.OpenAIServiceError(f"Failed to transcribe audio: {e}") from e
if not transcription:
raise exceptions.OpenAIServiceError("No transcription returned from OpenAI")
video_data["transcription"] = transcription
if not video_data["transcription"]:
self.logger.error("Could not extract a transcript (no data)")
return None, None
self.logger.debug(f"Transcription: {video_data['transcription'][:200]}...")
prompt = openai_service.get_prompt("recipes.parse-recipe-video")
message_parts = [
f"Title: {video_data['title']}",
f"Description: {video_data['description']}",
f"Transcription: {video_data['transcription']}",
]
try:
response = await openai_service.get_response(prompt, "\n".join(message_parts), response_schema=OpenAIRecipe)
except exceptions.RateLimitError:
raise
except Exception as e:
raise exceptions.OpenAIServiceError(f"Failed to extract recipe from video: {e}") from e
if not response:
raise exceptions.OpenAIServiceError("OpenAI returned an empty response when extracting recipe")
recipe = Recipe(
name=response.name,
slug="",
description=response.description,
recipe_yield=response.recipe_yield,
total_time=response.total_time,
prep_time=response.prep_time,
perform_time=response.perform_time,
recipe_ingredient=[
RecipeIngredient(title=ingredient.title, note=ingredient.text)
for ingredient in response.ingredients
if ingredient.text
],
recipe_instructions=[
RecipeStep(title=instruction.title, text=instruction.text)
for instruction in response.instructions
if instruction.text
],
notes=[RecipeNote(title=note.title or "", text=note.text) for note in response.notes if note.text],
image=video_data["thumbnail_url"] or None,
org_url=self.url,
)
self.logger.info(f"Successfully extracted recipe from video: {video_data['title']}")
return recipe, ScrapedExtras()
class RecipeScraperOpenGraph(ABCScraperStrategy):
def can_scrape(self) -> bool:
return bool(self.url or self.raw_html)
async def get_html(self, url: str) -> str:
return self.raw_html or await safe_scrape_html(url)