feat: Open AI Recipe Scraper (#3690)

This commit is contained in:
Michael Genson
2024-06-07 06:45:50 -05:00
committed by GitHub
parent a49c32e663
commit 4afb767375
14 changed files with 264 additions and 55 deletions

View File

@@ -5,7 +5,17 @@ from zipfile import ZipFile
import orjson
import sqlalchemy
from fastapi import BackgroundTasks, Depends, File, Form, HTTPException, Path, Query, Request, status
from fastapi import (
BackgroundTasks,
Depends,
File,
Form,
HTTPException,
Path,
Query,
Request,
status,
)
from fastapi.datastructures import UploadFile
from fastapi.responses import JSONResponse
from pydantic import UUID4, BaseModel, Field
@@ -14,7 +24,11 @@ from starlette.background import BackgroundTask
from starlette.responses import FileResponse
from mealie.core import exceptions
from mealie.core.dependencies import get_temporary_path, get_temporary_zip_path, validate_recipe_token
from mealie.core.dependencies import (
get_temporary_path,
get_temporary_zip_path,
validate_recipe_token,
)
from mealie.core.security import create_recipe_slug_token
from mealie.db.models.group.cookbook import CookBook
from mealie.pkgs import cache
@@ -26,10 +40,19 @@ from mealie.routes._base.routers import MealieCrudRoute, UserAPIRouter
from mealie.schema.cookbook.cookbook import ReadCookBook
from mealie.schema.make_dependable import make_dependable
from mealie.schema.recipe import Recipe, RecipeImageTypes, ScrapeRecipe
from mealie.schema.recipe.recipe import CreateRecipe, CreateRecipeByUrlBulk, RecipeLastMade, RecipeSummary
from mealie.schema.recipe.recipe import (
CreateRecipe,
CreateRecipeByUrlBulk,
RecipeLastMade,
RecipeSummary,
)
from mealie.schema.recipe.recipe_asset import RecipeAsset
from mealie.schema.recipe.recipe_scraper import ScrapeRecipeTest
from mealie.schema.recipe.request_helpers import RecipeDuplicate, RecipeZipTokenResponse, UpdateImageResponse
from mealie.schema.recipe.request_helpers import (
RecipeDuplicate,
RecipeZipTokenResponse,
UpdateImageResponse,
)
from mealie.schema.response import PaginationBase, PaginationQuery
from mealie.schema.response.pagination import RecipeSearchQuery
from mealie.schema.response.responses import ErrorResponse
@@ -40,13 +63,21 @@ from mealie.services.event_bus_service.event_types import (
EventRecipeData,
EventTypes,
)
from mealie.services.recipe.recipe_data_service import InvalidDomainError, NotAnImageError, RecipeDataService
from mealie.services.recipe.recipe_data_service import (
InvalidDomainError,
NotAnImageError,
RecipeDataService,
)
from mealie.services.recipe.recipe_service import RecipeService
from mealie.services.recipe.template_service import TemplateService
from mealie.services.scraper.recipe_bulk_scraper import RecipeBulkScraperService
from mealie.services.scraper.scraped_extras import ScraperContext
from mealie.services.scraper.scraper import create_from_url
from mealie.services.scraper.scraper_strategies import ForceTimeoutException, RecipeScraperPackage
from mealie.services.scraper.scraper_strategies import (
ForceTimeoutException,
RecipeScraperOpenAI,
RecipeScraperPackage,
)
class JSONBytes(JSONResponse):
@@ -210,10 +241,11 @@ class RecipeController(BaseRecipeController):
return {"reportId": report_id}
@router.post("/test-scrape-url")
async def test_parse_recipe_url(self, url: ScrapeRecipeTest):
async def test_parse_recipe_url(self, data: ScrapeRecipeTest):
# Debugger should produce the same result as the scraper sees before cleaning
ScraperClass = RecipeScraperOpenAI if data.use_openai else RecipeScraperPackage
try:
if scraped_data := await RecipeScraperPackage(url.url, self.translator).scrape_url():
if scraped_data := await ScraperClass(data.url, self.translator).scrape_url():
return scraped_data.schema.data
except ForceTimeoutException as e:
raise HTTPException(