mirror of
https://github.com/mealie-recipes/mealie.git
synced 2025-12-31 06:10:31 -05:00
Feature/import keywords as tags (#1170)
* feat: import original keywords as tags * remove cached env * Update frontend api types * fix: Issues with previous tag scraping implementation * Update category handling in backend * Update backend tests to include group_id * Correct type check * Update create-url interface * Improve tag cleaner list support * remove builtin name shadowing * update type annotations * test include tags scraper * implement scaper context for optional data * readd cache venv * use continue instead of break * remove test callback Co-authored-by: Miroito <alban.vachette@gmail.com>
This commit is contained in:
@@ -57,8 +57,6 @@ async def start_scheduler():
|
||||
tasks.purge_group_data_exports,
|
||||
)
|
||||
|
||||
SchedulerRegistry.register_minutely(lambda: logger.info("Scheduler tick"))
|
||||
|
||||
SchedulerRegistry.print_jobs()
|
||||
|
||||
await SchedulerService.start()
|
||||
|
||||
@@ -34,10 +34,10 @@ class RecipeCategoryController(BaseUserController):
|
||||
def mixins(self):
|
||||
return HttpRepo(self.repo, self.deps.logger)
|
||||
|
||||
@router.get("", response_model=list[CategorySummary])
|
||||
@router.get("", response_model=list[RecipeCategory])
|
||||
def get_all(self):
|
||||
"""Returns a list of available categories in the database"""
|
||||
return self.repo.get_all(override_schema=CategorySummary)
|
||||
return self.repo.get_all(override_schema=RecipeCategory)
|
||||
|
||||
@router.post("", status_code=201)
|
||||
def create_one(self, category: CategoryIn):
|
||||
|
||||
@@ -23,14 +23,16 @@ from mealie.routes._base import BaseUserController, controller
|
||||
from mealie.routes._base.mixins import HttpRepo
|
||||
from mealie.routes._base.routers import UserAPIRouter
|
||||
from mealie.schema.query import GetAll
|
||||
from mealie.schema.recipe import CreateRecipeByUrl, Recipe, RecipeImageTypes
|
||||
from mealie.schema.recipe import Recipe, RecipeImageTypes, ScrapeRecipe
|
||||
from mealie.schema.recipe.recipe import CreateRecipe, CreateRecipeByUrlBulk, RecipeSummary
|
||||
from mealie.schema.recipe.recipe_asset import RecipeAsset
|
||||
from mealie.schema.recipe.recipe_scraper import ScrapeRecipeTest
|
||||
from mealie.schema.response.responses import ErrorResponse
|
||||
from mealie.schema.server.tasks import ServerTaskNames
|
||||
from mealie.services.recipe.recipe_data_service import RecipeDataService
|
||||
from mealie.services.recipe.recipe_service import RecipeService
|
||||
from mealie.services.recipe.template_service import TemplateService
|
||||
from mealie.services.scraper.scraped_extras import ScraperContext
|
||||
from mealie.services.scraper.scraper import create_from_url
|
||||
from mealie.services.scraper.scraper_strategies import RecipeScraperPackage
|
||||
from mealie.services.server_tasks.background_executory import BackgroundExecutor
|
||||
@@ -141,9 +143,15 @@ class RecipeController(BaseRecipeController):
|
||||
# URL Scraping Operations
|
||||
|
||||
@router.post("/create-url", status_code=201, response_model=str)
|
||||
def parse_recipe_url(self, url: CreateRecipeByUrl):
|
||||
def parse_recipe_url(self, req: ScrapeRecipe):
|
||||
"""Takes in a URL and attempts to scrape data and load it into the database"""
|
||||
recipe = create_from_url(url.url)
|
||||
recipe, extras = create_from_url(req.url)
|
||||
|
||||
if req.include_tags:
|
||||
ctx = ScraperContext(self.user.id, self.group_id, self.repos)
|
||||
|
||||
recipe.tags = extras.use_tags(ctx) # type: ignore
|
||||
|
||||
return self.service.create_one(recipe).slug
|
||||
|
||||
@router.post("/create-url/bulk", status_code=202)
|
||||
@@ -159,7 +167,7 @@ class RecipeController(BaseRecipeController):
|
||||
|
||||
for b in bulk.imports:
|
||||
try:
|
||||
recipe = create_from_url(b.url)
|
||||
recipe, _ = create_from_url(b.url)
|
||||
|
||||
if b.tags:
|
||||
recipe.tags = b.tags
|
||||
@@ -184,7 +192,7 @@ class RecipeController(BaseRecipeController):
|
||||
return {"details": "task has been started"}
|
||||
|
||||
@router.post("/test-scrape-url")
|
||||
def test_parse_recipe_url(self, url: CreateRecipeByUrl):
|
||||
def test_parse_recipe_url(self, url: ScrapeRecipeTest):
|
||||
# Debugger should produce the same result as the scraper sees before cleaning
|
||||
if scraped_data := RecipeScraperPackage(url.url).scrape_url():
|
||||
return scraped_data.schema.data
|
||||
@@ -264,7 +272,7 @@ class RecipeController(BaseRecipeController):
|
||||
# Image and Assets
|
||||
|
||||
@router.post("/{slug}/image", tags=["Recipe: Images and Assets"])
|
||||
def scrape_image_url(self, slug: str, url: CreateRecipeByUrl):
|
||||
def scrape_image_url(self, slug: str, url: ScrapeRecipe):
|
||||
recipe = self.mixins.get_one(slug)
|
||||
data_service = RecipeDataService(recipe.id)
|
||||
data_service.scrape_image(url.url)
|
||||
|
||||
@@ -8,6 +8,7 @@ from .recipe_image_types import *
|
||||
from .recipe_ingredient import *
|
||||
from .recipe_notes import *
|
||||
from .recipe_nutrition import *
|
||||
from .recipe_scraper import *
|
||||
from .recipe_settings import *
|
||||
from .recipe_share_token import * # type: ignore
|
||||
from .recipe_step import *
|
||||
|
||||
@@ -41,13 +41,6 @@ class RecipeTool(RecipeTag):
|
||||
on_hand: bool = False
|
||||
|
||||
|
||||
class CreateRecipeByUrl(BaseModel):
|
||||
url: str
|
||||
|
||||
class Config:
|
||||
schema_extra = {"example": {"url": "https://myfavoriterecipes.com/recipes"}}
|
||||
|
||||
|
||||
class CreateRecipeBulk(BaseModel):
|
||||
url: str
|
||||
categories: list[RecipeCategory] = None
|
||||
@@ -140,21 +133,21 @@ class Recipe(RecipeSummary):
|
||||
if not self.id:
|
||||
raise ValueError("Recipe has no ID")
|
||||
|
||||
dir = app_dirs.RECIPE_DATA_DIR.joinpath(str(self.id))
|
||||
dir.mkdir(exist_ok=True, parents=True)
|
||||
return dir
|
||||
folder = app_dirs.RECIPE_DATA_DIR.joinpath(str(self.id))
|
||||
folder.mkdir(exist_ok=True, parents=True)
|
||||
return folder
|
||||
|
||||
@property
|
||||
def asset_dir(self) -> Path:
|
||||
dir = self.directory.joinpath("assets")
|
||||
dir.mkdir(exist_ok=True, parents=True)
|
||||
return dir
|
||||
folder = self.directory.joinpath("assets")
|
||||
folder.mkdir(exist_ok=True, parents=True)
|
||||
return folder
|
||||
|
||||
@property
|
||||
def image_dir(self) -> Path:
|
||||
dir = self.directory.joinpath("images")
|
||||
dir.mkdir(exist_ok=True, parents=True)
|
||||
return dir
|
||||
folder = self.directory.joinpath("images")
|
||||
folder.mkdir(exist_ok=True, parents=True)
|
||||
return folder
|
||||
|
||||
class Config:
|
||||
orm_mode = True
|
||||
|
||||
@@ -28,6 +28,7 @@ class CategoryBase(CategoryIn):
|
||||
|
||||
class CategoryOut(CategoryBase):
|
||||
slug: str
|
||||
group_id: UUID4
|
||||
|
||||
class Config:
|
||||
orm_mode = True
|
||||
|
||||
18
mealie/schema/recipe/recipe_scraper.py
Normal file
18
mealie/schema/recipe/recipe_scraper.py
Normal file
@@ -0,0 +1,18 @@
|
||||
from mealie.schema._mealie.mealie_model import MealieModel
|
||||
|
||||
|
||||
class ScrapeRecipeTest(MealieModel):
|
||||
url: str
|
||||
|
||||
|
||||
class ScrapeRecipe(MealieModel):
|
||||
url: str
|
||||
include_tags: bool = False
|
||||
|
||||
class Config:
|
||||
schema_extra = {
|
||||
"example": {
|
||||
"url": "https://myfavoriterecipes.com/recipes",
|
||||
"includeTags": True,
|
||||
},
|
||||
}
|
||||
@@ -91,6 +91,10 @@ class RecipeService(BaseService):
|
||||
additional_attrs["user_id"] = user.id
|
||||
additional_attrs["group_id"] = user.group_id
|
||||
|
||||
if additional_attrs.get("tags"):
|
||||
for i in range(len(additional_attrs.get("tags"))):
|
||||
additional_attrs["tags"][i]["group_id"] = user.group_id
|
||||
|
||||
if not additional_attrs.get("recipe_ingredient"):
|
||||
additional_attrs["recipe_ingredient"] = [RecipeIngredient(note=ingredient_note)]
|
||||
|
||||
|
||||
@@ -297,3 +297,32 @@ def pretty_print_timedelta(t: timedelta, max_components=None, max_decimal_places
|
||||
if out_list == []:
|
||||
return "none"
|
||||
return " ".join(out_list)
|
||||
|
||||
|
||||
def clean_tags(data: str | list[str]) -> list[str]:
|
||||
"""
|
||||
Gets keywords as a list or natural language list and returns them into a list of strings of individual tags
|
||||
"""
|
||||
if data is None:
|
||||
return []
|
||||
|
||||
if isinstance(data, list):
|
||||
all_str = True
|
||||
i = 0
|
||||
while all_str and i < len(data):
|
||||
all_str = isinstance(data[i], str)
|
||||
i = i + 1
|
||||
|
||||
if all_str:
|
||||
return data
|
||||
return []
|
||||
|
||||
if isinstance(data, str):
|
||||
tag_list = data.split(",")
|
||||
|
||||
for i in range(len(tag_list)):
|
||||
tag_list[i] = tag_list[i].strip().capitalize()
|
||||
|
||||
return tag_list
|
||||
|
||||
return []
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from mealie.schema.recipe.recipe import Recipe
|
||||
from mealie.services.scraper.scraped_extras import ScrapedExtras
|
||||
|
||||
from .scraper_strategies import ABCScraperStrategy, RecipeScraperOpenGraph, RecipeScraperPackage
|
||||
|
||||
@@ -20,16 +21,16 @@ class RecipeScraper:
|
||||
|
||||
self.scrapers = scrapers
|
||||
|
||||
def scrape(self, url: str) -> Recipe | None:
|
||||
def scrape(self, url: str) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
|
||||
"""
|
||||
Scrapes a recipe from the web.
|
||||
"""
|
||||
|
||||
for scraper_type in self.scrapers:
|
||||
scraper = scraper_type(url)
|
||||
recipe = scraper.parse()
|
||||
recipe, extras = scraper.parse()
|
||||
|
||||
if recipe is not None:
|
||||
return recipe
|
||||
return recipe, extras
|
||||
|
||||
return None
|
||||
return None, None
|
||||
|
||||
49
mealie/services/scraper/scraped_extras.py
Normal file
49
mealie/services/scraper/scraped_extras.py
Normal file
@@ -0,0 +1,49 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from pydantic import UUID4
|
||||
from slugify import slugify
|
||||
|
||||
from mealie.repos.repository_factory import AllRepositories
|
||||
from mealie.schema.recipe import TagOut
|
||||
from mealie.schema.recipe.recipe_category import TagSave
|
||||
|
||||
|
||||
class NoContextException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ScraperContext:
|
||||
user_id: UUID4
|
||||
group_id: UUID4
|
||||
repos: AllRepositories
|
||||
|
||||
|
||||
class ScrapedExtras:
|
||||
def __init__(self) -> None:
|
||||
self._tags: list[str] = []
|
||||
|
||||
def set_tags(self, tags: list[str]) -> None:
|
||||
self._tags = tags
|
||||
|
||||
def use_tags(self, ctx: ScraperContext) -> list[TagOut]:
|
||||
if not self._tags:
|
||||
return []
|
||||
|
||||
repo = ctx.repos.tags.by_group(ctx.group_id)
|
||||
|
||||
tags = []
|
||||
for tag in self._tags:
|
||||
slugify_tag = slugify(tag)
|
||||
|
||||
# Check if tag exists
|
||||
if db_tag := repo.get_one(slugify_tag, "slug"):
|
||||
tags.append(db_tag)
|
||||
continue
|
||||
|
||||
save_data = TagSave(name=tag, group_id=ctx.group_id)
|
||||
db_tag = repo.create(save_data)
|
||||
|
||||
tags.append(db_tag)
|
||||
|
||||
return tags
|
||||
@@ -8,6 +8,7 @@ from mealie.core.root_logger import get_logger
|
||||
from mealie.pkgs import cache
|
||||
from mealie.schema.recipe import Recipe
|
||||
from mealie.services.recipe.recipe_data_service import RecipeDataService
|
||||
from mealie.services.scraper.scraped_extras import ScrapedExtras
|
||||
|
||||
from .recipe_scraper import RecipeScraper
|
||||
|
||||
@@ -18,7 +19,7 @@ class ParserErrors(str, Enum):
|
||||
CONNECTION_ERROR = "CONNECTION_ERROR"
|
||||
|
||||
|
||||
def create_from_url(url: str) -> Recipe:
|
||||
def create_from_url(url: str) -> tuple[Recipe, ScrapedExtras]:
|
||||
"""Main entry point for generating a recipe from a URL. Pass in a URL and
|
||||
a Recipe object will be returned if successful.
|
||||
|
||||
@@ -29,12 +30,12 @@ def create_from_url(url: str) -> Recipe:
|
||||
Recipe: Recipe Object
|
||||
"""
|
||||
scraper = RecipeScraper()
|
||||
new_recipe = scraper.scrape(url)
|
||||
new_recipe.id = uuid4()
|
||||
new_recipe, extras = scraper.scrape(url)
|
||||
|
||||
if not new_recipe:
|
||||
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value})
|
||||
|
||||
new_recipe.id = uuid4()
|
||||
logger = get_logger()
|
||||
logger.info(f"Image {new_recipe.image}")
|
||||
|
||||
@@ -52,4 +53,4 @@ def create_from_url(url: str) -> Recipe:
|
||||
new_recipe.name = f"No Recipe Name Found - {str(uuid4())}"
|
||||
new_recipe.slug = slugify(new_recipe.name)
|
||||
|
||||
return new_recipe
|
||||
return new_recipe, extras
|
||||
|
||||
@@ -10,6 +10,7 @@ from w3lib.html import get_base_url
|
||||
|
||||
from mealie.core.root_logger import get_logger
|
||||
from mealie.schema.recipe.recipe import Recipe, RecipeStep
|
||||
from mealie.services.scraper.scraped_extras import ScrapedExtras
|
||||
|
||||
from . import cleaner
|
||||
|
||||
@@ -26,7 +27,7 @@ class ABCScraperStrategy(ABC):
|
||||
self.url = url
|
||||
|
||||
@abstractmethod
|
||||
def parse(self) -> Recipe | None:
|
||||
def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
|
||||
"""Parse a recipe from a web URL.
|
||||
|
||||
Args:
|
||||
@@ -39,13 +40,15 @@ class ABCScraperStrategy(ABC):
|
||||
|
||||
|
||||
class RecipeScraperPackage(ABCScraperStrategy):
|
||||
def clean_scraper(self, scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> Recipe:
|
||||
def clean_scraper(self, scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> tuple[Recipe, ScrapedExtras]:
|
||||
def try_get_default(func_call: Callable | None, get_attr: str, default: Any, clean_func=None):
|
||||
value = default
|
||||
try:
|
||||
value = func_call()
|
||||
except Exception:
|
||||
self.logger.error(f"Error parsing recipe func_call for '{get_attr}'")
|
||||
|
||||
if func_call:
|
||||
try:
|
||||
value = func_call()
|
||||
except Exception:
|
||||
self.logger.error(f"Error parsing recipe func_call for '{get_attr}'")
|
||||
|
||||
if value == default:
|
||||
try:
|
||||
@@ -58,7 +61,7 @@ class RecipeScraperPackage(ABCScraperStrategy):
|
||||
|
||||
return value
|
||||
|
||||
def get_instructions() -> list[dict]:
|
||||
def get_instructions() -> list[RecipeStep]:
|
||||
instruction_as_text = try_get_default(
|
||||
scraped_data.instructions, "recipeInstructions", ["No Instructions Found"]
|
||||
)
|
||||
@@ -78,7 +81,11 @@ class RecipeScraperPackage(ABCScraperStrategy):
|
||||
None, "cookTime", None, cleaner.clean_time
|
||||
)
|
||||
|
||||
return Recipe(
|
||||
extras = ScrapedExtras()
|
||||
|
||||
extras.set_tags(try_get_default(None, "keywords", "", cleaner.clean_tags))
|
||||
|
||||
recipe = Recipe(
|
||||
name=try_get_default(scraped_data.title, "name", "No Name Found", cleaner.clean_string),
|
||||
slug="",
|
||||
image=try_get_default(None, "image", None),
|
||||
@@ -93,6 +100,8 @@ class RecipeScraperPackage(ABCScraperStrategy):
|
||||
org_url=url,
|
||||
)
|
||||
|
||||
return recipe, extras
|
||||
|
||||
def scrape_url(self) -> SchemaScraperFactory.SchemaScraper | Any | None:
|
||||
try:
|
||||
scraped_schema = scrape_me(self.url)
|
||||
@@ -103,8 +112,8 @@ class RecipeScraperPackage(ABCScraperStrategy):
|
||||
self.logger.error("Recipe Scraper was unable to extract a recipe.")
|
||||
return None
|
||||
|
||||
except ConnectionError:
|
||||
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": "CONNECTION_ERROR"})
|
||||
except ConnectionError as e:
|
||||
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": "CONNECTION_ERROR"}) from e
|
||||
|
||||
# Check to see if the recipe is valid
|
||||
try:
|
||||
@@ -123,7 +132,7 @@ class RecipeScraperPackage(ABCScraperStrategy):
|
||||
self.logger.debug(f"Recipe Scraper [Package] was unable to extract a recipe from {self.url}")
|
||||
return None
|
||||
|
||||
def parse(self) -> Recipe | None:
|
||||
def parse(self):
|
||||
"""
|
||||
Parse a recipe from a given url.
|
||||
"""
|
||||
@@ -177,7 +186,7 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
|
||||
"extras": [],
|
||||
}
|
||||
|
||||
def parse(self) -> Recipe | None:
|
||||
def parse(self):
|
||||
"""
|
||||
Parse a recipe from a given url.
|
||||
"""
|
||||
@@ -188,4 +197,4 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
|
||||
if og_data is None:
|
||||
return None
|
||||
|
||||
return Recipe(**og_data)
|
||||
return Recipe(**og_data), ScrapedExtras()
|
||||
|
||||
Reference in New Issue
Block a user