feat: Structured Yields (#4489)

Co-authored-by: Kuchenpirat <24235032+Kuchenpirat@users.noreply.github.com>
This commit is contained in:
Michael Genson
2024-11-20 08:46:27 -06:00
committed by GitHub
parent c8cd68b4f0
commit 327da02fc8
39 changed files with 1018 additions and 551 deletions

View File

@@ -89,7 +89,8 @@ class RecipeModel(SqlAlchemyBase, BaseMixins):
cook_time: Mapped[str | None] = mapped_column(sa.String)
recipe_yield: Mapped[str | None] = mapped_column(sa.String)
recipeCuisine: Mapped[str | None] = mapped_column(sa.String)
recipe_yield_quantity: Mapped[float] = mapped_column(sa.Float, index=True, default=0)
recipe_servings: Mapped[float] = mapped_column(sa.Float, index=True, default=0)
assets: Mapped[list[RecipeAsset]] = orm.relationship("RecipeAsset", cascade="all, delete-orphan")
nutrition: Mapped[Nutrition] = orm.relationship("Nutrition", uselist=False, cascade="all, delete-orphan")
@@ -131,7 +132,6 @@ class RecipeModel(SqlAlchemyBase, BaseMixins):
notes: Mapped[list[Note]] = orm.relationship("Note", cascade="all, delete-orphan")
org_url: Mapped[str | None] = mapped_column(sa.String)
extras: Mapped[list[ApiExtras]] = orm.relationship("ApiExtras", cascade="all, delete-orphan")
is_ocr_recipe: Mapped[bool | None] = mapped_column(sa.Boolean, default=False)
# Time Stamp Properties
date_added: Mapped[date | None] = mapped_column(sa.Date, default=get_utc_today)
@@ -167,6 +167,10 @@ class RecipeModel(SqlAlchemyBase, BaseMixins):
},
)
# Deprecated
recipeCuisine: Mapped[str | None] = mapped_column(sa.String)
is_ocr_recipe: Mapped[bool | None] = mapped_column(sa.Boolean, default=False)
@validates("name")
def validate_name(self, _, name):
assert name != ""

View File

@@ -8,6 +8,14 @@
"recipe-defaults": {
"ingredient-note": "1 Cup Flour",
"step-text": "Recipe steps as well as other fields in the recipe page support markdown syntax.\n\n**Add a link**\n\n[My Link](https://demo.mealie.io)\n"
},
"servings-text": {
"makes": "Makes",
"serves": "Serves",
"serving": "Serving",
"servings": "Servings",
"yield": "Yield",
"yields": "Yields"
}
},
"mealplan": {

View File

@@ -29,3 +29,9 @@ def local_provider(accept_language: str | None = Header(None)) -> Translator:
factory = _load_factory()
accept_language = accept_language or "en-US"
return factory.get(accept_language)
@lru_cache
def get_all_translations(key: str) -> dict[str, str]:
factory = _load_factory()
return {locale: factory.get(locale).t(key) for locale in factory.supported_locales}

View File

@@ -1,4 +1,5 @@
from dataclasses import dataclass, field
from functools import cached_property
from pathlib import Path
from .json_provider import JsonProvider
@@ -10,7 +11,7 @@ class InUseProvider:
locks: int
@dataclass(slots=True)
@dataclass
class ProviderFactory:
directory: Path
fallback_locale: str = "en-US"
@@ -22,6 +23,10 @@ class ProviderFactory:
def fallback_file(self) -> Path:
return self.directory / self.filename_format.format(locale=self.fallback_locale, format="json")
@cached_property
def supported_locales(self) -> list[str]:
return [path.stem for path in self.directory.glob(self.filename_format.format(locale="*", format="json"))]
def _load(self, locale: str) -> JsonProvider:
filename = self.filename_format.format(locale=locale, format="json")
path = self.directory / filename

View File

@@ -116,7 +116,7 @@ def content_with_meta(group_slug: str, recipe: Recipe) -> str:
"prepTime": recipe.prep_time,
"cookTime": recipe.cook_time,
"totalTime": recipe.total_time,
"recipeYield": recipe.recipe_yield,
"recipeYield": recipe.recipe_yield_display,
"recipeIngredient": ingredients,
"recipeInstructions": [i.text for i in recipe.recipe_instructions] if recipe.recipe_instructions else [],
"recipeCategory": [c.name for c in recipe.recipe_category] if recipe.recipe_category else [],

View File

@@ -91,6 +91,8 @@ class RecipeSummary(MealieModel):
name: str | None = None
slug: Annotated[str, Field(validate_default=True)] = ""
image: Any | None = None
recipe_servings: float = 0
recipe_yield_quantity: float = 0
recipe_yield: str | None = None
total_time: str | None = None
@@ -122,6 +124,10 @@ class RecipeSummary(MealieModel):
return val
@property
def recipe_yield_display(self) -> str:
return f"{self.recipe_yield_quantity} {self.recipe_yield}".strip()
@classmethod
def loader_options(cls) -> list[LoaderOption]:
return [

View File

@@ -92,10 +92,8 @@ class TandoorMigrator(BaseMigrator):
recipe_data.pop("working_time", 0), recipe_data.pop("waiting_time", 0)
)
serving_size = recipe_data.pop("servings", 0)
serving_text = recipe_data.pop("servings_text", "")
if serving_size and serving_text:
recipe_data["recipeYield"] = f"{serving_size} {serving_text}"
recipe_data["recipeYieldQuantity"] = recipe_data.pop("servings", 0)
recipe_data["recipeYield"] = recipe_data.pop("servings_text", "")
try:
recipe_image_path = next(source_dir.glob("image.*"))

View File

@@ -1,23 +0,0 @@
import re
compiled_match = re.compile(r"(.){1,6}\s\((.[^\(\)])+\)\s")
compiled_search = re.compile(r"\((.[^\(])+\)")
def move_parens_to_end(ing_str) -> str:
"""
Moves all parentheses in the string to the end of the string using Regex.
If no parentheses are found, the string is returned unchanged.
"""
if re.match(compiled_match, ing_str):
if match := re.search(compiled_search, ing_str):
start = match.start()
end = match.end()
ing_str = ing_str[:start] + ing_str[end:] + " " + ing_str[start:end]
return ing_str
def check_char(char, *eql) -> bool:
"""Helper method to check if a characters matches any of the additional provided arguments"""
return any(char == eql_char for eql_char in eql)

View File

@@ -3,7 +3,7 @@ import unicodedata
from pydantic import BaseModel, ConfigDict
from .._helpers import check_char, move_parens_to_end
from ..parser_utils import check_char, move_parens_to_end
class BruteParsedIngredient(BaseModel):

View File

@@ -1,5 +1,6 @@
import re
import unicodedata
from mealie.services.parser_services.parser_utils import convert_vulgar_fractions_to_regular_fractions
replace_abbreviations = {
"cup": " cup ",
@@ -29,23 +30,6 @@ def remove_periods(string: str) -> str:
return re.sub(r"(?<!\d)\.(?!\d)", "", string)
def replace_fraction_unicode(string: str):
# TODO: I'm not confident this works well enough for production needs some testing and/or refacorting
# TODO: Breaks on multiple unicode fractions
for c in string:
try:
name = unicodedata.name(c)
except ValueError:
continue
if name.startswith("VULGAR FRACTION"):
normalized = unicodedata.normalize("NFKC", c)
numerator, _, denominator = normalized.partition("") # _ = slash
text = f" {numerator}/{denominator}"
return string.replace(c, text).replace(" ", " ")
return string
def wrap_or_clause(string: str):
"""
Attempts to wrap or clauses in ()
@@ -75,7 +59,7 @@ def pre_process_string(string: str) -> str:
"""
string = string.lower()
string = replace_fraction_unicode(string)
string = convert_vulgar_fractions_to_regular_fractions(string)
string = remove_periods(string)
string = replace_common_abbreviations(string)

View File

@@ -0,0 +1,111 @@
import re
from fractions import Fraction
compiled_match = re.compile(r"(.){1,6}\s\((.[^\(\)])+\)\s")
compiled_search = re.compile(r"\((.[^\(])+\)")
def move_parens_to_end(ing_str) -> str:
"""
Moves all parentheses in the string to the end of the string using Regex.
If no parentheses are found, the string is returned unchanged.
"""
if re.match(compiled_match, ing_str):
if match := re.search(compiled_search, ing_str):
start = match.start()
end = match.end()
ing_str = ing_str[:start] + ing_str[end:] + " " + ing_str[start:end]
return ing_str
def check_char(char, *eql) -> bool:
"""Helper method to check if a characters matches any of the additional provided arguments"""
return any(char == eql_char for eql_char in eql)
def convert_vulgar_fractions_to_regular_fractions(text: str) -> str:
vulgar_fractions = {
"¼": "1/4",
"½": "1/2",
"¾": "3/4",
"": "1/7",
"": "1/9",
"": "1/10",
"": "1/3",
"": "2/3",
"": "1/5",
"": "2/5",
"": "3/5",
"": "4/5",
"": "1/6",
"": "5/6",
"": "1/8",
"": "3/8",
"": "5/8",
"": "7/8",
}
for vulgar_fraction, regular_fraction in vulgar_fractions.items():
# if we don't add a space in front of the fraction, mixed fractions will be broken
# e.g. "1½" -> "11/2"
text = text.replace(vulgar_fraction, f" {regular_fraction}").strip()
return text
def extract_quantity_from_string(source_str: str) -> tuple[float, str]:
"""
Extracts a quantity from a string. The quantity can be a fraction, decimal, or integer.
Returns the quantity and the remaining string. If no quantity is found, returns the quantity as 0.
"""
source_str = source_str.strip()
if not source_str:
return 0, ""
source_str = convert_vulgar_fractions_to_regular_fractions(source_str)
mixed_fraction_pattern = re.compile(r"(\d+)\s+(\d+)/(\d+)")
fraction_pattern = re.compile(r"(\d+)/(\d+)")
number_pattern = re.compile(r"\d+(\.\d+)?")
try:
# Check for a mixed fraction (e.g. "1 1/2")
match = mixed_fraction_pattern.search(source_str)
if match:
whole_number = int(match.group(1))
numerator = int(match.group(2))
denominator = int(match.group(3))
quantity = whole_number + float(Fraction(numerator, denominator))
remaining_str = source_str[: match.start()] + source_str[match.end() :]
remaining_str = remaining_str.strip()
return quantity, remaining_str
# Check for a fraction (e.g. "1/2")
match = fraction_pattern.search(source_str)
if match:
numerator = int(match.group(1))
denominator = int(match.group(2))
quantity = float(Fraction(numerator, denominator))
remaining_str = source_str[: match.start()] + source_str[match.end() :]
remaining_str = remaining_str.strip()
return quantity, remaining_str
# Check for a number (integer or float)
match = number_pattern.search(source_str)
if match:
quantity = float(match.group())
remaining_str = source_str[: match.start()] + source_str[match.end() :]
remaining_str = remaining_str.strip()
return quantity, remaining_str
except ZeroDivisionError:
pass
# If no match, return 0 and the original string
return 0, source_str

View File

@@ -10,8 +10,9 @@ from datetime import datetime, timedelta
from slugify import slugify
from mealie.core.root_logger import get_logger
from mealie.lang.providers import Translator
from mealie.lang.providers import Translator, get_all_translations
from mealie.schema.recipe.recipe import Recipe
from mealie.services.parser_services.parser_utils import extract_quantity_from_string
logger = get_logger("recipe-scraper")
@@ -51,18 +52,21 @@ def clean(recipe_data: Recipe | dict, translator: Translator, url=None) -> Recip
recipe_data = recipe_data_dict
recipe_data["slug"] = slugify(recipe_data.get("name", ""))
recipe_data["description"] = clean_string(recipe_data.get("description", ""))
# Times
recipe_data["prepTime"] = clean_time(recipe_data.get("prepTime"), translator)
recipe_data["performTime"] = clean_time(recipe_data.get("performTime"), translator)
recipe_data["totalTime"] = clean_time(recipe_data.get("totalTime"), translator)
recipe_data["recipeServings"], recipe_data["recipeYieldQuantity"], recipe_data["recipeYield"] = clean_yield(
recipe_data.get("recipeYield")
)
recipe_data["recipeCategory"] = clean_categories(recipe_data.get("recipeCategory", []))
recipe_data["recipeYield"] = clean_yield(recipe_data.get("recipeYield"))
recipe_data["recipeIngredient"] = clean_ingredients(recipe_data.get("recipeIngredient", []))
recipe_data["recipeInstructions"] = clean_instructions(recipe_data.get("recipeInstructions", []))
recipe_data["image"] = clean_image(recipe_data.get("image"))[0]
recipe_data["slug"] = slugify(recipe_data.get("name", ""))
recipe_data["orgURL"] = url or recipe_data.get("orgURL")
recipe_data["notes"] = clean_notes(recipe_data.get("notes"))
recipe_data["rating"] = clean_int(recipe_data.get("rating"))
@@ -324,7 +328,31 @@ def clean_notes(notes: typing.Any) -> list[dict] | None:
return parsed_notes
def clean_yield(yld: str | list[str] | None) -> str:
@functools.lru_cache
def _get_servings_options() -> set[str]:
options: set[str] = set()
for key in [
"recipe.servings-text.makes",
"recipe.servings-text.serves",
"recipe.servings-text.serving",
"recipe.servings-text.servings",
"recipe.servings-text.yield",
"recipe.servings-text.yields",
]:
options.update([t.strip().lower() for t in get_all_translations(key).values()])
return options
def _is_serving_string(txt: str) -> bool:
txt = txt.strip().lower()
for option in _get_servings_options():
if option in txt.strip().lower():
return True
return False
def clean_yield(yields: str | list[str] | None) -> tuple[float, float, str]:
"""
yield_amount attemps to parse out the yield amount from a recipe.
@@ -333,15 +361,34 @@ def clean_yield(yld: str | list[str] | None) -> str:
- `["4 servings", "4 Pies"]` - returns the last value
Returns:
float: The servings, if it can be parsed else 0
float: The yield quantity, if it can be parsed else 0
str: The yield amount, if it can be parsed else an empty string
"""
if not yld:
return ""
servings_qty: float = 0
yld_qty: float = 0
yld_str = ""
if isinstance(yld, list):
return yld[-1]
if not yields:
return servings_qty, yld_qty, yld_str
return yld
if not isinstance(yields, list):
yields = [yields]
for yld in yields:
if not yld:
continue
if not isinstance(yld, str):
yld = str(yld)
qty, txt = extract_quantity_from_string(yld)
if qty and _is_serving_string(yld):
servings_qty = qty
else:
yld_qty = qty
yld_str = txt
return servings_qty, yld_qty, yld_str
def clean_time(time_entry: str | timedelta | None, translator: Translator) -> None | str: