mirror of
https://github.com/mealie-recipes/mealie.git
synced 2025-12-10 04:15:38 -05:00
feat: Improved Ingredient Matching (#2535)
* added normalization to foods and units
* changed search to reference new normalized fields
* fix tests
* added parsed food matching to backend
* prevent pagination from ordering when searching
* added extra fuzzy matching to sqlite ing matching
* added tests
* only apply search ordering when order_by is null
* enabled post-search fuzzy matching for postgres
* fixed postgres fuzzy search test
* idk why this is failing
* 🤦
* simplified frontend ing matching
and restored automatic unit creation
* tightened food fuzzy threshold
* change to rapidfuzz
* sped up fuzzy matching with process
* fixed units not matching by abbreviation
* fast return for exact matches
* replace db searching with pure fuzz
* added fuzzy normalization
* tightened unit fuzzy matching thresh
* cleaned up comments/var names
* ran matching logic through the dryer
* oops
* simplified order by application logic
This commit is contained in:
@@ -1,20 +1,32 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from fractions import Fraction
|
||||
from typing import TypeVar
|
||||
|
||||
from pydantic import UUID4, BaseModel
|
||||
from rapidfuzz import fuzz, process
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from mealie.core.root_logger import get_logger
|
||||
from mealie.db.models.recipe.ingredient import IngredientFoodModel, IngredientUnitModel
|
||||
from mealie.repos.all_repositories import get_repositories
|
||||
from mealie.repos.repository_factory import AllRepositories
|
||||
from mealie.schema.recipe import RecipeIngredient
|
||||
from mealie.schema.recipe.recipe_ingredient import (
|
||||
MAX_INGREDIENT_DENOMINATOR,
|
||||
CreateIngredientFood,
|
||||
CreateIngredientUnit,
|
||||
IngredientConfidence,
|
||||
IngredientFood,
|
||||
IngredientUnit,
|
||||
ParsedIngredient,
|
||||
RegisteredParser,
|
||||
)
|
||||
from mealie.schema.response.pagination import PaginationQuery
|
||||
|
||||
from . import brute, crfpp
|
||||
|
||||
logger = get_logger(__name__)
|
||||
T = TypeVar("T", bound=BaseModel)
|
||||
|
||||
|
||||
class ABCIngredientParser(ABC):
|
||||
@@ -22,6 +34,53 @@ class ABCIngredientParser(ABC):
|
||||
Abstract class for ingredient parsers.
|
||||
"""
|
||||
|
||||
def __init__(self, group_id: UUID4, session: Session) -> None:
|
||||
self.group_id = group_id
|
||||
self.session = session
|
||||
|
||||
self._foods_by_name: dict[str, IngredientFood] | None = None
|
||||
self._units_by_name: dict[str, IngredientUnit] | None = None
|
||||
|
||||
@property
|
||||
def _repos(self) -> AllRepositories:
|
||||
return get_repositories(self.session)
|
||||
|
||||
@property
|
||||
def foods_by_normalized_name(self) -> dict[str, IngredientFood]:
|
||||
if self._foods_by_name is None:
|
||||
foods_repo = self._repos.ingredient_foods.by_group(self.group_id)
|
||||
|
||||
query = PaginationQuery(page=1, per_page=-1)
|
||||
all_foods = foods_repo.page_all(query).items
|
||||
self._foods_by_name = {IngredientFoodModel.normalize(food.name): food for food in all_foods if food.name}
|
||||
|
||||
return self._foods_by_name
|
||||
|
||||
@property
|
||||
def units_by_normalized_name_or_abbreviation(self) -> dict[str, IngredientUnit]:
|
||||
if self._units_by_name is None:
|
||||
units_repo = self._repos.ingredient_units.by_group(self.group_id)
|
||||
|
||||
query = PaginationQuery(page=1, per_page=-1)
|
||||
all_units = units_repo.page_all(query).items
|
||||
self._units_by_name = {
|
||||
IngredientUnitModel.normalize(unit.name): unit for unit in all_units if unit.name
|
||||
} | {IngredientUnitModel.normalize(unit.abbreviation): unit for unit in all_units if unit.abbreviation}
|
||||
|
||||
return self._units_by_name
|
||||
|
||||
@property
|
||||
def food_fuzzy_match_threshold(self) -> int:
|
||||
"""Minimum threshold to fuzzy match against a database food search"""
|
||||
|
||||
return 85
|
||||
|
||||
@property
|
||||
def unit_fuzzy_match_threshold(self) -> int:
|
||||
"""Minimum threshold to fuzzy match against a database unit search"""
|
||||
|
||||
return 70
|
||||
|
||||
@abstractmethod
|
||||
def parse_one(self, ingredient_string: str) -> ParsedIngredient:
|
||||
...
|
||||
@@ -30,19 +89,64 @@ class ABCIngredientParser(ABC):
|
||||
def parse(self, ingredients: list[str]) -> list[ParsedIngredient]:
|
||||
...
|
||||
|
||||
@classmethod
|
||||
def find_match(cls, match_value: str, *, store_map: dict[str, T], fuzzy_match_threshold: int = 0) -> T | None:
|
||||
# check for literal matches
|
||||
if match_value in store_map:
|
||||
return store_map[match_value]
|
||||
|
||||
# fuzzy match against food store
|
||||
fuzz_result = process.extractOne(match_value, store_map.keys(), scorer=fuzz.ratio)
|
||||
if fuzz_result is None:
|
||||
return None
|
||||
|
||||
choice, score, _ = fuzz_result
|
||||
if score < fuzzy_match_threshold:
|
||||
return None
|
||||
else:
|
||||
return store_map[choice]
|
||||
|
||||
def find_food_match(self, food: IngredientFood | CreateIngredientFood) -> IngredientFood | None:
|
||||
if isinstance(food, IngredientFood):
|
||||
return food
|
||||
|
||||
match_value = IngredientFoodModel.normalize(food.name)
|
||||
return self.find_match(
|
||||
match_value,
|
||||
store_map=self.foods_by_normalized_name,
|
||||
fuzzy_match_threshold=self.food_fuzzy_match_threshold,
|
||||
)
|
||||
|
||||
def find_unit_match(self, unit: IngredientUnit | CreateIngredientUnit) -> IngredientUnit | None:
|
||||
if isinstance(unit, IngredientUnit):
|
||||
return unit
|
||||
|
||||
match_value = IngredientUnitModel.normalize(unit.name)
|
||||
return self.find_match(
|
||||
match_value,
|
||||
store_map=self.units_by_normalized_name_or_abbreviation,
|
||||
fuzzy_match_threshold=self.unit_fuzzy_match_threshold,
|
||||
)
|
||||
|
||||
def find_ingredient_match(self, ingredient: ParsedIngredient) -> ParsedIngredient:
|
||||
if ingredient.ingredient.food and (food_match := self.find_food_match(ingredient.ingredient.food)):
|
||||
ingredient.ingredient.food = food_match
|
||||
|
||||
if ingredient.ingredient.unit and (unit_match := self.find_unit_match(ingredient.ingredient.unit)):
|
||||
ingredient.ingredient.unit = unit_match
|
||||
|
||||
return ingredient
|
||||
|
||||
|
||||
class BruteForceParser(ABCIngredientParser):
|
||||
"""
|
||||
Brute force ingredient parser.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def parse_one(self, ingredient: str) -> ParsedIngredient:
|
||||
bfi = brute.parse(ingredient)
|
||||
|
||||
return ParsedIngredient(
|
||||
parsed_ingredient = ParsedIngredient(
|
||||
input=ingredient,
|
||||
ingredient=RecipeIngredient(
|
||||
unit=CreateIngredientUnit(name=bfi.unit),
|
||||
@@ -53,6 +157,8 @@ class BruteForceParser(ABCIngredientParser):
|
||||
),
|
||||
)
|
||||
|
||||
return self.find_ingredient_match(parsed_ingredient)
|
||||
|
||||
def parse(self, ingredients: list[str]) -> list[ParsedIngredient]:
|
||||
return [self.parse_one(ingredient) for ingredient in ingredients]
|
||||
|
||||
@@ -62,9 +168,6 @@ class NLPParser(ABCIngredientParser):
|
||||
Class for CRFPP ingredient parsers.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def _crf_to_ingredient(self, crf_model: crfpp.CRFIngredient) -> ParsedIngredient:
|
||||
ingredient = None
|
||||
|
||||
@@ -87,7 +190,7 @@ class NLPParser(ABCIngredientParser):
|
||||
note=crf_model.input,
|
||||
)
|
||||
|
||||
return ParsedIngredient(
|
||||
parsed_ingredient = ParsedIngredient(
|
||||
input=crf_model.input,
|
||||
ingredient=ingredient,
|
||||
confidence=IngredientConfidence(
|
||||
@@ -97,6 +200,8 @@ class NLPParser(ABCIngredientParser):
|
||||
),
|
||||
)
|
||||
|
||||
return self.find_ingredient_match(parsed_ingredient)
|
||||
|
||||
def parse(self, ingredients: list[str]) -> list[ParsedIngredient]:
|
||||
crf_models = crfpp.convert_list_to_crf_model(ingredients)
|
||||
return [self._crf_to_ingredient(crf_model) for crf_model in crf_models]
|
||||
@@ -112,9 +217,9 @@ __registrar = {
|
||||
}
|
||||
|
||||
|
||||
def get_parser(parser: RegisteredParser) -> ABCIngredientParser:
|
||||
def get_parser(parser: RegisteredParser, group_id: UUID4, session: Session) -> ABCIngredientParser:
|
||||
"""
|
||||
get_parser returns an ingrdeint parser based on the string enum value
|
||||
passed in.
|
||||
"""
|
||||
return __registrar.get(parser, NLPParser)()
|
||||
return __registrar.get(parser, NLPParser)(group_id, session)
|
||||
|
||||
Reference in New Issue
Block a user