feat: Improved Ingredient Matching (#2535)

* added normalization to foods and units

* changed search to reference new normalized fields

* fix tests

* added parsed food matching to backend

* prevent pagination from ordering when searching

* added extra fuzzy matching to sqlite ing matching

* added tests

* only apply search ordering when order_by is null

* enabled post-search fuzzy matching for postgres

* fixed postgres fuzzy search test

* idk why this is failing

* 🤦

* simplified frontend ing matching
and restored automatic unit creation

* tightened food fuzzy threshold

* change to rapidfuzz

* sped up fuzzy matching with process

* fixed units not matching by abbreviation

* fast return for exact matches

* replace db searching with pure fuzz

* added fuzzy normalization

* tightened unit fuzzy matching thresh

* cleaned up comments/var names

* ran matching logic through the dryer

* oops

* simplified order by application logic
This commit is contained in:
Michael Genson
2023-09-15 12:19:34 -05:00
committed by GitHub
parent 084ad4228b
commit 2dfbe9f08d
17 changed files with 738 additions and 97 deletions

View File

@@ -2,6 +2,7 @@ from datetime import datetime
from sqlalchemy import DateTime, Integer
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
from text_unidecode import unidecode
class SqlAlchemyBase(DeclarativeBase):
@@ -9,6 +10,10 @@ class SqlAlchemyBase(DeclarativeBase):
created_at: Mapped[datetime | None] = mapped_column(DateTime, default=datetime.now, index=True)
update_at: Mapped[datetime | None] = mapped_column(DateTime, default=datetime.now, onupdate=datetime.now)
@classmethod
def normalize(cls, val: str) -> str:
return unidecode(val).lower().strip()
class BaseMixins:
"""

View File

@@ -4,7 +4,6 @@ import sqlalchemy as sa
from sqlalchemy import Boolean, Float, ForeignKey, Integer, String, event, orm
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy.orm.session import Session
from text_unidecode import unidecode
from mealie.db.models._model_base import BaseMixins, SqlAlchemyBase
from mealie.db.models.labels import MultiPurposeLabel
@@ -34,9 +33,56 @@ class IngredientUnitModel(SqlAlchemyBase, BaseMixins):
"RecipeIngredientModel", back_populates="unit"
)
# Automatically updated by sqlalchemy event, do not write to this manually
name_normalized: Mapped[str | None] = mapped_column(sa.String, index=True)
abbreviation_normalized: Mapped[str | None] = mapped_column(String, index=True)
@auto_init()
def __init__(self, **_) -> None:
pass
def __init__(self, session: Session, name: str | None = None, abbreviation: str | None = None, **_) -> None:
if name is not None:
self.name_normalized = self.normalize(name)
if abbreviation is not None:
self.abbreviation = self.normalize(abbreviation)
tableargs = [
sa.Index(
"ix_ingredient_units_name_normalized",
"name_normalized",
unique=False,
),
sa.Index(
"ix_ingredient_units_abbreviation_normalized",
"abbreviation_normalized",
unique=False,
),
]
if session.get_bind().name == "postgresql":
tableargs.extend(
[
sa.Index(
"ix_ingredient_units_name_normalized_gin",
"name_normalized",
unique=False,
postgresql_using="gin",
postgresql_ops={
"name_normalized": "gin_trgm_ops",
},
),
sa.Index(
"ix_ingredient_units_abbreviation_normalized_gin",
"abbreviation_normalized",
unique=False,
postgresql_using="gin",
postgresql_ops={
"abbreviation_normalized": "gin_trgm_ops",
},
),
]
)
self.__table_args__ = tuple(tableargs)
class IngredientFoodModel(SqlAlchemyBase, BaseMixins):
@@ -57,10 +103,39 @@ class IngredientFoodModel(SqlAlchemyBase, BaseMixins):
label_id: Mapped[GUID | None] = mapped_column(GUID, ForeignKey("multi_purpose_labels.id"), index=True)
label: Mapped[MultiPurposeLabel | None] = orm.relationship(MultiPurposeLabel, uselist=False, back_populates="foods")
# Automatically updated by sqlalchemy event, do not write to this manually
name_normalized: Mapped[str | None] = mapped_column(sa.String, index=True)
@api_extras
@auto_init()
def __init__(self, **_) -> None:
pass
def __init__(self, session: Session, name: str | None = None, **_) -> None:
if name is not None:
self.name_normalized = self.normalize(name)
tableargs = [
sa.Index(
"ix_ingredient_foods_name_normalized",
"name_normalized",
unique=False,
),
]
if session.get_bind().name == "postgresql":
tableargs.extend(
[
sa.Index(
"ix_ingredient_foods_name_normalized_gin",
"name_normalized",
unique=False,
postgresql_using="gin",
postgresql_ops={
"name_normalized": "gin_trgm_ops",
},
)
]
)
self.__table_args__ = tuple(tableargs)
class RecipeIngredientModel(SqlAlchemyBase, BaseMixins):
@@ -92,10 +167,10 @@ class RecipeIngredientModel(SqlAlchemyBase, BaseMixins):
def __init__(self, session: Session, note: str | None = None, orginal_text: str | None = None, **_) -> None:
# SQLAlchemy events do not seem to register things that are set during auto_init
if note is not None:
self.note_normalized = unidecode(note).lower().strip()
self.note_normalized = self.normalize(note)
if orginal_text is not None:
self.orginal_text = unidecode(orginal_text).lower().strip()
self.orginal_text = self.normalize(orginal_text)
tableargs = [ # base set of indices
sa.Index(
@@ -136,17 +211,41 @@ class RecipeIngredientModel(SqlAlchemyBase, BaseMixins):
self.__table_args__ = tuple(tableargs)
@event.listens_for(RecipeIngredientModel.note, "set")
def receive_note(target: RecipeIngredientModel, value: str, oldvalue, initiator):
@event.listens_for(IngredientUnitModel.name, "set")
def receive_unit_name(target: IngredientUnitModel, value: str | None, oldvalue, initiator):
if value is not None:
target.note_normalized = unidecode(value).lower().strip()
target.name_normalized = IngredientUnitModel.normalize(value)
else:
target.name_normalized = None
@event.listens_for(IngredientUnitModel.abbreviation, "set")
def receive_unit_abbreviation(target: IngredientUnitModel, value: str | None, oldvalue, initiator):
if value is not None:
target.abbreviation_normalized = IngredientUnitModel.normalize(value)
else:
target.abbreviation_normalized = None
@event.listens_for(IngredientFoodModel.name, "set")
def receive_food_name(target: IngredientFoodModel, value: str | None, oldvalue, initiator):
if value is not None:
target.name_normalized = IngredientFoodModel.normalize(value)
else:
target.name_normalized = None
@event.listens_for(RecipeIngredientModel.note, "set")
def receive_ingredient_note(target: RecipeIngredientModel, value: str | None, oldvalue, initiator):
if value is not None:
target.note_normalized = RecipeIngredientModel.normalize(value)
else:
target.note_normalized = None
@event.listens_for(RecipeIngredientModel.original_text, "set")
def receive_original_text(target: RecipeIngredientModel, value: str, oldvalue, initiator):
def receive_ingredient_original_text(target: RecipeIngredientModel, value: str | None, oldvalue, initiator):
if value is not None:
target.original_text_normalized = unidecode(value).lower().strip()
target.original_text_normalized = RecipeIngredientModel.normalize(value)
else:
target.original_text_normalized = None

View File

@@ -6,7 +6,6 @@ import sqlalchemy.orm as orm
from sqlalchemy import event
from sqlalchemy.ext.orderinglist import ordering_list
from sqlalchemy.orm import Mapped, mapped_column, validates
from text_unidecode import unidecode
from mealie.db.models._model_utils.guid import GUID
@@ -189,10 +188,10 @@ class RecipeModel(SqlAlchemyBase, BaseMixins):
# SQLAlchemy events do not seem to register things that are set during auto_init
if name is not None:
self.name_normalized = unidecode(name).lower().strip()
self.name_normalized = self.normalize(name)
if description is not None:
self.description_normalized = unidecode(description).lower().strip()
self.description_normalized = self.normalize(description)
tableargs = [ # base set of indices
sa.UniqueConstraint("slug", "group_id", name="recipe_slug_group_id_key"),
@@ -237,12 +236,12 @@ class RecipeModel(SqlAlchemyBase, BaseMixins):
@event.listens_for(RecipeModel.name, "set")
def receive_name(target: RecipeModel, value: str, oldvalue, initiator):
target.name_normalized = unidecode(value).lower().strip()
target.name_normalized = RecipeModel.normalize(value)
@event.listens_for(RecipeModel.description, "set")
def receive_description(target: RecipeModel, value: str, oldvalue, initiator):
if value is not None:
target.description_normalized = unidecode(value).lower().strip()
target.description_normalized = RecipeModel.normalize(value)
else:
target.description_normalized = None