feat: search tokenization, handling of quoted literal search, and postgres fuzziness (#2351)

* Creating postgres migration script and starting to set up to detect database * non-working placeholders for postgres pg_tgrm * First draft of some indexes * non-working commit of postgres indexing * Further non-working edits to db-centric fuzzy search * update alembic for extensions * More non-working setup * Move db type check to init_db * fix typo in db name check * Add sqlite token search and postgres full text search * reorder search to hit exact matches faster * Add settings and docs for POSTGRES_LANGUAGE (full text search) * Use user-specified POSTGRES_LANGUAGE in search * fix fuzzy search typo * Remove full text search and instead order by trigram match * cleaner adding of indices, remove fulltext * Cleanup old import of getting app settings * Fix typo in index * Fix some alembic fuzzy typos * Remove diagnostic printing from alembic migration * Fix mixed up commutator for trigram operator and relax criteria * forgot to remove query debug * sort only on name * token and fuzzy search tests * Refactor recipe search test to avoid rare random string cross-matches. * Add ability to quote parts of search for exact match * Remove internal punctuation, unless it's quoted for literal search * Add tests for special character removal and literal search * Remove the outer double quotes from searches, but leave internal single quotes alone. * Update tests to avoid intra-test name collisions * Fixing leftovers highlighted by lint * cleanup linting and mypy errors * Fix test cross-matching on dirty db (leftovers from bulk import) * forgot to cleanup something when debugging mypy errors * re-order pg_trgm loading in postgres * address comments
2026-05-08 02:53:31 -04:00 · 2023-05-28 19:46:53 +02:00
parent 27ebb4c462
commit 7e0d29afc7
7 changed files with 304 additions and 43 deletions
--- a/mealie/db/init_db.py
+++ b/mealie/db/init_db.py
@@ -92,6 +92,9 @@ def main():
            logger.info("Migration needed. Performing migration...")
            command.upgrade(alembic_cfg, "head")

+        if session.get_bind().name == "postgresql":  # needed for fuzzy search and fast GIN text indices
+            session.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm;"))
+
        db = get_repositories(session)

        if db.users.get_all():
--- a/mealie/db/models/recipe/ingredient.py
+++ b/mealie/db/models/recipe/ingredient.py
@@ -1,7 +1,9 @@
 from typing import TYPE_CHECKING

+import sqlalchemy as sa
 from sqlalchemy import Boolean, Float, ForeignKey, Integer, String, event, orm
 from sqlalchemy.orm import Mapped, mapped_column
+from sqlalchemy.orm.session import Session
 from text_unidecode import unidecode

 from mealie.db.models._model_base import BaseMixins, SqlAlchemyBase
@@ -87,7 +89,7 @@ class RecipeIngredientModel(SqlAlchemyBase, BaseMixins):
    original_text_normalized: Mapped[str | None] = mapped_column(String, index=True)

    @auto_init()
-    def __init__(self, note: str | None = None, orginal_text: str | None = None, **_) -> None:
+    def __init__(self, session: Session, note: str | None = None, orginal_text: str | None = None, **_) -> None:
        # SQLAlchemy events do not seem to register things that are set during auto_init
        if note is not None:
            self.note_normalized = unidecode(note).lower().strip()
@@ -95,13 +97,51 @@ class RecipeIngredientModel(SqlAlchemyBase, BaseMixins):
        if orginal_text is not None:
            self.orginal_text = unidecode(orginal_text).lower().strip()

+        tableargs = [  # base set of indices
+            sa.Index(
+                "ix_recipes_ingredients_note_normalized",
+                "note_normalized",
+                unique=False,
+            ),
+            sa.Index(
+                "ix_recipes_ingredients_original_text_normalized",
+                "original_text_normalized",
+                unique=False,
+            ),
+        ]
+        if session.get_bind().name == "postgresql":
+            tableargs.extend(
+                [
+                    sa.Index(
+                        "ix_recipes_ingredients_note_normalized_gin",
+                        "note_normalized",
+                        unique=False,
+                        postgresql_using="gin",
+                        postgresql_ops={
+                            "note_normalized": "gin_trgm_ops",
+                        },
+                    ),
+                    sa.Index(
+                        "ix_recipes_ingredients_original_text_normalized_gin",
+                        "original_text",
+                        unique=False,
+                        postgresql_using="gin",
+                        postgresql_ops={
+                            "original_text_normalized": "gin_trgm_ops",
+                        },
+                    ),
+                ]
+            )
+        # add indices
+        self.__table_args__ = tuple(tableargs)
+

@event.listens_for(RecipeIngredientModel.note, "set")
 def receive_note(target: RecipeIngredientModel, value: str, oldvalue, initiator):
    if value is not None:
-        target.name_normalized = unidecode(value).lower().strip()
+        target.note_normalized = unidecode(value).lower().strip()
    else:
-        target.name_normalized = None
+        target.note_normalized = None


@event.listens_for(RecipeIngredientModel.original_text, "set")
--- a/mealie/db/models/recipe/recipe.py
+++ b/mealie/db/models/recipe/recipe.py
@@ -35,7 +35,9 @@ if TYPE_CHECKING:

 class RecipeModel(SqlAlchemyBase, BaseMixins):
    __tablename__ = "recipes"
-    __table_args__ = (sa.UniqueConstraint("slug", "group_id", name="recipe_slug_group_id_key"),)
+    __table_args__: tuple[sa.UniqueConstraint, ...] = (
+        sa.UniqueConstraint("slug", "group_id", name="recipe_slug_group_id_key"),
+    )

    id: Mapped[GUID] = mapped_column(GUID, primary_key=True, default=GUID.generate)
    slug: Mapped[str | None] = mapped_column(sa.String, index=True)
@@ -192,6 +194,46 @@ class RecipeModel(SqlAlchemyBase, BaseMixins):
        if description is not None:
            self.description_normalized = unidecode(description).lower().strip()

+        tableargs = [  # base set of indices
+            sa.UniqueConstraint("slug", "group_id", name="recipe_slug_group_id_key"),
+            sa.Index(
+                "ix_recipes_name_normalized",
+                "name_normalized",
+                unique=False,
+            ),
+            sa.Index(
+                "ix_recipes_description_normalized",
+                "description_normalized",
+                unique=False,
+            ),
+        ]
+
+        if session.get_bind().name == "postgresql":
+            tableargs.extend(
+                [
+                    sa.Index(
+                        "ix_recipes_name_normalized_gin",
+                        "name_normalized",
+                        unique=False,
+                        postgresql_using="gin",
+                        postgresql_ops={
+                            "name_normalized": "gin_trgm_ops",
+                        },
+                    ),
+                    sa.Index(
+                        "ix_recipes_description_normalized_gin",
+                        "description_normalized",
+                        unique=False,
+                        postgresql_using="gin",
+                        postgresql_ops={
+                            "description_normalized": "gin_trgm_ops",
+                        },
+                    ),
+                ]
+            )
+        # add indices
+        self.__table_args__ = tuple(tableargs)
+

@event.listens_for(RecipeModel.name, "set")
 def receive_name(target: RecipeModel, value: str, oldvalue, initiator):