feat: Generalize Search to Other Models (#2472)

* generalized search logic to SearchFilter * added default search behavior for all models * fix for schema overrides * added search support to several models * fix for label search * tests and fixes * add config for normalizing characters * dramatically simplified search tests * bark bark * fix normalization bug * tweaked tests * maybe this time? --------- Co-authored-by: Hayden <64056131+hay-kot@users.noreply.github.com>
2026-02-27 18:23:15 -05:00 · 2023-08-20 13:30:21 -05:00
parent 76ae0bafc7
commit 99372aa2b6
16 changed files with 521 additions and 250 deletions
--- a/mealie/repos/repository_generic.py
+++ b/mealie/repos/repository_generic.py
@@ -16,6 +16,7 @@ from mealie.db.models._model_base import SqlAlchemyBase
 from mealie.schema._mealie import MealieModel
 from mealie.schema.response.pagination import OrderDirection, PaginationBase, PaginationQuery
 from mealie.schema.response.query_filter import QueryFilter
+from mealie.schema.response.query_search import SearchFilter

 Schema = TypeVar("Schema", bound=MealieModel)
 Model = TypeVar("Model", bound=SqlAlchemyBase)
@@ -291,7 +292,7 @@ class RepositoryGeneric(Generic[Schema, Model]):
            q = self._query(override_schema=eff_schema).filter(attribute_name == attr_match)
            return [eff_schema.from_orm(x) for x in self.session.execute(q).scalars().all()]

-    def page_all(self, pagination: PaginationQuery, override=None) -> PaginationBase[Schema]:
+    def page_all(self, pagination: PaginationQuery, override=None, search: str | None = None) -> PaginationBase[Schema]:
        """
        pagination is a method to interact with the filtered database table and return a paginated result
        using the PaginationBase that provides several data points that are needed to manage pagination
@@ -302,12 +303,16 @@ class RepositoryGeneric(Generic[Schema, Model]):
        as the override, as the type system is not able to infer the result of this method.
        """
        eff_schema = override or self.schema
-
+        # Copy this, because calling methods (e.g. tests) might rely on it not getting mutated
+        pagination_result = pagination.copy()
        q = self._query(override_schema=eff_schema, with_options=False)

        fltr = self._filter_builder()
        q = q.filter_by(**fltr)
-        q, count, total_pages = self.add_pagination_to_query(q, pagination)
+        if search:
+            q = self.add_search_to_query(q, eff_schema, search)
+
+        q, count, total_pages = self.add_pagination_to_query(q, pagination_result)

        # Apply options late, so they do not get used for counting
        q = q.options(*eff_schema.loader_options())
@@ -318,8 +323,8 @@ class RepositoryGeneric(Generic[Schema, Model]):
            self.session.rollback()
            raise e
        return PaginationBase(
-            page=pagination.page,
-            per_page=pagination.per_page,
+            page=pagination_result.page,
+            per_page=pagination_result.per_page,
            total=count,
            total_pages=total_pages,
            items=[eff_schema.from_orm(s) for s in data],
@@ -392,3 +397,7 @@ class RepositoryGeneric(Generic[Schema, Model]):
                query = query.order_by(case_stmt)

        return query.limit(pagination.per_page).offset((pagination.page - 1) * pagination.per_page), count, total_pages
+
+    def add_search_to_query(self, query: Select, schema: type[Schema], search: str) -> Select:
+        search_filter = SearchFilter(self.session, search, schema._normalize_search)
+        return search_filter.filter_query_by_search(query, schema, self.model)
--- a/mealie/repos/repository_recipes.py
+++ b/mealie/repos/repository_recipes.py
@@ -5,10 +5,9 @@ from uuid import UUID

 from pydantic import UUID4
 from slugify import slugify
-from sqlalchemy import Select, and_, desc, func, or_, select, text
+from sqlalchemy import and_, func, select
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.orm import joinedload
-from text_unidecode import unidecode

 from mealie.db.models.recipe.category import Category
 from mealie.db.models.recipe.ingredient import RecipeIngredientModel
@@ -18,13 +17,7 @@ from mealie.db.models.recipe.tag import Tag
 from mealie.db.models.recipe.tool import Tool
 from mealie.schema.cookbook.cookbook import ReadCookBook
 from mealie.schema.recipe import Recipe
-from mealie.schema.recipe.recipe import (
-    RecipeCategory,
-    RecipePagination,
-    RecipeSummary,
-    RecipeTag,
-    RecipeTool,
-)
+from mealie.schema.recipe.recipe import RecipeCategory, RecipePagination, RecipeSummary, RecipeTag, RecipeTool
 from mealie.schema.recipe.recipe_category import CategoryBase, TagBase
 from mealie.schema.response.pagination import PaginationQuery

@@ -151,98 +144,7 @@ class RepositoryRecipes(RepositoryGeneric[Recipe, RecipeModel]):
        additional_ids = self.session.execute(select(model.id).filter(model.slug.in_(slugs))).scalars().all()
        return ids + additional_ids

-    def _add_search_to_query(self, query: Select, search: str) -> Select:
-        """
-        0. fuzzy search (postgres only) and tokenized search are performed separately
-        1. take search string and do a little pre-normalization
-        2. look for internal quoted strings and keep them together as "literal" parts of the search
-        3. remove special characters from each non-literal search string
-        4. token search looks for any individual exact hit in name, description, and ingredients
-        5. fuzzy search looks for trigram hits in name, description, and ingredients
-        6. Sort order is determined by closeness to the recipe name
-        Should search also look at tags?
-        """
-
-        normalized_search = unidecode(search).lower().strip()
-        punctuation = "!\#$%&()*+,-./:;<=>?@[\\]^_`{|}~"  # string.punctuation with ' & " removed
-        # keep quoted phrases together as literal portions of the search string
-        literal = False
-        quoted_regex = re.compile(r"""(["'])(?:(?=(\\?))\2.)*?\1""")  # thank you stack exchange!
-        removequotes_regex = re.compile(r"""['"](.*)['"]""")
-        if quoted_regex.search(normalized_search):
-            literal = True
-            temp = normalized_search
-            quoted_search_list = [match.group() for match in quoted_regex.finditer(temp)]  # all quoted strings
-            quoted_search_list = [removequotes_regex.sub("\\1", x) for x in quoted_search_list]  # remove outer quotes
-            temp = quoted_regex.sub("", temp)  # remove all quoted strings, leaving just non-quoted
-            temp = temp.translate(
-                str.maketrans(punctuation, " " * len(punctuation))
-            )  # punctuation->spaces for splitting, but only on unquoted strings
-            unquoted_search_list = temp.split()  # all unquoted strings
-            normalized_search_list = quoted_search_list + unquoted_search_list
-        else:
-            #
-            normalized_search = normalized_search.translate(str.maketrans(punctuation, " " * len(punctuation)))
-            normalized_search_list = normalized_search.split()
-        normalized_search_list = [x.strip() for x in normalized_search_list]  # remove padding whitespace inside quotes
-        # I would prefer to just do this in the recipe_ingredient.any part of the main query, but it turns out
-        # that at least sqlite wont use indexes for that correctly anymore and takes a big hit, so prefiltering it is
-        if (self.session.get_bind().name == "postgresql") & (literal is False):  # fuzzy search
-            ingredient_ids = (
-                self.session.execute(
-                    select(RecipeIngredientModel.id).filter(
-                        or_(
-                            RecipeIngredientModel.note_normalized.op("%>")(normalized_search),
-                            RecipeIngredientModel.original_text_normalized.op("%>")(normalized_search),
-                        )
-                    )
-                )
-                .scalars()
-                .all()
-            )
-        else:  # exact token search
-            ingredient_ids = (
-                self.session.execute(
-                    select(RecipeIngredientModel.id).filter(
-                        or_(
-                            *[RecipeIngredientModel.note_normalized.like(f"%{ns}%") for ns in normalized_search_list],
-                            *[
-                                RecipeIngredientModel.original_text_normalized.like(f"%{ns}%")
-                                for ns in normalized_search_list
-                            ],
-                        )
-                    )
-                )
-                .scalars()
-                .all()
-            )
-
-        if (self.session.get_bind().name == "postgresql") & (literal is False):  # fuzzy search
-            # default = 0.7 is too strict for effective fuzzing
-            self.session.execute(text("set pg_trgm.word_similarity_threshold = 0.5;"))
-            q = query.filter(
-                or_(
-                    RecipeModel.name_normalized.op("%>")(normalized_search),
-                    RecipeModel.description_normalized.op("%>")(normalized_search),
-                    RecipeModel.recipe_ingredient.any(RecipeIngredientModel.id.in_(ingredient_ids)),
-                )
-            ).order_by(  # trigram ordering could be too slow on million record db, but is fine with thousands.
-                func.least(
-                    RecipeModel.name_normalized.op("<->>")(normalized_search),
-                )
-            )
-        else:  # exact token search
-            q = query.filter(
-                or_(
-                    *[RecipeModel.name_normalized.like(f"%{ns}%") for ns in normalized_search_list],
-                    *[RecipeModel.description_normalized.like(f"%{ns}%") for ns in normalized_search_list],
-                    RecipeModel.recipe_ingredient.any(RecipeIngredientModel.id.in_(ingredient_ids)),
-                )
-            ).order_by(desc(RecipeModel.name_normalized.like(f"%{normalized_search}%")))
-
-        return q
-
-    def page_all(
+    def page_all(  # type: ignore
        self,
        pagination: PaginationQuery,
        override=None,
@@ -299,7 +201,7 @@ class RepositoryRecipes(RepositoryGeneric[Recipe, RecipeModel]):
            )
            q = q.filter(*filters)
        if search:
-            q = self._add_search_to_query(q, search)
+            q = self.add_search_to_query(q, self.schema, search)

        q, count, total_pages = self.add_pagination_to_query(q, pagination_result)