feat: Generalize Search to Other Models (#2472)

* generalized search logic to SearchFilter

* added default search behavior for all models

* fix for schema overrides

* added search support to several models

* fix for label search

* tests and fixes

* add config for normalizing characters

* dramatically simplified search tests

* bark bark

* fix normalization bug

* tweaked tests

* maybe this time?

---------

Co-authored-by: Hayden <64056131+hay-kot@users.noreply.github.com>
This commit is contained in:
Michael Genson
2023-08-20 13:30:21 -05:00
committed by GitHub
parent 76ae0bafc7
commit 99372aa2b6
16 changed files with 521 additions and 250 deletions

View File

@@ -16,6 +16,7 @@ from mealie.db.models._model_base import SqlAlchemyBase
from mealie.schema._mealie import MealieModel
from mealie.schema.response.pagination import OrderDirection, PaginationBase, PaginationQuery
from mealie.schema.response.query_filter import QueryFilter
from mealie.schema.response.query_search import SearchFilter
Schema = TypeVar("Schema", bound=MealieModel)
Model = TypeVar("Model", bound=SqlAlchemyBase)
@@ -291,7 +292,7 @@ class RepositoryGeneric(Generic[Schema, Model]):
q = self._query(override_schema=eff_schema).filter(attribute_name == attr_match)
return [eff_schema.from_orm(x) for x in self.session.execute(q).scalars().all()]
def page_all(self, pagination: PaginationQuery, override=None) -> PaginationBase[Schema]:
def page_all(self, pagination: PaginationQuery, override=None, search: str | None = None) -> PaginationBase[Schema]:
"""
pagination is a method to interact with the filtered database table and return a paginated result
using the PaginationBase that provides several data points that are needed to manage pagination
@@ -302,12 +303,16 @@ class RepositoryGeneric(Generic[Schema, Model]):
as the override, as the type system is not able to infer the result of this method.
"""
eff_schema = override or self.schema
# Copy this, because calling methods (e.g. tests) might rely on it not getting mutated
pagination_result = pagination.copy()
q = self._query(override_schema=eff_schema, with_options=False)
fltr = self._filter_builder()
q = q.filter_by(**fltr)
q, count, total_pages = self.add_pagination_to_query(q, pagination)
if search:
q = self.add_search_to_query(q, eff_schema, search)
q, count, total_pages = self.add_pagination_to_query(q, pagination_result)
# Apply options late, so they do not get used for counting
q = q.options(*eff_schema.loader_options())
@@ -318,8 +323,8 @@ class RepositoryGeneric(Generic[Schema, Model]):
self.session.rollback()
raise e
return PaginationBase(
page=pagination.page,
per_page=pagination.per_page,
page=pagination_result.page,
per_page=pagination_result.per_page,
total=count,
total_pages=total_pages,
items=[eff_schema.from_orm(s) for s in data],
@@ -392,3 +397,7 @@ class RepositoryGeneric(Generic[Schema, Model]):
query = query.order_by(case_stmt)
return query.limit(pagination.per_page).offset((pagination.page - 1) * pagination.per_page), count, total_pages
def add_search_to_query(self, query: Select, schema: type[Schema], search: str) -> Select:
search_filter = SearchFilter(self.session, search, schema._normalize_search)
return search_filter.filter_query_by_search(query, schema, self.model)

View File

@@ -5,10 +5,9 @@ from uuid import UUID
from pydantic import UUID4
from slugify import slugify
from sqlalchemy import Select, and_, desc, func, or_, select, text
from sqlalchemy import and_, func, select
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import joinedload
from text_unidecode import unidecode
from mealie.db.models.recipe.category import Category
from mealie.db.models.recipe.ingredient import RecipeIngredientModel
@@ -18,13 +17,7 @@ from mealie.db.models.recipe.tag import Tag
from mealie.db.models.recipe.tool import Tool
from mealie.schema.cookbook.cookbook import ReadCookBook
from mealie.schema.recipe import Recipe
from mealie.schema.recipe.recipe import (
RecipeCategory,
RecipePagination,
RecipeSummary,
RecipeTag,
RecipeTool,
)
from mealie.schema.recipe.recipe import RecipeCategory, RecipePagination, RecipeSummary, RecipeTag, RecipeTool
from mealie.schema.recipe.recipe_category import CategoryBase, TagBase
from mealie.schema.response.pagination import PaginationQuery
@@ -151,98 +144,7 @@ class RepositoryRecipes(RepositoryGeneric[Recipe, RecipeModel]):
additional_ids = self.session.execute(select(model.id).filter(model.slug.in_(slugs))).scalars().all()
return ids + additional_ids
def _add_search_to_query(self, query: Select, search: str) -> Select:
"""
0. fuzzy search (postgres only) and tokenized search are performed separately
1. take search string and do a little pre-normalization
2. look for internal quoted strings and keep them together as "literal" parts of the search
3. remove special characters from each non-literal search string
4. token search looks for any individual exact hit in name, description, and ingredients
5. fuzzy search looks for trigram hits in name, description, and ingredients
6. Sort order is determined by closeness to the recipe name
Should search also look at tags?
"""
normalized_search = unidecode(search).lower().strip()
punctuation = "!\#$%&()*+,-./:;<=>?@[\\]^_`{|}~" # string.punctuation with ' & " removed
# keep quoted phrases together as literal portions of the search string
literal = False
quoted_regex = re.compile(r"""(["'])(?:(?=(\\?))\2.)*?\1""") # thank you stack exchange!
removequotes_regex = re.compile(r"""['"](.*)['"]""")
if quoted_regex.search(normalized_search):
literal = True
temp = normalized_search
quoted_search_list = [match.group() for match in quoted_regex.finditer(temp)] # all quoted strings
quoted_search_list = [removequotes_regex.sub("\\1", x) for x in quoted_search_list] # remove outer quotes
temp = quoted_regex.sub("", temp) # remove all quoted strings, leaving just non-quoted
temp = temp.translate(
str.maketrans(punctuation, " " * len(punctuation))
) # punctuation->spaces for splitting, but only on unquoted strings
unquoted_search_list = temp.split() # all unquoted strings
normalized_search_list = quoted_search_list + unquoted_search_list
else:
#
normalized_search = normalized_search.translate(str.maketrans(punctuation, " " * len(punctuation)))
normalized_search_list = normalized_search.split()
normalized_search_list = [x.strip() for x in normalized_search_list] # remove padding whitespace inside quotes
# I would prefer to just do this in the recipe_ingredient.any part of the main query, but it turns out
# that at least sqlite wont use indexes for that correctly anymore and takes a big hit, so prefiltering it is
if (self.session.get_bind().name == "postgresql") & (literal is False): # fuzzy search
ingredient_ids = (
self.session.execute(
select(RecipeIngredientModel.id).filter(
or_(
RecipeIngredientModel.note_normalized.op("%>")(normalized_search),
RecipeIngredientModel.original_text_normalized.op("%>")(normalized_search),
)
)
)
.scalars()
.all()
)
else: # exact token search
ingredient_ids = (
self.session.execute(
select(RecipeIngredientModel.id).filter(
or_(
*[RecipeIngredientModel.note_normalized.like(f"%{ns}%") for ns in normalized_search_list],
*[
RecipeIngredientModel.original_text_normalized.like(f"%{ns}%")
for ns in normalized_search_list
],
)
)
)
.scalars()
.all()
)
if (self.session.get_bind().name == "postgresql") & (literal is False): # fuzzy search
# default = 0.7 is too strict for effective fuzzing
self.session.execute(text("set pg_trgm.word_similarity_threshold = 0.5;"))
q = query.filter(
or_(
RecipeModel.name_normalized.op("%>")(normalized_search),
RecipeModel.description_normalized.op("%>")(normalized_search),
RecipeModel.recipe_ingredient.any(RecipeIngredientModel.id.in_(ingredient_ids)),
)
).order_by( # trigram ordering could be too slow on million record db, but is fine with thousands.
func.least(
RecipeModel.name_normalized.op("<->>")(normalized_search),
)
)
else: # exact token search
q = query.filter(
or_(
*[RecipeModel.name_normalized.like(f"%{ns}%") for ns in normalized_search_list],
*[RecipeModel.description_normalized.like(f"%{ns}%") for ns in normalized_search_list],
RecipeModel.recipe_ingredient.any(RecipeIngredientModel.id.in_(ingredient_ids)),
)
).order_by(desc(RecipeModel.name_normalized.like(f"%{normalized_search}%")))
return q
def page_all(
def page_all( # type: ignore
self,
pagination: PaginationQuery,
override=None,
@@ -299,7 +201,7 @@ class RepositoryRecipes(RepositoryGeneric[Recipe, RecipeModel]):
)
q = q.filter(*filters)
if search:
q = self._add_search_to_query(q, search)
q = self.add_search_to_query(q, self.schema, search)
q, count, total_pages = self.add_pagination_to_query(q, pagination_result)