mirror of
https://github.com/mealie-recipes/mealie.git
synced 2026-01-05 08:31:25 -05:00
feat: Generalize Search to Other Models (#2472)
* generalized search logic to SearchFilter * added default search behavior for all models * fix for schema overrides * added search support to several models * fix for label search * tests and fixes * add config for normalizing characters * dramatically simplified search tests * bark bark * fix normalization bug * tweaked tests * maybe this time? --------- Co-authored-by: Hayden <64056131+hay-kot@users.noreply.github.com>
This commit is contained in:
@@ -16,6 +16,7 @@ from mealie.db.models._model_base import SqlAlchemyBase
|
||||
from mealie.schema._mealie import MealieModel
|
||||
from mealie.schema.response.pagination import OrderDirection, PaginationBase, PaginationQuery
|
||||
from mealie.schema.response.query_filter import QueryFilter
|
||||
from mealie.schema.response.query_search import SearchFilter
|
||||
|
||||
Schema = TypeVar("Schema", bound=MealieModel)
|
||||
Model = TypeVar("Model", bound=SqlAlchemyBase)
|
||||
@@ -291,7 +292,7 @@ class RepositoryGeneric(Generic[Schema, Model]):
|
||||
q = self._query(override_schema=eff_schema).filter(attribute_name == attr_match)
|
||||
return [eff_schema.from_orm(x) for x in self.session.execute(q).scalars().all()]
|
||||
|
||||
def page_all(self, pagination: PaginationQuery, override=None) -> PaginationBase[Schema]:
|
||||
def page_all(self, pagination: PaginationQuery, override=None, search: str | None = None) -> PaginationBase[Schema]:
|
||||
"""
|
||||
pagination is a method to interact with the filtered database table and return a paginated result
|
||||
using the PaginationBase that provides several data points that are needed to manage pagination
|
||||
@@ -302,12 +303,16 @@ class RepositoryGeneric(Generic[Schema, Model]):
|
||||
as the override, as the type system is not able to infer the result of this method.
|
||||
"""
|
||||
eff_schema = override or self.schema
|
||||
|
||||
# Copy this, because calling methods (e.g. tests) might rely on it not getting mutated
|
||||
pagination_result = pagination.copy()
|
||||
q = self._query(override_schema=eff_schema, with_options=False)
|
||||
|
||||
fltr = self._filter_builder()
|
||||
q = q.filter_by(**fltr)
|
||||
q, count, total_pages = self.add_pagination_to_query(q, pagination)
|
||||
if search:
|
||||
q = self.add_search_to_query(q, eff_schema, search)
|
||||
|
||||
q, count, total_pages = self.add_pagination_to_query(q, pagination_result)
|
||||
|
||||
# Apply options late, so they do not get used for counting
|
||||
q = q.options(*eff_schema.loader_options())
|
||||
@@ -318,8 +323,8 @@ class RepositoryGeneric(Generic[Schema, Model]):
|
||||
self.session.rollback()
|
||||
raise e
|
||||
return PaginationBase(
|
||||
page=pagination.page,
|
||||
per_page=pagination.per_page,
|
||||
page=pagination_result.page,
|
||||
per_page=pagination_result.per_page,
|
||||
total=count,
|
||||
total_pages=total_pages,
|
||||
items=[eff_schema.from_orm(s) for s in data],
|
||||
@@ -392,3 +397,7 @@ class RepositoryGeneric(Generic[Schema, Model]):
|
||||
query = query.order_by(case_stmt)
|
||||
|
||||
return query.limit(pagination.per_page).offset((pagination.page - 1) * pagination.per_page), count, total_pages
|
||||
|
||||
def add_search_to_query(self, query: Select, schema: type[Schema], search: str) -> Select:
|
||||
search_filter = SearchFilter(self.session, search, schema._normalize_search)
|
||||
return search_filter.filter_query_by_search(query, schema, self.model)
|
||||
|
||||
@@ -5,10 +5,9 @@ from uuid import UUID
|
||||
|
||||
from pydantic import UUID4
|
||||
from slugify import slugify
|
||||
from sqlalchemy import Select, and_, desc, func, or_, select, text
|
||||
from sqlalchemy import and_, func, select
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.orm import joinedload
|
||||
from text_unidecode import unidecode
|
||||
|
||||
from mealie.db.models.recipe.category import Category
|
||||
from mealie.db.models.recipe.ingredient import RecipeIngredientModel
|
||||
@@ -18,13 +17,7 @@ from mealie.db.models.recipe.tag import Tag
|
||||
from mealie.db.models.recipe.tool import Tool
|
||||
from mealie.schema.cookbook.cookbook import ReadCookBook
|
||||
from mealie.schema.recipe import Recipe
|
||||
from mealie.schema.recipe.recipe import (
|
||||
RecipeCategory,
|
||||
RecipePagination,
|
||||
RecipeSummary,
|
||||
RecipeTag,
|
||||
RecipeTool,
|
||||
)
|
||||
from mealie.schema.recipe.recipe import RecipeCategory, RecipePagination, RecipeSummary, RecipeTag, RecipeTool
|
||||
from mealie.schema.recipe.recipe_category import CategoryBase, TagBase
|
||||
from mealie.schema.response.pagination import PaginationQuery
|
||||
|
||||
@@ -151,98 +144,7 @@ class RepositoryRecipes(RepositoryGeneric[Recipe, RecipeModel]):
|
||||
additional_ids = self.session.execute(select(model.id).filter(model.slug.in_(slugs))).scalars().all()
|
||||
return ids + additional_ids
|
||||
|
||||
def _add_search_to_query(self, query: Select, search: str) -> Select:
|
||||
"""
|
||||
0. fuzzy search (postgres only) and tokenized search are performed separately
|
||||
1. take search string and do a little pre-normalization
|
||||
2. look for internal quoted strings and keep them together as "literal" parts of the search
|
||||
3. remove special characters from each non-literal search string
|
||||
4. token search looks for any individual exact hit in name, description, and ingredients
|
||||
5. fuzzy search looks for trigram hits in name, description, and ingredients
|
||||
6. Sort order is determined by closeness to the recipe name
|
||||
Should search also look at tags?
|
||||
"""
|
||||
|
||||
normalized_search = unidecode(search).lower().strip()
|
||||
punctuation = "!\#$%&()*+,-./:;<=>?@[\\]^_`{|}~" # string.punctuation with ' & " removed
|
||||
# keep quoted phrases together as literal portions of the search string
|
||||
literal = False
|
||||
quoted_regex = re.compile(r"""(["'])(?:(?=(\\?))\2.)*?\1""") # thank you stack exchange!
|
||||
removequotes_regex = re.compile(r"""['"](.*)['"]""")
|
||||
if quoted_regex.search(normalized_search):
|
||||
literal = True
|
||||
temp = normalized_search
|
||||
quoted_search_list = [match.group() for match in quoted_regex.finditer(temp)] # all quoted strings
|
||||
quoted_search_list = [removequotes_regex.sub("\\1", x) for x in quoted_search_list] # remove outer quotes
|
||||
temp = quoted_regex.sub("", temp) # remove all quoted strings, leaving just non-quoted
|
||||
temp = temp.translate(
|
||||
str.maketrans(punctuation, " " * len(punctuation))
|
||||
) # punctuation->spaces for splitting, but only on unquoted strings
|
||||
unquoted_search_list = temp.split() # all unquoted strings
|
||||
normalized_search_list = quoted_search_list + unquoted_search_list
|
||||
else:
|
||||
#
|
||||
normalized_search = normalized_search.translate(str.maketrans(punctuation, " " * len(punctuation)))
|
||||
normalized_search_list = normalized_search.split()
|
||||
normalized_search_list = [x.strip() for x in normalized_search_list] # remove padding whitespace inside quotes
|
||||
# I would prefer to just do this in the recipe_ingredient.any part of the main query, but it turns out
|
||||
# that at least sqlite wont use indexes for that correctly anymore and takes a big hit, so prefiltering it is
|
||||
if (self.session.get_bind().name == "postgresql") & (literal is False): # fuzzy search
|
||||
ingredient_ids = (
|
||||
self.session.execute(
|
||||
select(RecipeIngredientModel.id).filter(
|
||||
or_(
|
||||
RecipeIngredientModel.note_normalized.op("%>")(normalized_search),
|
||||
RecipeIngredientModel.original_text_normalized.op("%>")(normalized_search),
|
||||
)
|
||||
)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
else: # exact token search
|
||||
ingredient_ids = (
|
||||
self.session.execute(
|
||||
select(RecipeIngredientModel.id).filter(
|
||||
or_(
|
||||
*[RecipeIngredientModel.note_normalized.like(f"%{ns}%") for ns in normalized_search_list],
|
||||
*[
|
||||
RecipeIngredientModel.original_text_normalized.like(f"%{ns}%")
|
||||
for ns in normalized_search_list
|
||||
],
|
||||
)
|
||||
)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
if (self.session.get_bind().name == "postgresql") & (literal is False): # fuzzy search
|
||||
# default = 0.7 is too strict for effective fuzzing
|
||||
self.session.execute(text("set pg_trgm.word_similarity_threshold = 0.5;"))
|
||||
q = query.filter(
|
||||
or_(
|
||||
RecipeModel.name_normalized.op("%>")(normalized_search),
|
||||
RecipeModel.description_normalized.op("%>")(normalized_search),
|
||||
RecipeModel.recipe_ingredient.any(RecipeIngredientModel.id.in_(ingredient_ids)),
|
||||
)
|
||||
).order_by( # trigram ordering could be too slow on million record db, but is fine with thousands.
|
||||
func.least(
|
||||
RecipeModel.name_normalized.op("<->>")(normalized_search),
|
||||
)
|
||||
)
|
||||
else: # exact token search
|
||||
q = query.filter(
|
||||
or_(
|
||||
*[RecipeModel.name_normalized.like(f"%{ns}%") for ns in normalized_search_list],
|
||||
*[RecipeModel.description_normalized.like(f"%{ns}%") for ns in normalized_search_list],
|
||||
RecipeModel.recipe_ingredient.any(RecipeIngredientModel.id.in_(ingredient_ids)),
|
||||
)
|
||||
).order_by(desc(RecipeModel.name_normalized.like(f"%{normalized_search}%")))
|
||||
|
||||
return q
|
||||
|
||||
def page_all(
|
||||
def page_all( # type: ignore
|
||||
self,
|
||||
pagination: PaginationQuery,
|
||||
override=None,
|
||||
@@ -299,7 +201,7 @@ class RepositoryRecipes(RepositoryGeneric[Recipe, RecipeModel]):
|
||||
)
|
||||
q = q.filter(*filters)
|
||||
if search:
|
||||
q = self._add_search_to_query(q, search)
|
||||
q = self.add_search_to_query(q, self.schema, search)
|
||||
|
||||
q, count, total_pages = self.add_pagination_to_query(q, pagination_result)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user