From d340fdd9df943e906f37d806c404fff6ff97a08c Mon Sep 17 00:00:00 2001
From: Michael Genson <71845777+michael-genson@users.noreply.github.com>
Date: Sun, 10 May 2026 21:23:57 -0500
Subject: [PATCH] fix: Update backend normalization to match search
 normalization logic (#7603)

Co-authored-by: Copilot <copilot@github.com>
---
 ...427796f7b6_more_aggresive_normalization.py | 76 +++++++++++++++++++
 mealie/db/models/_model_base.py               |  9 ++-
 mealie/schema/response/query_search.py        |  4 +-
 .../repository_tests/test_search.py           | 34 +++++++++
 .../backup_v2_tests/test_backup_v2.py         | 13 ----
 5 files changed, 120 insertions(+), 16 deletions(-)
 create mode 100644 mealie/alembic/versions/2026-05-10-18.44.53_c7427796f7b6_more_aggresive_normalization.py

diff --git a/mealie/alembic/versions/2026-05-10-18.44.53_c7427796f7b6_more_aggresive_normalization.py b/mealie/alembic/versions/2026-05-10-18.44.53_c7427796f7b6_more_aggresive_normalization.py
new file mode 100644
index 000000000..59c889133
--- /dev/null
+++ b/mealie/alembic/versions/2026-05-10-18.44.53_c7427796f7b6_more_aggresive_normalization.py
@@ -0,0 +1,76 @@
+"""more aggresive normalization
+
+Revision ID: c7427796f7b6
+Revises: 4395a04f7784
+Create Date: 2026-05-10 18:44:53.159775
+
+"""
+
+from sqlalchemy import orm, text
+
+from alembic import op
+from mealie.db.models._model_base import SqlAlchemyBase
+
+
+# revision identifiers, used by Alembic.
+revision = "c7427796f7b6"
+down_revision: str | None = "4395a04f7784"
+branch_labels: str | tuple[str, ...] | None = None
+depends_on: str | tuple[str, ...] | None = None
+
+
+def _update_table(session: orm.Session, table: str, columns: list[str], source_columns: list[str]) -> None:
+    """Re-normalize all rows in `table`, reading raw values from `source_columns` and writing to `columns`."""
+    rows = session.execute(text(f"SELECT id, {', '.join(source_columns)} FROM {table}")).fetchall()
+    for row in rows:
+        id_ = row[0]
+        updates = {}
+        for col, src in zip(columns, source_columns, strict=True):
+            val = row[source_columns.index(src) + 1]
+            updates[col] = SqlAlchemyBase.normalize(val) if val is not None else None
+
+        set_clause = ", ".join(f"{col} = :{col}" for col in columns)
+        session.execute(text(f"UPDATE {table} SET {set_clause} WHERE id = :id"), {**updates, "id": id_})
+    session.commit()
+
+
+def update_normalization() -> None:
+    bind = op.get_bind()
+    session = orm.Session(bind=bind)
+
+    # recipes: name_normalized, description_normalized
+    _update_table(session, "recipes", ["name_normalized", "description_normalized"], ["name", "description"])
+
+    # recipe ingredients: note_normalized, original_text_normalized
+    _update_table(
+        session,
+        "recipes_ingredients",
+        ["note_normalized", "original_text_normalized"],
+        ["note", "original_text"],
+    )
+
+    # ingredient units: name, plural_name, abbreviation, plural_abbreviation
+    _update_table(
+        session,
+        "ingredient_units",
+        ["name_normalized", "plural_name_normalized", "abbreviation_normalized", "plural_abbreviation_normalized"],
+        ["name", "plural_name", "abbreviation", "plural_abbreviation"],
+    )
+
+    # ingredient foods: name, plural_name
+    _update_table(session, "ingredient_foods", ["name_normalized", "plural_name_normalized"], ["name", "plural_name"])
+
+    # unit aliases
+    _update_table(session, "ingredient_units_aliases", ["name_normalized"], ["name"])
+
+    # food aliases
+    _update_table(session, "ingredient_foods_aliases", ["name_normalized"], ["name"])
+
+
+def upgrade():
+    # no table changes, this is a data migration
+    update_normalization()
+
+
+def downgrade():
+    pass
diff --git a/mealie/db/models/_model_base.py b/mealie/db/models/_model_base.py
index 4298a838b..ef9b7cacc 100644
--- a/mealie/db/models/_model_base.py
+++ b/mealie/db/models/_model_base.py
@@ -1,3 +1,4 @@
+import string
 from datetime import datetime
 
 from sqlalchemy import Integer
@@ -6,6 +7,12 @@ from text_unidecode import unidecode
 
 from ._model_utils.datetime import NaiveDateTime, get_utc_now
 
+# Punctuation characters replaced with spaces during text normalization.
+# Mirrors SearchFilter in query_search.py: string.punctuation minus apostrophe and
+# double-quote, which are reserved for quoted literal searches.
+NORMALIZE_PUNCTUATION = string.punctuation.replace("'", "").replace('"', "")
+_NORMALIZE_PUNCTUATION_TABLE = str.maketrans(NORMALIZE_PUNCTUATION, " " * len(NORMALIZE_PUNCTUATION))
+
 
 class SqlAlchemyBase(DeclarativeBase):
     id: Mapped[int] = mapped_column(Integer, primary_key=True)
@@ -20,7 +27,7 @@ class SqlAlchemyBase(DeclarativeBase):
     def normalize(cls, val: str) -> str:
         # We cap the length to 255 to prevent indexes from being too long; see:
         # https://www.postgresql.org/docs/current/btree.html
-        return unidecode(val).lower().strip()[:255]
+        return unidecode(val).translate(_NORMALIZE_PUNCTUATION_TABLE).lower().strip()[:255]
 
 
 class BaseMixins:
diff --git a/mealie/schema/response/query_search.py b/mealie/schema/response/query_search.py
index 65d40310f..61ee7c61b 100644
--- a/mealie/schema/response/query_search.py
+++ b/mealie/schema/response/query_search.py
@@ -4,7 +4,7 @@ from sqlalchemy import Select
 from sqlalchemy.orm import Session
 from text_unidecode import unidecode
 
-from ...db.models._model_base import SqlAlchemyBase
+from ...db.models._model_base import NORMALIZE_PUNCTUATION, SqlAlchemyBase
 from .._mealie import MealieModel, SearchType
 
 
@@ -16,7 +16,7 @@ class SearchFilter:
     3. remove special characters from each non-literal search string
     """
 
-    punctuation = r"!\#$%&()*+,-./:;<=>?@[\\]^_`{|}~"  # string.punctuation with ' & " removed
+    punctuation = NORMALIZE_PUNCTUATION
     quoted_regex = re.compile(r"""(["'])(?:(?=(\\?))\2.)*?\1""")
     remove_quotes_regex = re.compile(r"""['"](.*)['"]""")
 
diff --git a/tests/unit_tests/repository_tests/test_search.py b/tests/unit_tests/repository_tests/test_search.py
index 461d28d32..e9aec7aeb 100644
--- a/tests/unit_tests/repository_tests/test_search.py
+++ b/tests/unit_tests/repository_tests/test_search.py
@@ -3,10 +3,12 @@ from datetime import UTC, datetime
 import pytest
 from sqlalchemy.orm import Session
 
+from mealie.db.models._model_base import SqlAlchemyBase
 from mealie.repos.all_repositories import get_repositories
 from mealie.repos.repository_factory import AllRepositories
 from mealie.schema.recipe.recipe_ingredient import IngredientUnit, SaveIngredientUnit
 from mealie.schema.response.pagination import OrderDirection, PaginationQuery
+from mealie.schema.response.query_search import SearchFilter
 from mealie.schema.user.user import GroupBase
 from tests.utils.factories import random_int, random_string
 
@@ -137,3 +139,35 @@ def test_random_order_search(
         pagination.pagination_seed = str(datetime.now(UTC))
         random_ordered.append(repo.page_all(pagination, search="unit").items)
     assert not all(i == random_ordered[0] for i in random_ordered)
+
+
+@pytest.mark.parametrize(
+    "name, expected",
+    [
+        ("Gluten-Free Bread", "gluten free bread"),
+        ("Mac & Cheese", "mac   cheese"),
+        ("Chicken/Rice Bowl", "chicken rice bowl"),
+        ("Rátàtôuile", "ratatouile"),
+        ("Mom's Pasta", "mom's pasta"),
+    ],
+)
+def test_normalize_strips_punctuation(name: str, expected: str):
+    assert SqlAlchemyBase.normalize(name) == expected
+
+
+@pytest.mark.parametrize(
+    "name",
+    [
+        "Gluten-Free Bread",
+        "Mac & Cheese",
+        "Chicken/Rice Bowl",
+        "Rátàtôuile",
+        "Mom's Pasta",
+    ],
+)
+def test_search_normalize_symmetric_with_store_normalize(name: str):
+    """SearchFilter._normalize_search and SqlAlchemyBase.normalize must produce the same
+    output for the same input, otherwise stored values and search queries won't match."""
+    stored = SqlAlchemyBase.normalize(name)
+    searched = SearchFilter._normalize_search(name, normalize_characters=True)
+    assert stored == searched, f"Normalization mismatch for {name!r}: stored={stored!r}, searched={searched!r}"
diff --git a/tests/unit_tests/services_tests/backup_v2_tests/test_backup_v2.py b/tests/unit_tests/services_tests/backup_v2_tests/test_backup_v2.py
index 22f25d33d..c367df2d9 100644
--- a/tests/unit_tests/services_tests/backup_v2_tests/test_backup_v2.py
+++ b/tests/unit_tests/services_tests/backup_v2_tests/test_backup_v2.py
@@ -1,6 +1,4 @@
-import filecmp
 import statistics
-from pathlib import Path
 from typing import Any
 
 from sqlalchemy.orm import Session
@@ -32,17 +30,6 @@ def dict_sorter(d: dict) -> Any:
     return next((d[key] for key in possible_keys if d.get(key)), 1)
 
 
-# For Future Use
-def match_file_tree(path_a: Path, path_b: Path):
-    if path_a.is_dir() and path_b.is_dir():
-        for a_file in path_a.iterdir():
-            b_file = path_b.joinpath(a_file.name)
-            assert b_file.exists()
-            match_file_tree(a_file, b_file)
-    else:
-        assert filecmp.cmp(path_a, path_b)
-
-
 def test_database_backup():
     backup_v2 = BackupV2()
     path_to_backup = backup_v2.backup()