fix: restore recipe notes during JSON import (#7017)

Co-authored-by: Michael Genson <genson.michael@gmail.com> Co-authored-by: Michael Genson <71845777+michael-genson@users.noreply.github.com>
2026-07-19 06:00:16 -04:00 · 2026-03-26 21:04:59 +01:00
parent e52a887e30
commit 449e3baa07
3 changed files with 118 additions and 1 deletions
--- a/mealie/services/scraper/scraper_strategies.py
+++ b/mealie/services/scraper/scraper_strategies.py
@@ -230,6 +230,28 @@ class RecipeScraperPackage(ABCScraperStrategy):
            except TypeError:
                return []

+        def get_notes() -> list[RecipeNote]:
+            """Extract notes from schema.org recipe data and convert to RecipeNote objects"""
+            notes_data = try_get_default(None, "notes", None)
+
+            if not notes_data or not isinstance(notes_data, list):
+                return []
+
+            cleaned_notes = []
+            for note in notes_data:
+                if not isinstance(note, dict):
+                    continue
+
+                if text := note.get("text"):
+                    cleaned_notes.append(
+                        RecipeNote(
+                            title=cleaner.clean_string(note.get("title", "")),
+                            text=cleaner.clean_string(text),
+                        )
+                    )
+
+            return cleaned_notes
+
        cook_time = try_get_default(
            None, "performTime", None, cleaner.clean_time, translator=self.translator
        ) or try_get_default(scraped_data.cook_time, "cookTime", None, cleaner.clean_time, translator=self.translator)
@@ -261,6 +283,7 @@ class RecipeScraperPackage(ABCScraperStrategy):
            ),
            perform_time=cook_time,
            org_url=url or try_get_default(None, "url", None, cleaner.clean_string),
+            notes=get_notes(),
        )

        return recipe, extras
--- a/tests/unit_tests/services_tests/scraper_tests/test_cleaner.py
+++ b/tests/unit_tests/services_tests/scraper_tests/test_cleaner.py
@@ -3,10 +3,11 @@ import re
 from pathlib import Path

 import pytest
+from recipe_scrapers import scrape_html

 from mealie.lang.providers import get_locale_provider
 from mealie.services.scraper import cleaner
-from mealie.services.scraper.scraper_strategies import RecipeScraperOpenGraph
+from mealie.services.scraper.scraper_strategies import RecipeScraperOpenGraph, RecipeScraperPackage
 from tests import data as test_data

 # https://github.com/django/django/blob/stable/1.3.x/django/core/validators.py#L45
@@ -57,3 +58,33 @@ def test_html_with_recipe_data():
    assert recipe_data["orgURL"] == url
    assert len(recipe_data["description"]) > 100
    assert url_validation_regex.match(recipe_data["image"])
+
+
+def test_clean_scraper_preserves_notes():
+    """Regression test: notes must survive the RecipeScraperPackage pipeline (previously dropped silently)."""
+    ld_json = json.dumps(
+        {
+            "@context": "https://schema.org",
+            "@type": "Recipe",
+            "name": "Test Recipe",
+            "recipeIngredient": ["1 cup flour"],
+            "recipeInstructions": [{"@type": "HowToStep", "text": "Mix everything together"}],
+            "notes": [
+                {"title": "Storage Tip", "text": "Keep refrigerated up to 3 days"},
+                {"title": "Variation", "text": "Add chili flakes for extra heat"},
+            ],
+        }
+    )
+    html = RecipeScraperPackage.ld_json_to_html(ld_json)
+    scraped = scrape_html(html, org_url="https://example.com", supported_only=False)
+    translator = get_locale_provider()
+    strategy = RecipeScraperPackage("https://example.com", translator)
+
+    recipe, _ = strategy.clean_scraper(scraped, "https://example.com")
+
+    assert recipe.notes is not None
+    assert len(recipe.notes) == 2
+    assert recipe.notes[0].title == "Storage Tip"
+    assert recipe.notes[0].text == "Keep refrigerated up to 3 days"
+    assert recipe.notes[1].title == "Variation"
+    assert recipe.notes[1].text == "Add chili flakes for extra heat"
--- a/tests/unit_tests/services_tests/scraper_tests/test_cleaner_parts.py
+++ b/tests/unit_tests/services_tests/scraper_tests/test_cleaner_parts.py
@@ -672,6 +672,69 @@ def test_cleaner_clean_nutrition(case: CleanerCase):
    assert case.expected == result


+clean_notes_test_cases = (
+    CleanerCase(
+        test_id="valid dicts with title and text",
+        input=[
+            {"title": "Storage Tip", "text": "Keep refrigerated up to 3 days"},
+            {"title": "Variation", "text": "Add chili flakes for extra heat"},
+        ],
+        expected=[
+            {"title": "Storage Tip", "text": "Keep refrigerated up to 3 days"},
+            {"title": "Variation", "text": "Add chili flakes for extra heat"},
+        ],
+    ),
+    CleanerCase(
+        test_id="dict missing title gets empty title",
+        input=[{"text": "A note without a title"}],
+        expected=[{"title": "", "text": "A note without a title"}],
+    ),
+    CleanerCase(
+        test_id="dict missing text is skipped",
+        input=[{"title": "Only title, no text"}],
+        expected=[],
+    ),
+    CleanerCase(
+        test_id="plain string becomes note with empty title",
+        input=["A plain text note"],
+        expected=[{"title": "", "text": "A plain text note"}],
+    ),
+    CleanerCase(
+        test_id="mixed valid and invalid entries",
+        input=[
+            {"title": "Valid", "text": "Has both fields"},
+            {"title": "No text"},
+            "Plain string note",
+        ],
+        expected=[
+            {"title": "Valid", "text": "Has both fields"},
+            {"title": "", "text": "Plain string note"},
+        ],
+    ),
+    CleanerCase(
+        test_id="empty list",
+        input=[],
+        expected=[],
+    ),
+    CleanerCase(
+        test_id="non-list returns None",
+        input="not a list",
+        expected=None,
+    ),
+    CleanerCase(
+        test_id="none returns None",
+        input=None,
+        expected=None,
+    ),
+)
+
+
+@pytest.mark.parametrize("case", clean_notes_test_cases, ids=(x.test_id for x in clean_notes_test_cases))
+def test_cleaner_clean_notes(case: CleanerCase) -> None:
+    result = cleaner.clean_notes(case.input)
+    assert case.expected == result
+
+
@pytest.mark.parametrize(
    "t,max_components,max_decimal_places,expected",
    [