diff --git a/mealie/services/scraper/scraper_strategies.py b/mealie/services/scraper/scraper_strategies.py index b33a441d6..c05ef59d2 100644 --- a/mealie/services/scraper/scraper_strategies.py +++ b/mealie/services/scraper/scraper_strategies.py @@ -230,6 +230,28 @@ class RecipeScraperPackage(ABCScraperStrategy): except TypeError: return [] + def get_notes() -> list[RecipeNote]: + """Extract notes from schema.org recipe data and convert to RecipeNote objects""" + notes_data = try_get_default(None, "notes", None) + + if not notes_data or not isinstance(notes_data, list): + return [] + + cleaned_notes = [] + for note in notes_data: + if not isinstance(note, dict): + continue + + if text := note.get("text"): + cleaned_notes.append( + RecipeNote( + title=cleaner.clean_string(note.get("title", "")), + text=cleaner.clean_string(text), + ) + ) + + return cleaned_notes + cook_time = try_get_default( None, "performTime", None, cleaner.clean_time, translator=self.translator ) or try_get_default(scraped_data.cook_time, "cookTime", None, cleaner.clean_time, translator=self.translator) @@ -261,6 +283,7 @@ class RecipeScraperPackage(ABCScraperStrategy): ), perform_time=cook_time, org_url=url or try_get_default(None, "url", None, cleaner.clean_string), + notes=get_notes(), ) return recipe, extras diff --git a/tests/unit_tests/services_tests/scraper_tests/test_cleaner.py b/tests/unit_tests/services_tests/scraper_tests/test_cleaner.py index 1c761aee6..35c87de48 100644 --- a/tests/unit_tests/services_tests/scraper_tests/test_cleaner.py +++ b/tests/unit_tests/services_tests/scraper_tests/test_cleaner.py @@ -3,10 +3,11 @@ import re from pathlib import Path import pytest +from recipe_scrapers import scrape_html from mealie.lang.providers import get_locale_provider from mealie.services.scraper import cleaner -from mealie.services.scraper.scraper_strategies import RecipeScraperOpenGraph +from mealie.services.scraper.scraper_strategies import RecipeScraperOpenGraph, RecipeScraperPackage from tests import data as test_data # https://github.com/django/django/blob/stable/1.3.x/django/core/validators.py#L45 @@ -57,3 +58,33 @@ def test_html_with_recipe_data(): assert recipe_data["orgURL"] == url assert len(recipe_data["description"]) > 100 assert url_validation_regex.match(recipe_data["image"]) + + +def test_clean_scraper_preserves_notes(): + """Regression test: notes must survive the RecipeScraperPackage pipeline (previously dropped silently).""" + ld_json = json.dumps( + { + "@context": "https://schema.org", + "@type": "Recipe", + "name": "Test Recipe", + "recipeIngredient": ["1 cup flour"], + "recipeInstructions": [{"@type": "HowToStep", "text": "Mix everything together"}], + "notes": [ + {"title": "Storage Tip", "text": "Keep refrigerated up to 3 days"}, + {"title": "Variation", "text": "Add chili flakes for extra heat"}, + ], + } + ) + html = RecipeScraperPackage.ld_json_to_html(ld_json) + scraped = scrape_html(html, org_url="https://example.com", supported_only=False) + translator = get_locale_provider() + strategy = RecipeScraperPackage("https://example.com", translator) + + recipe, _ = strategy.clean_scraper(scraped, "https://example.com") + + assert recipe.notes is not None + assert len(recipe.notes) == 2 + assert recipe.notes[0].title == "Storage Tip" + assert recipe.notes[0].text == "Keep refrigerated up to 3 days" + assert recipe.notes[1].title == "Variation" + assert recipe.notes[1].text == "Add chili flakes for extra heat" diff --git a/tests/unit_tests/services_tests/scraper_tests/test_cleaner_parts.py b/tests/unit_tests/services_tests/scraper_tests/test_cleaner_parts.py index ee6b689f6..c984bc173 100644 --- a/tests/unit_tests/services_tests/scraper_tests/test_cleaner_parts.py +++ b/tests/unit_tests/services_tests/scraper_tests/test_cleaner_parts.py @@ -672,6 +672,69 @@ def test_cleaner_clean_nutrition(case: CleanerCase): assert case.expected == result +clean_notes_test_cases = ( + CleanerCase( + test_id="valid dicts with title and text", + input=[ + {"title": "Storage Tip", "text": "Keep refrigerated up to 3 days"}, + {"title": "Variation", "text": "Add chili flakes for extra heat"}, + ], + expected=[ + {"title": "Storage Tip", "text": "Keep refrigerated up to 3 days"}, + {"title": "Variation", "text": "Add chili flakes for extra heat"}, + ], + ), + CleanerCase( + test_id="dict missing title gets empty title", + input=[{"text": "A note without a title"}], + expected=[{"title": "", "text": "A note without a title"}], + ), + CleanerCase( + test_id="dict missing text is skipped", + input=[{"title": "Only title, no text"}], + expected=[], + ), + CleanerCase( + test_id="plain string becomes note with empty title", + input=["A plain text note"], + expected=[{"title": "", "text": "A plain text note"}], + ), + CleanerCase( + test_id="mixed valid and invalid entries", + input=[ + {"title": "Valid", "text": "Has both fields"}, + {"title": "No text"}, + "Plain string note", + ], + expected=[ + {"title": "Valid", "text": "Has both fields"}, + {"title": "", "text": "Plain string note"}, + ], + ), + CleanerCase( + test_id="empty list", + input=[], + expected=[], + ), + CleanerCase( + test_id="non-list returns None", + input="not a list", + expected=None, + ), + CleanerCase( + test_id="none returns None", + input=None, + expected=None, + ), +) + + +@pytest.mark.parametrize("case", clean_notes_test_cases, ids=(x.test_id for x in clean_notes_test_cases)) +def test_cleaner_clean_notes(case: CleanerCase) -> None: + result = cleaner.clean_notes(case.input) + assert case.expected == result + + @pytest.mark.parametrize( "t,max_components,max_decimal_places,expected", [