fix: restore recipe notes during JSON import (#7017)

Co-authored-by: Michael Genson <genson.michael@gmail.com>
Co-authored-by: Michael Genson <71845777+michael-genson@users.noreply.github.com>
This commit is contained in:
Sebastian
2026-03-26 21:04:59 +01:00
committed by GitHub
parent e52a887e30
commit 449e3baa07
3 changed files with 118 additions and 1 deletions

View File

@@ -230,6 +230,28 @@ class RecipeScraperPackage(ABCScraperStrategy):
except TypeError:
return []
def get_notes() -> list[RecipeNote]:
"""Extract notes from schema.org recipe data and convert to RecipeNote objects"""
notes_data = try_get_default(None, "notes", None)
if not notes_data or not isinstance(notes_data, list):
return []
cleaned_notes = []
for note in notes_data:
if not isinstance(note, dict):
continue
if text := note.get("text"):
cleaned_notes.append(
RecipeNote(
title=cleaner.clean_string(note.get("title", "")),
text=cleaner.clean_string(text),
)
)
return cleaned_notes
cook_time = try_get_default(
None, "performTime", None, cleaner.clean_time, translator=self.translator
) or try_get_default(scraped_data.cook_time, "cookTime", None, cleaner.clean_time, translator=self.translator)
@@ -261,6 +283,7 @@ class RecipeScraperPackage(ABCScraperStrategy):
),
perform_time=cook_time,
org_url=url or try_get_default(None, "url", None, cleaner.clean_string),
notes=get_notes(),
)
return recipe, extras

View File

@@ -3,10 +3,11 @@ import re
from pathlib import Path
import pytest
from recipe_scrapers import scrape_html
from mealie.lang.providers import get_locale_provider
from mealie.services.scraper import cleaner
from mealie.services.scraper.scraper_strategies import RecipeScraperOpenGraph
from mealie.services.scraper.scraper_strategies import RecipeScraperOpenGraph, RecipeScraperPackage
from tests import data as test_data
# https://github.com/django/django/blob/stable/1.3.x/django/core/validators.py#L45
@@ -57,3 +58,33 @@ def test_html_with_recipe_data():
assert recipe_data["orgURL"] == url
assert len(recipe_data["description"]) > 100
assert url_validation_regex.match(recipe_data["image"])
def test_clean_scraper_preserves_notes():
"""Regression test: notes must survive the RecipeScraperPackage pipeline (previously dropped silently)."""
ld_json = json.dumps(
{
"@context": "https://schema.org",
"@type": "Recipe",
"name": "Test Recipe",
"recipeIngredient": ["1 cup flour"],
"recipeInstructions": [{"@type": "HowToStep", "text": "Mix everything together"}],
"notes": [
{"title": "Storage Tip", "text": "Keep refrigerated up to 3 days"},
{"title": "Variation", "text": "Add chili flakes for extra heat"},
],
}
)
html = RecipeScraperPackage.ld_json_to_html(ld_json)
scraped = scrape_html(html, org_url="https://example.com", supported_only=False)
translator = get_locale_provider()
strategy = RecipeScraperPackage("https://example.com", translator)
recipe, _ = strategy.clean_scraper(scraped, "https://example.com")
assert recipe.notes is not None
assert len(recipe.notes) == 2
assert recipe.notes[0].title == "Storage Tip"
assert recipe.notes[0].text == "Keep refrigerated up to 3 days"
assert recipe.notes[1].title == "Variation"
assert recipe.notes[1].text == "Add chili flakes for extra heat"

View File

@@ -672,6 +672,69 @@ def test_cleaner_clean_nutrition(case: CleanerCase):
assert case.expected == result
clean_notes_test_cases = (
CleanerCase(
test_id="valid dicts with title and text",
input=[
{"title": "Storage Tip", "text": "Keep refrigerated up to 3 days"},
{"title": "Variation", "text": "Add chili flakes for extra heat"},
],
expected=[
{"title": "Storage Tip", "text": "Keep refrigerated up to 3 days"},
{"title": "Variation", "text": "Add chili flakes for extra heat"},
],
),
CleanerCase(
test_id="dict missing title gets empty title",
input=[{"text": "A note without a title"}],
expected=[{"title": "", "text": "A note without a title"}],
),
CleanerCase(
test_id="dict missing text is skipped",
input=[{"title": "Only title, no text"}],
expected=[],
),
CleanerCase(
test_id="plain string becomes note with empty title",
input=["A plain text note"],
expected=[{"title": "", "text": "A plain text note"}],
),
CleanerCase(
test_id="mixed valid and invalid entries",
input=[
{"title": "Valid", "text": "Has both fields"},
{"title": "No text"},
"Plain string note",
],
expected=[
{"title": "Valid", "text": "Has both fields"},
{"title": "", "text": "Plain string note"},
],
),
CleanerCase(
test_id="empty list",
input=[],
expected=[],
),
CleanerCase(
test_id="non-list returns None",
input="not a list",
expected=None,
),
CleanerCase(
test_id="none returns None",
input=None,
expected=None,
),
)
@pytest.mark.parametrize("case", clean_notes_test_cases, ids=(x.test_id for x in clean_notes_test_cases))
def test_cleaner_clean_notes(case: CleanerCase) -> None:
result = cleaner.clean_notes(case.input)
assert case.expected == result
@pytest.mark.parametrize(
"t,max_components,max_decimal_places,expected",
[