Merge branch 'mealie-next' into fix/translation-issues-when-scraping

This commit is contained in:
Michael Genson
2023-12-11 13:01:26 -06:00
committed by GitHub
74 changed files with 505 additions and 672 deletions

View File

@@ -90,10 +90,10 @@ def clean_image(image: str | list | dict | None = None, default: str = "no image
image attempts to parse the image field from a recipe and return a string. Currenty
Supported Structures:
- `https://exmaple.com` - A string
- `{ "url": "https://exmaple.com" }` - A dictionary with a `url` key
- `["https://exmaple.com"]` - A list of strings
- `[{ "url": "https://exmaple.com" }]` - A list of dictionaries with a `url` key
- `https://example.com` - A string
- `{ "url": "https://example.com" }` - A dictionary with a `url` key
- `["https://example.com"]` - A list of strings
- `[{ "url": "https://example.com" }]` - A list of dictionaries with a `url` key
Raises:
TypeError: If the image field is not a supported type a TypeError is raised.
@@ -113,8 +113,11 @@ def clean_image(image: str | list | dict | None = None, default: str = "no image
return [x["url"] for x in image]
case {"url": str(image)}:
return [image]
case [{"@id": str(_)}, *_]:
return [x["@id"] for x in image]
case _:
raise TypeError(f"Unexpected type for image: {type(image)}, {image}")
logger.exception(f"Unexpected type for image: {type(image)}, {image}")
return [default]
def clean_instructions(steps_object: list | dict | str, default: list | None = None) -> list[dict]:

View File

@@ -212,10 +212,6 @@ class RecipeScraperPackage(ABCScraperStrategy):
class RecipeScraperOpenGraph(ABCScraperStrategy):
"""
Abstract class for all recipe parsers.
"""
async def get_html(self, url: str) -> str:
return await safe_scrape_html(url)
@@ -245,7 +241,7 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
"recipeIngredient": ["Could not detect ingredients"],
"recipeInstructions": [{"text": "Could not detect instructions"}],
"slug": slugify(og_field(properties, "og:title")),
"orgURL": og_field(properties, "og:url"),
"orgURL": self.url,
"categories": [],
"tags": og_fields(properties, "og:article:tag"),
"dateAdded": None,