feat: Migrate from Copy Me That (#2212)

* implemented copymethat migration

* added migration tree

* added translation support

* genericized example jpgs

* added test data

* fixed test archive

* switched recipe create to service
added test for timeline event creation

* linting

* lxml go brrr
This commit is contained in:
Michael Genson
2023-03-12 15:37:24 -05:00
committed by GitHub
parent 3ce8fa9492
commit 3118b0e423
17 changed files with 288 additions and 36 deletions

View File

@@ -1,4 +1,5 @@
from .chowdown import *
from .copymethat import *
from .mealie_alpha import *
from .nextcloud import *
from .paprika import *

View File

@@ -5,6 +5,7 @@ from uuid import UUID
from pydantic import UUID4
from mealie.core import root_logger
from mealie.core.exceptions import UnexpectedNone
from mealie.repos.all_repositories import AllRepositories
from mealie.schema.recipe import Recipe
from mealie.schema.recipe.recipe_settings import RecipeSettings
@@ -16,6 +17,7 @@ from mealie.schema.reports.reports import (
ReportSummary,
ReportSummaryStatus,
)
from mealie.services.recipe.recipe_service import RecipeService
from mealie.services.scraper import cleaner
from .._base_service import BaseService
@@ -38,17 +40,27 @@ class BaseMigrator(BaseService):
self.archive = archive
self.db = db
self.session = session
self.user_id = user_id
self.group_id = group_id
self.add_migration_tag = add_migration_tag
user = db.users.get_one(user_id)
if not user:
raise UnexpectedNone(f"Cannot find user {user_id}")
group = db.groups.get_one(group_id)
if not group:
raise UnexpectedNone(f"Cannot find group {group_id}")
self.user = user
self.group = group
self.name = "migration"
self.report_entries = []
self.logger = root_logger.get_logger()
self.helpers = DatabaseMigrationHelpers(self.db, self.session, self.group_id, self.user_id)
self.helpers = DatabaseMigrationHelpers(self.db, self.session, self.group.id, self.user.id)
self.recipe_service = RecipeService(db, user, group)
super().__init__()
@@ -60,7 +72,7 @@ class BaseMigrator(BaseService):
name=report_name,
category=ReportCategory.migration,
status=ReportSummaryStatus.in_progress,
group_id=self.group_id,
group_id=self.group.id,
)
self.report = self.db.group_reports.create(report_to_save)
@@ -117,25 +129,23 @@ class BaseMigrator(BaseService):
return_vars: list[tuple[str, UUID4, bool]] = []
group = self.db.groups.get_one(self.group_id)
if not group or not group.preferences:
if not self.group.preferences:
raise ValueError("Group preferences not found")
default_settings = RecipeSettings(
public=group.preferences.recipe_public,
show_nutrition=group.preferences.recipe_show_nutrition,
show_assets=group.preferences.recipe_show_assets,
landscape_view=group.preferences.recipe_landscape_view,
disable_comments=group.preferences.recipe_disable_comments,
disable_amount=group.preferences.recipe_disable_amount,
public=self.group.preferences.recipe_public,
show_nutrition=self.group.preferences.recipe_show_nutrition,
show_assets=self.group.preferences.recipe_show_assets,
landscape_view=self.group.preferences.recipe_landscape_view,
disable_comments=self.group.preferences.recipe_disable_comments,
disable_amount=self.group.preferences.recipe_disable_amount,
)
for recipe in validated_recipes:
recipe.settings = default_settings
recipe.user_id = self.user_id
recipe.group_id = self.group_id
recipe.user_id = self.user.id
recipe.group_id = self.group.id
if recipe.tags:
recipe.tags = self.helpers.get_or_set_tags(x.name for x in recipe.tags)
@@ -151,7 +161,7 @@ class BaseMigrator(BaseService):
exception: str | Exception = ""
status = False
try:
recipe = self.db.recipes.create(recipe)
recipe = self.recipe_service.create_one(recipe)
status = True
except Exception as inst:

View File

@@ -0,0 +1,123 @@
import tempfile
import zipfile
from datetime import datetime
from pathlib import Path
from bs4 import BeautifulSoup
from mealie.schema.reports.reports import ReportEntryCreate
from ._migration_base import BaseMigrator
from .utils.migration_alias import MigrationAlias
from .utils.migration_helpers import import_image
def parse_recipe_tags(tags: list) -> list[str]:
"""Parses the list of recipe tags and removes invalid ones"""
updated_tags: list[str] = []
for tag in tags:
if not tag or not isinstance(tag, str):
continue
if "Tags:" in tag:
continue
updated_tags.append(tag)
return updated_tags
class CopyMeThatMigrator(BaseMigrator):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.name = "copymethat"
self.key_aliases = [
MigrationAlias(key="last_made", alias="made_this", func=lambda x: datetime.now()),
MigrationAlias(key="notes", alias="recipeNotes"),
MigrationAlias(key="orgURL", alias="original_link"),
MigrationAlias(key="rating", alias="ratingValue"),
MigrationAlias(key="recipeIngredient", alias="recipeIngredients"),
MigrationAlias(key="recipeYield", alias="servings", func=lambda x: x.replace(":", ": ")),
]
def _process_recipe_document(self, source_dir: Path, soup: BeautifulSoup) -> dict:
"""Reads a single recipe's HTML and converts it to a dictionary"""
recipe_dict: dict = {}
recipe_tags: list[str] = []
for tag in soup.find_all():
# the recipe image tag has no id, so we parse it directly
if tag.name == "img" and "recipeImage" in tag.get("class", []):
if image_path := tag.get("src"):
recipe_dict["image"] = str(source_dir.joinpath(image_path))
continue
# tags (internally named categories) are not in a list, and don't have ids
if tag.name == "span" and "recipeCategory" in tag.get("class", []):
recipe_tag = tag.get_text(strip=True)
if "Tags:" not in recipe_tag:
recipe_tags.append(recipe_tag)
continue
# add only elements with an id to the recipe dictionary
if not (tag_id := tag.get("id")):
continue
# for lists, store the list items as an array (e.g. for recipe instructions)
if tag.name in ["ul", "ol"]:
recipe_dict[tag_id] = [item.get_text(strip=True) for item in tag.find_all("li", recursive=False)]
continue
# for all other tags, write the text directly to the recipe data
recipe_dict[tag_id] = tag.get_text(strip=True)
if recipe_tags:
recipe_dict["tags"] = recipe_tags
return recipe_dict
def _migrate(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
with zipfile.ZipFile(self.archive) as zip_file:
zip_file.extractall(tmpdir)
source_dir = Path(tmpdir)
recipes_as_dicts: list[dict] = []
for recipes_data_file in source_dir.glob("*.html"):
with open(recipes_data_file, encoding="utf-8") as f:
soup = BeautifulSoup(f, "lxml")
for recipe_data in soup.find_all("div", class_="recipe"):
try:
recipes_as_dicts.append(self._process_recipe_document(source_dir, recipe_data))
# since recipes are stored in one large file, we keep going on error
except Exception as e:
self.report_entries.append(
ReportEntryCreate(
report_id=self.report_id,
success=False,
message="Failed to parse recipe",
exception=f"{type(e).__name__}: {e}",
)
)
recipes = [self.clean_recipe_dictionary(x) for x in recipes_as_dicts]
results = self.import_recipes_to_database(recipes)
recipe_lookup = {r.slug: r for r in recipes}
for slug, recipe_id, status in results:
if status:
try:
r = recipe_lookup.get(slug)
if not r or not r.image:
continue
except StopIteration:
continue
import_image(r.image, recipe_id)

View File

@@ -81,10 +81,17 @@ def glob_walker(directory: Path, glob_str: str, return_parent=True) -> list[Path
return matches
def import_image(src: Path, recipe_id: UUID4):
def import_image(src: str | Path, recipe_id: UUID4):
"""Read the successful migrations attribute and for each import the image
appropriately into the image directory. Minification is done in mass
after the migration occurs.
"""
if isinstance(src, str):
src = Path(src)
if not src.exists():
return
data_service = RecipeDataService(recipe_id=recipe_id)
data_service.write_image(src, src.suffix)

View File

@@ -49,7 +49,9 @@ def clean(recipe_data: dict, url=None) -> dict:
recipe_data["recipeInstructions"] = clean_instructions(recipe_data.get("recipeInstructions", []))
recipe_data["image"] = clean_image(recipe_data.get("image"))[0]
recipe_data["slug"] = slugify(recipe_data.get("name", ""))
recipe_data["orgURL"] = url
recipe_data["orgURL"] = url or recipe_data.get("orgURL")
recipe_data["notes"] = clean_notes(recipe_data.get("notes"))
recipe_data["rating"] = clean_int(recipe_data.get("rating"))
return recipe_data
@@ -255,6 +257,48 @@ def clean_ingredients(ingredients: list | str | None, default: list | None = Non
raise TypeError(f"Unexpected type for ingredients: {type(ingredients)}, {ingredients}")
def clean_int(val: str | int | None, min: int | None = None, max: int | None = None):
if val is None or isinstance(val, int):
return val
filtered_val = "".join(c for c in val if c.isnumeric())
if not filtered_val:
return None
val = int(filtered_val)
if min is None or max is None:
return val
if not (min <= val <= max):
return None
return val
def clean_notes(notes: typing.Any) -> list[dict] | None:
if not isinstance(notes, list):
return None
parsed_notes: list[dict] = []
for note in notes:
if not isinstance(note, (str, dict)):
continue
if isinstance(note, dict):
if "text" not in note:
continue
if "title" not in note:
note["title"] = ""
parsed_notes.append(note)
continue
parsed_notes.append({"title": "", "text": note})
return parsed_notes
def clean_yield(yld: str | list[str] | None) -> str:
"""
yield_amount attemps to parse out the yield amount from a recipe.