feat: Paprika/Mealie Migration support (#873)

* feat:  paprika support - partial

* feat:  add full paprika support

* re-organize data directory

* add data directory auto-gen

* rewrite migration tests

* remove print statements

* remove hard-coded paths

* add auto-tag support

* add mealie migration support

* add looking for migraiton button
This commit is contained in:
Hayden
2021-12-09 19:52:53 -09:00
committed by GitHub
parent 5839992c19
commit 8d77f4b31e
76 changed files with 718 additions and 4056 deletions

View File

@@ -229,8 +229,12 @@ class AccessModel(Generic[T, D]):
result = self.session.query(self.sql_model).filter_by(**{self.primary_key: primary_key_value}).one()
results_as_model = self.schema.from_orm(result)
self.session.delete(result)
self.session.commit()
try:
self.session.delete(result)
self.session.commit()
except Exception as e:
self.session.rollback()
raise e
if self.observers:
self.update_observers()

View File

@@ -31,7 +31,7 @@ class Tag(SqlAlchemyBase, BaseMixins):
assert name != ""
return name
def __init__(self, name, session=None) -> None:
def __init__(self, name, **_) -> None:
self.name = name.strip()
self.slug = slugify(self.name)

View File

@@ -14,6 +14,7 @@ router = UserAPIRouter()
@router.post("", response_model=ReportSummary)
def start_data_migration(
add_migration_tag: bool = Form(False),
migration_type: SupportedMigrations = Form(...),
archive: UploadFile = File(...),
temp_path: str = Depends(temporary_zip_path),
@@ -23,4 +24,4 @@ def start_data_migration(
with temp_path.open("wb") as buffer:
shutil.copyfileobj(archive.file, buffer)
return gm_service.migrate(migration_type, temp_path)
return gm_service.migrate(migration_type, add_migration_tag, temp_path)

View File

@@ -6,6 +6,8 @@ from fastapi_camelcase import CamelModel
class SupportedMigrations(str, enum.Enum):
nextcloud = "nextcloud"
chowdown = "chowdown"
paprika = "paprika"
mealie_alpha = "mealie_alpha"
class DataMigrationCreate(CamelModel):

View File

@@ -11,6 +11,8 @@ from mealie.schema.reports.reports import ReportOut, ReportSummary
from mealie.services._base_http_service.http_services import UserHttpService
from mealie.services.events import create_group_event
from mealie.services.migrations import ChowdownMigrator, NextcloudMigrator
from mealie.services.migrations.mealie_alpha import MealieAlphaMigrator
from mealie.services.migrations.paprika import PaprikaMigrator
logger = get_logger(module=__name__)
@@ -24,14 +26,29 @@ class GroupMigrationService(UserHttpService[int, ReportOut]):
def dal(self):
raise NotImplementedError
def populate_item(self, id: UUID4) -> ReportOut:
def populate_item(self, _: UUID4) -> ReportOut:
return None
def migrate(self, migration: SupportedMigrations, archive: Path) -> ReportSummary:
def migrate(self, migration: SupportedMigrations, add_migration_tag: bool, archive: Path) -> ReportSummary:
args = {
"archive": archive,
"db": self.db,
"session": self.session,
"user_id": self.user.id,
"group_id": self.group_id,
"add_migration_tag": add_migration_tag,
}
if migration == SupportedMigrations.nextcloud:
self.migration_type = NextcloudMigrator(archive, self.db, self.session, self.user.id, self.group_id)
self.migration_type = NextcloudMigrator(**args)
if migration == SupportedMigrations.chowdown:
self.migration_type = ChowdownMigrator(archive, self.db, self.session, self.user.id, self.group_id)
self.migration_type = ChowdownMigrator(**args)
if migration == SupportedMigrations.paprika:
self.migration_type = PaprikaMigrator(**args)
if migration == SupportedMigrations.mealie_alpha:
self.migration_type = MealieAlphaMigrator(**args)
return self.migration_type.migrate(f"{migration.value.title()} Migration")

View File

@@ -16,6 +16,7 @@ from mealie.schema.reports.reports import (
from mealie.services.scraper import cleaner
from .._base_service import BaseService
from .utils.database_helpers import DatabaseMigrationHelpers
from .utils.migration_alias import MigrationAlias
@@ -26,17 +27,22 @@ class BaseMigrator(BaseService):
report_id: int
report: ReportOut
def __init__(self, archive: Path, db: Database, session, user_id: int, group_id: UUID):
def __init__(self, archive: Path, db: Database, session, user_id: int, group_id: UUID, add_migration_tag: bool):
self.archive = archive
self.db = db
self.session = session
self.user_id = user_id
self.group_id = group_id
self.add_migration_tag = add_migration_tag
self.name = "migration"
self.report_entries = []
self.logger = root_logger.get_logger()
self.helpers = DatabaseMigrationHelpers(self.db, self.session, self.group_id, self.user_id)
super().__init__()
def _migrate(self) -> None:
@@ -94,6 +100,8 @@ class BaseMigrator(BaseService):
Args:
validated_recipes (list[Recipe]):
"""
if self.add_migration_tag:
migration_tag = self.helpers.get_or_set_tags([self.name])[0]
return_vars = []
@@ -102,6 +110,9 @@ class BaseMigrator(BaseService):
recipe.user_id = self.user_id
recipe.group_id = self.group_id
if self.add_migration_tag:
recipe.tags.append(migration_tag)
exception = ""
status = False
try:
@@ -109,7 +120,7 @@ class BaseMigrator(BaseService):
status = True
except Exception as inst:
exception = inst
exception = str(inst)
self.logger.exception(inst)
self.session.rollback()
@@ -165,6 +176,17 @@ class BaseMigrator(BaseService):
dictionary and returns the result unpacked into a Recipe object
"""
recipe_dict = self.rewrite_alias(recipe_dict)
# Temporary hold out of recipe_dict
# temp_categories = recipe_dict["recipeCategory"]
# temp_tools = recipe_dict["tools"]
# temp_tasg = recipe_dict["tags"]
recipe_dict = cleaner.clean(recipe_dict, url=recipe_dict.get("org_url", None))
# Reassign after cleaning
# recipe_dict["recipeCategory"] = temp_categories
# recipe_dict["tools"] = temp_tools
# recipe_dict["tags"] = temp_tasg
return Recipe(**recipe_dict)

View File

@@ -1,9 +1,6 @@
import tempfile
import zipfile
from pathlib import Path
from uuid import UUID
from mealie.db.database import Database
from ._migration_base import BaseMigrator
from .utils.migration_alias import MigrationAlias
@@ -11,8 +8,10 @@ from .utils.migration_helpers import MigrationReaders, import_image, split_by_co
class ChowdownMigrator(BaseMigrator):
def __init__(self, archive: Path, db: Database, session, user_id: int, group_id: UUID):
super().__init__(archive, db, session, user_id, group_id)
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.name = "chowdown"
self.key_aliases = [
MigrationAlias(key="name", alias="title", func=None),

View File

@@ -0,0 +1,87 @@
import shutil
import tempfile
import zipfile
from pathlib import Path
from mealie.schema.recipe.recipe import Recipe
from ._migration_base import BaseMigrator
from .utils.migration_alias import MigrationAlias
from .utils.migration_helpers import MigrationReaders, split_by_comma
class MealieAlphaMigrator(BaseMigrator):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.name = "mealie_alpha"
self.key_aliases = [
MigrationAlias(key="name", alias="title", func=None),
MigrationAlias(key="recipeIngredient", alias="ingredients", func=None),
MigrationAlias(key="recipeInstructions", alias="directions", func=None),
MigrationAlias(key="tags", alias="tags", func=split_by_comma),
]
def _convert_to_new_schema(self, recipe: dict) -> Recipe:
if recipe.get("categories", False):
recipe["recipeCategory"] = recipe.get("categories")
del recipe["categories"]
try:
del recipe["_id"]
del recipe["date_added"]
except Exception:
pass
# Migration from list to Object Type Data
try:
if "" in recipe["tags"]:
recipe["tags"] = [tag for tag in recipe["tags"] if tag != ""]
except Exception:
pass
try:
if "" in recipe["categories"]:
recipe["categories"] = [cat for cat in recipe["categories"] if cat != ""]
except Exception:
pass
if type(recipe["extras"]) == list:
recipe["extras"] = {}
recipe["comments"] = []
return Recipe(**recipe)
def _migrate(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
with zipfile.ZipFile(self.archive) as zip_file:
zip_file.extractall(tmpdir)
temp_path = Path(tmpdir)
recipe_lookup: dict[str, Path] = {}
recipes_as_dicts = []
for x in temp_path.rglob("**/[!.]*.json"):
if (y := MigrationReaders.json(x)) is not None:
recipes_as_dicts.append(y)
slug = y["slug"]
recipe_lookup[slug] = x.parent
recipes = [self._convert_to_new_schema(x) for x in recipes_as_dicts]
results = self.import_recipes_to_database(recipes)
recipe_model_lookup = {x.slug: x for x in recipes}
for slug, status in results:
if status:
model = recipe_model_lookup.get(slug)
dest_dir = model.directory
source_dir = recipe_lookup.get(slug)
if dest_dir.exists():
shutil.rmtree(dest_dir)
shutil.copytree(source_dir, dest_dir)

View File

@@ -3,12 +3,9 @@ import zipfile
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
from uuid import UUID
from slugify import slugify
from mealie.db.database import Database
from ._migration_base import BaseMigrator
from .utils.migration_alias import MigrationAlias
from .utils.migration_helpers import MigrationReaders, glob_walker, import_image, split_by_comma
@@ -40,8 +37,10 @@ class NextcloudDir:
class NextcloudMigrator(BaseMigrator):
def __init__(self, archive: Path, db: Database, session, user_id: int, group_id: UUID):
super().__init__(archive, db, session, user_id, group_id)
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.name = "nextcloud"
self.key_aliases = [
MigrationAlias(key="tags", alias="keywords", func=split_by_comma),

View File

@@ -0,0 +1,93 @@
import base64
import io
import json
import tempfile
import zipfile
from gzip import GzipFile
from pathlib import Path
import regex as re
from slugify import slugify
from mealie.schema.recipe import RecipeNote
from ._migration_base import BaseMigrator
from .utils.migration_alias import MigrationAlias
from .utils.migration_helpers import import_image
def paprika_recipes(file: Path):
"""Yields all recipes inside the export file as JSON"""
with tempfile.TemporaryDirectory() as tmpdir:
with zipfile.ZipFile(file) as zip_file:
zip_file.extractall(tmpdir)
for name in Path(tmpdir).glob("**/[!.]*.paprikarecipe"):
with open(name, "rb") as fd:
with GzipFile("r", fileobj=fd) as recipe_json:
recipe = json.load(recipe_json)
yield recipe
class PaprikaMigrator(BaseMigrator):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.name = "paprika"
re_num_list = re.compile(r"^\d+\.\s")
self.key_aliases = [
MigrationAlias(key="recipeIngredient", alias="ingredients", func=lambda x: x.split("\n")),
MigrationAlias(key="orgUrl", alias="source_url", func=None),
MigrationAlias(key="performTime", alias="cook_time", func=None),
MigrationAlias(key="recipeYield", alias="servings", func=None),
MigrationAlias(key="image", alias="image_url", func=None),
MigrationAlias(key="dateAdded", alias="created", func=lambda x: x[: x.find(" ")]),
MigrationAlias(
key="notes",
alias="notes",
func=lambda x: [z for z in [RecipeNote(title="", text=x) if x else None] if z],
),
MigrationAlias(
key="recipeCategory",
alias="categories",
func=self.helpers.get_or_set_category,
),
MigrationAlias(
key="recipeInstructions",
alias="directions",
func=lambda x: [{"text": re.sub(re_num_list, "", s)} for s in x.split("\n\n")],
),
]
def _migrate(self) -> None:
recipe_image_urls = {}
recipes = []
for recipe in paprika_recipes(self.archive):
if "name" not in recipe:
continue
recipe_model = self.clean_recipe_dictionary(recipe)
if "photo_data" in recipe:
recipe_image_urls[slugify(recipe["name"])] = recipe["photo_data"]
recipes.append(recipe_model)
results = self.import_recipes_to_database(recipes)
for slug, status in results:
if not status:
continue
try:
# Images are stored as base64 encoded strings, so we need to decode them before importing.
image = io.BytesIO(base64.b64decode(recipe_image_urls[slug]))
with tempfile.NamedTemporaryFile(suffix=".jpeg") as temp_file:
temp_file.write(image.read())
path = Path(temp_file.name)
import_image(path, slug)
except Exception as e:
self.logger.error(f"Failed to download image for {slug}: {e}")

View File

@@ -0,0 +1,51 @@
from typing import TypeVar
from pydantic import BaseModel
from slugify import slugify
from sqlalchemy.orm import Session
from mealie.db.data_access_layer.access_model_factory import AccessModel
from mealie.db.database import Database
from mealie.schema.recipe import RecipeCategory
from mealie.schema.recipe.recipe import RecipeTag
T = TypeVar("T", bound=BaseModel)
class DatabaseMigrationHelpers:
def __init__(self, db: Database, session: Session, group_id: int, user_id: int) -> None:
self.group_id = group_id
self.user_id = user_id
self.session = session
self.db = db
def _get_or_set_generic(self, accessor: AccessModel, items: list[str], out_model: T) -> list[T]:
"""
Utility model for getting or setting categories or tags. This will only work for those two cases.
This is probably a bad implementation of this pattern.
"""
items_out = []
for item_name in items:
slug_lookup = slugify(item_name)
item_model = accessor.get_one(slug_lookup, "slug", override_schema=out_model)
if not item_model:
item_model = accessor.create(
out_model(
name=item_name,
slug=slug_lookup,
)
)
items_out.append(item_model.dict())
return items_out
def get_or_set_category(self, categories: list[str]) -> list[RecipeCategory]:
return self._get_or_set_generic(self.db.categories, categories, RecipeCategory)
def get_or_set_tags(self, tags: list[str]) -> list[RecipeTag]:
return self._get_or_set_generic(self.db.tags, tags, RecipeTag)

View File

@@ -56,10 +56,20 @@ def clean_string(text: str) -> str:
def category(category: str):
if isinstance(category, list) and len(category) > 0 and isinstance(category[0], dict):
# If the category is a list of dicts, it's probably from a migration
# validate that the required fields are present
valid = []
for cat in category:
if "name" in cat and "slug" in cat:
valid.append(cat)
return valid
if isinstance(category, str) and category != "":
return [category]
else:
return []
return []
def clean_html(raw_html):
@@ -201,7 +211,7 @@ def yield_amount(yld) -> str:
def clean_time(time_entry):
if time_entry is None:
if time_entry is None or time_entry == "" or time_entry == " ":
return None
elif isinstance(time_entry, timedelta):
return pretty_print_timedelta(time_entry)
@@ -214,13 +224,11 @@ def clean_time(time_entry):
return pretty_print_timedelta(time_delta_object)
except ValueError:
logger.error(f"Could not parse time_entry `{time_entry}`")
return str(time_entry)
else:
return str(time_entry)
# ! TODO: Cleanup Code Below
def parse_duration(iso_duration):
"""Parses an ISO 8601 duration string into a datetime.timedelta instance.
Args:
@@ -253,8 +261,9 @@ def parse_duration(iso_duration):
def pretty_print_timedelta(t: timedelta, max_components=None, max_decimal_places=2):
"""
Print a pretty string for a timedelta.
For example datetime.timedelta(days=2, seconds=17280) will be printed as '2 days 4 Hours 48 Minutes'. Setting max_components to e.g. 1 will change this to '2.2 days', where the
number of decimal points can also be set.
For example datetime.timedelta(days=2, seconds=17280) will be printed as '2 days 4 Hours 48 Minutes'.
Setting max_components to e.g. 1 will change this to '2.2 days', where the number of decimal
points can also be set.
"""
time_scale_names_dict = {
timedelta(days=365): "year",