From 326bb1eb8ea9d5de7cd0a34fa659e74f2d0d0418 Mon Sep 17 00:00:00 2001 From: Michael Genson <71845777+michael-genson@users.noreply.github.com> Date: Fri, 12 Dec 2025 18:30:49 -0600 Subject: [PATCH] feat: Reprocess image user script (#6704) --- .../documentation/getting-started/updating.md | 17 ++ mealie/pkgs/img/minify.py | 20 +- mealie/scripts/reprocess_images.py | 190 ++++++++++++++++++ mealie/services/recipe/recipe_data_service.py | 4 +- 4 files changed, 220 insertions(+), 11 deletions(-) create mode 100644 mealie/scripts/reprocess_images.py diff --git a/docs/docs/documentation/getting-started/updating.md b/docs/docs/documentation/getting-started/updating.md index 6bca130f2..89889de91 100644 --- a/docs/docs/documentation/getting-started/updating.md +++ b/docs/docs/documentation/getting-started/updating.md @@ -9,6 +9,23 @@ - Create a Backup and Download from the UI - Upgrade +!!! info "Improved Image Processing" + Starting with :octicons-tag-24: v3.7.0, we updated our image processing algorithm to improve image quality and compression. New image processing can be up to 40%-50% smaller on disk while providing higher resolution thumbnails. To take advantage of these improvements on older recipes, you can run our image-processing script: + + ```shell + docker exec -it mealie bash + python /opt/mealie/lib64/python3.12/site-packages/mealie/scripts/reprocess_images.py + ``` + + ### Options + - `--workers N`: Number of worker threads (default: 2, safe for low-powered devices) + - `--force-all`: Reprocess all recipes regardless of current image state + + ### Example + ```shell + python /opt/mealie/lib64/python3.12/site-packages/mealie/scripts/reprocess_images.py --workers 8 + ``` + ## Upgrading to Mealie v1 or later If you are upgrading from pre-v1.0.0 to v1.0.0 or later (v2.0.0, etc.), make sure you read [Migrating to Mealie v1](./migrating-to-mealie-v1.md)! diff --git a/mealie/pkgs/img/minify.py b/mealie/pkgs/img/minify.py index c528543c2..e21af345e 100644 --- a/mealie/pkgs/img/minify.py +++ b/mealie/pkgs/img/minify.py @@ -44,10 +44,10 @@ class ABCMinifier(ABC): def __init__(self, purge=False, opts: MinifierOptions | None = None, logger: Logger | None = None): self._purge = purge self._opts = opts or MinifierOptions() - self._logger = logger or Logger("Minifier") + self.logger = logger or Logger("Minifier") def get_image_sizes(self, org_img: Path, min_img: Path, tiny_img: Path): - self._logger.info( + self.logger.info( f"{org_img.name} Minified: {sizeof_fmt(org_img)} -> {sizeof_fmt(min_img)} -> {sizeof_fmt(tiny_img)}" ) @@ -162,7 +162,7 @@ class PillowMinifier(ABCMinifier): tiny_dest = image_path.parent.joinpath("tiny-original.webp") if not force and min_dest.exists() and tiny_dest.exists() and org_dest.exists(): - self._logger.info(f"{image_path.name} already exists in all formats") + self.logger.info(f"{image_path.name} already exists in all formats") return success = False @@ -171,35 +171,35 @@ class PillowMinifier(ABCMinifier): with Image.open(image_path) as img: if self._opts.original: if not force and org_dest.exists(): - self._logger.info(f"{org_dest} already exists") + self.logger.info(f"{org_dest} already exists") else: original = img.copy() original.thumbnail((2048, 2048), Image.LANCZOS) result_path = PillowMinifier.to_webp(dest=org_dest, quality=80, img=original) - self._logger.info(f"{result_path} created") + self.logger.info(f"{result_path} created") success = True if self._opts.miniature: if not force and min_dest.exists(): - self._logger.info(f"{min_dest} already exists") + self.logger.info(f"{min_dest} already exists") else: mini = img.copy() mini.thumbnail((1024, 1024), Image.LANCZOS) result_path = PillowMinifier.to_webp(dest=min_dest, quality=80, img=mini) - self._logger.info(f"{result_path} created") + self.logger.info(f"{result_path} created") success = True if self._opts.tiny: if not force and tiny_dest.exists(): - self._logger.info(f"{tiny_dest} already exists") + self.logger.info(f"{tiny_dest} already exists") else: tiny = PillowMinifier.crop_center(img.copy(), size=(300, 300)) result_path = PillowMinifier.to_webp(dest=tiny_dest, quality=80, img=tiny) - self._logger.info(f"{result_path} created") + self.logger.info(f"{result_path} created") success = True except Exception as e: - self._logger.error(f"[ERROR] Failed to minify {image_path.name}. Error: {e}") + self.logger.error(f"[ERROR] Failed to minify {image_path.name}. Error: {e}") raise if self._purge and success: diff --git a/mealie/scripts/reprocess_images.py b/mealie/scripts/reprocess_images.py new file mode 100644 index 000000000..f9b516242 --- /dev/null +++ b/mealie/scripts/reprocess_images.py @@ -0,0 +1,190 @@ +import argparse +import threading +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path + +import sqlalchemy as sa +from PIL import Image +from pydantic import UUID4 + +from mealie.core import root_logger +from mealie.db.db_setup import session_context +from mealie.db.models.recipe import RecipeModel +from mealie.services.recipe.recipe_data_service import RecipeDataService + +logger = root_logger.get_logger() +minifier_logger = root_logger.get_logger("minifier") +minifier_logger.setLevel("WARNING") + +NON_ORIGINAL_FILENAMES = {"min-original.webp", "tiny-original.webp"} + + +def check_if_tiny_image_is_old(image_path: Path) -> bool: + with Image.open(image_path) as img: + # This will miss images which were originally smaller than 300x300, + # but we probably don't care about those anyway + return img.width == 300 and img.height == 300 + + +def check_needs_reprocess(recipe_id: UUID4) -> bool: + """ + Check if a recipe's images need reprocessing by examining tiny image dimensions. + New processing creates 600x600 tiny images, old processing created 300x300. + + Returns True if needs reprocessing (has old 300x300 tiny image or missing images). + """ + + try: + service = RecipeDataService(recipe_id) + tiny_path = service.dir_image / "tiny-original.webp" + original_path = service.dir_image / "original.webp" + + if not original_path.exists(): + return False # Cannot reprocess without original image + + if not tiny_path.exists(): + return True # Needs reprocessing if tiny image is missing + + except Exception: + logger.error(f"Failed to access recipe {recipe_id} images for reprocessing check; skipping") + return False + + try: + return check_if_tiny_image_is_old(tiny_path) + except Exception: + logger.error(f"Failed to open tiny image for recipe {recipe_id}; assuming reprocessing needed") + return False + + +def fetch_recipe_ids(force_all: bool = False) -> set[UUID4]: + logger.info("Fetching recipes for image reprocessing") + + with session_context() as session: + result = session.execute(sa.text(f"SELECT id FROM {RecipeModel.__tablename__}")) + + recipe_ids = {UUID4(row[0]) for row in result} + if force_all: + logger.info("!!Force processing all recipes regardless of current image state") + return recipe_ids + + else: + return {recipe_id for recipe_id in recipe_ids if check_needs_reprocess(recipe_id)} + + +def reprocess_recipe_images(recipe_id: UUID4, force_all: bool = False) -> None: + service = RecipeDataService(recipe_id, logger=minifier_logger) + original_image = service.dir_image / "original.webp" + if not original_image.exists(): + # Double-check that original image exists. We may have skipped this if we're using force_all + logger.error(f"Original image missing for recipe {recipe_id}; cannot reprocess") + return + + # Reprocess recipe images + for image_filename in NON_ORIGINAL_FILENAMES: + image_file = service.dir_image / image_filename + image_file.unlink(missing_ok=True) + + service.minifier.minify(original_image, force=True) + + # Reprocess timeline event images + timeline_dir = service.dir_image_timeline + if not timeline_dir.exists(): + return + + for event_dir in timeline_dir.iterdir(): + try: + if not event_dir.is_dir(): + continue + + event_original = event_dir / "original.webp" + if not event_original.exists(): + continue + + event_tiny = event_dir / "tiny-original.webp" + if not force_all and (event_tiny.exists() and not check_if_tiny_image_is_old(event_tiny)): + continue + + for image_filename in NON_ORIGINAL_FILENAMES: + image_file = event_dir / image_filename + image_file.unlink(missing_ok=True) + + service.minifier.minify(event_original, force=True) + except Exception: + # Silently skip these; they're not as important and there could be a lot of them which could spam logs + continue + + +def process_recipe(recipe_id: UUID4, force_all: bool = False) -> tuple[UUID4, bool]: + """Process a single recipe's images, returning (recipe_id, success)""" + try: + reprocess_recipe_images(recipe_id, force_all) + return recipe_id, True + except Exception: + logger.exception(f"Failed to reprocess images for recipe {recipe_id}") + return recipe_id, False + + +def process_all_recipes(recipe_ids: set[UUID4], force_all: bool = False, max_workers: int = 2) -> set[UUID4]: + """Process all given recipe IDs concurrently, returning set of failed recipe IDs.""" + failed_recipe_ids: set[UUID4] = set() + progress_freq = 20 if len(recipe_ids) <= 1000 else 100 + progress_lock = threading.Lock() + completed_count = 0 + + with ThreadPoolExecutor(max_workers=max_workers) as executor: + future_to_recipe = { + executor.submit(process_recipe, recipe_id, force_all): recipe_id for recipe_id in recipe_ids + } + + for future in as_completed(future_to_recipe): + recipe_id, success = future.result() + if not success: + failed_recipe_ids.add(recipe_id) + + # Progress reporting + with progress_lock: + completed_count += 1 + if completed_count % progress_freq == 0: + perc = (completed_count / len(recipe_ids)) * 100 + logger.info(f"{perc:.2f}% complete ({completed_count}/{len(recipe_ids)})") + + return failed_recipe_ids + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Reprocess old recipe images to improve compression and upgrade quality" + ) + parser.add_argument("--workers", type=int, default=2, help="Number of worker threads (default: 2)") + parser.add_argument( + "--force-all", action="store_true", help="Reprocess all recipes regardless of current image state" + ) + args = parser.parse_args() + workers: int = max(1, args.workers) + force_all: bool = args.force_all + + recipe_ids = fetch_recipe_ids(force_all=force_all) + if not recipe_ids: + logger.info("No recipes need image reprocessing. Exiting...") + exit(0) + + confirmed = input( + f"Found {len(recipe_ids)} {'recipe' if len(recipe_ids) == 1 else 'recipes'} " + f"needing image reprocessing (using {workers} {'worker' if workers == 1 else 'workers'}). Proceed? (y/n) " + ) + if confirmed.lower() != "y": + print("aborting") # noqa + exit(0) + + logger.info("Starting image reprocessing...") + failed_recipe_ids = process_all_recipes(recipe_ids, force_all, max_workers=workers) + + logger.info(f"Image reprocessing complete. {len(recipe_ids) - len(failed_recipe_ids)} successfully processed") + if failed_recipe_ids: + logger.error(f"Failed recipes: {', '.join(str(rid) for rid in failed_recipe_ids)}") + + exit(0) + + +if __name__ == "__main__": + main() diff --git a/mealie/services/recipe/recipe_data_service.py b/mealie/services/recipe/recipe_data_service.py index be4611bfa..9eb864773 100644 --- a/mealie/services/recipe/recipe_data_service.py +++ b/mealie/services/recipe/recipe_data_service.py @@ -1,5 +1,6 @@ import asyncio import shutil +from logging import Logger from pathlib import Path from httpx import AsyncClient, Response @@ -60,7 +61,7 @@ class InvalidDomainError(Exception): class RecipeDataService(BaseService): minifier: img.ABCMinifier - def __init__(self, recipe_id: UUID4) -> None: + def __init__(self, recipe_id: UUID4, logger: Logger | None = None) -> None: """ RecipeDataService is a service that consolidates the reading/writing actions related to assets, and images for a recipe. @@ -68,6 +69,7 @@ class RecipeDataService(BaseService): super().__init__() self.recipe_id = recipe_id + self.logger = logger or self.logger self.minifier = img.PillowMinifier(purge=True, logger=self.logger) self.dir_data = Recipe.directory_from_id(self.recipe_id)