Files
mealie/mealie/services/scraper/recipe_bulk_scraper.py
renovate[bot] b8dd0fc97c chore(deps): update dependency mypy to v1.8.0 (#2943)
* chore(deps): update dependency mypy to v1.8.0

* appease the linting overlords

---------

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: Michael Genson <71845777+michael-genson@users.noreply.github.com>
2024-01-11 04:44:23 +00:00

124 lines
4.1 KiB
Python

import asyncio
from pydantic import UUID4
from mealie.repos.repository_factory import AllRepositories
from mealie.schema.recipe.recipe import CreateRecipeByUrlBulk, Recipe
from mealie.schema.reports.reports import (
ReportCategory,
ReportCreate,
ReportEntryCreate,
ReportEntryOut,
ReportSummaryStatus,
)
from mealie.schema.user.user import GroupInDB
from mealie.services._base_service import BaseService
from mealie.services.recipe.recipe_service import RecipeService
from mealie.services.scraper.scraper import create_from_url
class RecipeBulkScraperService(BaseService):
report_entries: list[ReportEntryCreate]
def __init__(self, service: RecipeService, repos: AllRepositories, group: GroupInDB) -> None:
self.service = service
self.repos = repos
self.group = group
self.report_entries = []
super().__init__()
def get_report_id(self) -> UUID4:
import_report = ReportCreate(
name="Bulk Import",
category=ReportCategory.bulk_import,
status=ReportSummaryStatus.in_progress,
group_id=self.group.id,
)
self.report = self.repos.group_reports.create(import_report)
return self.report.id
def _add_error_entry(self, message: str, exception: str = "") -> None:
self.report_entries.append(
ReportEntryCreate(
report_id=self.report.id,
success=False,
message=message,
exception=exception,
)
)
def _save_all_entries(self) -> None:
is_success = True
is_failure = True
new_entries: list[ReportEntryOut] = []
for entry in self.report_entries:
if is_failure and entry.success:
is_failure = False
if is_success and not entry.success:
is_success = False
new_entries.append(self.repos.group_report_entries.create(entry))
if is_success:
self.report.status = ReportSummaryStatus.success
if is_failure:
self.report.status = ReportSummaryStatus.failure
if not is_success and not is_failure:
self.report.status = ReportSummaryStatus.partial
self.report.entries = new_entries
self.repos.group_reports.update(self.report.id, self.report)
async def scrape(self, urls: CreateRecipeByUrlBulk) -> None:
sem = asyncio.Semaphore(3)
async def _do(url: str) -> Recipe | None:
async with sem:
try:
recipe, _ = await create_from_url(url)
return recipe
except Exception as e:
self.service.logger.error(f"failed to scrape url during bulk url import {url}")
self.service.logger.exception(e)
self._add_error_entry(f"failed to scrape url {url}", str(e))
return None
if self.report is None:
self.get_report_id()
tasks = [_do(b.url) for b in urls.imports]
results = await asyncio.gather(*tasks, return_exceptions=True)
for b, recipe in zip(urls.imports, results, strict=True):
if not recipe or isinstance(recipe, BaseException):
continue
if b.tags:
recipe.tags = b.tags
if b.categories:
recipe.recipe_category = b.categories
try:
self.service.create_one(recipe)
except Exception as e:
self.service.logger.error(f"Failed to save recipe to database during bulk url import {b.url}")
self.service.logger.exception(e)
self._add_error_entry(f"Failed to save recipe to database during bulk url import {b.url}", str(e))
continue
self.report_entries.append(
ReportEntryCreate(
report_id=self.report.id,
success=True,
message=f"Successfully imported recipe {recipe.name}",
exception="",
)
)
self._save_all_entries()