| 
									
										
										
										
											2023-12-07 11:08:47 -06:00
										 |  |  | import asyncio | 
					
						
							| 
									
										
										
										
											2023-01-29 01:43:27 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-25 19:33:58 -08:00
										 |  |  | from pydantic import UUID4 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from mealie.repos.repository_factory import AllRepositories | 
					
						
							| 
									
										
										
										
											2023-01-29 01:43:27 +01:00
										 |  |  | from mealie.schema.recipe.recipe import CreateRecipeByUrlBulk, Recipe | 
					
						
							| 
									
										
										
										
											2023-12-07 11:08:47 -06:00
										 |  |  | from mealie.schema.reports.reports import ( | 
					
						
							|  |  |  |     ReportCategory, | 
					
						
							|  |  |  |     ReportCreate, | 
					
						
							|  |  |  |     ReportEntryCreate, | 
					
						
							|  |  |  |     ReportEntryOut, | 
					
						
							|  |  |  |     ReportSummaryStatus, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2022-05-25 19:33:58 -08:00
										 |  |  | from mealie.schema.user.user import GroupInDB | 
					
						
							|  |  |  | from mealie.services._base_service import BaseService | 
					
						
							|  |  |  | from mealie.services.recipe.recipe_service import RecipeService | 
					
						
							|  |  |  | from mealie.services.scraper.scraper import create_from_url | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class RecipeBulkScraperService(BaseService): | 
					
						
							|  |  |  |     report_entries: list[ReportEntryCreate] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self, service: RecipeService, repos: AllRepositories, group: GroupInDB) -> None: | 
					
						
							|  |  |  |         self.service = service | 
					
						
							|  |  |  |         self.repos = repos | 
					
						
							|  |  |  |         self.group = group | 
					
						
							|  |  |  |         self.report_entries = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         super().__init__() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get_report_id(self) -> UUID4: | 
					
						
							|  |  |  |         import_report = ReportCreate( | 
					
						
							|  |  |  |             name="Bulk Import", | 
					
						
							|  |  |  |             category=ReportCategory.bulk_import, | 
					
						
							|  |  |  |             status=ReportSummaryStatus.in_progress, | 
					
						
							|  |  |  |             group_id=self.group.id, | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.report = self.repos.group_reports.create(import_report) | 
					
						
							|  |  |  |         return self.report.id | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _add_error_entry(self, message: str, exception: str = "") -> None: | 
					
						
							|  |  |  |         self.report_entries.append( | 
					
						
							|  |  |  |             ReportEntryCreate( | 
					
						
							|  |  |  |                 report_id=self.report.id, | 
					
						
							|  |  |  |                 success=False, | 
					
						
							|  |  |  |                 message=message, | 
					
						
							|  |  |  |                 exception=exception, | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _save_all_entries(self) -> None: | 
					
						
							|  |  |  |         is_success = True | 
					
						
							|  |  |  |         is_failure = True | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-07 11:08:47 -06:00
										 |  |  |         new_entries: list[ReportEntryOut] = [] | 
					
						
							| 
									
										
										
										
											2022-05-25 19:33:58 -08:00
										 |  |  |         for entry in self.report_entries: | 
					
						
							|  |  |  |             if is_failure and entry.success: | 
					
						
							|  |  |  |                 is_failure = False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             if is_success and not entry.success: | 
					
						
							|  |  |  |                 is_success = False | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-07 11:08:47 -06:00
										 |  |  |             new_entries.append(self.repos.group_report_entries.create(entry)) | 
					
						
							| 
									
										
										
										
											2022-05-25 19:33:58 -08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if is_success: | 
					
						
							|  |  |  |             self.report.status = ReportSummaryStatus.success | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if is_failure: | 
					
						
							|  |  |  |             self.report.status = ReportSummaryStatus.failure | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if not is_success and not is_failure: | 
					
						
							|  |  |  |             self.report.status = ReportSummaryStatus.partial | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-07 11:08:47 -06:00
										 |  |  |         self.report.entries = new_entries | 
					
						
							| 
									
										
										
										
											2022-05-25 19:33:58 -08:00
										 |  |  |         self.repos.group_reports.update(self.report.id, self.report) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-29 01:43:27 +01:00
										 |  |  |     async def scrape(self, urls: CreateRecipeByUrlBulk) -> None: | 
					
						
							| 
									
										
										
										
											2023-12-07 11:08:47 -06:00
										 |  |  |         sem = asyncio.Semaphore(3) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-29 01:43:27 +01:00
										 |  |  |         async def _do(url: str) -> Recipe | None: | 
					
						
							| 
									
										
										
										
											2023-12-07 11:08:47 -06:00
										 |  |  |             async with sem: | 
					
						
							|  |  |  |                 try: | 
					
						
							|  |  |  |                     recipe, _ = await create_from_url(url) | 
					
						
							|  |  |  |                     return recipe | 
					
						
							|  |  |  |                 except Exception as e: | 
					
						
							|  |  |  |                     self.service.logger.error(f"failed to scrape url during bulk url import {url}") | 
					
						
							|  |  |  |                     self.service.logger.exception(e) | 
					
						
							|  |  |  |                     self._add_error_entry(f"failed to scrape url {url}", str(e)) | 
					
						
							|  |  |  |                     return None | 
					
						
							| 
									
										
										
										
											2023-01-29 01:43:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if self.report is None: | 
					
						
							|  |  |  |             self.get_report_id() | 
					
						
							|  |  |  |         tasks = [_do(b.url) for b in urls.imports] | 
					
						
							| 
									
										
										
										
											2023-12-07 11:08:47 -06:00
										 |  |  |         results = await asyncio.gather(*tasks, return_exceptions=True) | 
					
						
							| 
									
										
										
										
											2023-01-29 01:43:27 +01:00
										 |  |  |         for b, recipe in zip(urls.imports, results, strict=True): | 
					
						
							| 
									
										
										
										
											2024-01-11 04:44:23 +00:00
										 |  |  |             if not recipe or isinstance(recipe, BaseException): | 
					
						
							| 
									
										
										
										
											2022-06-15 18:19:52 -08:00
										 |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2022-05-25 19:33:58 -08:00
										 |  |  | 
 | 
					
						
							|  |  |  |             if b.tags: | 
					
						
							|  |  |  |                 recipe.tags = b.tags | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             if b.categories: | 
					
						
							|  |  |  |                 recipe.recipe_category = b.categories | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             try: | 
					
						
							|  |  |  |                 self.service.create_one(recipe) | 
					
						
							|  |  |  |             except Exception as e: | 
					
						
							|  |  |  |                 self.service.logger.error(f"Failed to save recipe to database during bulk url import {b.url}") | 
					
						
							|  |  |  |                 self.service.logger.exception(e) | 
					
						
							| 
									
										
										
										
											2022-06-15 18:19:52 -08:00
										 |  |  |                 self._add_error_entry(f"Failed to save recipe to database during bulk url import {b.url}", str(e)) | 
					
						
							|  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2022-05-25 19:33:58 -08:00
										 |  |  | 
 | 
					
						
							|  |  |  |             self.report_entries.append( | 
					
						
							|  |  |  |                 ReportEntryCreate( | 
					
						
							|  |  |  |                     report_id=self.report.id, | 
					
						
							|  |  |  |                     success=True, | 
					
						
							|  |  |  |                     message=f"Successfully imported recipe {recipe.name}", | 
					
						
							|  |  |  |                     exception="", | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self._save_all_entries() |