mirror of
				https://github.com/mealie-recipes/mealie.git
				synced 2025-10-31 10:13:32 -04:00 
			
		
		
		
	fix: Bulk URL Import Fixes (#2796)
* allow expections when fetching content * removed extra bracket on import text * added more fault tolerance and limited concurrency * fix entries not being saved to report * disable clicking into in-proress import * conditionally render expansion
This commit is contained in:
		| @@ -49,6 +49,10 @@ export default defineComponent({ | |||||||
|     ]; |     ]; | ||||||
|  |  | ||||||
|     function handleRowClick(item: ReportSummary) { |     function handleRowClick(item: ReportSummary) { | ||||||
|  |       if (item.status === "in-progress") { | ||||||
|  |         return; | ||||||
|  |       } | ||||||
|  |  | ||||||
|       router.push(`/group/reports/${item.id}`); |       router.push(`/group/reports/${item.id}`); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -121,7 +121,7 @@ | |||||||
|             <template #icon> |             <template #icon> | ||||||
|               {{ $globals.icons.database }} |               {{ $globals.icons.database }} | ||||||
|             </template> |             </template> | ||||||
|             {{ $t('general.import') }}} |             {{ $t('general.import') }} | ||||||
|           </BaseButton> |           </BaseButton> | ||||||
|           <BaseButton |           <BaseButton | ||||||
|             color="info" |             color="info" | ||||||
|   | |||||||
| @@ -21,7 +21,7 @@ | |||||||
|           {{ $d(Date.parse(item.timestamp), "short") }} |           {{ $d(Date.parse(item.timestamp), "short") }} | ||||||
|         </template> |         </template> | ||||||
|         <template #expanded-item="{ headers, item }"> |         <template #expanded-item="{ headers, item }"> | ||||||
|           <td class="pa-6" :colspan="headers.length">{{ item.exception }}</td> |           <td v-if="item.exception" class="pa-6" :colspan="headers.length">{{ item.exception }}</td> | ||||||
|         </template> |         </template> | ||||||
|       </v-data-table> |       </v-data-table> | ||||||
|     </v-container> |     </v-container> | ||||||
|   | |||||||
| @@ -12,14 +12,17 @@ from mealie.services._base_service import BaseService | |||||||
| _FIREFOX_UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0" | _FIREFOX_UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0" | ||||||
|  |  | ||||||
|  |  | ||||||
| async def gather_with_concurrency(n, *coros): | async def gather_with_concurrency(n, *coros, ignore_exceptions=False): | ||||||
|     semaphore = asyncio.Semaphore(n) |     semaphore = asyncio.Semaphore(n) | ||||||
|  |  | ||||||
|     async def sem_coro(coro): |     async def sem_coro(coro): | ||||||
|         async with semaphore: |         async with semaphore: | ||||||
|             return await coro |             return await coro | ||||||
|  |  | ||||||
|     return await asyncio.gather(*(sem_coro(c) for c in coros)) |     results = await asyncio.gather(*(sem_coro(c) for c in coros), return_exceptions=ignore_exceptions) | ||||||
|  |     if ignore_exceptions: | ||||||
|  |         results = [r for r in results if not isinstance(r, Exception)] | ||||||
|  |     return results | ||||||
|  |  | ||||||
|  |  | ||||||
| async def largest_content_len(urls: list[str]) -> tuple[str, int]: | async def largest_content_len(urls: list[str]) -> tuple[str, int]: | ||||||
| @@ -31,7 +34,7 @@ async def largest_content_len(urls: list[str]) -> tuple[str, int]: | |||||||
|  |  | ||||||
|     async with AsyncClient() as client: |     async with AsyncClient() as client: | ||||||
|         tasks = [do(client, url) for url in urls] |         tasks = [do(client, url) for url in urls] | ||||||
|         responses: list[Response] = await gather_with_concurrency(10, *tasks) |         responses: list[Response] = await gather_with_concurrency(10, *tasks, ignore_exceptions=True) | ||||||
|         for response in responses: |         for response in responses: | ||||||
|             len_int = int(response.headers.get("Content-Length", 0)) |             len_int = int(response.headers.get("Content-Length", 0)) | ||||||
|             if len_int > largest_len: |             if len_int > largest_len: | ||||||
|   | |||||||
| @@ -1,10 +1,16 @@ | |||||||
| from asyncio import gather | import asyncio | ||||||
|  |  | ||||||
| from pydantic import UUID4 | from pydantic import UUID4 | ||||||
|  |  | ||||||
| from mealie.repos.repository_factory import AllRepositories | from mealie.repos.repository_factory import AllRepositories | ||||||
| from mealie.schema.recipe.recipe import CreateRecipeByUrlBulk, Recipe | from mealie.schema.recipe.recipe import CreateRecipeByUrlBulk, Recipe | ||||||
| from mealie.schema.reports.reports import ReportCategory, ReportCreate, ReportEntryCreate, ReportSummaryStatus | from mealie.schema.reports.reports import ( | ||||||
|  |     ReportCategory, | ||||||
|  |     ReportCreate, | ||||||
|  |     ReportEntryCreate, | ||||||
|  |     ReportEntryOut, | ||||||
|  |     ReportSummaryStatus, | ||||||
|  | ) | ||||||
| from mealie.schema.user.user import GroupInDB | from mealie.schema.user.user import GroupInDB | ||||||
| from mealie.services._base_service import BaseService | from mealie.services._base_service import BaseService | ||||||
| from mealie.services.recipe.recipe_service import RecipeService | from mealie.services.recipe.recipe_service import RecipeService | ||||||
| @@ -47,6 +53,7 @@ class RecipeBulkScraperService(BaseService): | |||||||
|         is_success = True |         is_success = True | ||||||
|         is_failure = True |         is_failure = True | ||||||
|  |  | ||||||
|  |         new_entries: list[ReportEntryOut] = [] | ||||||
|         for entry in self.report_entries: |         for entry in self.report_entries: | ||||||
|             if is_failure and entry.success: |             if is_failure and entry.success: | ||||||
|                 is_failure = False |                 is_failure = False | ||||||
| @@ -54,7 +61,7 @@ class RecipeBulkScraperService(BaseService): | |||||||
|             if is_success and not entry.success: |             if is_success and not entry.success: | ||||||
|                 is_success = False |                 is_success = False | ||||||
|  |  | ||||||
|             self.repos.group_report_entries.create(entry) |             new_entries.append(self.repos.group_report_entries.create(entry)) | ||||||
|  |  | ||||||
|         if is_success: |         if is_success: | ||||||
|             self.report.status = ReportSummaryStatus.success |             self.report.status = ReportSummaryStatus.success | ||||||
| @@ -65,15 +72,19 @@ class RecipeBulkScraperService(BaseService): | |||||||
|         if not is_success and not is_failure: |         if not is_success and not is_failure: | ||||||
|             self.report.status = ReportSummaryStatus.partial |             self.report.status = ReportSummaryStatus.partial | ||||||
|  |  | ||||||
|  |         self.report.entries = new_entries | ||||||
|         self.repos.group_reports.update(self.report.id, self.report) |         self.repos.group_reports.update(self.report.id, self.report) | ||||||
|  |  | ||||||
|     async def scrape(self, urls: CreateRecipeByUrlBulk) -> None: |     async def scrape(self, urls: CreateRecipeByUrlBulk) -> None: | ||||||
|  |         sem = asyncio.Semaphore(3) | ||||||
|  |  | ||||||
|         async def _do(url: str) -> Recipe | None: |         async def _do(url: str) -> Recipe | None: | ||||||
|  |             async with sem: | ||||||
|                 try: |                 try: | ||||||
|                     recipe, _ = await create_from_url(url) |                     recipe, _ = await create_from_url(url) | ||||||
|                     return recipe |                     return recipe | ||||||
|                 except Exception as e: |                 except Exception as e: | ||||||
|                 self.service.logger.error(f"failed to scrape url during bulk url import {b.url}") |                     self.service.logger.error(f"failed to scrape url during bulk url import {url}") | ||||||
|                     self.service.logger.exception(e) |                     self.service.logger.exception(e) | ||||||
|                     self._add_error_entry(f"failed to scrape url {url}", str(e)) |                     self._add_error_entry(f"failed to scrape url {url}", str(e)) | ||||||
|                     return None |                     return None | ||||||
| @@ -81,9 +92,9 @@ class RecipeBulkScraperService(BaseService): | |||||||
|         if self.report is None: |         if self.report is None: | ||||||
|             self.get_report_id() |             self.get_report_id() | ||||||
|         tasks = [_do(b.url) for b in urls.imports] |         tasks = [_do(b.url) for b in urls.imports] | ||||||
|         results = await gather(*tasks) |         results = await asyncio.gather(*tasks, return_exceptions=True) | ||||||
|         for b, recipe in zip(urls.imports, results, strict=True): |         for b, recipe in zip(urls.imports, results, strict=True): | ||||||
|             if not recipe: |             if not recipe or isinstance(recipe, Exception): | ||||||
|                 continue |                 continue | ||||||
|  |  | ||||||
|             if b.tags: |             if b.tags: | ||||||
|   | |||||||
| @@ -172,7 +172,7 @@ class RecipeScraperPackage(ABCScraperStrategy): | |||||||
|         try: |         try: | ||||||
|             scraped_schema = scrape_html(recipe_html, org_url=self.url) |             scraped_schema = scrape_html(recipe_html, org_url=self.url) | ||||||
|         except (NoSchemaFoundInWildMode, AttributeError): |         except (NoSchemaFoundInWildMode, AttributeError): | ||||||
|             self.logger.error("Recipe Scraper was unable to extract a recipe.") |             self.logger.error(f"Recipe Scraper was unable to extract a recipe from {self.url}") | ||||||
|             return None |             return None | ||||||
|  |  | ||||||
|         except ConnectionError as e: |         except ConnectionError as e: | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user