mirror of
https://github.com/mealie-recipes/mealie.git
synced 2026-04-14 08:55:34 -04:00
fix: Don't continue parsing recipes with errored HTTP status codes (#7230)
This commit is contained in:
@@ -44,6 +44,10 @@ class RecipeScraper:
|
||||
"""
|
||||
|
||||
raw_html = html or await safe_scrape_html(url)
|
||||
|
||||
if not raw_html:
|
||||
return None, None
|
||||
|
||||
for ScraperClass in self.scrapers:
|
||||
scraper = ScraperClass(url, self.translator, raw_html=raw_html)
|
||||
if not scraper.can_scrape():
|
||||
|
||||
@@ -70,8 +70,8 @@ async def safe_scrape_html(url: str) -> str:
|
||||
headers=user_agents_manager.get_scrape_headers(user_agent),
|
||||
follow_redirects=True,
|
||||
) as resp:
|
||||
if resp.status_code == status.HTTP_403_FORBIDDEN:
|
||||
logger.debug(f'403 Forbidden with User-Agent: "{user_agent}"')
|
||||
if resp.status_code >= status.HTTP_400_BAD_REQUEST:
|
||||
logger.debug(f'Error status code {resp.status_code} with User-Agent: "{user_agent}"')
|
||||
continue
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
Reference in New Issue
Block a user