From 6a3b38a31e437447568ef5a19a2b3bf3841ab7bd Mon Sep 17 00:00:00 2001
From: Dean Malan <delenamalan@gmail.com>
Date: Thu, 12 Mar 2026 15:58:40 +0200
Subject: [PATCH] fix: Don't continue parsing recipes with errored HTTP status
 codes (#7230)

---
 mealie/services/scraper/recipe_scraper.py          |  4 ++++
 mealie/services/scraper/scraper_strategies.py      |  4 ++--
 .../test_recipe_create_from_video.py               |  2 +-
 .../user_recipe_tests/test_recipe_crud.py          | 14 +++++++-------
 4 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/mealie/services/scraper/recipe_scraper.py b/mealie/services/scraper/recipe_scraper.py
index 5223378aa..f4bca66df 100644
--- a/mealie/services/scraper/recipe_scraper.py
+++ b/mealie/services/scraper/recipe_scraper.py
@@ -44,6 +44,10 @@ class RecipeScraper:
         """
 
         raw_html = html or await safe_scrape_html(url)
+
+        if not raw_html:
+            return None, None
+
         for ScraperClass in self.scrapers:
             scraper = ScraperClass(url, self.translator, raw_html=raw_html)
             if not scraper.can_scrape():
diff --git a/mealie/services/scraper/scraper_strategies.py b/mealie/services/scraper/scraper_strategies.py
index b15c9c09c..03057723b 100644
--- a/mealie/services/scraper/scraper_strategies.py
+++ b/mealie/services/scraper/scraper_strategies.py
@@ -70,8 +70,8 @@ async def safe_scrape_html(url: str) -> str:
                 headers=user_agents_manager.get_scrape_headers(user_agent),
                 follow_redirects=True,
             ) as resp:
-                if resp.status_code == status.HTTP_403_FORBIDDEN:
-                    logger.debug(f'403 Forbidden with User-Agent: "{user_agent}"')
+                if resp.status_code >= status.HTTP_400_BAD_REQUEST:
+                    logger.debug(f'Error status code {resp.status_code} with User-Agent: "{user_agent}"')
                     continue
 
                 start_time = time.time()
diff --git a/tests/integration_tests/user_recipe_tests/test_recipe_create_from_video.py b/tests/integration_tests/user_recipe_tests/test_recipe_create_from_video.py
index d54443086..9313ad55b 100644
--- a/tests/integration_tests/user_recipe_tests/test_recipe_create_from_video.py
+++ b/tests/integration_tests/user_recipe_tests/test_recipe_create_from_video.py
@@ -33,7 +33,7 @@ def video_scraper_setup(monkeypatch: pytest.MonkeyPatch):
 
     # Prevent any real HTTP calls during scraping
     async def mock_safe_scrape_html(url: str) -> str:
-        return ""
+        return "<html></html>"
 
     monkeypatch.setattr(recipe_scraper_module, "safe_scrape_html", mock_safe_scrape_html)
 
diff --git a/tests/integration_tests/user_recipe_tests/test_recipe_crud.py b/tests/integration_tests/user_recipe_tests/test_recipe_crud.py
index 7203527df..ba1974649 100644
--- a/tests/integration_tests/user_recipe_tests/test_recipe_crud.py
+++ b/tests/integration_tests/user_recipe_tests/test_recipe_crud.py
@@ -14,11 +14,11 @@ from bs4 import BeautifulSoup
 from fastapi.testclient import TestClient
 from httpx import Response
 from pytest import MonkeyPatch
-from recipe_scrapers._abstract import AbstractScraper
 from recipe_scrapers._schemaorg import SchemaOrg
 from recipe_scrapers.plugins import SchemaOrgFillPlugin
 from slugify import slugify
 
+import mealie.services.scraper.recipe_scraper as recipe_scraper_module
 from mealie.db.models.recipe import RecipeModel
 from mealie.pkgs.safehttp.transport import AsyncSafeTransport
 from mealie.schema.cookbook.cookbook import SaveCookBook
@@ -102,12 +102,12 @@ def test_create_by_url(
     monkeypatch: MonkeyPatch,
 ):
     for recipe_data in recipe_test_data:
-        # Override init function for AbstractScraper to use the test html instead of calling the url
-        monkeypatch.setattr(
-            AbstractScraper,
-            "__init__",
-            get_init(recipe_data.html_file),
-        )
+        # Prevent any real HTTP calls during scraping
+        async def mock_safe_scrape_html(url: str) -> str:
+            return "<html></html>"
+
+        monkeypatch.setattr(recipe_scraper_module, "safe_scrape_html", mock_safe_scrape_html)
+
         # Override the get_html method of the RecipeScraperOpenGraph to return the test html
         for scraper_cls in DEFAULT_SCRAPER_STRATEGIES:
             monkeypatch.setattr(