fix: remove network calls from tests (#2055)

* abstracted scraper get_html method * applied mock to all scrapers * fixed incorrect var reference
2025-12-29 13:27:09 -05:00 · 2023-02-05 12:14:57 -06:00
parent 20160346d7
commit 4fc4ba934d
3 changed files with 34 additions and 26 deletions
--- a/mealie/services/scraper/recipe_scraper.py
+++ b/mealie/services/scraper/recipe_scraper.py
@@ -3,6 +3,8 @@ from mealie.services.scraper.scraped_extras import ScrapedExtras

 from .scraper_strategies import ABCScraperStrategy, RecipeScraperOpenGraph, RecipeScraperPackage

+DEFAULT_SCRAPER_STRATEGIES: list[type[ABCScraperStrategy]] = [RecipeScraperPackage, RecipeScraperOpenGraph]
+

 class RecipeScraper:
    """
@@ -14,10 +16,7 @@ class RecipeScraper:

    def __init__(self, scrapers: list[type[ABCScraperStrategy]] | None = None) -> None:
        if scrapers is None:
-            scrapers = [
-                RecipeScraperPackage,
-                RecipeScraperOpenGraph,
-            ]
+            scrapers = DEFAULT_SCRAPER_STRATEGIES

        self.scrapers = scrapers

--- a/mealie/services/scraper/scraper_strategies.py
+++ b/mealie/services/scraper/scraper_strategies.py
@@ -81,6 +81,10 @@ class ABCScraperStrategy(ABC):
        self.logger = get_logger()
        self.url = url

+    @abstractmethod
+    async def get_html(self, url: str) -> str:
+        ...
+
    @abstractmethod
    async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
        """Parse a recipe from a web URL.
@@ -95,6 +99,9 @@ class ABCScraperStrategy(ABC):


 class RecipeScraperPackage(ABCScraperStrategy):
+    async def get_html(self, url: str) -> str:
+        return await safe_scrape_html(url)
+
    def clean_scraper(self, scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> tuple[Recipe, ScrapedExtras]:
        def try_get_default(func_call: Callable | None, get_attr: str, default: Any, clean_func=None):
            value = default
@@ -160,7 +167,8 @@ class RecipeScraperPackage(ABCScraperStrategy):
        return recipe, extras

    async def scrape_url(self) -> SchemaScraperFactory.SchemaScraper | Any | None:
-        recipe_html = await safe_scrape_html(self.url)
+        recipe_html = await self.get_html(self.url)
+
        try:
            scraped_schema = scrape_html(recipe_html, org_url=self.url)
        except (NoSchemaFoundInWildMode, AttributeError):
@@ -204,8 +212,8 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
    Abstract class for all recipe parsers.
    """

-    async def get_html(self) -> str:
-        return await safe_scrape_html(self.url)
+    async def get_html(self, url: str) -> str:
+        return await safe_scrape_html(url)

    def get_recipe_fields(self, html) -> dict | None:
        """
@@ -245,7 +253,7 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
        """
        Parse a recipe from a given url.
        """
-        html = await self.get_html()
+        html = await self.get_html(self.url)

        og_data = self.get_recipe_fields(html)

--- a/tests/integration_tests/user_recipe_tests/test_recipe_crud.py
+++ b/tests/integration_tests/user_recipe_tests/test_recipe_crud.py
@@ -1,6 +1,5 @@
 import json
 from pathlib import Path
-from typing import Optional, Union

 import pytest
 from bs4 import BeautifulSoup
@@ -12,7 +11,7 @@ from slugify import slugify

 from mealie.schema.recipe.recipe import RecipeCategory
 from mealie.services.recipe.recipe_data_service import RecipeDataService
-from mealie.services.scraper.scraper_strategies import RecipeScraperOpenGraph
+from mealie.services.scraper.recipe_scraper import DEFAULT_SCRAPER_STRATEGIES
 from tests import data, utils
 from tests.utils import api_routes
 from tests.utils.factories import random_string
@@ -31,9 +30,9 @@ def get_init(html_path: Path):
    def init_override(
        self,
        url,
-        proxies: Optional[str] = None,
-        timeout: Optional[Union[float, tuple, None]] = None,
-        wild_mode: Optional[bool] = False,
+        proxies: str | None = None,
+        timeout: float | tuple | None = None,
+        wild_mode: bool | None = False,
        **_,
    ):
        page_data = html_path.read_bytes()
@@ -48,7 +47,7 @@ def get_init(html_path: Path):


 def open_graph_override(html: str):
-    def get_html(self) -> str:
+    async def get_html(self, url: str) -> str:
        return html

    return get_html
@@ -68,11 +67,12 @@ def test_create_by_url(
        get_init(recipe_data.html_file),
    )
    # Override the get_html method of the RecipeScraperOpenGraph to return the test html
-    monkeypatch.setattr(
-        RecipeScraperOpenGraph,
-        "get_html",
-        open_graph_override(recipe_data.html_file.read_text()),
-    )
+    for scraper_cls in DEFAULT_SCRAPER_STRATEGIES:
+        monkeypatch.setattr(
+            scraper_cls,
+            "get_html",
+            open_graph_override(recipe_data.html_file.read_text()),
+        )
    # Skip image downloader
    monkeypatch.setattr(
        RecipeDataService,
@@ -113,12 +113,13 @@ def test_create_by_url_with_tags(
        "__init__",
        get_init(html_file),
    )
-    # Override the get_html method of the RecipeScraperOpenGraph to return the test html
-    monkeypatch.setattr(
-        RecipeScraperOpenGraph,
-        "get_html",
-        open_graph_override(html_file.read_text()),
-    )
+    # Override the get_html method of all scraper strategies to return the test html
+    for scraper_cls in DEFAULT_SCRAPER_STRATEGIES:
+        monkeypatch.setattr(
+            scraper_cls,
+            "get_html",
+            open_graph_override(html_file.read_text()),
+        )
    # Skip image downloader
    monkeypatch.setattr(
        RecipeDataService,
@@ -198,7 +199,7 @@ def test_read_update(
    assert len(recipe["recipeCategory"]) == len(recipe_categories)

    test_name = [x.name for x in recipe_categories]
-    for cats in zip(recipe["recipeCategory"], recipe_categories):
+    for cats in zip(recipe["recipeCategory"], recipe_categories, strict=False):
        assert cats[0]["name"] in test_name