fix: remove network calls from tests (#2055)

* abstracted scraper get_html method * applied mock to all scrapers * fixed incorrect var reference
2025-10-31 02:03:35 -04:00 · 2023-02-05 12:14:57 -06:00
parent 20160346d7
commit 4fc4ba934d
3 changed files with 34 additions and 26 deletions
--- a/mealie/services/scraper/recipe_scraper.py
+++ b/mealie/services/scraper/recipe_scraper.py
@@ -3,6 +3,8 @@ from mealie.services.scraper.scraped_extras import ScrapedExtras
 from .scraper_strategies import ABCScraperStrategy, RecipeScraperOpenGraph, RecipeScraperPackage
 DEFAULT_SCRAPER_STRATEGIES: list[type[ABCScraperStrategy]] = [RecipeScraperPackage, RecipeScraperOpenGraph]
 class RecipeScraper:
    """
@@ -14,10 +16,7 @@ class RecipeScraper:
    def __init__(self, scrapers: list[type[ABCScraperStrategy]] | None = None) -> None:
        if scrapers is None:
-            scrapers = [
+            scrapers = DEFAULT_SCRAPER_STRATEGIES
                RecipeScraperPackage,
                RecipeScraperOpenGraph,
            ]
        self.scrapers = scrapers
--- a/mealie/services/scraper/scraper_strategies.py
+++ b/mealie/services/scraper/scraper_strategies.py
@@ -81,6 +81,10 @@ class ABCScraperStrategy(ABC):
        self.logger = get_logger()
        self.url = url
    @abstractmethod
    async def get_html(self, url: str) -> str:
        ...
    @abstractmethod
    async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
        """Parse a recipe from a web URL.
@@ -95,6 +99,9 @@ class ABCScraperStrategy(ABC):
 class RecipeScraperPackage(ABCScraperStrategy):
    async def get_html(self, url: str) -> str:
        return await safe_scrape_html(url)
    def clean_scraper(self, scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> tuple[Recipe, ScrapedExtras]:
        def try_get_default(func_call: Callable | None, get_attr: str, default: Any, clean_func=None):
            value = default
@@ -160,7 +167,8 @@ class RecipeScraperPackage(ABCScraperStrategy):
        return recipe, extras
    async def scrape_url(self) -> SchemaScraperFactory.SchemaScraper | Any | None:
-        recipe_html = await safe_scrape_html(self.url)
+        recipe_html = await self.get_html(self.url)
        try:
            scraped_schema = scrape_html(recipe_html, org_url=self.url)
        except (NoSchemaFoundInWildMode, AttributeError):
@@ -204,8 +212,8 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
    Abstract class for all recipe parsers.
    """
-    async def get_html(self) -> str:
+    async def get_html(self, url: str) -> str:
-        return await safe_scrape_html(self.url)
+        return await safe_scrape_html(url)
    def get_recipe_fields(self, html) -> dict | None:
        """
@@ -245,7 +253,7 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
        """
        Parse a recipe from a given url.
        """
-        html = await self.get_html()
+        html = await self.get_html(self.url)
        og_data = self.get_recipe_fields(html)
--- a/tests/integration_tests/user_recipe_tests/test_recipe_crud.py
+++ b/tests/integration_tests/user_recipe_tests/test_recipe_crud.py
@@ -1,6 +1,5 @@
 import json
 from pathlib import Path
 from typing import Optional, Union
 import pytest
 from bs4 import BeautifulSoup
@@ -12,7 +11,7 @@ from slugify import slugify
 from mealie.schema.recipe.recipe import RecipeCategory
 from mealie.services.recipe.recipe_data_service import RecipeDataService
-from mealie.services.scraper.scraper_strategies import RecipeScraperOpenGraph
+from mealie.services.scraper.recipe_scraper import DEFAULT_SCRAPER_STRATEGIES
 from tests import data, utils
 from tests.utils import api_routes
 from tests.utils.factories import random_string
@@ -31,9 +30,9 @@ def get_init(html_path: Path):
    def init_override(
        self,
        url,
-        proxies: Optional[str] = None,
+        proxies: str | None = None,
-        timeout: Optional[Union[float, tuple, None]] = None,
+        timeout: float | tuple | None = None,
-        wild_mode: Optional[bool] = False,
+        wild_mode: bool | None = False,
        **_,
    ):
        page_data = html_path.read_bytes()
@@ -48,7 +47,7 @@ def get_init(html_path: Path):
 def open_graph_override(html: str):
-    def get_html(self) -> str:
+    async def get_html(self, url: str) -> str:
        return html
    return get_html
@@ -68,8 +67,9 @@ def test_create_by_url(
        get_init(recipe_data.html_file),
    )
    # Override the get_html method of the RecipeScraperOpenGraph to return the test html
    for scraper_cls in DEFAULT_SCRAPER_STRATEGIES:
        monkeypatch.setattr(
-        RecipeScraperOpenGraph,
+            scraper_cls,
            "get_html",
            open_graph_override(recipe_data.html_file.read_text()),
        )
@@ -113,9 +113,10 @@ def test_create_by_url_with_tags(
        "__init__",
        get_init(html_file),
    )
-    # Override the get_html method of the RecipeScraperOpenGraph to return the test html
+    # Override the get_html method of all scraper strategies to return the test html
    for scraper_cls in DEFAULT_SCRAPER_STRATEGIES:
        monkeypatch.setattr(
-        RecipeScraperOpenGraph,
+            scraper_cls,
            "get_html",
            open_graph_override(html_file.read_text()),
        )
@@ -198,7 +199,7 @@ def test_read_update(
    assert len(recipe["recipeCategory"]) == len(recipe_categories)
    test_name = [x.name for x in recipe_categories]
-    for cats in zip(recipe["recipeCategory"], recipe_categories):
+    for cats in zip(recipe["recipeCategory"], recipe_categories, strict=False):
        assert cats[0]["name"] in test_name