mirror of
				https://github.com/mealie-recipes/mealie.git
				synced 2025-10-31 02:03:35 -04:00 
			
		
		
		
	fix: remove network calls from tests (#2055)
* abstracted scraper get_html method * applied mock to all scrapers * fixed incorrect var reference
This commit is contained in:
		| @@ -3,6 +3,8 @@ from mealie.services.scraper.scraped_extras import ScrapedExtras | ||||
|  | ||||
| from .scraper_strategies import ABCScraperStrategy, RecipeScraperOpenGraph, RecipeScraperPackage | ||||
|  | ||||
| DEFAULT_SCRAPER_STRATEGIES: list[type[ABCScraperStrategy]] = [RecipeScraperPackage, RecipeScraperOpenGraph] | ||||
|  | ||||
|  | ||||
| class RecipeScraper: | ||||
|     """ | ||||
| @@ -14,10 +16,7 @@ class RecipeScraper: | ||||
|  | ||||
|     def __init__(self, scrapers: list[type[ABCScraperStrategy]] | None = None) -> None: | ||||
|         if scrapers is None: | ||||
|             scrapers = [ | ||||
|                 RecipeScraperPackage, | ||||
|                 RecipeScraperOpenGraph, | ||||
|             ] | ||||
|             scrapers = DEFAULT_SCRAPER_STRATEGIES | ||||
|  | ||||
|         self.scrapers = scrapers | ||||
|  | ||||
|   | ||||
| @@ -81,6 +81,10 @@ class ABCScraperStrategy(ABC): | ||||
|         self.logger = get_logger() | ||||
|         self.url = url | ||||
|  | ||||
|     @abstractmethod | ||||
|     async def get_html(self, url: str) -> str: | ||||
|         ... | ||||
|  | ||||
|     @abstractmethod | ||||
|     async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]: | ||||
|         """Parse a recipe from a web URL. | ||||
| @@ -95,6 +99,9 @@ class ABCScraperStrategy(ABC): | ||||
|  | ||||
|  | ||||
| class RecipeScraperPackage(ABCScraperStrategy): | ||||
|     async def get_html(self, url: str) -> str: | ||||
|         return await safe_scrape_html(url) | ||||
|  | ||||
|     def clean_scraper(self, scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> tuple[Recipe, ScrapedExtras]: | ||||
|         def try_get_default(func_call: Callable | None, get_attr: str, default: Any, clean_func=None): | ||||
|             value = default | ||||
| @@ -160,7 +167,8 @@ class RecipeScraperPackage(ABCScraperStrategy): | ||||
|         return recipe, extras | ||||
|  | ||||
|     async def scrape_url(self) -> SchemaScraperFactory.SchemaScraper | Any | None: | ||||
|         recipe_html = await safe_scrape_html(self.url) | ||||
|         recipe_html = await self.get_html(self.url) | ||||
|  | ||||
|         try: | ||||
|             scraped_schema = scrape_html(recipe_html, org_url=self.url) | ||||
|         except (NoSchemaFoundInWildMode, AttributeError): | ||||
| @@ -204,8 +212,8 @@ class RecipeScraperOpenGraph(ABCScraperStrategy): | ||||
|     Abstract class for all recipe parsers. | ||||
|     """ | ||||
|  | ||||
|     async def get_html(self) -> str: | ||||
|         return await safe_scrape_html(self.url) | ||||
|     async def get_html(self, url: str) -> str: | ||||
|         return await safe_scrape_html(url) | ||||
|  | ||||
|     def get_recipe_fields(self, html) -> dict | None: | ||||
|         """ | ||||
| @@ -245,7 +253,7 @@ class RecipeScraperOpenGraph(ABCScraperStrategy): | ||||
|         """ | ||||
|         Parse a recipe from a given url. | ||||
|         """ | ||||
|         html = await self.get_html() | ||||
|         html = await self.get_html(self.url) | ||||
|  | ||||
|         og_data = self.get_recipe_fields(html) | ||||
|  | ||||
|   | ||||
| @@ -1,6 +1,5 @@ | ||||
| import json | ||||
| from pathlib import Path | ||||
| from typing import Optional, Union | ||||
|  | ||||
| import pytest | ||||
| from bs4 import BeautifulSoup | ||||
| @@ -12,7 +11,7 @@ from slugify import slugify | ||||
|  | ||||
| from mealie.schema.recipe.recipe import RecipeCategory | ||||
| from mealie.services.recipe.recipe_data_service import RecipeDataService | ||||
| from mealie.services.scraper.scraper_strategies import RecipeScraperOpenGraph | ||||
| from mealie.services.scraper.recipe_scraper import DEFAULT_SCRAPER_STRATEGIES | ||||
| from tests import data, utils | ||||
| from tests.utils import api_routes | ||||
| from tests.utils.factories import random_string | ||||
| @@ -31,9 +30,9 @@ def get_init(html_path: Path): | ||||
|     def init_override( | ||||
|         self, | ||||
|         url, | ||||
|         proxies: Optional[str] = None, | ||||
|         timeout: Optional[Union[float, tuple, None]] = None, | ||||
|         wild_mode: Optional[bool] = False, | ||||
|         proxies: str | None = None, | ||||
|         timeout: float | tuple | None = None, | ||||
|         wild_mode: bool | None = False, | ||||
|         **_, | ||||
|     ): | ||||
|         page_data = html_path.read_bytes() | ||||
| @@ -48,7 +47,7 @@ def get_init(html_path: Path): | ||||
|  | ||||
|  | ||||
| def open_graph_override(html: str): | ||||
|     def get_html(self) -> str: | ||||
|     async def get_html(self, url: str) -> str: | ||||
|         return html | ||||
|  | ||||
|     return get_html | ||||
| @@ -68,11 +67,12 @@ def test_create_by_url( | ||||
|         get_init(recipe_data.html_file), | ||||
|     ) | ||||
|     # Override the get_html method of the RecipeScraperOpenGraph to return the test html | ||||
|     monkeypatch.setattr( | ||||
|         RecipeScraperOpenGraph, | ||||
|         "get_html", | ||||
|         open_graph_override(recipe_data.html_file.read_text()), | ||||
|     ) | ||||
|     for scraper_cls in DEFAULT_SCRAPER_STRATEGIES: | ||||
|         monkeypatch.setattr( | ||||
|             scraper_cls, | ||||
|             "get_html", | ||||
|             open_graph_override(recipe_data.html_file.read_text()), | ||||
|         ) | ||||
|     # Skip image downloader | ||||
|     monkeypatch.setattr( | ||||
|         RecipeDataService, | ||||
| @@ -113,12 +113,13 @@ def test_create_by_url_with_tags( | ||||
|         "__init__", | ||||
|         get_init(html_file), | ||||
|     ) | ||||
|     # Override the get_html method of the RecipeScraperOpenGraph to return the test html | ||||
|     monkeypatch.setattr( | ||||
|         RecipeScraperOpenGraph, | ||||
|         "get_html", | ||||
|         open_graph_override(html_file.read_text()), | ||||
|     ) | ||||
|     # Override the get_html method of all scraper strategies to return the test html | ||||
|     for scraper_cls in DEFAULT_SCRAPER_STRATEGIES: | ||||
|         monkeypatch.setattr( | ||||
|             scraper_cls, | ||||
|             "get_html", | ||||
|             open_graph_override(html_file.read_text()), | ||||
|         ) | ||||
|     # Skip image downloader | ||||
|     monkeypatch.setattr( | ||||
|         RecipeDataService, | ||||
| @@ -198,7 +199,7 @@ def test_read_update( | ||||
|     assert len(recipe["recipeCategory"]) == len(recipe_categories) | ||||
|  | ||||
|     test_name = [x.name for x in recipe_categories] | ||||
|     for cats in zip(recipe["recipeCategory"], recipe_categories): | ||||
|     for cats in zip(recipe["recipeCategory"], recipe_categories, strict=False): | ||||
|         assert cats[0]["name"] in test_name | ||||
|  | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user