mirror of
				https://github.com/mealie-recipes/mealie.git
				synced 2025-10-31 02:03:35 -04:00 
			
		
		
		
	fix: remove network calls from tests (#2055)
* abstracted scraper get_html method * applied mock to all scrapers * fixed incorrect var reference
This commit is contained in:
		| @@ -3,6 +3,8 @@ from mealie.services.scraper.scraped_extras import ScrapedExtras | |||||||
|  |  | ||||||
| from .scraper_strategies import ABCScraperStrategy, RecipeScraperOpenGraph, RecipeScraperPackage | from .scraper_strategies import ABCScraperStrategy, RecipeScraperOpenGraph, RecipeScraperPackage | ||||||
|  |  | ||||||
|  | DEFAULT_SCRAPER_STRATEGIES: list[type[ABCScraperStrategy]] = [RecipeScraperPackage, RecipeScraperOpenGraph] | ||||||
|  |  | ||||||
|  |  | ||||||
| class RecipeScraper: | class RecipeScraper: | ||||||
|     """ |     """ | ||||||
| @@ -14,10 +16,7 @@ class RecipeScraper: | |||||||
|  |  | ||||||
|     def __init__(self, scrapers: list[type[ABCScraperStrategy]] | None = None) -> None: |     def __init__(self, scrapers: list[type[ABCScraperStrategy]] | None = None) -> None: | ||||||
|         if scrapers is None: |         if scrapers is None: | ||||||
|             scrapers = [ |             scrapers = DEFAULT_SCRAPER_STRATEGIES | ||||||
|                 RecipeScraperPackage, |  | ||||||
|                 RecipeScraperOpenGraph, |  | ||||||
|             ] |  | ||||||
|  |  | ||||||
|         self.scrapers = scrapers |         self.scrapers = scrapers | ||||||
|  |  | ||||||
|   | |||||||
| @@ -81,6 +81,10 @@ class ABCScraperStrategy(ABC): | |||||||
|         self.logger = get_logger() |         self.logger = get_logger() | ||||||
|         self.url = url |         self.url = url | ||||||
|  |  | ||||||
|  |     @abstractmethod | ||||||
|  |     async def get_html(self, url: str) -> str: | ||||||
|  |         ... | ||||||
|  |  | ||||||
|     @abstractmethod |     @abstractmethod | ||||||
|     async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]: |     async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]: | ||||||
|         """Parse a recipe from a web URL. |         """Parse a recipe from a web URL. | ||||||
| @@ -95,6 +99,9 @@ class ABCScraperStrategy(ABC): | |||||||
|  |  | ||||||
|  |  | ||||||
| class RecipeScraperPackage(ABCScraperStrategy): | class RecipeScraperPackage(ABCScraperStrategy): | ||||||
|  |     async def get_html(self, url: str) -> str: | ||||||
|  |         return await safe_scrape_html(url) | ||||||
|  |  | ||||||
|     def clean_scraper(self, scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> tuple[Recipe, ScrapedExtras]: |     def clean_scraper(self, scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> tuple[Recipe, ScrapedExtras]: | ||||||
|         def try_get_default(func_call: Callable | None, get_attr: str, default: Any, clean_func=None): |         def try_get_default(func_call: Callable | None, get_attr: str, default: Any, clean_func=None): | ||||||
|             value = default |             value = default | ||||||
| @@ -160,7 +167,8 @@ class RecipeScraperPackage(ABCScraperStrategy): | |||||||
|         return recipe, extras |         return recipe, extras | ||||||
|  |  | ||||||
|     async def scrape_url(self) -> SchemaScraperFactory.SchemaScraper | Any | None: |     async def scrape_url(self) -> SchemaScraperFactory.SchemaScraper | Any | None: | ||||||
|         recipe_html = await safe_scrape_html(self.url) |         recipe_html = await self.get_html(self.url) | ||||||
|  |  | ||||||
|         try: |         try: | ||||||
|             scraped_schema = scrape_html(recipe_html, org_url=self.url) |             scraped_schema = scrape_html(recipe_html, org_url=self.url) | ||||||
|         except (NoSchemaFoundInWildMode, AttributeError): |         except (NoSchemaFoundInWildMode, AttributeError): | ||||||
| @@ -204,8 +212,8 @@ class RecipeScraperOpenGraph(ABCScraperStrategy): | |||||||
|     Abstract class for all recipe parsers. |     Abstract class for all recipe parsers. | ||||||
|     """ |     """ | ||||||
|  |  | ||||||
|     async def get_html(self) -> str: |     async def get_html(self, url: str) -> str: | ||||||
|         return await safe_scrape_html(self.url) |         return await safe_scrape_html(url) | ||||||
|  |  | ||||||
|     def get_recipe_fields(self, html) -> dict | None: |     def get_recipe_fields(self, html) -> dict | None: | ||||||
|         """ |         """ | ||||||
| @@ -245,7 +253,7 @@ class RecipeScraperOpenGraph(ABCScraperStrategy): | |||||||
|         """ |         """ | ||||||
|         Parse a recipe from a given url. |         Parse a recipe from a given url. | ||||||
|         """ |         """ | ||||||
|         html = await self.get_html() |         html = await self.get_html(self.url) | ||||||
|  |  | ||||||
|         og_data = self.get_recipe_fields(html) |         og_data = self.get_recipe_fields(html) | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,6 +1,5 @@ | |||||||
| import json | import json | ||||||
| from pathlib import Path | from pathlib import Path | ||||||
| from typing import Optional, Union |  | ||||||
|  |  | ||||||
| import pytest | import pytest | ||||||
| from bs4 import BeautifulSoup | from bs4 import BeautifulSoup | ||||||
| @@ -12,7 +11,7 @@ from slugify import slugify | |||||||
|  |  | ||||||
| from mealie.schema.recipe.recipe import RecipeCategory | from mealie.schema.recipe.recipe import RecipeCategory | ||||||
| from mealie.services.recipe.recipe_data_service import RecipeDataService | from mealie.services.recipe.recipe_data_service import RecipeDataService | ||||||
| from mealie.services.scraper.scraper_strategies import RecipeScraperOpenGraph | from mealie.services.scraper.recipe_scraper import DEFAULT_SCRAPER_STRATEGIES | ||||||
| from tests import data, utils | from tests import data, utils | ||||||
| from tests.utils import api_routes | from tests.utils import api_routes | ||||||
| from tests.utils.factories import random_string | from tests.utils.factories import random_string | ||||||
| @@ -31,9 +30,9 @@ def get_init(html_path: Path): | |||||||
|     def init_override( |     def init_override( | ||||||
|         self, |         self, | ||||||
|         url, |         url, | ||||||
|         proxies: Optional[str] = None, |         proxies: str | None = None, | ||||||
|         timeout: Optional[Union[float, tuple, None]] = None, |         timeout: float | tuple | None = None, | ||||||
|         wild_mode: Optional[bool] = False, |         wild_mode: bool | None = False, | ||||||
|         **_, |         **_, | ||||||
|     ): |     ): | ||||||
|         page_data = html_path.read_bytes() |         page_data = html_path.read_bytes() | ||||||
| @@ -48,7 +47,7 @@ def get_init(html_path: Path): | |||||||
|  |  | ||||||
|  |  | ||||||
| def open_graph_override(html: str): | def open_graph_override(html: str): | ||||||
|     def get_html(self) -> str: |     async def get_html(self, url: str) -> str: | ||||||
|         return html |         return html | ||||||
|  |  | ||||||
|     return get_html |     return get_html | ||||||
| @@ -68,8 +67,9 @@ def test_create_by_url( | |||||||
|         get_init(recipe_data.html_file), |         get_init(recipe_data.html_file), | ||||||
|     ) |     ) | ||||||
|     # Override the get_html method of the RecipeScraperOpenGraph to return the test html |     # Override the get_html method of the RecipeScraperOpenGraph to return the test html | ||||||
|  |     for scraper_cls in DEFAULT_SCRAPER_STRATEGIES: | ||||||
|         monkeypatch.setattr( |         monkeypatch.setattr( | ||||||
|         RecipeScraperOpenGraph, |             scraper_cls, | ||||||
|             "get_html", |             "get_html", | ||||||
|             open_graph_override(recipe_data.html_file.read_text()), |             open_graph_override(recipe_data.html_file.read_text()), | ||||||
|         ) |         ) | ||||||
| @@ -113,9 +113,10 @@ def test_create_by_url_with_tags( | |||||||
|         "__init__", |         "__init__", | ||||||
|         get_init(html_file), |         get_init(html_file), | ||||||
|     ) |     ) | ||||||
|     # Override the get_html method of the RecipeScraperOpenGraph to return the test html |     # Override the get_html method of all scraper strategies to return the test html | ||||||
|  |     for scraper_cls in DEFAULT_SCRAPER_STRATEGIES: | ||||||
|         monkeypatch.setattr( |         monkeypatch.setattr( | ||||||
|         RecipeScraperOpenGraph, |             scraper_cls, | ||||||
|             "get_html", |             "get_html", | ||||||
|             open_graph_override(html_file.read_text()), |             open_graph_override(html_file.read_text()), | ||||||
|         ) |         ) | ||||||
| @@ -198,7 +199,7 @@ def test_read_update( | |||||||
|     assert len(recipe["recipeCategory"]) == len(recipe_categories) |     assert len(recipe["recipeCategory"]) == len(recipe_categories) | ||||||
|  |  | ||||||
|     test_name = [x.name for x in recipe_categories] |     test_name = [x.name for x in recipe_categories] | ||||||
|     for cats in zip(recipe["recipeCategory"], recipe_categories): |     for cats in zip(recipe["recipeCategory"], recipe_categories, strict=False): | ||||||
|         assert cats[0]["name"] in test_name |         assert cats[0]["name"] in test_name | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user