mirror of
				https://github.com/mealie-recipes/mealie.git
				synced 2025-10-31 02:03:35 -04:00 
			
		
		
		
	Feature/Capture Scraper Improvement PRs (#749)
* capture #732 changes * capture PR #733 * capture PR #736 * capture pr #745 Co-authored-by: Hayden <hay-kot@pm.me>
This commit is contained in:
		| @@ -4,7 +4,6 @@ from fastapi import Depends, File | |||||||
| from fastapi.datastructures import UploadFile | from fastapi.datastructures import UploadFile | ||||||
| from fastapi.encoders import jsonable_encoder | from fastapi.encoders import jsonable_encoder | ||||||
| from fastapi.responses import JSONResponse | from fastapi.responses import JSONResponse | ||||||
| from scrape_schema_recipe import scrape_url |  | ||||||
| from sqlalchemy.orm.session import Session | from sqlalchemy.orm.session import Session | ||||||
| from starlette.responses import FileResponse | from starlette.responses import FileResponse | ||||||
|  |  | ||||||
| @@ -16,7 +15,7 @@ from mealie.routes.routers import UserAPIRouter | |||||||
| from mealie.schema.recipe import CreateRecipeByURL, Recipe, RecipeImageTypes | from mealie.schema.recipe import CreateRecipeByURL, Recipe, RecipeImageTypes | ||||||
| from mealie.schema.recipe.recipe import CreateRecipe, RecipeSummary | from mealie.schema.recipe.recipe import CreateRecipe, RecipeSummary | ||||||
| from mealie.services.recipe.recipe_service import RecipeService | from mealie.services.recipe.recipe_service import RecipeService | ||||||
| from mealie.services.scraper.scraper import create_from_url | from mealie.services.scraper.scraper import create_from_url, scrape_from_url | ||||||
|  |  | ||||||
| user_router = UserAPIRouter() | user_router = UserAPIRouter() | ||||||
| logger = get_logger() | logger = get_logger() | ||||||
| @@ -44,8 +43,11 @@ def parse_recipe_url(url: CreateRecipeByURL, recipe_service: RecipeService = Dep | |||||||
|  |  | ||||||
| @user_router.post("/test-scrape-url") | @user_router.post("/test-scrape-url") | ||||||
| def test_parse_recipe_url(url: CreateRecipeByURL): | def test_parse_recipe_url(url: CreateRecipeByURL): | ||||||
|     # TODO: Replace with more current implementation of testing schema |     # Debugger should produce the same result as the scraper sees before cleaning | ||||||
|     return scrape_url(url.url) |     scraped_data = scrape_from_url(url.url) | ||||||
|  |     if scraped_data: | ||||||
|  |         return scraped_data.schema.data | ||||||
|  |     return "recipe_scrapers was unable to scrape this URL" | ||||||
|  |  | ||||||
|  |  | ||||||
| @user_router.post("/create-from-zip", status_code=201) | @user_router.post("/create-from-zip", status_code=201) | ||||||
|   | |||||||
| @@ -44,10 +44,24 @@ def write_image(recipe_slug: str, file_data: bytes, extension: str) -> Path: | |||||||
| def scrape_image(image_url: str, slug: str) -> Path: | def scrape_image(image_url: str, slug: str) -> Path: | ||||||
|     logger.info(f"Image URL: {image_url}") |     logger.info(f"Image URL: {image_url}") | ||||||
|     if isinstance(image_url, str):  # Handles String Types |     if isinstance(image_url, str):  # Handles String Types | ||||||
|         image_url = image_url |         pass | ||||||
|  |  | ||||||
|     if isinstance(image_url, list):  # Handles List Types |     if isinstance(image_url, list):  # Handles List Types | ||||||
|         image_url = image_url[0] |         # Multiple images have been defined in the schema - usually different resolutions | ||||||
|  |         # Typically would be in smallest->biggest order, but can't be certain so test each. | ||||||
|  |         # 'Google will pick the best image to display in Search results based on the aspect ratio and resolution.' | ||||||
|  |  | ||||||
|  |         all_image_requests = [] | ||||||
|  |         for url in image_url: | ||||||
|  |             try: | ||||||
|  |                 r = requests.get(url, stream=True, headers={"User-Agent": ""}) | ||||||
|  |             except Exception: | ||||||
|  |                 logger.exception("Image {url} could not be requested") | ||||||
|  |                 continue | ||||||
|  |             if r.status_code == 200: | ||||||
|  |                 all_image_requests.append((url, r)) | ||||||
|  |  | ||||||
|  |         image_url, _ = max(all_image_requests, key=lambda url_r: len(url_r[1].content), default=("", 0)) | ||||||
|  |  | ||||||
|     if isinstance(image_url, dict):  # Handles Dictionary Types |     if isinstance(image_url, dict):  # Handles Dictionary Types | ||||||
|         for key in image_url: |         for key in image_url: | ||||||
| @@ -70,6 +84,6 @@ def scrape_image(image_url: str, slug: str) -> Path: | |||||||
|  |  | ||||||
|         filename.unlink(missing_ok=True) |         filename.unlink(missing_ok=True) | ||||||
|  |  | ||||||
|         return slug |         return Path(slug) | ||||||
|  |  | ||||||
|     return None |     return None | ||||||
|   | |||||||
| @@ -2,7 +2,7 @@ import html | |||||||
| import json | import json | ||||||
| import re | import re | ||||||
| from datetime import datetime, timedelta | from datetime import datetime, timedelta | ||||||
| from typing import List | from typing import List, Optional | ||||||
|  |  | ||||||
| from slugify import slugify | from slugify import slugify | ||||||
|  |  | ||||||
| @@ -43,9 +43,13 @@ def clean_string(text: str) -> str: | |||||||
|     if isinstance(text, list): |     if isinstance(text, list): | ||||||
|         text = text[0] |         text = text[0] | ||||||
|  |  | ||||||
|  |     print(type(text)) | ||||||
|  |  | ||||||
|     if text == "" or text is None: |     if text == "" or text is None: | ||||||
|         return "" |         return "" | ||||||
|  |  | ||||||
|  |     print(text) | ||||||
|  |  | ||||||
|     cleaned_text = html.unescape(text) |     cleaned_text = html.unescape(text) | ||||||
|     cleaned_text = re.sub("<[^<]+?>", "", cleaned_text) |     cleaned_text = re.sub("<[^<]+?>", "", cleaned_text) | ||||||
|     cleaned_text = re.sub(" +", " ", cleaned_text) |     cleaned_text = re.sub(" +", " ", cleaned_text) | ||||||
| @@ -67,6 +71,40 @@ def clean_html(raw_html): | |||||||
|     return re.sub(cleanr, "", raw_html) |     return re.sub(cleanr, "", raw_html) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def clean_nutrition(nutrition: Optional[dict]) -> dict[str, str]: | ||||||
|  |     # Assumes that all units are supplied in grams, except sodium which may be in mg. | ||||||
|  |  | ||||||
|  |     # Fn only expects a dict[str,str]. Other structures should not be parsed. | ||||||
|  |     if not isinstance(nutrition, dict): | ||||||
|  |         return {} | ||||||
|  |  | ||||||
|  |     # Allow for commas as decimals (common in Europe) | ||||||
|  |     # Compile once for efficiency | ||||||
|  |     re_match_digits = re.compile(r"\d+([.,]\d+)?") | ||||||
|  |  | ||||||
|  |     output_nutrition = {} | ||||||
|  |     for key, val in nutrition.items(): | ||||||
|  |         # If the val contains digits matching the regex, add the first match to the output dict. | ||||||
|  |         # Handle unexpected datastructures safely. | ||||||
|  |         try: | ||||||
|  |             if matched_digits := re_match_digits.search(val): | ||||||
|  |                 output_nutrition[key] = matched_digits.group(0) | ||||||
|  |         except Exception: | ||||||
|  |             continue | ||||||
|  |  | ||||||
|  |     output_nutrition = {key: val.replace(",", ".") for key, val in output_nutrition.items()} | ||||||
|  |  | ||||||
|  |     if "sodiumContent" in nutrition and "m" not in nutrition["sodiumContent"] and "g" in nutrition["sodiumContent"]: | ||||||
|  |         # Sodium is in grams. Parse its value, multiple by 1k and return to string. | ||||||
|  |         try: | ||||||
|  |             output_nutrition["sodiumContent"] = str(float(output_nutrition["sodiumContent"]) * 1000) | ||||||
|  |         except ValueError: | ||||||
|  |             # Could not parse sodium content as float, so don't touch it. | ||||||
|  |             pass | ||||||
|  |  | ||||||
|  |     return output_nutrition | ||||||
|  |  | ||||||
|  |  | ||||||
| def image(image=None) -> str: | def image(image=None) -> str: | ||||||
|     if not image: |     if not image: | ||||||
|         return "no image" |         return "no image" | ||||||
| @@ -167,9 +205,11 @@ def clean_time(time_entry): | |||||||
|     elif isinstance(time_entry, datetime): |     elif isinstance(time_entry, datetime): | ||||||
|         print(time_entry) |         print(time_entry) | ||||||
|     elif isinstance(time_entry, str): |     elif isinstance(time_entry, str): | ||||||
|         if re.match("PT.*H.*M", time_entry): |         try: | ||||||
|             time_delta_object = parse_duration(time_entry) |             time_delta_object = parse_duration(time_entry) | ||||||
|             return pretty_print_timedelta(time_delta_object) |             return pretty_print_timedelta(time_delta_object) | ||||||
|  |         except ValueError: | ||||||
|  |             logger.error(f"Could not parse time_entry `{time_entry}`") | ||||||
|     else: |     else: | ||||||
|         return str(time_entry) |         return str(time_entry) | ||||||
|  |  | ||||||
| @@ -184,48 +224,34 @@ def parse_duration(iso_duration): | |||||||
|     Returns: |     Returns: | ||||||
|         a datetime.timedelta instance |         a datetime.timedelta instance | ||||||
|     """ |     """ | ||||||
|     m = re.match(r"^P(?:(\d+)Y)?(?:(\d+)M)?(?:(\d+)D)?T(?:(\d+)H)?(?:(\d+)M)?(?:(\d+(?:.\d+)?)S)?$", iso_duration) |     m = re.match( | ||||||
|  |         r"^P((\d+)Y)?((\d+)M)?((?P<days>\d+)D)?" | ||||||
|  |         r"T((?P<hours>\d+)H)?((?P<minutes>\d+)M)?((?P<seconds>\d+(?:\.\d+)?)S)?$", | ||||||
|  |         iso_duration, | ||||||
|  |     ) | ||||||
|     if m is None: |     if m is None: | ||||||
|         raise ValueError("invalid ISO 8601 duration string") |         raise ValueError("invalid ISO 8601 duration string") | ||||||
|  |  | ||||||
|     days = 0 |  | ||||||
|     hours = 0 |  | ||||||
|     minutes = 0 |  | ||||||
|     seconds = 0.0 |  | ||||||
|  |  | ||||||
|     # Years and months are not being utilized here, as there is not enough |     # Years and months are not being utilized here, as there is not enough | ||||||
|     # information provided to determine which year and which month. |     # information provided to determine which year and which month. | ||||||
|     # Python's time_delta class stores durations as days, seconds and |     # Python's time_delta class stores durations as days, seconds and | ||||||
|     # microseconds internally, and therefore we'd have to |     # microseconds internally, and therefore we'd have to | ||||||
|     # convert parsed years and months to specific number of days. |     # convert parsed years and months to specific number of days. | ||||||
|  |  | ||||||
|     if m[3]: |     times = {"days": 0, "hours": 0, "minutes": 0, "seconds": 0} | ||||||
|         days = int(m[3]) |     for unit, _ in times.items(): | ||||||
|     if m[4]: |         if m.group(unit): | ||||||
|         hours = int(m[4]) |             times[unit] = int(float(m.group(unit))) | ||||||
|     if m[5]: |  | ||||||
|         minutes = int(m[5]) |  | ||||||
|     if m[6]: |  | ||||||
|         seconds = float(m[6]) |  | ||||||
|  |  | ||||||
|     return timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds) |     return timedelta(**times) | ||||||
|  |  | ||||||
|  |  | ||||||
| def pretty_print_timedelta(t, max_components=None, max_decimal_places=2): | def pretty_print_timedelta(t: timedelta, max_components=None, max_decimal_places=2): | ||||||
|     """ |     """ | ||||||
|     Print a pretty string for a timedelta. |     Print a pretty string for a timedelta. | ||||||
|     For example datetime.timedelta(days=2, seconds=17280) will be printed as '2 days, 4 hours, 48 minutes'. Setting max_components to e.g. 1 will change this to '2.2 days', where the |     For example datetime.timedelta(days=2, seconds=17280) will be printed as '2 days 4 Hours 48 Minutes'. Setting max_components to e.g. 1 will change this to '2.2 days', where the | ||||||
|     number of decimal points can also be set. |     number of decimal points can also be set. | ||||||
|     """ |     """ | ||||||
|     time_scales = [ |  | ||||||
|         timedelta(days=365), |  | ||||||
|         timedelta(days=1), |  | ||||||
|         timedelta(hours=1), |  | ||||||
|         timedelta(minutes=1), |  | ||||||
|         timedelta(seconds=1), |  | ||||||
|         timedelta(microseconds=1000), |  | ||||||
|         timedelta(microseconds=1), |  | ||||||
|     ] |  | ||||||
|     time_scale_names_dict = { |     time_scale_names_dict = { | ||||||
|         timedelta(days=365): "year", |         timedelta(days=365): "year", | ||||||
|         timedelta(days=1): "day", |         timedelta(days=1): "day", | ||||||
| @@ -236,9 +262,8 @@ def pretty_print_timedelta(t, max_components=None, max_decimal_places=2): | |||||||
|         timedelta(microseconds=1): "microsecond", |         timedelta(microseconds=1): "microsecond", | ||||||
|     } |     } | ||||||
|     count = 0 |     count = 0 | ||||||
|     txt = "" |     out_list = [] | ||||||
|     first = True |     for scale, scale_name in time_scale_names_dict.items(): | ||||||
|     for scale in time_scales: |  | ||||||
|         if t >= scale: |         if t >= scale: | ||||||
|             count += 1 |             count += 1 | ||||||
|             n = t / scale if count == max_components else int(t / scale) |             n = t / scale if count == max_components else int(t / scale) | ||||||
| @@ -247,15 +272,9 @@ def pretty_print_timedelta(t, max_components=None, max_decimal_places=2): | |||||||
|             n_txt = str(round(n, max_decimal_places)) |             n_txt = str(round(n, max_decimal_places)) | ||||||
|             if n_txt[-2:] == ".0": |             if n_txt[-2:] == ".0": | ||||||
|                 n_txt = n_txt[:-2] |                 n_txt = n_txt[:-2] | ||||||
|             txt += "{}{} {}{}".format( |  | ||||||
|                 "" if first else " ", |  | ||||||
|                 n_txt, |  | ||||||
|                 time_scale_names_dict[scale], |  | ||||||
|                 "s" if n > 1 else "", |  | ||||||
|             ) |  | ||||||
|             if first: |  | ||||||
|                 first = False |  | ||||||
|  |  | ||||||
|     if len(txt) == 0: |             out_list.append(f"{n_txt} {scale_name}{'s' if n > 1 else ''}") | ||||||
|         txt = "none" |  | ||||||
|     return txt |     if out_list == []: | ||||||
|  |         return "none" | ||||||
|  |     return " ".join(out_list) | ||||||
|   | |||||||
| @@ -1,6 +1,5 @@ | |||||||
| import json |  | ||||||
| from enum import Enum | from enum import Enum | ||||||
| from typing import Any, Callable | from typing import Any, Callable, Optional | ||||||
| from uuid import uuid4 | from uuid import uuid4 | ||||||
|  |  | ||||||
| import requests | import requests | ||||||
| @@ -8,17 +7,11 @@ from fastapi import HTTPException, status | |||||||
| from recipe_scrapers import NoSchemaFoundInWildMode, SchemaScraperFactory, WebsiteNotImplementedError, scrape_me | from recipe_scrapers import NoSchemaFoundInWildMode, SchemaScraperFactory, WebsiteNotImplementedError, scrape_me | ||||||
| from slugify import slugify | from slugify import slugify | ||||||
|  |  | ||||||
| from mealie.core.config import get_app_dirs |  | ||||||
|  |  | ||||||
| app_dirs = get_app_dirs() |  | ||||||
| from mealie.core.root_logger import get_logger | from mealie.core.root_logger import get_logger | ||||||
| from mealie.schema.recipe import Recipe, RecipeStep | from mealie.schema.recipe import Recipe, RecipeStep | ||||||
| from mealie.services.image.image import scrape_image | from mealie.services.image.image import scrape_image | ||||||
| from mealie.services.scraper import cleaner, open_graph | from mealie.services.scraper import cleaner, open_graph | ||||||
|  |  | ||||||
| LAST_JSON = app_dirs.DEBUG_DIR.joinpath("last_recipe.json") |  | ||||||
|  |  | ||||||
|  |  | ||||||
| logger = get_logger() | logger = get_logger() | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -32,7 +25,14 @@ def create_from_url(url: str) -> Recipe: | |||||||
|     Returns: |     Returns: | ||||||
|         Recipe: Recipe Object |         Recipe: Recipe Object | ||||||
|     """ |     """ | ||||||
|     new_recipe = scrape_from_url(url) |     # Try the different scrapers in order. | ||||||
|  |     if scraped_data := scrape_from_url(url): | ||||||
|  |         new_recipe = clean_scraper(scraped_data, url) | ||||||
|  |     elif og_dict := extract_open_graph_values(url): | ||||||
|  |         new_recipe = Recipe(**og_dict) | ||||||
|  |     else: | ||||||
|  |         raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value}) | ||||||
|  |  | ||||||
|     logger.info(f"Image {new_recipe.image}") |     logger.info(f"Image {new_recipe.image}") | ||||||
|     new_recipe.image = download_image_for_recipe(new_recipe.slug, new_recipe.image) |     new_recipe.image = download_image_for_recipe(new_recipe.slug, new_recipe.image) | ||||||
|  |  | ||||||
| @@ -49,16 +49,17 @@ class ParserErrors(str, Enum): | |||||||
|     CONNECTION_ERROR = "CONNECTION_ERROR" |     CONNECTION_ERROR = "CONNECTION_ERROR" | ||||||
|  |  | ||||||
|  |  | ||||||
| def extract_open_graph_values(url) -> Recipe: | def extract_open_graph_values(url) -> Optional[dict]: | ||||||
|     r = requests.get(url) |     r = requests.get(url) | ||||||
|     recipe = open_graph.basic_recipe_from_opengraph(r.text, url) |     recipe = open_graph.basic_recipe_from_opengraph(r.text, url) | ||||||
|  |     if recipe.get("name", "") == "": | ||||||
|     return Recipe(**recipe) |         return None | ||||||
|  |     return recipe | ||||||
|  |  | ||||||
|  |  | ||||||
| def scrape_from_url(url: str) -> Recipe: | def scrape_from_url(url: str): | ||||||
|     """Entry function to generating are recipe obejct from a url |     """Entry function to scrape a recipe from a url | ||||||
|     This will determine if a url can be parsed and raise an appropriate error keyword |     This will determine if a url can be parsed and return None if not, to allow another parser to try. | ||||||
|     This keyword is used on the frontend to reference a localized string to present on the UI. |     This keyword is used on the frontend to reference a localized string to present on the UI. | ||||||
|  |  | ||||||
|     Args: |     Args: | ||||||
| @@ -68,7 +69,7 @@ def scrape_from_url(url: str) -> Recipe: | |||||||
|         HTTPException: 400_BAD_REQUEST - See ParserErrors Class for Key Details |         HTTPException: 400_BAD_REQUEST - See ParserErrors Class for Key Details | ||||||
|  |  | ||||||
|     Returns: |     Returns: | ||||||
|         Recipe: Recipe Model |         Optional[Scraped schema for cleaning] | ||||||
|     """ |     """ | ||||||
|     try: |     try: | ||||||
|         scraped_schema = scrape_me(url) |         scraped_schema = scrape_me(url) | ||||||
| @@ -76,28 +77,26 @@ def scrape_from_url(url: str) -> Recipe: | |||||||
|         try: |         try: | ||||||
|             scraped_schema = scrape_me(url, wild_mode=True) |             scraped_schema = scrape_me(url, wild_mode=True) | ||||||
|         except (NoSchemaFoundInWildMode, AttributeError): |         except (NoSchemaFoundInWildMode, AttributeError): | ||||||
|             recipe = extract_open_graph_values(url) |             # Recipe_scraper was unable to extract a recipe. | ||||||
|             if recipe.name != "": |             return None | ||||||
|                 return recipe |  | ||||||
|             raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value}) |  | ||||||
|  |  | ||||||
|     except ConnectionError: |     except ConnectionError: | ||||||
|         raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.CONNECTION_ERROR.value}) |         raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.CONNECTION_ERROR.value}) | ||||||
|  |  | ||||||
|  |     # Check to see if the recipe is valid | ||||||
|     try: |     try: | ||||||
|  |         ingredients = scraped_schema.ingredients() | ||||||
|         instruct = scraped_schema.instructions() |         instruct = scraped_schema.instructions() | ||||||
|     except Exception: |     except Exception: | ||||||
|  |         ingredients = [] | ||||||
|         instruct = [] |         instruct = [] | ||||||
|  |  | ||||||
|     try: |     if instruct and ingredients: | ||||||
|         ing = scraped_schema.ingredients() |         return scraped_schema | ||||||
|     except Exception: |  | ||||||
|         ing = [] |  | ||||||
|  |  | ||||||
|     if not instruct and not ing: |     # recipe_scrapers did not get a valid recipe. | ||||||
|         raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.NO_RECIPE_DATA.value}) |     # Return None to let another scraper try. | ||||||
|     else: |     return None | ||||||
|         return clean_scraper(scraped_schema, url) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def clean_scraper(scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> Recipe: | def clean_scraper(scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> Recipe: | ||||||
| @@ -135,17 +134,22 @@ def clean_scraper(scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> | |||||||
|         except TypeError: |         except TypeError: | ||||||
|             return [] |             return [] | ||||||
|  |  | ||||||
|  |     cook_time = try_get_default(None, "performTime", None, cleaner.clean_time) or try_get_default( | ||||||
|  |         None, "cookTime", None, cleaner.clean_time | ||||||
|  |     ) | ||||||
|  |  | ||||||
|     return Recipe( |     return Recipe( | ||||||
|         name=try_get_default(scraped_data.title, "name", "No Name Found", cleaner.clean_string), |         name=try_get_default(scraped_data.title, "name", "No Name Found", cleaner.clean_string), | ||||||
|         slug="", |         slug="", | ||||||
|         image=try_get_default(scraped_data.image, "image", None), |         image=try_get_default(None, "image", None), | ||||||
|         description=try_get_default(None, "description", "", cleaner.clean_string), |         description=try_get_default(None, "description", "", cleaner.clean_string), | ||||||
|  |         nutrition=try_get_default(None, "nutrition", None, cleaner.clean_nutrition), | ||||||
|         recipe_yield=try_get_default(scraped_data.yields, "recipeYield", "1", cleaner.clean_string), |         recipe_yield=try_get_default(scraped_data.yields, "recipeYield", "1", cleaner.clean_string), | ||||||
|         recipe_ingredient=try_get_default(scraped_data.ingredients, "recipeIngredient", [""], cleaner.ingredient), |         recipe_ingredient=try_get_default(scraped_data.ingredients, "recipeIngredient", [""], cleaner.ingredient), | ||||||
|         recipe_instructions=get_instructions(), |         recipe_instructions=get_instructions(), | ||||||
|         total_time=try_get_default(None, "totalTime", None, cleaner.clean_time), |         total_time=try_get_default(None, "totalTime", None, cleaner.clean_time), | ||||||
|         prep_time=try_get_default(None, "prepTime", None, cleaner.clean_time), |         prep_time=try_get_default(None, "prepTime", None, cleaner.clean_time), | ||||||
|         perform_time=try_get_default(None, "performTime", None, cleaner.clean_time), |         perform_time=cook_time, | ||||||
|         org_url=url, |         org_url=url, | ||||||
|     ) |     ) | ||||||
|  |  | ||||||
| @@ -160,10 +164,3 @@ def download_image_for_recipe(slug, image_url) -> dict: | |||||||
|         img_name = None |         img_name = None | ||||||
|  |  | ||||||
|     return img_name or "no image" |     return img_name or "no image" | ||||||
|  |  | ||||||
|  |  | ||||||
| def dump_last_json(recipe_data: dict): |  | ||||||
|     with open(LAST_JSON, "w") as f: |  | ||||||
|         f.write(json.dumps(recipe_data, indent=4, default=str)) |  | ||||||
|  |  | ||||||
|     return |  | ||||||
|   | |||||||
							
								
								
									
										765
									
								
								poetry.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										765
									
								
								poetry.lock
									
									
									
										generated
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -23,7 +23,6 @@ python-slugify = "^4.0.1" | |||||||
| requests = "^2.25.1" | requests = "^2.25.1" | ||||||
| PyYAML = "^5.3.1" | PyYAML = "^5.3.1" | ||||||
| extruct = "^0.12.0" | extruct = "^0.12.0" | ||||||
| scrape-schema-recipe = "^0.1.3" |  | ||||||
| python-multipart = "^0.0.5" | python-multipart = "^0.0.5" | ||||||
| fastapi-camelcase = "^1.0.2" | fastapi-camelcase = "^1.0.2" | ||||||
| bcrypt = "^3.2.0" | bcrypt = "^3.2.0" | ||||||
| @@ -50,6 +49,7 @@ coverage = "^5.5" | |||||||
| pydantic-to-typescript = "^1.0.7" | pydantic-to-typescript = "^1.0.7" | ||||||
| rich = "^10.7.0" | rich = "^10.7.0" | ||||||
| isort = "^5.9.3" | isort = "^5.9.3" | ||||||
|  | regex = "2021.9.30" # TODO: Remove during Upgrade -> https://github.com/psf/black/issues/2524 | ||||||
|  |  | ||||||
| [build-system] | [build-system] | ||||||
| requires = ["poetry-core>=1.0.0"] | requires = ["poetry-core>=1.0.0"] | ||||||
|   | |||||||
| @@ -1,5 +1,6 @@ | |||||||
| import json | import json | ||||||
| import re | import re | ||||||
|  | from datetime import timedelta | ||||||
|  |  | ||||||
| import pytest | import pytest | ||||||
|  |  | ||||||
| @@ -59,6 +60,24 @@ def test_clean_image(): | |||||||
|     assert cleaner.image(["My URL!", "MY SECOND URL"]) == "My URL!" |     assert cleaner.image(["My URL!", "MY SECOND URL"]) == "My URL!" | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @pytest.mark.parametrize( | ||||||
|  |     "nutrition,expected", | ||||||
|  |     [ | ||||||
|  |         (None, {}), | ||||||
|  |         ({"calories": "105 kcal"}, {"calories": "105"}), | ||||||
|  |         ({"calories": "105 kcal 104 sugar"}, {"calories": "105"}), | ||||||
|  |         ({"calories": ""}, {}), | ||||||
|  |         ({"calories": ["not just a string"], "sugarContent": "but still tries 555.321"}, {"sugarContent": "555.321"}), | ||||||
|  |         ({"sodiumContent": "5.1235g"}, {"sodiumContent": "5123.5"}), | ||||||
|  |         ({"sodiumContent": "5mg"}, {"sodiumContent": "5"}), | ||||||
|  |         ({"sodiumContent": "10oz"}, {"sodiumContent": "10"}), | ||||||
|  |         ({"sodiumContent": "10.1.2g"}, {"sodiumContent": "10100.0"}), | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | def test_clean_nutrition(nutrition, expected): | ||||||
|  |     assert cleaner.clean_nutrition(nutrition) == expected | ||||||
|  |  | ||||||
|  |  | ||||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||||
|     "instructions", |     "instructions", | ||||||
|     [ |     [ | ||||||
| @@ -90,9 +109,29 @@ def test_html_with_recipe_data(): | |||||||
|     assert url_validation_regex.match(recipe_data["image"]) |     assert url_validation_regex.match(recipe_data["image"]) | ||||||
|  |  | ||||||
|  |  | ||||||
| def test_time_cleaner(): | @pytest.mark.parametrize( | ||||||
|  |     "time_delta,expected", | ||||||
|  |     [ | ||||||
|  |         ("PT2H30M", "2 Hours 30 Minutes"), | ||||||
|  |         ("PT30M", "30 Minutes"), | ||||||
|  |         ("PT3H", "3 Hours"), | ||||||
|  |         ("P1DT1H1M1S", "1 day 1 Hour 1 Minute 1 Second"), | ||||||
|  |         ("P1DT1H1M1.53S", "1 day 1 Hour 1 Minute 1 Second"), | ||||||
|  |         ("PT-3H", None), | ||||||
|  |         ("PT", "none"), | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | def test_time_cleaner(time_delta, expected): | ||||||
|  |     assert cleaner.clean_time(time_delta) == expected | ||||||
|  |  | ||||||
|     my_time_delta = "PT2H30M" |  | ||||||
|     return_delta = cleaner.clean_time(my_time_delta) |  | ||||||
|  |  | ||||||
|     assert return_delta == "2 Hours 30 Minutes" | @pytest.mark.parametrize( | ||||||
|  |     "t,max_components,max_decimal_places,expected", | ||||||
|  |     [ | ||||||
|  |         (timedelta(days=2, seconds=17280), None, 2, "2 days 4 Hours 48 Minutes"), | ||||||
|  |         (timedelta(days=2, seconds=17280), 1, 2, "2.2 days"), | ||||||
|  |         (timedelta(days=365), None, 2, "1 year"), | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | def test_pretty_print_timedelta(t, max_components, max_decimal_places, expected): | ||||||
|  |     assert cleaner.pretty_print_timedelta(t, max_components, max_decimal_places) == expected | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user