mirror of
https://github.com/mealie-recipes/mealie.git
synced 2026-02-15 12:23:12 -05:00
Convert scraper to use async (#1915)
* add httpx depedency for async http requests * rework scraper strategies to download recipe html asynchronously * rework recipe_data_service to download recipe images asynchronously * fix recipe_parser test, so it can use async results * fix bulk import so that it also works with async scraper * fix broken recipe_parser tests * Fix issues found by scanners * Add additional checks for ingredient and instruction count in test_create_by_url * Revert changes in test recipe_data Since we are checking ingredients and instructions in test_create_url now, these would fail with the stored html of recipe data * Add explicit type annotation in recipe_data_service.largest_content_len * Fix typo in annotation
This commit is contained in:
@@ -162,10 +162,10 @@ class RecipeController(BaseRecipeController):
|
||||
# URL Scraping Operations
|
||||
|
||||
@router.post("/create-url", status_code=201, response_model=str)
|
||||
def parse_recipe_url(self, req: ScrapeRecipe):
|
||||
async def parse_recipe_url(self, req: ScrapeRecipe):
|
||||
"""Takes in a URL and attempts to scrape data and load it into the database"""
|
||||
try:
|
||||
recipe, extras = create_from_url(req.url)
|
||||
recipe, extras = await create_from_url(req.url)
|
||||
except ForceTimeoutException as e:
|
||||
raise HTTPException(
|
||||
status_code=408, detail=ErrorResponse.respond(message="Recipe Scraping Timed Out")
|
||||
@@ -206,10 +206,10 @@ class RecipeController(BaseRecipeController):
|
||||
return {"reportId": report_id}
|
||||
|
||||
@router.post("/test-scrape-url")
|
||||
def test_parse_recipe_url(self, url: ScrapeRecipeTest):
|
||||
async def test_parse_recipe_url(self, url: ScrapeRecipeTest):
|
||||
# Debugger should produce the same result as the scraper sees before cleaning
|
||||
try:
|
||||
if scraped_data := RecipeScraperPackage(url.url).scrape_url():
|
||||
if scraped_data := await RecipeScraperPackage(url.url).scrape_url():
|
||||
return scraped_data.schema.data
|
||||
except ForceTimeoutException as e:
|
||||
raise HTTPException(
|
||||
@@ -381,12 +381,12 @@ class RecipeController(BaseRecipeController):
|
||||
# Image and Assets
|
||||
|
||||
@router.post("/{slug}/image", tags=["Recipe: Images and Assets"])
|
||||
def scrape_image_url(self, slug: str, url: ScrapeRecipe):
|
||||
async def scrape_image_url(self, slug: str, url: ScrapeRecipe):
|
||||
recipe = self.mixins.get_one(slug)
|
||||
data_service = RecipeDataService(recipe.id)
|
||||
|
||||
try:
|
||||
data_service.scrape_image(url.url)
|
||||
await data_service.scrape_image(url.url)
|
||||
except NotAnImageError as e:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
|
||||
Reference in New Issue
Block a user