Convert scraper to use async (#1915)

* add httpx depedency for async http requests

* rework scraper strategies to download recipe html asynchronously

* rework recipe_data_service to download recipe images asynchronously

* fix recipe_parser test, so it can use async results

* fix bulk import so that it also works with async scraper

* fix broken recipe_parser tests

* Fix issues found by scanners

* Add additional checks for ingredient and instruction count in test_create_by_url

* Revert changes in test recipe_data
Since we are checking ingredients and instructions in test_create_url now, these would fail with the stored html of recipe data

* Add explicit type annotation in recipe_data_service.largest_content_len

* Fix typo in annotation
This commit is contained in:
Sören
2023-01-29 01:43:27 +01:00
committed by GitHub
parent 7275dd2696
commit 3415a9c310
11 changed files with 129 additions and 115 deletions

View File

@@ -22,6 +22,7 @@ bcrypt = "^4.0.1"
extruct = "^0.14.0"
fastapi = "^0.89.0"
gunicorn = "^20.1.0"
httpx = "^0.23.1"
lxml = "^4.7.1"
orjson = "^3.8.0"
passlib = "^1.7.4"
@@ -40,7 +41,6 @@ recipe-scrapers = "^14.26.0"
requests = "^2.25.1"
tzdata = "^2022.7"
uvicorn = {extras = ["standard"], version = "^0.20.0"}
httpx = "^0.23.1"
[tool.poetry.group.dev.dependencies]
black = "^21.12b0"
@@ -53,6 +53,7 @@ pre-commit = "^2.20.0"
pydantic-to-typescript = "^1.0.7"
pylint = "^2.6.0"
pytest = "^7.2.0"
pytest-asyncio = "^0.20.3"
rich = "^13.0.0"
ruff = "^0.0.221"
types-PyYAML = "^6.0.4"
@@ -61,6 +62,7 @@ types-python-slugify = "^6.0.0"
types-requests = "^2.27.12"
types-urllib3 = "^1.26.11"
[build-system]
build-backend = "poetry.core.masonry.api"
requires = ["poetry-core>=1.0.0"]