Use opengraph metadata to make basic recipe cards when full recipe metadata is not available

This commit is contained in:
Richard Mitic
2021-01-10 20:15:43 +01:00
parent a4a33af1c3
commit 9818d567b9
5 changed files with 1944 additions and 29 deletions

View File

@@ -3,8 +3,11 @@ Helper script to download raw recipe data from a URL and dump it to disk.
The resulting files can be used as test input data.
"""
import sys, json
import sys, json, pprint
import requests
import extruct
from scrape_schema_recipe import scrape_url
from w3lib.html import get_base_url
for url in sys.argv[1:]:
try:
@@ -16,3 +19,9 @@ for url in sys.argv[1:]:
print(f"Saved {filename}")
except Exception as e:
print(f"Error for {url}: {e}")
print("Trying extruct instead")
pp = pprint.PrettyPrinter(indent=2)
r = requests.get(url)
base_url = get_base_url(r.text, r.url)
data = extruct.extract(r.text, base_url=base_url)
pp.pprint(data)