2021-04-02 21:54:46 -08:00
|
|
|
import shutil
|
|
|
|
|
from dataclasses import dataclass
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
import requests
|
2021-08-28 14:27:56 -08:00
|
|
|
|
2021-04-10 15:09:06 -08:00
|
|
|
from mealie.core import root_logger
|
2021-05-03 19:32:37 -08:00
|
|
|
from mealie.schema.recipe import Recipe
|
2021-04-02 21:54:46 -08:00
|
|
|
from mealie.services.image import minify
|
|
|
|
|
|
2021-04-10 15:09:06 -08:00
|
|
|
logger = root_logger.get_logger()
|
|
|
|
|
|
2021-04-02 21:54:46 -08:00
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
|
class ImageOptions:
|
2021-06-04 18:45:13 -08:00
|
|
|
ORIGINAL_IMAGE: str = "original.webp"
|
|
|
|
|
MINIFIED_IMAGE: str = "min-original.webp"
|
|
|
|
|
TINY_IMAGE: str = "tiny-original.webp"
|
2021-04-02 21:54:46 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
IMG_OPTIONS = ImageOptions()
|
|
|
|
|
|
|
|
|
|
|
2021-04-10 15:33:29 -08:00
|
|
|
def write_image(recipe_slug: str, file_data: bytes, extension: str) -> Path:
|
2021-05-03 19:32:37 -08:00
|
|
|
image_dir = Recipe(slug=recipe_slug).image_dir
|
2021-04-02 21:54:46 -08:00
|
|
|
extension = extension.replace(".", "")
|
|
|
|
|
image_path = image_dir.joinpath(f"original.{extension}")
|
2021-05-03 19:32:37 -08:00
|
|
|
image_path.unlink(missing_ok=True)
|
2021-04-02 21:54:46 -08:00
|
|
|
|
2021-04-11 13:16:33 -08:00
|
|
|
if isinstance(file_data, Path):
|
|
|
|
|
shutil.copy2(file_data, image_path)
|
|
|
|
|
elif isinstance(file_data, bytes):
|
2021-04-02 21:54:46 -08:00
|
|
|
with open(image_path, "ab") as f:
|
|
|
|
|
f.write(file_data)
|
|
|
|
|
else:
|
2021-04-11 13:16:33 -08:00
|
|
|
with open(image_path, "ab") as f:
|
|
|
|
|
shutil.copyfileobj(file_data, f)
|
2021-04-02 21:54:46 -08:00
|
|
|
|
2021-05-23 15:05:39 -08:00
|
|
|
minify.minify_image(image_path, force=True)
|
2021-04-02 21:54:46 -08:00
|
|
|
|
|
|
|
|
return image_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def scrape_image(image_url: str, slug: str) -> Path:
|
2021-06-09 13:04:54 -08:00
|
|
|
logger.info(f"Image URL: {image_url}")
|
2021-11-23 20:41:07 -09:00
|
|
|
_FIREFOX_UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0"
|
|
|
|
|
|
2021-04-02 21:54:46 -08:00
|
|
|
if isinstance(image_url, str): # Handles String Types
|
2021-10-19 15:55:45 -08:00
|
|
|
pass
|
2021-04-02 21:54:46 -08:00
|
|
|
|
|
|
|
|
if isinstance(image_url, list): # Handles List Types
|
2021-10-19 15:55:45 -08:00
|
|
|
# Multiple images have been defined in the schema - usually different resolutions
|
|
|
|
|
# Typically would be in smallest->biggest order, but can't be certain so test each.
|
|
|
|
|
# 'Google will pick the best image to display in Search results based on the aspect ratio and resolution.'
|
|
|
|
|
|
|
|
|
|
all_image_requests = []
|
|
|
|
|
for url in image_url:
|
2021-12-04 16:06:24 -09:00
|
|
|
if isinstance(url, dict):
|
|
|
|
|
url = url.get("url", "")
|
2021-10-19 15:55:45 -08:00
|
|
|
try:
|
2021-11-23 20:41:07 -09:00
|
|
|
r = requests.get(url, stream=True, headers={"User-Agent": _FIREFOX_UA})
|
2021-10-19 15:55:45 -08:00
|
|
|
except Exception:
|
|
|
|
|
logger.exception("Image {url} could not be requested")
|
|
|
|
|
continue
|
|
|
|
|
if r.status_code == 200:
|
|
|
|
|
all_image_requests.append((url, r))
|
|
|
|
|
|
|
|
|
|
image_url, _ = max(all_image_requests, key=lambda url_r: len(url_r[1].content), default=("", 0))
|
2021-04-02 21:54:46 -08:00
|
|
|
|
|
|
|
|
if isinstance(image_url, dict): # Handles Dictionary Types
|
|
|
|
|
for key in image_url:
|
|
|
|
|
if key == "url":
|
|
|
|
|
image_url = image_url.get("url")
|
|
|
|
|
|
|
|
|
|
filename = slug + "." + image_url.split(".")[-1]
|
2021-05-03 19:32:37 -08:00
|
|
|
filename = Recipe(slug=slug).image_dir.joinpath(filename)
|
2021-04-02 21:54:46 -08:00
|
|
|
|
|
|
|
|
try:
|
2021-11-23 20:41:07 -09:00
|
|
|
r = requests.get(image_url, stream=True, headers={"User-Agent": _FIREFOX_UA})
|
2021-04-29 21:46:27 -08:00
|
|
|
except Exception:
|
2021-04-02 21:54:46 -08:00
|
|
|
logger.exception("Fatal Image Request Exception")
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
if r.status_code == 200:
|
|
|
|
|
r.raw.decode_content = True
|
2021-06-09 13:04:54 -08:00
|
|
|
logger.info(f"File Name Suffix {filename.suffix}")
|
2021-04-02 21:54:46 -08:00
|
|
|
write_image(slug, r.raw, filename.suffix)
|
|
|
|
|
|
2021-04-11 13:16:33 -08:00
|
|
|
filename.unlink(missing_ok=True)
|
2021-04-02 21:54:46 -08:00
|
|
|
|
2021-10-19 15:55:45 -08:00
|
|
|
return Path(slug)
|
2021-04-02 21:54:46 -08:00
|
|
|
|
|
|
|
|
return None
|