mirror of
https://github.com/mealie-recipes/mealie.git
synced 2025-12-15 23:05:23 -05:00
security: multiple reported CVE fixes (#1515)
* update out of date license * update typing / refactor * fix arbitrarty path injection * use markdown sanatizer to prevent XSS CWE-79 * fix CWE-918 SSRF by validating url and mime type * add security docs * update recipe-scrapers * resolve DOS from arbitrary url * update changelog * bump version * add ref to #1506 * add #1511 to changelog * use requests decoder * actually fix encoding issue
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
from pathlib import Path
|
||||
|
||||
APP_VERSION = "v1.0.0beta-3"
|
||||
APP_VERSION = "v1.0.0beta-4"
|
||||
|
||||
CWD = Path(__file__).parent
|
||||
BASE_DIR = CWD.parent.parent.parent
|
||||
|
||||
@@ -29,13 +29,13 @@ from mealie.schema.response.responses import ErrorResponse
|
||||
from mealie.services import urls
|
||||
from mealie.services.event_bus_service.event_bus_service import EventBusService, EventSource
|
||||
from mealie.services.event_bus_service.message_types import EventTypes
|
||||
from mealie.services.recipe.recipe_data_service import RecipeDataService
|
||||
from mealie.services.recipe.recipe_data_service import InvalidDomainError, NotAnImageError, RecipeDataService
|
||||
from mealie.services.recipe.recipe_service import RecipeService
|
||||
from mealie.services.recipe.template_service import TemplateService
|
||||
from mealie.services.scraper.recipe_bulk_scraper import RecipeBulkScraperService
|
||||
from mealie.services.scraper.scraped_extras import ScraperContext
|
||||
from mealie.services.scraper.scraper import create_from_url
|
||||
from mealie.services.scraper.scraper_strategies import RecipeScraperPackage
|
||||
from mealie.services.scraper.scraper_strategies import ForceTimeoutException, RecipeScraperPackage
|
||||
|
||||
|
||||
class BaseRecipeController(BaseUserController):
|
||||
@@ -139,7 +139,12 @@ class RecipeController(BaseRecipeController):
|
||||
@router.post("/create-url", status_code=201, response_model=str)
|
||||
def parse_recipe_url(self, req: ScrapeRecipe):
|
||||
"""Takes in a URL and attempts to scrape data and load it into the database"""
|
||||
recipe, extras = create_from_url(req.url)
|
||||
try:
|
||||
recipe, extras = create_from_url(req.url)
|
||||
except ForceTimeoutException as e:
|
||||
raise HTTPException(
|
||||
status_code=408, detail=ErrorResponse.respond(message="Recipe Scraping Timed Out")
|
||||
) from e
|
||||
|
||||
if req.include_tags:
|
||||
ctx = ScraperContext(self.user.id, self.group_id, self.repos)
|
||||
@@ -176,8 +181,13 @@ class RecipeController(BaseRecipeController):
|
||||
@router.post("/test-scrape-url")
|
||||
def test_parse_recipe_url(self, url: ScrapeRecipeTest):
|
||||
# Debugger should produce the same result as the scraper sees before cleaning
|
||||
if scraped_data := RecipeScraperPackage(url.url).scrape_url():
|
||||
return scraped_data.schema.data
|
||||
try:
|
||||
if scraped_data := RecipeScraperPackage(url.url).scrape_url():
|
||||
return scraped_data.schema.data
|
||||
except ForceTimeoutException as e:
|
||||
raise HTTPException(
|
||||
status_code=408, detail=ErrorResponse.respond(message="Recipe Scraping Timed Out")
|
||||
) from e
|
||||
|
||||
return "recipe_scrapers was unable to scrape this URL"
|
||||
|
||||
@@ -314,7 +324,19 @@ class RecipeController(BaseRecipeController):
|
||||
def scrape_image_url(self, slug: str, url: ScrapeRecipe):
|
||||
recipe = self.mixins.get_one(slug)
|
||||
data_service = RecipeDataService(recipe.id)
|
||||
data_service.scrape_image(url.url)
|
||||
|
||||
try:
|
||||
data_service.scrape_image(url.url)
|
||||
except NotAnImageError as e:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=ErrorResponse.respond("Url is not an image"),
|
||||
) from e
|
||||
except InvalidDomainError as e:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=ErrorResponse.respond("Url is not from an allowed domain"),
|
||||
) from e
|
||||
|
||||
recipe.image = cache.cache_key.new_key()
|
||||
self.service.update_one(recipe.slug, recipe)
|
||||
@@ -338,13 +360,27 @@ class RecipeController(BaseRecipeController):
|
||||
file: UploadFile = File(...),
|
||||
):
|
||||
"""Upload a file to store as a recipe asset"""
|
||||
file_name = f"{slugify(name)}.{extension}"
|
||||
if "." in extension:
|
||||
extension = extension.split(".")[-1]
|
||||
|
||||
file_slug = slugify(name)
|
||||
if not extension or not file_slug:
|
||||
raise HTTPException(status_code=400, detail="Missing required fields")
|
||||
|
||||
file_name = f"{file_slug}.{extension}"
|
||||
asset_in = RecipeAsset(name=name, icon=icon, file_name=file_name)
|
||||
|
||||
recipe = self.mixins.get_one(slug)
|
||||
|
||||
dest = recipe.asset_dir / file_name
|
||||
|
||||
# Ensure path is relative to the recipe's asset directory
|
||||
if dest.absolute().parent != recipe.asset_dir:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"File name {file_name} or extension {extension} not valid",
|
||||
)
|
||||
|
||||
with dest.open("wb") as buffer:
|
||||
copyfileobj(file.file, buffer)
|
||||
|
||||
|
||||
@@ -11,6 +11,14 @@ from mealie.services._base_service import BaseService
|
||||
_FIREFOX_UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0"
|
||||
|
||||
|
||||
class NotAnImageError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidDomainError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class RecipeDataService(BaseService):
|
||||
minifier: img.ABCMinifier
|
||||
|
||||
@@ -56,8 +64,26 @@ class RecipeDataService(BaseService):
|
||||
|
||||
return image_path
|
||||
|
||||
@staticmethod
|
||||
def _validate_image_url(url: str) -> bool:
|
||||
# sourcery skip: invert-any-all, use-any
|
||||
"""
|
||||
Validates that the URL is of an allowed source and restricts certain sources to prevent
|
||||
malicious images from being downloaded.
|
||||
"""
|
||||
invalid_domains = {"127.0.0.1", "localhost"}
|
||||
for domain in invalid_domains:
|
||||
if domain in url:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def scrape_image(self, image_url) -> None:
|
||||
self.logger.info(f"Image URL: {image_url}")
|
||||
self.logger.debug(f"Image URL: {image_url}")
|
||||
|
||||
if not self._validate_image_url(image_url):
|
||||
self.logger.error(f"Invalid image URL: {image_url}")
|
||||
raise InvalidDomainError(f"Invalid domain: {image_url}")
|
||||
|
||||
if isinstance(image_url, str): # Handles String Types
|
||||
pass
|
||||
@@ -74,7 +100,7 @@ class RecipeDataService(BaseService):
|
||||
try:
|
||||
r = requests.get(url, stream=True, headers={"User-Agent": _FIREFOX_UA})
|
||||
except Exception:
|
||||
self.logger.exception("Image {url} could not be requested")
|
||||
self.logger.exception(f"Image {url} could not be requested")
|
||||
continue
|
||||
if r.status_code == 200:
|
||||
all_image_requests.append((url, r))
|
||||
@@ -100,9 +126,19 @@ class RecipeDataService(BaseService):
|
||||
self.logger.exception("Fatal Image Request Exception")
|
||||
return None
|
||||
|
||||
if r.status_code == 200:
|
||||
r.raw.decode_content = True
|
||||
self.logger.info(f"File Name Suffix {file_path.suffix}")
|
||||
self.write_image(r.raw, file_path.suffix)
|
||||
if r.status_code != 200:
|
||||
# TODO: Probably should throw an exception in this case as well, but before these changes
|
||||
# we were returning None if it failed anyways.
|
||||
return None
|
||||
|
||||
file_path.unlink(missing_ok=True)
|
||||
content_type = r.headers.get("content-type", "")
|
||||
|
||||
if "image" not in content_type:
|
||||
self.logger.error(f"Content-Type: {content_type} is not an image")
|
||||
raise NotAnImageError(f"Content-Type {content_type} is not an image")
|
||||
|
||||
r.raw.decode_content = True
|
||||
self.logger.info(f"File Name Suffix {file_path.suffix}")
|
||||
self.write_image(r.raw, file_path.suffix)
|
||||
|
||||
file_path.unlink(missing_ok=True)
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Callable
|
||||
|
||||
import extruct
|
||||
import requests
|
||||
from fastapi import HTTPException, status
|
||||
from recipe_scrapers import NoSchemaFoundInWildMode, SchemaScraperFactory, WebsiteNotImplementedError, scrape_me
|
||||
from recipe_scrapers import NoSchemaFoundInWildMode, SchemaScraperFactory, scrape_html
|
||||
from slugify import slugify
|
||||
from w3lib.html import get_base_url
|
||||
|
||||
@@ -14,6 +15,59 @@ from mealie.services.scraper.scraped_extras import ScrapedExtras
|
||||
|
||||
from . import cleaner
|
||||
|
||||
SCRAPER_TIMEOUT = 15
|
||||
|
||||
|
||||
class ForceTimeoutException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def safe_scrape_html(url: str) -> str:
|
||||
"""
|
||||
Scrapes the html from a url but will cancel the request
|
||||
if the request takes longer than 15 seconds. This is used to mitigate
|
||||
DDOS attacks from users providing a url with arbitrary large content.
|
||||
"""
|
||||
resp = requests.get(url, timeout=SCRAPER_TIMEOUT, stream=True)
|
||||
|
||||
html_bytes = b""
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
for chunk in resp.iter_content(chunk_size=1024):
|
||||
html_bytes += chunk
|
||||
|
||||
if time.time() - start_time > SCRAPER_TIMEOUT:
|
||||
raise ForceTimeoutException()
|
||||
|
||||
# =====================================
|
||||
# Coppied from requests text property
|
||||
|
||||
# Try charset from content-type
|
||||
content = None
|
||||
encoding = resp.encoding
|
||||
|
||||
if not html_bytes:
|
||||
return ""
|
||||
|
||||
# Fallback to auto-detected encoding.
|
||||
if encoding is None:
|
||||
encoding = resp.apparent_encoding
|
||||
|
||||
# Decode unicode from given encoding.
|
||||
try:
|
||||
content = str(html_bytes, encoding, errors="replace")
|
||||
except (LookupError, TypeError):
|
||||
# A LookupError is raised if the encoding was not found which could
|
||||
# indicate a misspelling or similar mistake.
|
||||
#
|
||||
# A TypeError can be raised if encoding is None
|
||||
#
|
||||
# So we try blindly encoding.
|
||||
content = str(html_bytes, errors="replace")
|
||||
|
||||
return content
|
||||
|
||||
|
||||
class ABCScraperStrategy(ABC):
|
||||
"""
|
||||
@@ -103,14 +157,13 @@ class RecipeScraperPackage(ABCScraperStrategy):
|
||||
return recipe, extras
|
||||
|
||||
def scrape_url(self) -> SchemaScraperFactory.SchemaScraper | Any | None:
|
||||
recipe_html = safe_scrape_html(self.url)
|
||||
|
||||
try:
|
||||
scraped_schema = scrape_me(self.url)
|
||||
except (WebsiteNotImplementedError, AttributeError):
|
||||
try:
|
||||
scraped_schema = scrape_me(self.url, wild_mode=True)
|
||||
except (NoSchemaFoundInWildMode, AttributeError):
|
||||
self.logger.error("Recipe Scraper was unable to extract a recipe.")
|
||||
return None
|
||||
scraped_schema = scrape_html(recipe_html, org_url=self.url)
|
||||
except (NoSchemaFoundInWildMode, AttributeError):
|
||||
self.logger.error("Recipe Scraper was unable to extract a recipe.")
|
||||
return None
|
||||
|
||||
except ConnectionError as e:
|
||||
raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": "CONNECTION_ERROR"}) from e
|
||||
@@ -150,7 +203,7 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
|
||||
"""
|
||||
|
||||
def get_html(self) -> str:
|
||||
return requests.get(self.url).text
|
||||
return safe_scrape_html(self.url)
|
||||
|
||||
def get_recipe_fields(self, html) -> dict | None:
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user