mirror of
https://github.com/mealie-recipes/mealie.git
synced 2026-02-01 13:33:11 -05:00
feat: Remove OCR Support (#2838)
* remove ocr package * remove tesseract * remove OCR from app * remove OCR from tests * fix docs
This commit is contained in:
@@ -7,7 +7,6 @@ from . import (
|
||||
comments,
|
||||
explore,
|
||||
groups,
|
||||
ocr,
|
||||
organizers,
|
||||
parser,
|
||||
recipe,
|
||||
@@ -32,4 +31,3 @@ router.include_router(unit_and_foods.router)
|
||||
router.include_router(admin.router)
|
||||
router.include_router(validators.router)
|
||||
router.include_router(explore.router)
|
||||
router.include_router(ocr.router)
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
from fastapi import APIRouter
|
||||
|
||||
from . import pytesseract
|
||||
|
||||
router = APIRouter(prefix="/ocr")
|
||||
|
||||
router.include_router(pytesseract.router)
|
||||
@@ -1,37 +0,0 @@
|
||||
from fastapi import APIRouter, File
|
||||
|
||||
from mealie.routes._base import BaseUserController, controller
|
||||
from mealie.schema.ocr.ocr import OcrAssetReq, OcrTsvResponse
|
||||
from mealie.services.ocr.pytesseract import OcrService
|
||||
from mealie.services.recipe.recipe_data_service import RecipeDataService
|
||||
from mealie.services.recipe.recipe_service import RecipeService
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@controller(router)
|
||||
class OCRController(BaseUserController):
|
||||
def __init__(self):
|
||||
self.ocr_service = OcrService()
|
||||
|
||||
@router.post("/", response_model=str)
|
||||
def image_to_string(self, file: bytes = File(...)):
|
||||
return self.ocr_service.image_to_string(file)
|
||||
|
||||
@router.post("/file-to-tsv", response_model=list[OcrTsvResponse])
|
||||
def file_to_tsv(self, file: bytes = File(...)):
|
||||
tsv = self.ocr_service.image_to_tsv(file)
|
||||
return self.ocr_service.format_tsv_output(tsv)
|
||||
|
||||
@router.post("/asset-to-tsv", response_model=list[OcrTsvResponse])
|
||||
def asset_to_tsv(self, req: OcrAssetReq):
|
||||
recipe_service = RecipeService(self.repos, self.user, self.group)
|
||||
recipe = recipe_service._get_recipe(req.recipe_slug)
|
||||
if recipe.id is None:
|
||||
return []
|
||||
data_service = RecipeDataService(recipe.id, recipe.group_id)
|
||||
asset_path = data_service.dir_assets.joinpath(req.asset_name)
|
||||
file = open(asset_path, "rb")
|
||||
tsv = self.ocr_service.image_to_tsv(file.read())
|
||||
|
||||
return self.ocr_service.format_tsv_output(tsv)
|
||||
@@ -27,10 +27,7 @@ from mealie.schema.make_dependable import make_dependable
|
||||
from mealie.schema.recipe import Recipe, RecipeImageTypes, ScrapeRecipe
|
||||
from mealie.schema.recipe.recipe import CreateRecipe, CreateRecipeByUrlBulk, RecipeLastMade, RecipeSummary
|
||||
from mealie.schema.recipe.recipe_asset import RecipeAsset
|
||||
from mealie.schema.recipe.recipe_ingredient import RecipeIngredient
|
||||
from mealie.schema.recipe.recipe_scraper import ScrapeRecipeTest
|
||||
from mealie.schema.recipe.recipe_settings import RecipeSettings
|
||||
from mealie.schema.recipe.recipe_step import RecipeStep
|
||||
from mealie.schema.recipe.request_helpers import RecipeDuplicate, RecipeZipTokenResponse, UpdateImageResponse
|
||||
from mealie.schema.response import PaginationBase, PaginationQuery
|
||||
from mealie.schema.response.pagination import RecipeSearchQuery
|
||||
@@ -489,37 +486,3 @@ class RecipeController(BaseRecipeController):
|
||||
self.mixins.update_one(recipe, slug)
|
||||
|
||||
return asset_in
|
||||
|
||||
# ==================================================================================================================
|
||||
# OCR
|
||||
@router.post("/create-ocr", status_code=201, response_model=str)
|
||||
def create_recipe_ocr(
|
||||
self, extension: str = Form(...), file: UploadFile = File(...), makefilerecipeimage: bool = Form(...)
|
||||
):
|
||||
"""Takes an image and creates a recipe based on the image"""
|
||||
slug = self.service.create_one(
|
||||
Recipe(
|
||||
name="New OCR Recipe",
|
||||
recipe_ingredient=[RecipeIngredient(note="", title=None, unit=None, food=None, original_text=None)],
|
||||
recipe_instructions=[RecipeStep(text="")],
|
||||
is_ocr_recipe=True,
|
||||
settings=RecipeSettings(show_assets=True),
|
||||
id=None,
|
||||
image=None,
|
||||
recipe_yield=None,
|
||||
rating=None,
|
||||
orgURL=None,
|
||||
date_added=None,
|
||||
date_updated=None,
|
||||
created_at=None,
|
||||
update_at=None,
|
||||
nutrition=None,
|
||||
)
|
||||
).slug
|
||||
RecipeController.upload_recipe_asset(self, slug, "Original recipe image", "", extension, file)
|
||||
if makefilerecipeimage:
|
||||
# Get the pointer to the beginning of the file to read it once more
|
||||
file.file.seek(0)
|
||||
self.update_recipe_image(slug, file.file.read(), extension)
|
||||
|
||||
return slug
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
# This file is auto-generated by gen_schema_exports.py
|
||||
from .ocr import OcrAssetReq, OcrTsvResponse
|
||||
|
||||
__all__ = [
|
||||
"OcrAssetReq",
|
||||
"OcrTsvResponse",
|
||||
]
|
||||
@@ -1,21 +0,0 @@
|
||||
from mealie.schema._mealie import MealieModel
|
||||
|
||||
|
||||
class OcrTsvResponse(MealieModel):
|
||||
level: int = 0
|
||||
page_num: int = 0
|
||||
block_num: int = 0
|
||||
par_num: int = 0
|
||||
line_num: int = 0
|
||||
word_num: int = 0
|
||||
left: int = 0
|
||||
top: int = 0
|
||||
width: int = 0
|
||||
height: int = 0
|
||||
conf: float = 0.0
|
||||
text: str = ""
|
||||
|
||||
|
||||
class OcrAssetReq(MealieModel):
|
||||
recipe_slug: str
|
||||
asset_name: str
|
||||
@@ -128,7 +128,6 @@ class Recipe(RecipeSummary):
|
||||
assets: list[RecipeAsset] | None = []
|
||||
notes: list[RecipeNote] | None = []
|
||||
extras: dict | None = {}
|
||||
is_ocr_recipe: bool | None = False
|
||||
|
||||
comments: list[RecipeCommentOut] | None = []
|
||||
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
from io import BytesIO
|
||||
|
||||
import pytesseract
|
||||
from PIL import Image
|
||||
|
||||
from mealie.schema.ocr.ocr import OcrTsvResponse
|
||||
from mealie.services._base_service import BaseService
|
||||
|
||||
|
||||
class OcrService(BaseService):
|
||||
"""
|
||||
Class for ocr engines.
|
||||
"""
|
||||
|
||||
def image_to_string(self, image_data):
|
||||
"""
|
||||
Returns a plain text translation of an image
|
||||
"""
|
||||
return pytesseract.image_to_string(Image.open(image_data))
|
||||
|
||||
def image_to_tsv(self, image_data, lang=None):
|
||||
"""
|
||||
Returns the pytesseract default tsv output
|
||||
"""
|
||||
if lang is not None:
|
||||
return pytesseract.image_to_data(Image.open(BytesIO(image_data)), lang=lang)
|
||||
|
||||
return pytesseract.image_to_data(Image.open(BytesIO(image_data)))
|
||||
|
||||
def format_tsv_output(self, tsv: str) -> list[OcrTsvResponse]:
|
||||
"""
|
||||
Returns a OcrTsvResponse from a default pytesseract tsv output
|
||||
"""
|
||||
lines = tsv.split("\n")
|
||||
titles = [t.strip() for t in lines[0].split("\t")]
|
||||
response: list[OcrTsvResponse] = []
|
||||
|
||||
for i in range(1, len(lines)):
|
||||
if lines[i] == "":
|
||||
continue
|
||||
|
||||
line = OcrTsvResponse()
|
||||
for key, value in zip(titles, lines[i].split("\t"), strict=False):
|
||||
if key == "text":
|
||||
setattr(line, key, value.strip())
|
||||
elif key == "conf":
|
||||
setattr(line, key, float(value.strip()))
|
||||
elif key in OcrTsvResponse.__fields__:
|
||||
setattr(line, key, int(value.strip()))
|
||||
else:
|
||||
continue
|
||||
|
||||
if isinstance(line, OcrTsvResponse):
|
||||
response.append(line)
|
||||
|
||||
return response
|
||||
Reference in New Issue
Block a user