feat (WIP): bring png OCR scanning support (#1670)

* Add pytesseract

* Add simple ocr endpoint

replace extension argument

* feat/ocr-editor gui

* fix frontend linting issues

* Add service unit tests

* Add split text modes & single ingredient/instruction editing

* make split mode really reactive

* Remove default step and ingredient

* make the linter haappy

* Accept only image uploads

* Add automatic recipe title suggestion

* Correct regex

* fix incorrect array.map method usage

* make the linter happy again

* Swap route to use asset name

* Rearange buttons

* fix test data

* feat: Allow making image the recipe image

* Add translation

* Make the linter happy

* Restrict function setPropertyValueByPath generic

* Restrict template literal type

* Add a more friendly icon to creation page

* update poetry lock file

* Correct sloppy ocr classes

* Make MyPy happy

* Rewrite safer tests

* Add tesseract to backend test CI container dependencies

* Make canvas element a component global

* Remove unwanted spaces in selected text

* Add way to know if recipe was created with ocr

* Access to ocr-editor for ocr recipes

* Update Alembic revision

* Make the frontend build

* Fix scrolling offset bug

* Allow creation of recipes with custom settings

* Fix rebasing mistakes

* Add format_tsv_output test

* Exclude the tests data directory only

* Enforce camelCase for frontend functions

* Remove import of unused component

* Fix type and class initialization

* Add multi-language support

* Highlight words in mount

* Fix image ratio bug

* Better ocr creation page

* Revert awkward feature to scroll in Selection mode

* Rebasing alembic migrations sux

* Remove obsolete getShared function

* Add function docstring

* Move down ocr creation option

* Make toolbar icons more generic

* Show help at the bottom of the page

* move ocr types to own file

* Use template ref for the canvas

* Use i18n.tc to get strings directly

* Correct naming mistake

* Move Ocr editor to own directory

* Create Ocr Editor parts

* Safeguard recipe properties access

* Add loading frontend animation due to longer request time

* minor cleanup chores

Co-authored-by: Miroito <alban.vachette@gmail.com>
This commit is contained in:
Hayden
2022-09-25 15:00:45 -08:00
committed by GitHub
parent a8f3922907
commit 39adea4ee3
44 changed files with 1659 additions and 34 deletions

View File

@@ -104,6 +104,7 @@ class RecipeModel(SqlAlchemyBase, BaseMixins):
rating = sa.Column(sa.Integer)
org_url = sa.Column(sa.String)
extras: list[ApiExtras] = orm.relationship("ApiExtras", cascade="all, delete-orphan")
is_ocr_recipe = sa.Column(sa.Boolean, default=False)
# Time Stamp Properties
date_added = sa.Column(sa.Date, default=datetime.date.today)

View File

@@ -7,6 +7,7 @@ from . import (
comments,
explore,
groups,
ocr,
organizers,
parser,
recipe,
@@ -31,3 +32,4 @@ router.include_router(unit_and_foods.router)
router.include_router(admin.router)
router.include_router(validators.router)
router.include_router(explore.router)
router.include_router(ocr.router)

View File

@@ -0,0 +1,7 @@
from fastapi import APIRouter
from . import pytesseract
router = APIRouter(prefix="/ocr")
router.include_router(pytesseract.router)

View File

@@ -0,0 +1,37 @@
from fastapi import APIRouter, File
from mealie.routes._base import BaseUserController, controller
from mealie.schema.ocr.ocr import OcrAssetReq, OcrTsvResponse
from mealie.services.ocr.pytesseract import OcrService
from mealie.services.recipe.recipe_data_service import RecipeDataService
from mealie.services.recipe.recipe_service import RecipeService
router = APIRouter()
@controller(router)
class OCRController(BaseUserController):
def __init__(self):
self.ocr_service = OcrService()
@router.post("/", response_model=str)
def image_to_string(self, file: bytes = File(...)):
return self.ocr_service.image_to_string(file)
@router.post("/file-to-tsv", response_model=list[OcrTsvResponse])
def file_to_tsv(self, file: bytes = File(...)):
tsv = self.ocr_service.image_to_tsv(file)
return self.ocr_service.format_tsv_output(tsv)
@router.post("/asset-to-tsv", response_model=list[OcrTsvResponse])
def asset_to_tsv(self, req: OcrAssetReq):
recipe_service = RecipeService(self.repos, self.user, self.group)
recipe = recipe_service._get_recipe(req.recipe_slug)
if recipe.id is None:
return []
data_service = RecipeDataService(recipe.id, recipe.group_id)
asset_path = data_service.dir_assets.joinpath(req.asset_name)
file = open(asset_path, "rb")
tsv = self.ocr_service.image_to_tsv(file.read())
return self.ocr_service.format_tsv_output(tsv)

View File

@@ -33,7 +33,10 @@ from mealie.schema.recipe.recipe import (
RecipeSummary,
)
from mealie.schema.recipe.recipe_asset import RecipeAsset
from mealie.schema.recipe.recipe_ingredient import RecipeIngredient
from mealie.schema.recipe.recipe_scraper import ScrapeRecipeTest
from mealie.schema.recipe.recipe_settings import RecipeSettings
from mealie.schema.recipe.recipe_step import RecipeStep
from mealie.schema.recipe.request_helpers import RecipeZipTokenResponse, UpdateImageResponse
from mealie.schema.response.responses import ErrorResponse
from mealie.services import urls
@@ -435,3 +438,37 @@ class RecipeController(BaseRecipeController):
self.mixins.update_one(recipe, slug)
return asset_in
# ==================================================================================================================
# OCR
@router.post("/create-ocr", status_code=201, response_model=str)
def create_recipe_ocr(
self, extension: str = Form(...), file: UploadFile = File(...), makefilerecipeimage: bool = Form(...)
):
"""Takes an image and creates a recipe based on the image"""
slug = self.service.create_one(
Recipe(
name="New OCR Recipe",
recipe_ingredient=[RecipeIngredient(note="", title=None, unit=None, food=None, original_text=None)],
recipe_instructions=[RecipeStep(text="")],
is_ocr_recipe=True,
settings=RecipeSettings(show_assets=True),
id=None,
image=None,
recipe_yield=None,
rating=None,
orgURL=None,
date_added=None,
date_updated=None,
created_at=None,
update_at=None,
nutrition=None,
)
).slug
RecipeController.upload_recipe_asset(self, slug, "Original recipe image", "", extension, file)
if makefilerecipeimage:
# Get the pointer to the beginning of the file to read it once more
file.file.seek(0)
self.update_recipe_image(slug, file.file.read(), extension)
return slug

View File

21
mealie/schema/ocr/ocr.py Normal file
View File

@@ -0,0 +1,21 @@
from mealie.schema._mealie import MealieModel
class OcrTsvResponse(MealieModel):
level: int = 0
page_num: int = 0
block_num: int = 0
par_num: int = 0
line_num: int = 0
word_num: int = 0
left: int = 0
top: int = 0
width: int = 0
height: int = 0
conf: float = 0.0
text: str = ""
class OcrAssetReq(MealieModel):
recipe_slug: str
asset_name: str

View File

@@ -141,10 +141,11 @@ class Recipe(RecipeSummary):
nutrition: Optional[Nutrition]
# Mealie Specific
settings: Optional[RecipeSettings] = RecipeSettings()
settings: Optional[RecipeSettings] = None
assets: Optional[list[RecipeAsset]] = []
notes: Optional[list[RecipeNote]] = []
extras: Optional[dict] = {}
is_ocr_recipe: Optional[bool] = False
comments: Optional[list[RecipeCommentOut]] = []

View File

View File

@@ -0,0 +1,56 @@
from io import BytesIO
import pytesseract
from PIL import Image
from mealie.schema.ocr.ocr import OcrTsvResponse
from mealie.services._base_service import BaseService
class OcrService(BaseService):
"""
Class for ocr engines.
"""
def image_to_string(self, image_data):
"""
Returns a plain text translation of an image
"""
return pytesseract.image_to_string(Image.open(image_data))
def image_to_tsv(self, image_data, lang=None):
"""
Returns the pytesseract default tsv output
"""
if lang is not None:
return pytesseract.image_to_data(Image.open(BytesIO(image_data)), lang=lang)
return pytesseract.image_to_data(Image.open(BytesIO(image_data)))
def format_tsv_output(self, tsv: str) -> list[OcrTsvResponse]:
"""
Returns a OcrTsvResponse from a default pytesseract tsv output
"""
lines = tsv.split("\n")
titles = [t.strip() for t in lines[0].split("\t")]
response: list[OcrTsvResponse] = []
for i in range(1, len(lines)):
if lines[i] == "":
continue
line = OcrTsvResponse()
for key, value in zip(titles, lines[i].split("\t")):
if key == "text":
setattr(line, key, value.strip())
elif key == "conf":
setattr(line, key, float(value.strip()))
elif key in OcrTsvResponse.__fields__:
setattr(line, key, int(value.strip()))
else:
continue
if isinstance(line, OcrTsvResponse):
response.append(line)
return response

View File

@@ -111,14 +111,18 @@ class RecipeService(BaseService):
additional_attrs=create_data.dict(),
)
data.settings = RecipeSettings(
public=self.group.preferences.recipe_public,
show_nutrition=self.group.preferences.recipe_show_nutrition,
show_assets=self.group.preferences.recipe_show_assets,
landscape_view=self.group.preferences.recipe_landscape_view,
disable_comments=self.group.preferences.recipe_disable_comments,
disable_amount=self.group.preferences.recipe_disable_amount,
)
if isinstance(create_data, CreateRecipe) or create_data.settings is None:
if self.group.preferences is not None:
data.settings = RecipeSettings(
public=self.group.preferences.recipe_public,
show_nutrition=self.group.preferences.recipe_show_nutrition,
show_assets=self.group.preferences.recipe_show_assets,
landscape_view=self.group.preferences.recipe_landscape_view,
disable_comments=self.group.preferences.recipe_disable_comments,
disable_amount=self.group.preferences.recipe_disable_amount,
)
else:
data.settings = RecipeSettings()
return self.repos.recipes.create(data)