mirror of
https://github.com/mealie-recipes/mealie.git
synced 2026-01-09 02:21:20 -05:00
feat: OpenAI Ingredient Parsing (#3581)
This commit is contained in:
6
mealie/services/openai/__init__.py
Normal file
6
mealie/services/openai/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from .openai import OpenAIDataInjection, OpenAIService
|
||||
|
||||
__all__ = [
|
||||
"OpenAIDataInjection",
|
||||
"OpenAIService",
|
||||
]
|
||||
129
mealie/services/openai/openai.py
Normal file
129
mealie/services/openai/openai.py
Normal file
@@ -0,0 +1,129 @@
|
||||
import inspect
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from textwrap import dedent
|
||||
|
||||
from openai import NOT_GIVEN, AsyncOpenAI
|
||||
from openai.resources.chat.completions import ChatCompletion
|
||||
from pydantic import BaseModel, field_validator
|
||||
|
||||
from mealie.core.config import get_app_settings
|
||||
|
||||
from .._base_service import BaseService
|
||||
|
||||
|
||||
class OpenAIDataInjection(BaseModel):
|
||||
description: str
|
||||
value: str
|
||||
|
||||
@field_validator("value", mode="before")
|
||||
def parse_value(cls, value):
|
||||
if not value:
|
||||
raise ValueError("Value cannot be empty")
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
|
||||
# convert Pydantic models to JSON
|
||||
if isinstance(value, BaseModel):
|
||||
return value.model_dump_json()
|
||||
|
||||
# convert Pydantic types to their JSON schema definition
|
||||
if inspect.isclass(value) and issubclass(value, BaseModel):
|
||||
value = value.model_json_schema()
|
||||
|
||||
# attempt to convert object to JSON
|
||||
try:
|
||||
return json.dumps(value, separators=(",", ":"))
|
||||
except TypeError:
|
||||
return value
|
||||
|
||||
|
||||
class OpenAIService(BaseService):
|
||||
PROMPTS_DIR = Path(os.path.dirname(os.path.abspath(__file__))) / "prompts"
|
||||
|
||||
def __init__(self) -> None:
|
||||
settings = get_app_settings()
|
||||
if not settings.OPENAI_ENABLED:
|
||||
raise ValueError("OpenAI is not enabled")
|
||||
|
||||
self.model = settings.OPENAI_MODEL
|
||||
self.workers = settings.OPENAI_WORKERS
|
||||
self.send_db_data = settings.OPENAI_SEND_DATABASE_DATA
|
||||
|
||||
self.get_client = lambda: AsyncOpenAI(
|
||||
base_url=settings.OPENAI_BASE_URL,
|
||||
api_key=settings.OPENAI_API_KEY,
|
||||
)
|
||||
|
||||
super().__init__()
|
||||
|
||||
@classmethod
|
||||
def get_prompt(cls, name: str, data_injections: list[OpenAIDataInjection] | None = None) -> str:
|
||||
"""
|
||||
Load stored prompt and inject data into it.
|
||||
|
||||
Access prompts with dot notation.
|
||||
For example, to access `prompts/recipes/parse-recipe-ingredients.txt`, use
|
||||
`recipes.parse-recipe-ingredients`
|
||||
"""
|
||||
|
||||
if not name:
|
||||
raise ValueError("Prompt name cannot be empty")
|
||||
|
||||
tree = name.split(".")
|
||||
prompt_dir = os.path.join(cls.PROMPTS_DIR, *tree[:-1], tree[-1] + ".txt")
|
||||
try:
|
||||
with open(prompt_dir) as f:
|
||||
content = f.read()
|
||||
except OSError as e:
|
||||
raise OSError(f"Unable to load prompt {name}") from e
|
||||
|
||||
if not data_injections:
|
||||
return content
|
||||
|
||||
content_parts = [content]
|
||||
for data_injection in data_injections:
|
||||
content_parts.append(
|
||||
dedent(
|
||||
f"""
|
||||
###
|
||||
{data_injection.description}
|
||||
---
|
||||
|
||||
{data_injection.value}
|
||||
"""
|
||||
)
|
||||
)
|
||||
return "\n".join(content_parts)
|
||||
|
||||
async def _get_raw_response(
|
||||
self, prompt: str, message: str, temperature=0.2, force_json_response=True
|
||||
) -> ChatCompletion:
|
||||
client = self.get_client()
|
||||
return await client.chat.completions.create(
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": prompt,
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": message,
|
||||
},
|
||||
],
|
||||
model=self.model,
|
||||
temperature=temperature,
|
||||
response_format={"type": "json_object"} if force_json_response else NOT_GIVEN,
|
||||
)
|
||||
|
||||
async def get_response(self, prompt: str, message: str, temperature=0.2, force_json_response=True) -> str | None:
|
||||
"""Send data to OpenAI and return the response message content"""
|
||||
try:
|
||||
response = await self._get_raw_response(prompt, message, temperature, force_json_response)
|
||||
if not response.choices:
|
||||
return None
|
||||
return response.choices[0].message.content
|
||||
except Exception:
|
||||
self.logger.exception("OpenAI Request Failed")
|
||||
return None
|
||||
@@ -0,0 +1,24 @@
|
||||
You are a bot that parses user input into recipe ingredients. You will receive a list of one or more ingredients, each containing one or more of the following components:
|
||||
- Food: the actual physical ingredient used in the recipe. For instance, if you receive "3 cups of onions, chopped", the food is "onions"
|
||||
- Unit: the unit of measurement for this ingredient. For instance, if you receive "2 lbs chicken breast", the unit is "lbs" (short for "pounds")
|
||||
- Quantity: the numerical representation of how much of this ingredient. For instance, if you receive "3 1/2 grams of minced garlic", the quantity is "3 1/2". Quantity may be represented as a whole number (integer), a float or decimal, or a fraction. You should output quantity in only whole numbers or floats, converting fractions into floats. Floats longer than 10 decimal places should be rounded to 10 decimal places.
|
||||
- Note: the rest of the text that represents more detail on how to prepare the ingredient. Anything that is not one of the above should be the note. For instance, if you receive "one can of butter beans, drained" the note would be "drained". If you receive "3 cloves of garlic peeled and finely chopped", the note would be "peeled and finely chopped"
|
||||
- Input: The input is simply the ingredient string you are processing as-is. It is forbidden to modify this at all, you must provide the input exactly as you received it
|
||||
|
||||
While parsing the ingredients, there are some things to keep in mind:
|
||||
- If you cannot accurately determine the quantity, unit, food, or note, you should place everything into the note field and leave everything else empty. It's better to err on the side of putting everything in the note field than being wrong
|
||||
- You may receive recipe ingredients from multiple different languages. You should adhere to the grammar rules of the input language when trying to parse the ingredient string
|
||||
- Sometimes foods or units will be in their singular, plural, or other grammatical forms. You must interpret all of them appropriately
|
||||
- Sometimes ingredients will have text in parenthesis (like this). Parenthesis typically indicate something that should appear in the notes. For example: an input of "3 potatoes (roughly chopped)" would parse "roughly chopped" into the notes. Notice that when this occurs, the parenthesis are dropped, and you should use "roughly chopped" instead of "(roughly chopped)" in the note
|
||||
- It's possible for the input to contain typos. For instance, you might see the word "potatos" instead of "potatoes". If it is a common misspelling, you may correct it
|
||||
- Pay close attention to what can be considered a unit of measurement. There are common measurements such as tablespoon, teaspoon, and gram, abbreviations such as tsp, tbsp, and oz, and others such as sprig, can, bundle, bunch, unit, cube, package, and pinch
|
||||
- Sometimes quantities can be given a range, such as "3-5" or "1 to 2" or "three or four". In this instance, choose the lower quantity; do not try to average or otherwise calculate the quantity. For instance, if the input it "2-3 lbs of chicken breast" the quantity should be "2"
|
||||
- Any text that does not appear in the unit or food must appear in the notes. No text should be left off. The only exception for this is if a quantity is converted from text into a number. For instance, if you convert "2 dozen" into the number "24", you should not put the word "dozen" into any other field
|
||||
|
||||
It is imperative that you do not create any data or otherwise make up any information. Failure to adhere to this rule is illegal and will result in harsh punishment. If you are unsure, place the entire string into the note section of the response. Do not make things up.
|
||||
|
||||
In addition to calculating the recipe ingredient fields, you are also responsible for including a confidence value. This value is a float between 0 - 1, where 1 is full confidence that the result is correct, and 0 is no confidence that the result is correct. If you're unable to parse anything, and you put the entire string in the notes, you should return 0 confidence. If you can easily parse the string into each component, then you should return a confidence of 1. If you have to guess which part is the unit and which part is the food, your confidence should be lower, such as 0.6. Even if there is no unit or note, if you're able to determine the food, you may use a higher confidence. If the entire ingredient consists of only a food, you can use a confidence of 1.
|
||||
|
||||
Below you will receive the JSON schema for your response. Your response must be in valid JSON in the below schema as provided. You must respond in this JSON schema; failure to do so is illegal. It is imperative that you follow the schema precisely to avoid punishment. You must follow the JSON schema.
|
||||
|
||||
The user message that you receive will be the list of one or more recipe ingredients for you to parse. Your response should have exactly one item for each item provided. For instance, if you receive 12 items to parse, then your response should be an array of 12 parsed items.
|
||||
161
mealie/services/parser_services/_base.py
Normal file
161
mealie/services/parser_services/_base.py
Normal file
@@ -0,0 +1,161 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import TypeVar
|
||||
|
||||
from pydantic import UUID4, BaseModel
|
||||
from rapidfuzz import fuzz, process
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from mealie.db.models.recipe.ingredient import IngredientFoodModel, IngredientUnitModel
|
||||
from mealie.repos.all_repositories import get_repositories
|
||||
from mealie.repos.repository_factory import AllRepositories
|
||||
from mealie.schema.recipe.recipe_ingredient import (
|
||||
CreateIngredientFood,
|
||||
CreateIngredientUnit,
|
||||
IngredientFood,
|
||||
IngredientUnit,
|
||||
ParsedIngredient,
|
||||
)
|
||||
from mealie.schema.response.pagination import PaginationQuery
|
||||
|
||||
T = TypeVar("T", bound=BaseModel)
|
||||
|
||||
|
||||
class ABCIngredientParser(ABC):
|
||||
"""
|
||||
Abstract class for ingredient parsers.
|
||||
"""
|
||||
|
||||
def __init__(self, group_id: UUID4, session: Session) -> None:
|
||||
self.group_id = group_id
|
||||
self.session = session
|
||||
|
||||
self._foods_by_alias: dict[str, IngredientFood] | None = None
|
||||
self._units_by_alias: dict[str, IngredientUnit] | None = None
|
||||
|
||||
@property
|
||||
def _repos(self) -> AllRepositories:
|
||||
return get_repositories(self.session)
|
||||
|
||||
@property
|
||||
def foods_by_alias(self) -> dict[str, IngredientFood]:
|
||||
if self._foods_by_alias is None:
|
||||
foods_repo = self._repos.ingredient_foods.by_group(self.group_id)
|
||||
query = PaginationQuery(page=1, per_page=-1)
|
||||
all_foods = foods_repo.page_all(query).items
|
||||
|
||||
foods_by_alias: dict[str, IngredientFood] = {}
|
||||
for food in all_foods:
|
||||
if food.name:
|
||||
foods_by_alias[IngredientFoodModel.normalize(food.name)] = food
|
||||
if food.plural_name:
|
||||
foods_by_alias[IngredientFoodModel.normalize(food.plural_name)] = food
|
||||
|
||||
for alias in food.aliases or []:
|
||||
if alias.name:
|
||||
foods_by_alias[IngredientFoodModel.normalize(alias.name)] = food
|
||||
|
||||
self._foods_by_alias = foods_by_alias
|
||||
|
||||
return self._foods_by_alias
|
||||
|
||||
@property
|
||||
def units_by_alias(self) -> dict[str, IngredientUnit]:
|
||||
if self._units_by_alias is None:
|
||||
units_repo = self._repos.ingredient_units.by_group(self.group_id)
|
||||
query = PaginationQuery(page=1, per_page=-1)
|
||||
all_units = units_repo.page_all(query).items
|
||||
|
||||
units_by_alias: dict[str, IngredientUnit] = {}
|
||||
for unit in all_units:
|
||||
if unit.name:
|
||||
units_by_alias[IngredientUnitModel.normalize(unit.name)] = unit
|
||||
if unit.plural_name:
|
||||
units_by_alias[IngredientUnitModel.normalize(unit.plural_name)] = unit
|
||||
if unit.abbreviation:
|
||||
units_by_alias[IngredientUnitModel.normalize(unit.abbreviation)] = unit
|
||||
if unit.plural_abbreviation:
|
||||
units_by_alias[IngredientUnitModel.normalize(unit.plural_abbreviation)] = unit
|
||||
|
||||
for alias in unit.aliases or []:
|
||||
if alias.name:
|
||||
units_by_alias[IngredientUnitModel.normalize(alias.name)] = unit
|
||||
|
||||
self._units_by_alias = units_by_alias
|
||||
|
||||
return self._units_by_alias
|
||||
|
||||
@property
|
||||
def food_fuzzy_match_threshold(self) -> int:
|
||||
"""Minimum threshold to fuzzy match against a database food search"""
|
||||
|
||||
return 85
|
||||
|
||||
@property
|
||||
def unit_fuzzy_match_threshold(self) -> int:
|
||||
"""Minimum threshold to fuzzy match against a database unit search"""
|
||||
|
||||
return 70
|
||||
|
||||
@abstractmethod
|
||||
async def parse_one(self, ingredient_string: str) -> ParsedIngredient: ...
|
||||
|
||||
@abstractmethod
|
||||
async def parse(self, ingredients: list[str]) -> list[ParsedIngredient]: ...
|
||||
|
||||
@classmethod
|
||||
def find_match(cls, match_value: str, *, store_map: dict[str, T], fuzzy_match_threshold: int = 0) -> T | None:
|
||||
# check for literal matches
|
||||
if match_value in store_map:
|
||||
return store_map[match_value]
|
||||
|
||||
# fuzzy match against food store
|
||||
fuzz_result = process.extractOne(
|
||||
match_value, store_map.keys(), scorer=fuzz.ratio, score_cutoff=fuzzy_match_threshold
|
||||
)
|
||||
if fuzz_result is None:
|
||||
return None
|
||||
|
||||
return store_map[fuzz_result[0]]
|
||||
|
||||
def find_food_match(self, food: IngredientFood | CreateIngredientFood | str) -> IngredientFood | None:
|
||||
if isinstance(food, IngredientFood):
|
||||
return food
|
||||
|
||||
food_name = food if isinstance(food, str) else food.name
|
||||
match_value = IngredientFoodModel.normalize(food_name)
|
||||
return self.find_match(
|
||||
match_value,
|
||||
store_map=self.foods_by_alias,
|
||||
fuzzy_match_threshold=self.food_fuzzy_match_threshold,
|
||||
)
|
||||
|
||||
def find_unit_match(self, unit: IngredientUnit | CreateIngredientUnit | str) -> IngredientUnit | None:
|
||||
if isinstance(unit, IngredientUnit):
|
||||
return unit
|
||||
|
||||
unit_name = unit if isinstance(unit, str) else unit.name
|
||||
match_value = IngredientUnitModel.normalize(unit_name)
|
||||
return self.find_match(
|
||||
match_value,
|
||||
store_map=self.units_by_alias,
|
||||
fuzzy_match_threshold=self.unit_fuzzy_match_threshold,
|
||||
)
|
||||
|
||||
def find_ingredient_match(self, ingredient: ParsedIngredient) -> ParsedIngredient:
|
||||
if ingredient.ingredient.food and (food_match := self.find_food_match(ingredient.ingredient.food)):
|
||||
ingredient.ingredient.food = food_match
|
||||
|
||||
if ingredient.ingredient.unit and (unit_match := self.find_unit_match(ingredient.ingredient.unit)):
|
||||
ingredient.ingredient.unit = unit_match
|
||||
|
||||
# Parser might have wrongly split a food into a unit and food.
|
||||
if isinstance(ingredient.ingredient.food, CreateIngredientFood) and isinstance(
|
||||
ingredient.ingredient.unit, CreateIngredientUnit
|
||||
):
|
||||
if food_match := self.find_food_match(
|
||||
f"{ingredient.ingredient.unit.name} {ingredient.ingredient.food.name}"
|
||||
):
|
||||
ingredient.ingredient.food = food_match
|
||||
ingredient.ingredient.unit = None
|
||||
|
||||
return ingredient
|
||||
@@ -1,173 +1,23 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from fractions import Fraction
|
||||
from typing import TypeVar
|
||||
|
||||
from pydantic import UUID4, BaseModel
|
||||
from rapidfuzz import fuzz, process
|
||||
from pydantic import UUID4
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from mealie.core.root_logger import get_logger
|
||||
from mealie.db.models.recipe.ingredient import IngredientFoodModel, IngredientUnitModel
|
||||
from mealie.repos.all_repositories import get_repositories
|
||||
from mealie.repos.repository_factory import AllRepositories
|
||||
from mealie.schema.recipe import RecipeIngredient
|
||||
from mealie.schema.recipe.recipe_ingredient import (
|
||||
MAX_INGREDIENT_DENOMINATOR,
|
||||
CreateIngredientFood,
|
||||
CreateIngredientUnit,
|
||||
IngredientConfidence,
|
||||
IngredientFood,
|
||||
IngredientUnit,
|
||||
ParsedIngredient,
|
||||
RegisteredParser,
|
||||
)
|
||||
from mealie.schema.response.pagination import PaginationQuery
|
||||
|
||||
from . import brute, crfpp
|
||||
from . import brute, crfpp, openai
|
||||
from ._base import ABCIngredientParser
|
||||
|
||||
logger = get_logger(__name__)
|
||||
T = TypeVar("T", bound=BaseModel)
|
||||
|
||||
|
||||
class ABCIngredientParser(ABC):
|
||||
"""
|
||||
Abstract class for ingredient parsers.
|
||||
"""
|
||||
|
||||
def __init__(self, group_id: UUID4, session: Session) -> None:
|
||||
self.group_id = group_id
|
||||
self.session = session
|
||||
|
||||
self._foods_by_alias: dict[str, IngredientFood] | None = None
|
||||
self._units_by_alias: dict[str, IngredientUnit] | None = None
|
||||
|
||||
@property
|
||||
def _repos(self) -> AllRepositories:
|
||||
return get_repositories(self.session)
|
||||
|
||||
@property
|
||||
def foods_by_alias(self) -> dict[str, IngredientFood]:
|
||||
if self._foods_by_alias is None:
|
||||
foods_repo = self._repos.ingredient_foods.by_group(self.group_id)
|
||||
query = PaginationQuery(page=1, per_page=-1)
|
||||
all_foods = foods_repo.page_all(query).items
|
||||
|
||||
foods_by_alias: dict[str, IngredientFood] = {}
|
||||
for food in all_foods:
|
||||
if food.name:
|
||||
foods_by_alias[IngredientFoodModel.normalize(food.name)] = food
|
||||
if food.plural_name:
|
||||
foods_by_alias[IngredientFoodModel.normalize(food.plural_name)] = food
|
||||
|
||||
for alias in food.aliases or []:
|
||||
if alias.name:
|
||||
foods_by_alias[IngredientFoodModel.normalize(alias.name)] = food
|
||||
|
||||
self._foods_by_alias = foods_by_alias
|
||||
|
||||
return self._foods_by_alias
|
||||
|
||||
@property
|
||||
def units_by_alias(self) -> dict[str, IngredientUnit]:
|
||||
if self._units_by_alias is None:
|
||||
units_repo = self._repos.ingredient_units.by_group(self.group_id)
|
||||
query = PaginationQuery(page=1, per_page=-1)
|
||||
all_units = units_repo.page_all(query).items
|
||||
|
||||
units_by_alias: dict[str, IngredientUnit] = {}
|
||||
for unit in all_units:
|
||||
if unit.name:
|
||||
units_by_alias[IngredientUnitModel.normalize(unit.name)] = unit
|
||||
if unit.plural_name:
|
||||
units_by_alias[IngredientUnitModel.normalize(unit.plural_name)] = unit
|
||||
if unit.abbreviation:
|
||||
units_by_alias[IngredientUnitModel.normalize(unit.abbreviation)] = unit
|
||||
if unit.plural_abbreviation:
|
||||
units_by_alias[IngredientUnitModel.normalize(unit.plural_abbreviation)] = unit
|
||||
|
||||
for alias in unit.aliases or []:
|
||||
if alias.name:
|
||||
units_by_alias[IngredientUnitModel.normalize(alias.name)] = unit
|
||||
|
||||
self._units_by_alias = units_by_alias
|
||||
|
||||
return self._units_by_alias
|
||||
|
||||
@property
|
||||
def food_fuzzy_match_threshold(self) -> int:
|
||||
"""Minimum threshold to fuzzy match against a database food search"""
|
||||
|
||||
return 85
|
||||
|
||||
@property
|
||||
def unit_fuzzy_match_threshold(self) -> int:
|
||||
"""Minimum threshold to fuzzy match against a database unit search"""
|
||||
|
||||
return 70
|
||||
|
||||
@abstractmethod
|
||||
def parse_one(self, ingredient_string: str) -> ParsedIngredient: ...
|
||||
|
||||
@abstractmethod
|
||||
def parse(self, ingredients: list[str]) -> list[ParsedIngredient]: ...
|
||||
|
||||
@classmethod
|
||||
def find_match(cls, match_value: str, *, store_map: dict[str, T], fuzzy_match_threshold: int = 0) -> T | None:
|
||||
# check for literal matches
|
||||
if match_value in store_map:
|
||||
return store_map[match_value]
|
||||
|
||||
# fuzzy match against food store
|
||||
fuzz_result = process.extractOne(
|
||||
match_value, store_map.keys(), scorer=fuzz.ratio, score_cutoff=fuzzy_match_threshold
|
||||
)
|
||||
if fuzz_result is None:
|
||||
return None
|
||||
|
||||
return store_map[fuzz_result[0]]
|
||||
|
||||
def find_food_match(self, food: IngredientFood | CreateIngredientFood | str) -> IngredientFood | None:
|
||||
if isinstance(food, IngredientFood):
|
||||
return food
|
||||
|
||||
food_name = food if isinstance(food, str) else food.name
|
||||
match_value = IngredientFoodModel.normalize(food_name)
|
||||
return self.find_match(
|
||||
match_value,
|
||||
store_map=self.foods_by_alias,
|
||||
fuzzy_match_threshold=self.food_fuzzy_match_threshold,
|
||||
)
|
||||
|
||||
def find_unit_match(self, unit: IngredientUnit | CreateIngredientUnit | str) -> IngredientUnit | None:
|
||||
if isinstance(unit, IngredientUnit):
|
||||
return unit
|
||||
|
||||
unit_name = unit if isinstance(unit, str) else unit.name
|
||||
match_value = IngredientUnitModel.normalize(unit_name)
|
||||
return self.find_match(
|
||||
match_value,
|
||||
store_map=self.units_by_alias,
|
||||
fuzzy_match_threshold=self.unit_fuzzy_match_threshold,
|
||||
)
|
||||
|
||||
def find_ingredient_match(self, ingredient: ParsedIngredient) -> ParsedIngredient:
|
||||
if ingredient.ingredient.food and (food_match := self.find_food_match(ingredient.ingredient.food)):
|
||||
ingredient.ingredient.food = food_match
|
||||
|
||||
if ingredient.ingredient.unit and (unit_match := self.find_unit_match(ingredient.ingredient.unit)):
|
||||
ingredient.ingredient.unit = unit_match
|
||||
|
||||
# Parser might have wrongly split a food into a unit and food.
|
||||
if isinstance(ingredient.ingredient.food, CreateIngredientFood) and isinstance(
|
||||
ingredient.ingredient.unit, CreateIngredientUnit
|
||||
):
|
||||
if food_match := self.find_food_match(
|
||||
f"{ingredient.ingredient.unit.name} {ingredient.ingredient.food.name}"
|
||||
):
|
||||
ingredient.ingredient.food = food_match
|
||||
ingredient.ingredient.unit = None
|
||||
|
||||
return ingredient
|
||||
|
||||
|
||||
class BruteForceParser(ABCIngredientParser):
|
||||
@@ -175,7 +25,7 @@ class BruteForceParser(ABCIngredientParser):
|
||||
Brute force ingredient parser.
|
||||
"""
|
||||
|
||||
def parse_one(self, ingredient: str) -> ParsedIngredient:
|
||||
async def parse_one(self, ingredient: str) -> ParsedIngredient:
|
||||
bfi = brute.parse(ingredient, self)
|
||||
|
||||
parsed_ingredient = ParsedIngredient(
|
||||
@@ -191,8 +41,8 @@ class BruteForceParser(ABCIngredientParser):
|
||||
|
||||
return self.find_ingredient_match(parsed_ingredient)
|
||||
|
||||
def parse(self, ingredients: list[str]) -> list[ParsedIngredient]:
|
||||
return [self.parse_one(ingredient) for ingredient in ingredients]
|
||||
async def parse(self, ingredients: list[str]) -> list[ParsedIngredient]:
|
||||
return [await self.parse_one(ingredient) for ingredient in ingredients]
|
||||
|
||||
|
||||
class NLPParser(ABCIngredientParser):
|
||||
@@ -234,18 +84,19 @@ class NLPParser(ABCIngredientParser):
|
||||
|
||||
return self.find_ingredient_match(parsed_ingredient)
|
||||
|
||||
def parse(self, ingredients: list[str]) -> list[ParsedIngredient]:
|
||||
async def parse(self, ingredients: list[str]) -> list[ParsedIngredient]:
|
||||
crf_models = crfpp.convert_list_to_crf_model(ingredients)
|
||||
return [self._crf_to_ingredient(crf_model) for crf_model in crf_models]
|
||||
|
||||
def parse_one(self, ingredient: str) -> ParsedIngredient:
|
||||
items = self.parse([ingredient])
|
||||
async def parse_one(self, ingredient_string: str) -> ParsedIngredient:
|
||||
items = await self.parse([ingredient_string])
|
||||
return items[0]
|
||||
|
||||
|
||||
__registrar = {
|
||||
__registrar: dict[RegisteredParser, type[ABCIngredientParser]] = {
|
||||
RegisteredParser.nlp: NLPParser,
|
||||
RegisteredParser.brute: BruteForceParser,
|
||||
RegisteredParser.openai: openai.OpenAIParser,
|
||||
}
|
||||
|
||||
|
||||
|
||||
5
mealie/services/parser_services/openai/__init__.py
Normal file
5
mealie/services/parser_services/openai/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from .parser import OpenAIParser
|
||||
|
||||
__all__ = [
|
||||
"OpenAIParser",
|
||||
]
|
||||
122
mealie/services/parser_services/openai/parser.py
Normal file
122
mealie/services/parser_services/openai/parser.py
Normal file
@@ -0,0 +1,122 @@
|
||||
import asyncio
|
||||
import json
|
||||
from collections.abc import Awaitable
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from mealie.schema.recipe.recipe_ingredient import (
|
||||
CreateIngredientFood,
|
||||
CreateIngredientUnit,
|
||||
IngredientConfidence,
|
||||
ParsedIngredient,
|
||||
RecipeIngredient,
|
||||
)
|
||||
from mealie.services.openai import OpenAIDataInjection, OpenAIService
|
||||
|
||||
from .._base import ABCIngredientParser
|
||||
|
||||
|
||||
class OpenAIIngredient(BaseModel):
|
||||
"""
|
||||
This class defines the JSON schema sent to OpenAI. Its schema is
|
||||
injected directly into the OpenAI prompt.
|
||||
"""
|
||||
|
||||
__doc__ = "" # we don't want to include the docstring in the JSON schema
|
||||
|
||||
input: str
|
||||
confidence: float | None = None
|
||||
|
||||
quantity: float | None = 0
|
||||
unit: str | None = None
|
||||
food: str | None = None
|
||||
note: str | None = None
|
||||
|
||||
|
||||
class OpenAIIngredients(BaseModel):
|
||||
ingredients: list[OpenAIIngredient] = []
|
||||
|
||||
|
||||
class OpenAIParser(ABCIngredientParser):
|
||||
def _convert_ingredient(self, openai_ing: OpenAIIngredient) -> ParsedIngredient:
|
||||
ingredient = RecipeIngredient(
|
||||
original_text=openai_ing.input,
|
||||
quantity=openai_ing.quantity,
|
||||
unit=CreateIngredientUnit(name=openai_ing.unit) if openai_ing.unit else None,
|
||||
food=CreateIngredientFood(name=openai_ing.food) if openai_ing.food else None,
|
||||
note=openai_ing.note,
|
||||
)
|
||||
|
||||
parsed_ingredient = ParsedIngredient(
|
||||
input=openai_ing.input,
|
||||
confidence=IngredientConfidence(average=openai_ing.confidence),
|
||||
ingredient=ingredient,
|
||||
)
|
||||
|
||||
return self.find_ingredient_match(parsed_ingredient)
|
||||
|
||||
def _get_prompt(self, service: OpenAIService) -> str:
|
||||
data_injections = [
|
||||
OpenAIDataInjection(
|
||||
description=(
|
||||
"This is the JSON response schema. You must respond in valid JSON that follows this schema. "
|
||||
"Your payload should be as compact as possible, eliminating unncessesary whitespace. Any fields "
|
||||
"with default values which you do not populate should not be in the payload."
|
||||
),
|
||||
value=OpenAIIngredients,
|
||||
),
|
||||
]
|
||||
|
||||
if service.send_db_data:
|
||||
data_injections.extend(
|
||||
[
|
||||
OpenAIDataInjection(
|
||||
description=(
|
||||
"Below is a list of units found in the units database. While parsing, you should "
|
||||
"reference this list when determining which part of the input is the unit. You may "
|
||||
"find a unit in the input that does not exist in this list. This should not prevent "
|
||||
"you from parsing that text as a unit, however it may lower your confidence level."
|
||||
),
|
||||
value=list(set(self.units_by_alias)),
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
return service.get_prompt("recipes.parse-recipe-ingredients", data_injections=data_injections)
|
||||
|
||||
@staticmethod
|
||||
def _chunk_messages(messages: list[str], n=1) -> list[list[str]]:
|
||||
if n < 1:
|
||||
n = 1
|
||||
return [messages[i : i + n] for i in range(0, len(messages), n)]
|
||||
|
||||
async def _parse(self, ingredients: list[str]) -> OpenAIIngredients:
|
||||
service = OpenAIService()
|
||||
prompt = self._get_prompt(service)
|
||||
|
||||
# chunk ingredients and send each chunk to its own worker
|
||||
ingredient_chunks = self._chunk_messages(ingredients, n=service.workers)
|
||||
tasks: list[Awaitable[str | None]] = []
|
||||
for ingredient_chunk in ingredient_chunks:
|
||||
message = json.dumps(ingredient_chunk, separators=(",", ":"))
|
||||
tasks.append(service.get_response(prompt, message, force_json_response=True))
|
||||
|
||||
# re-combine chunks into one response
|
||||
responses_json = await asyncio.gather(*tasks)
|
||||
responses = [
|
||||
OpenAIIngredients.model_validate_json(response_json) for response_json in responses_json if responses_json
|
||||
]
|
||||
if not responses:
|
||||
raise Exception("No response from OpenAI")
|
||||
|
||||
return OpenAIIngredients(
|
||||
ingredients=[ingredient for response in responses for ingredient in response.ingredients]
|
||||
)
|
||||
|
||||
async def parse_one(self, ingredient_string: str) -> ParsedIngredient:
|
||||
items = await self.parse([ingredient_string])
|
||||
return items[0]
|
||||
|
||||
async def parse(self, ingredients: list[str]) -> list[ParsedIngredient]:
|
||||
response = await self._parse(ingredients)
|
||||
return [self._convert_ingredient(ing) for ing in response.ingredients]
|
||||
Reference in New Issue
Block a user