mirror of
https://github.com/mealie-recipes/mealie.git
synced 2026-04-05 12:35:35 -04:00
feat: include extra ingredients + amounts in NLP parser output (#7191)
This commit is contained in:
@@ -17,7 +17,9 @@
|
||||
"servings": "Servings",
|
||||
"yield": "Yield",
|
||||
"yields": "Yields"
|
||||
}
|
||||
},
|
||||
"and-amount": "and {amount}",
|
||||
"or-ingredient": "or {ingredient}"
|
||||
},
|
||||
"mealplan": {
|
||||
"no-recipes-match-your-rules": "No recipes match your rules"
|
||||
|
||||
@@ -12,11 +12,11 @@ router = APIRouter(prefix="/parser")
|
||||
class IngredientParserController(BaseUserController):
|
||||
@router.post("/ingredient", response_model=ParsedIngredient)
|
||||
async def parse_ingredient(self, ingredient: IngredientRequest):
|
||||
parser = get_parser(ingredient.parser, self.group_id, self.session)
|
||||
parser = get_parser(ingredient.parser, self.group_id, self.session, self.translator)
|
||||
response = await parser.parse([ingredient.ingredient])
|
||||
return response[0]
|
||||
|
||||
@router.post("/ingredients", response_model=list[ParsedIngredient])
|
||||
async def parse_ingredients(self, ingredients: IngredientsRequest):
|
||||
parser = get_parser(ingredients.parser, self.group_id, self.session)
|
||||
parser = get_parser(ingredients.parser, self.group_id, self.session, self.translator)
|
||||
return await parser.parse(ingredients.ingredients)
|
||||
|
||||
@@ -5,6 +5,7 @@ from rapidfuzz import fuzz, process
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from mealie.db.models.recipe.ingredient import IngredientFoodModel, IngredientUnitModel
|
||||
from mealie.lang.providers import Translator
|
||||
from mealie.repos.all_repositories import get_repositories
|
||||
from mealie.repos.repository_factory import AllRepositories
|
||||
from mealie.schema.recipe.recipe_ingredient import (
|
||||
@@ -126,11 +127,14 @@ class ABCIngredientParser(ABC):
|
||||
Abstract class for ingredient parsers.
|
||||
"""
|
||||
|
||||
def __init__(self, group_id: UUID4, session: Session) -> None:
|
||||
def __init__(self, group_id: UUID4, session: Session, translator: Translator) -> None:
|
||||
self.group_id = group_id
|
||||
self.session = session
|
||||
self.data_matcher = DataMatcher(self._repos, self.food_fuzzy_match_threshold, self.unit_fuzzy_match_threshold)
|
||||
|
||||
self.translator = translator
|
||||
self.t = self.translator.t
|
||||
|
||||
@property
|
||||
def _repos(self) -> AllRepositories:
|
||||
return get_repositories(self.session, group_id=self.group_id)
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
from dataclasses import dataclass, field
|
||||
from fractions import Fraction
|
||||
from itertools import zip_longest
|
||||
|
||||
from ingredient_parser import parse_ingredient
|
||||
from ingredient_parser.dataclasses import CompositeIngredientAmount, IngredientAmount
|
||||
@@ -7,6 +9,7 @@ from pydantic import UUID4
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from mealie.core.root_logger import get_logger
|
||||
from mealie.lang.providers import Translator
|
||||
from mealie.schema.recipe import RecipeIngredient
|
||||
from mealie.schema.recipe.recipe_ingredient import (
|
||||
CreateIngredientFood,
|
||||
@@ -70,13 +73,29 @@ class BruteForceParser(ABCIngredientParser):
|
||||
return [await self.parse_one(ingredient) for ingredient in ingredients]
|
||||
|
||||
|
||||
@dataclass
|
||||
class _IngredientPart:
|
||||
qty: float = 0
|
||||
unit: str = ""
|
||||
food: str = ""
|
||||
extra_amounts: list[IngredientAmount] = field(default_factory=list)
|
||||
qty_conf: float = 0
|
||||
unit_conf: float = 0
|
||||
food_conf: float = 0
|
||||
|
||||
@property
|
||||
def avg_conf(self) -> float:
|
||||
confs = [self.qty_conf, self.unit_conf, self.food_conf]
|
||||
return sum(confs) / len(confs)
|
||||
|
||||
|
||||
class NLPParser(ABCIngredientParser):
|
||||
"""
|
||||
Class for Ingredient Parser library
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _extract_amount(ingredient: IngredientParserParsedIngredient) -> IngredientAmount:
|
||||
@classmethod
|
||||
def _extract_amount(cls, ingredient: IngredientParserParsedIngredient) -> IngredientAmount:
|
||||
if not (ingredient_amounts := ingredient.amount):
|
||||
return IngredientAmount(
|
||||
quantity=Fraction(0), quantity_max=Fraction(0), unit="", text="", confidence=0, starting_index=-1
|
||||
@@ -88,8 +107,8 @@ class NLPParser(ABCIngredientParser):
|
||||
|
||||
return ingredient_amount
|
||||
|
||||
@staticmethod
|
||||
def _extract_quantity(ingredient_amount: IngredientAmount) -> tuple[float, float]:
|
||||
@classmethod
|
||||
def _extract_quantity(cls, ingredient_amount: IngredientAmount) -> tuple[float, float]:
|
||||
confidence = ingredient_amount.confidence
|
||||
|
||||
if isinstance(ingredient_amount.quantity, str):
|
||||
@@ -103,27 +122,19 @@ class NLPParser(ABCIngredientParser):
|
||||
|
||||
return qty, confidence
|
||||
|
||||
@staticmethod
|
||||
def _extract_unit(ingredient_amount: IngredientAmount) -> tuple[str, float]:
|
||||
@classmethod
|
||||
def _extract_unit(cls, ingredient_amount: IngredientAmount) -> tuple[str, float]:
|
||||
confidence = ingredient_amount.confidence
|
||||
unit = str(ingredient_amount.unit) if ingredient_amount.unit else ""
|
||||
return unit, confidence
|
||||
|
||||
@staticmethod
|
||||
def _extract_food(ingredient: IngredientParserParsedIngredient) -> tuple[str, float]:
|
||||
if not ingredient.name:
|
||||
return "", 0
|
||||
|
||||
ingredient_name = ingredient.name[0]
|
||||
confidence = ingredient_name.confidence
|
||||
food = ingredient_name.text
|
||||
|
||||
return food, confidence
|
||||
|
||||
@staticmethod
|
||||
def _extract_note(ingredient: IngredientParserParsedIngredient) -> tuple[str, float]:
|
||||
@classmethod
|
||||
def _extract_note(
|
||||
cls, ingredient: IngredientParserParsedIngredient, extra_amounts: list[IngredientAmount] | None = None
|
||||
) -> tuple[str, float]:
|
||||
confidences: list[float] = []
|
||||
note_parts: list[str] = []
|
||||
|
||||
if ingredient.size:
|
||||
note_parts.append(ingredient.size.text)
|
||||
confidences.append(ingredient.size.confidence)
|
||||
@@ -139,45 +150,103 @@ class NLPParser(ABCIngredientParser):
|
||||
|
||||
# average confidence among all note parts
|
||||
confidence = sum(confidences) / len(confidences) if confidences else 0
|
||||
|
||||
note = ", ".join(note_parts)
|
||||
note = note.replace("(", "").replace(")", "")
|
||||
|
||||
# insert extra amounts to the front of the notes with parenthesis
|
||||
if extra_amounts:
|
||||
amt_part = "(" + ", ".join([amount.text for amount in extra_amounts]) + ")"
|
||||
note = " ".join(filter(None, [amt_part, note]))
|
||||
|
||||
return note, confidence
|
||||
|
||||
def _convert_ingredient(self, ingredient: IngredientParserParsedIngredient) -> ParsedIngredient:
|
||||
ingredient_amount = self._extract_amount(ingredient)
|
||||
qty, qty_conf = self._extract_quantity(ingredient_amount)
|
||||
unit, unit_conf = self._extract_unit(ingredient_amount)
|
||||
food, food_conf = self._extract_food(ingredient)
|
||||
note, note_conf = self._extract_note(ingredient)
|
||||
ing_parts: list[_IngredientPart] = []
|
||||
|
||||
for amount, ing_name in zip_longest(ingredient.amount, ingredient.name, fillvalue=None):
|
||||
part = _IngredientPart()
|
||||
|
||||
if amount:
|
||||
if isinstance(amount, CompositeIngredientAmount):
|
||||
part.extra_amounts = list(amount.amounts[1:])
|
||||
amount = amount.amounts[0]
|
||||
|
||||
part.qty, part.qty_conf = self._extract_quantity(amount)
|
||||
part.unit, part.unit_conf = self._extract_unit(amount)
|
||||
|
||||
if ing_name:
|
||||
part.food = ing_name.text
|
||||
part.food_conf = ing_name.confidence
|
||||
|
||||
ing_parts.append(part)
|
||||
|
||||
note, note_conf = self._extract_note(ingredient, ing_parts[0].extra_amounts if ing_parts else None)
|
||||
|
||||
# Safeguard in case the parser outputs nothing
|
||||
if not ing_parts:
|
||||
ing_parts.append(_IngredientPart())
|
||||
|
||||
# average confidence for components which were parsed
|
||||
# uses ing_parts[0] since this is the primary ingredient
|
||||
primary = ing_parts[0]
|
||||
confidences: list[float] = []
|
||||
if qty:
|
||||
confidences.append(qty_conf)
|
||||
if unit:
|
||||
confidences.append(unit_conf)
|
||||
if food:
|
||||
confidences.append(food_conf)
|
||||
|
||||
if primary.qty:
|
||||
confidences.append(primary.qty_conf)
|
||||
if primary.unit:
|
||||
confidences.append(primary.unit_conf)
|
||||
if primary.food:
|
||||
confidences.append(primary.food_conf)
|
||||
if note:
|
||||
confidences.append(note_conf)
|
||||
if len(ing_parts) > 1:
|
||||
confidences.extend([part.avg_conf for part in ing_parts[1:]])
|
||||
|
||||
recipe_ingredients: list[RecipeIngredient] = []
|
||||
for i, part in enumerate(ing_parts):
|
||||
if not i:
|
||||
ing_note = note
|
||||
elif part.extra_amounts:
|
||||
# TODO: handle extra amounts when we add support for them
|
||||
# For now, just add them as a note ("and amt_1, and amt_2, and ...")
|
||||
ing_note = ", ".join(self.t("recipe.and-amount", amount=a.text) for a in part.extra_amounts)
|
||||
else:
|
||||
ing_note = None
|
||||
recipe_ingredients.append(
|
||||
RecipeIngredient(
|
||||
quantity=part.qty,
|
||||
unit=CreateIngredientUnit(name=part.unit) if part.unit else None,
|
||||
food=CreateIngredientFood(name=part.food) if part.food else None,
|
||||
note=ing_note,
|
||||
)
|
||||
)
|
||||
|
||||
primary_ingredient = recipe_ingredients[0] # there will always be at least one recipe ingredient
|
||||
extra_ingredients = recipe_ingredients[1:] if len(recipe_ingredients) > 1 else []
|
||||
|
||||
# TODO: handle extra ingredients when we support them
|
||||
# For now, just add them to the note ("or ing_1, or ing_2, or ...")
|
||||
if extra_ingredients:
|
||||
extras_note_parts = [
|
||||
self.t("recipe.or-ingredient", ingredient=extra_ing.display) for extra_ing in extra_ingredients
|
||||
]
|
||||
extras_note = ", ".join(extras_note_parts)
|
||||
primary_ingredient.note = " ".join(filter(None, [extras_note, primary_ingredient.note]))
|
||||
|
||||
# re-calculate display property since we modified the note
|
||||
primary_ingredient.display = primary_ingredient._format_display()
|
||||
|
||||
parsed_ingredient = ParsedIngredient(
|
||||
input=ingredient.sentence,
|
||||
confidence=IngredientConfidence(
|
||||
average=(sum(confidences) / len(confidences)) if confidences else 0,
|
||||
quantity=qty_conf,
|
||||
unit=unit_conf,
|
||||
food=food_conf,
|
||||
quantity=primary.qty_conf,
|
||||
unit=primary.unit_conf,
|
||||
food=primary.food_conf,
|
||||
comment=note_conf,
|
||||
),
|
||||
ingredient=RecipeIngredient(
|
||||
title="",
|
||||
quantity=qty,
|
||||
unit=CreateIngredientUnit(name=unit) if unit else None,
|
||||
food=CreateIngredientFood(name=food) if food else None,
|
||||
note=note,
|
||||
),
|
||||
ingredient=primary_ingredient,
|
||||
)
|
||||
|
||||
return self.find_ingredient_match(parsed_ingredient)
|
||||
@@ -197,9 +266,11 @@ __registrar: dict[RegisteredParser, type[ABCIngredientParser]] = {
|
||||
}
|
||||
|
||||
|
||||
def get_parser(parser: RegisteredParser, group_id: UUID4, session: Session) -> ABCIngredientParser:
|
||||
def get_parser(
|
||||
parser: RegisteredParser, group_id: UUID4, session: Session, translator: Translator
|
||||
) -> ABCIngredientParser:
|
||||
"""
|
||||
get_parser returns an ingrdeint parser based on the string enum value
|
||||
passed in.
|
||||
"""
|
||||
return __registrar.get(parser, NLPParser)(group_id, session)
|
||||
return __registrar.get(parser, NLPParser)(group_id, session, translator)
|
||||
|
||||
@@ -1,67 +0,0 @@
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from mealie.schema.recipe.recipe_ingredient import RegisteredParser
|
||||
from tests.unit_tests.test_ingredient_parser import TestIngredient
|
||||
from tests.utils import api_routes
|
||||
from tests.utils.fixture_schemas import TestUser
|
||||
|
||||
nlp_test_ingredients = [
|
||||
TestIngredient("½ cup all-purpose flour", 0.5, "cup", "all-purpose flour", ""),
|
||||
TestIngredient("1 ½ teaspoons ground black pepper", 1.5, "teaspoon", "black pepper", "ground"),
|
||||
TestIngredient("⅔ cup unsweetened flaked coconut", 0.667, "cup", "unsweetened flaked coconut", ""),
|
||||
TestIngredient("⅓ cup panko bread crumbs", 0.333, "cup", "panko bread crumbs", ""),
|
||||
TestIngredient("1/8 cup all-purpose flour", 0.125, "cup", "all-purpose flour", ""),
|
||||
TestIngredient("1/32 cup all-purpose flour", 0.031, "cup", "all-purpose flour", ""),
|
||||
TestIngredient("1 1/2 cups chopped onion ", 1.5, "cup", "onion", "chopped"),
|
||||
TestIngredient(
|
||||
"2 pounds russet potatoes, peeled, and cut into 3/4-inch cubes ",
|
||||
2,
|
||||
"pound",
|
||||
"russet potatoes",
|
||||
"peeled, and cut into 3/4 inch cubes",
|
||||
),
|
||||
TestIngredient("2 tablespoons (30ml) vegetable oil ", 2, "tablespoon", "vegetable oil", ""),
|
||||
TestIngredient("2 teaspoons salt (to taste) ", 2, "teaspoon", "salt", "to taste"),
|
||||
TestIngredient("2 cups chicken broth or beef broth ", 2, "cup", "chicken broth", ""),
|
||||
TestIngredient("1/2 cup", 0.5, "cup", "", ""),
|
||||
]
|
||||
|
||||
|
||||
def assert_ingredient(api_response: dict, test_ingredient: TestIngredient):
|
||||
response_quantity = api_response["ingredient"]["quantity"]
|
||||
response_unit = api_response["ingredient"]["unit"]["name"] if api_response["ingredient"]["unit"] else ""
|
||||
response_food = api_response["ingredient"]["food"]["name"] if api_response["ingredient"]["food"] else ""
|
||||
response_note = api_response["ingredient"]["note"]
|
||||
|
||||
assert response_quantity == pytest.approx(test_ingredient.quantity)
|
||||
assert response_unit == test_ingredient.unit
|
||||
assert response_food == test_ingredient.food
|
||||
assert response_note == test_ingredient.comments
|
||||
|
||||
|
||||
@pytest.mark.parametrize("test_ingredient", nlp_test_ingredients)
|
||||
def test_recipe_ingredient_parser_nlp(api_client: TestClient, test_ingredient: TestIngredient, unique_user: TestUser):
|
||||
payload = {"parser": RegisteredParser.nlp, "ingredient": test_ingredient.input}
|
||||
response = api_client.post(api_routes.parser_ingredient, json=payload, headers=unique_user.token)
|
||||
assert response.status_code == 200
|
||||
assert_ingredient(response.json(), test_ingredient)
|
||||
|
||||
|
||||
def test_recipe_ingredients_parser_nlp(api_client: TestClient, unique_user: TestUser):
|
||||
payload = {"parser": RegisteredParser.nlp, "ingredients": [x.input for x in nlp_test_ingredients]}
|
||||
response = api_client.post(api_routes.parser_ingredients, json=payload, headers=unique_user.token)
|
||||
assert response.status_code == 200
|
||||
|
||||
for api_ingredient, test_ingredient in zip(response.json(), nlp_test_ingredients, strict=False):
|
||||
assert_ingredient(api_ingredient, test_ingredient)
|
||||
|
||||
|
||||
@pytest.mark.skip("TODO: Implement")
|
||||
def test_recipe_ingredient_parser_brute(api_client: TestClient):
|
||||
pass
|
||||
|
||||
|
||||
@pytest.mark.skip("TODO: Implement")
|
||||
def test_recipe_ingredients_parser_brute(api_client: TestClient):
|
||||
pass
|
||||
@@ -0,0 +1,93 @@
|
||||
import pytest
|
||||
from pydantic import UUID4
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from mealie.repos.all_repositories import get_repositories
|
||||
from mealie.repos.repository_factory import AllRepositories
|
||||
from mealie.schema.recipe.recipe_ingredient import (
|
||||
CreateIngredientFoodAlias,
|
||||
CreateIngredientUnitAlias,
|
||||
IngredientFood,
|
||||
IngredientUnit,
|
||||
SaveIngredientFood,
|
||||
SaveIngredientUnit,
|
||||
)
|
||||
from mealie.schema.user.user import GroupBase
|
||||
from tests.utils.factories import random_int, random_string
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def unique_local_group_id(unfiltered_database: AllRepositories) -> UUID4:
|
||||
return str(unfiltered_database.groups.create(GroupBase(name=random_string())).id)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def unique_db(session: Session, unique_local_group_id: str):
|
||||
return get_repositories(session, group_id=unique_local_group_id)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def parsed_ingredient_data(
|
||||
unique_db: AllRepositories, unique_local_group_id: UUID4
|
||||
) -> tuple[list[IngredientFood], list[IngredientUnit]]:
|
||||
foods = unique_db.ingredient_foods.create_many(
|
||||
[
|
||||
SaveIngredientFood(name="potatoes", group_id=unique_local_group_id),
|
||||
SaveIngredientFood(name="onion", group_id=unique_local_group_id),
|
||||
SaveIngredientFood(name="green onion", group_id=unique_local_group_id),
|
||||
SaveIngredientFood(name="frozen pearl onions", group_id=unique_local_group_id),
|
||||
SaveIngredientFood(name="bell peppers", group_id=unique_local_group_id),
|
||||
SaveIngredientFood(name="red pepper flakes", group_id=unique_local_group_id),
|
||||
SaveIngredientFood(name="fresh ginger", group_id=unique_local_group_id),
|
||||
SaveIngredientFood(name="ground ginger", group_id=unique_local_group_id),
|
||||
SaveIngredientFood(name="ñör̃m̈ãl̈ĩz̈ẽm̈ẽ", group_id=unique_local_group_id),
|
||||
SaveIngredientFood(name="PluralFoodTest", plural_name="myfoodisplural", group_id=unique_local_group_id),
|
||||
SaveIngredientFood(
|
||||
name="IHaveAnAlias",
|
||||
group_id=unique_local_group_id,
|
||||
aliases=[CreateIngredientFoodAlias(name="thisismyalias")],
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
foods.extend(
|
||||
unique_db.ingredient_foods.create_many(
|
||||
[
|
||||
SaveIngredientFood(name=f"{random_string()} food", group_id=unique_local_group_id)
|
||||
for _ in range(random_int(10, 15))
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
units = unique_db.ingredient_units.create_many(
|
||||
[
|
||||
SaveIngredientUnit(name="Cups", group_id=unique_local_group_id),
|
||||
SaveIngredientUnit(name="Tablespoon", group_id=unique_local_group_id),
|
||||
SaveIngredientUnit(name="Teaspoon", group_id=unique_local_group_id),
|
||||
SaveIngredientUnit(name="Stalk", group_id=unique_local_group_id),
|
||||
SaveIngredientUnit(name="My Very Long Unit Name", abbreviation="mvlun", group_id=unique_local_group_id),
|
||||
SaveIngredientUnit(
|
||||
name="PluralUnitName",
|
||||
plural_name="abc123",
|
||||
abbreviation="doremiabc",
|
||||
plural_abbreviation="doremi123",
|
||||
group_id=unique_local_group_id,
|
||||
),
|
||||
SaveIngredientUnit(
|
||||
name="IHaveAnAliasToo",
|
||||
group_id=unique_local_group_id,
|
||||
aliases=[CreateIngredientUnitAlias(name="thisismyalias")],
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
units.extend(
|
||||
unique_db.ingredient_foods.create_many(
|
||||
[
|
||||
SaveIngredientUnit(name=f"{random_string()} unit", group_id=unique_local_group_id)
|
||||
for _ in range(random_int(10, 15))
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
return foods, units
|
||||
@@ -0,0 +1,351 @@
|
||||
import asyncio
|
||||
|
||||
import pytest
|
||||
from pydantic import UUID4
|
||||
|
||||
from mealie.db.db_setup import session_context
|
||||
from mealie.lang.providers import get_locale_provider
|
||||
from mealie.schema.recipe.recipe_ingredient import (
|
||||
CreateIngredientFood,
|
||||
CreateIngredientUnit,
|
||||
IngredientFood,
|
||||
IngredientUnit,
|
||||
ParsedIngredient,
|
||||
RecipeIngredient,
|
||||
)
|
||||
from mealie.services.parser_services import RegisteredParser, get_parser
|
||||
|
||||
|
||||
def build_parsed_ing(food: str | None, unit: str | None) -> ParsedIngredient:
|
||||
ing = RecipeIngredient(unit=None, food=None)
|
||||
if food:
|
||||
ing.food = CreateIngredientFood(name=food)
|
||||
if unit:
|
||||
ing.unit = CreateIngredientUnit(name=unit)
|
||||
|
||||
return ParsedIngredient(input=None, ingredient=ing)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input, quantity, unit, food, comment",
|
||||
[
|
||||
pytest.param("1 theelepel koffie", 1, "theelepel", "koffie", "", id="1 theelepel koffie"),
|
||||
pytest.param("3 theelepels koffie", 3, "theelepels", "koffie", "", id="3 theelepels koffie"),
|
||||
pytest.param("1 eetlepel tarwe", 1, "eetlepel", "tarwe", "", id="1 eetlepel tarwe"),
|
||||
pytest.param("20 eetlepels bloem", 20, "eetlepels", "bloem", "", id="20 eetlepels bloem"),
|
||||
pytest.param("1 mespunt kaneel", 1, "mespunt", "kaneel", "", id="1 mespunt kaneel"),
|
||||
pytest.param("1 snuf(je) zout", 1, "snuf(je)", "zout", "", id="1 snuf(je) zout"),
|
||||
pytest.param(
|
||||
"2 tbsp minced cilantro, leaves and stems",
|
||||
2,
|
||||
"tbsp",
|
||||
"minced cilantro",
|
||||
"leaves and stems",
|
||||
id="2 tbsp minced cilantro, leaves and stems",
|
||||
),
|
||||
pytest.param(
|
||||
"1 large yellow onion, coarsely chopped",
|
||||
1,
|
||||
"large",
|
||||
"yellow onion",
|
||||
"coarsely chopped",
|
||||
id="1 large yellow onion, coarsely chopped",
|
||||
),
|
||||
pytest.param("1 1/2 tsp garam masala", 1.5, "tsp", "garam masala", "", id="1 1/2 tsp garam masala"),
|
||||
pytest.param(
|
||||
"2 cups mango chunks, (2 large mangoes) (fresh or frozen)",
|
||||
2,
|
||||
"Cups",
|
||||
"mango chunks, (2 large mangoes)",
|
||||
"fresh or frozen",
|
||||
id="2 cups mango chunks, (2 large mangoes) (fresh or frozen)",
|
||||
),
|
||||
pytest.param("stalk onion", 0, "Stalk", "onion", "", id="stalk onion"),
|
||||
pytest.param("a stalk bell peppers", 0, "Stalk", "bell peppers", "", id="a stalk bell peppers"),
|
||||
pytest.param("a tablespoon unknownFood", 0, "Tablespoon", "unknownFood", "", id="a tablespoon unknownFood"),
|
||||
pytest.param(
|
||||
"stalk bell peppers, cut in pieces",
|
||||
0,
|
||||
"Stalk",
|
||||
"bell peppers",
|
||||
"cut in pieces",
|
||||
id="stalk bell peppers, cut in pieces",
|
||||
),
|
||||
pytest.param(
|
||||
"a stalk bell peppers, cut in pieces",
|
||||
0,
|
||||
"Stalk",
|
||||
"bell peppers",
|
||||
"cut in pieces",
|
||||
id="a stalk bell peppers, cut in pieces",
|
||||
),
|
||||
pytest.param("red pepper flakes", 0, "", "red pepper flakes", "", id="red pepper flakes"),
|
||||
pytest.param("1 bell peppers", 1, "", "bell peppers", "", id="1 bell peppers"),
|
||||
pytest.param("1 stalk bell peppers", 1, "Stalk", "bell peppers", "", id="1 big stalk bell peppers"),
|
||||
pytest.param("a big stalk bell peppers", 0, "Stalk", "bell peppers", "", id="a big stalk bell peppers"),
|
||||
pytest.param(
|
||||
"1 bell peppers, cut in pieces", 1, "", "bell peppers", "cut in pieces", id="1 bell peppers, cut in pieces"
|
||||
),
|
||||
pytest.param(
|
||||
"bell peppers, cut in pieces", 0, "", "bell peppers", "cut in pieces", id="bell peppers, cut in pieces"
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_brute_parser(
|
||||
unique_local_group_id: UUID4,
|
||||
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
|
||||
input: str,
|
||||
quantity: int | float,
|
||||
unit: str,
|
||||
food: str,
|
||||
comment: str,
|
||||
):
|
||||
with session_context() as session:
|
||||
loop = asyncio.get_event_loop()
|
||||
parser = get_parser(RegisteredParser.brute, unique_local_group_id, session, get_locale_provider())
|
||||
parsed = loop.run_until_complete(parser.parse_one(input))
|
||||
ing = parsed.ingredient
|
||||
|
||||
if ing.quantity:
|
||||
assert ing.quantity == quantity
|
||||
else:
|
||||
assert not quantity
|
||||
if ing.unit:
|
||||
assert ing.unit.name == unit
|
||||
else:
|
||||
assert not unit
|
||||
if ing.food:
|
||||
assert ing.food.name == food
|
||||
else:
|
||||
assert not food
|
||||
if ing.note:
|
||||
assert ing.note == comment
|
||||
else:
|
||||
assert not comment
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"unit, food, expect_unit_match, expect_food_match, expected_avg",
|
||||
[
|
||||
pytest.param("Cups", "potatoes", True, True, 1.0, id="all matched"),
|
||||
pytest.param("Cups", "veryuniquefood", True, False, 0.75, id="unit matched only"),
|
||||
pytest.param("veryuniqueunit", "potatoes", False, True, 0.75, id="food matched only"),
|
||||
pytest.param("veryuniqueunit", "veryuniquefood", False, False, 0.5, id="neither matched"),
|
||||
],
|
||||
)
|
||||
def test_brute_parser_confidence(
|
||||
unit: str,
|
||||
food: str,
|
||||
expect_unit_match: bool,
|
||||
expect_food_match: bool,
|
||||
expected_avg: float,
|
||||
unique_local_group_id: UUID4,
|
||||
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]],
|
||||
):
|
||||
input_str = f"1 {unit} {food}"
|
||||
|
||||
with session_context() as session:
|
||||
original_loop = asyncio.get_event_loop()
|
||||
try:
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
parser = get_parser(RegisteredParser.brute, unique_local_group_id, session, get_locale_provider())
|
||||
parsed = loop.run_until_complete(parser.parse_one(input_str))
|
||||
finally:
|
||||
loop.close()
|
||||
asyncio.set_event_loop(original_loop)
|
||||
|
||||
conf = parsed.confidence
|
||||
|
||||
assert conf.quantity == 1
|
||||
assert conf.comment == 1
|
||||
assert conf.unit == (1 if expect_unit_match or not unit else 0)
|
||||
assert conf.food == (1 if expect_food_match or not food else 0)
|
||||
assert conf.average == expected_avg
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input, expected_unit_name, expected_food_name, expect_unit_match, expect_food_match",
|
||||
(
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="cup", food="potatoes"),
|
||||
"Cups",
|
||||
"potatoes",
|
||||
True,
|
||||
True,
|
||||
id="basic match",
|
||||
),
|
||||
pytest.param( # this should work in sqlite since "potato" is contained within "potatoes"
|
||||
build_parsed_ing(unit="cup", food="potato"),
|
||||
"Cups",
|
||||
"potatoes",
|
||||
True,
|
||||
True,
|
||||
id="basic fuzzy match",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="tablespoon", food="onion"),
|
||||
"Tablespoon",
|
||||
"onion",
|
||||
True,
|
||||
True,
|
||||
id="nested match 1",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="teaspoon", food="green onion"),
|
||||
"Teaspoon",
|
||||
"green onion",
|
||||
True,
|
||||
True,
|
||||
id="nested match 2",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="cup", food="gren onion"),
|
||||
"Cups",
|
||||
"green onion",
|
||||
True,
|
||||
True,
|
||||
id="nested match 3",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="stalk", food="very unique"),
|
||||
"Stalk",
|
||||
"very unique",
|
||||
True,
|
||||
False,
|
||||
id="no food match",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="cup", food=None),
|
||||
"Cups",
|
||||
None,
|
||||
True,
|
||||
False,
|
||||
id="no food input",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="very unique", food="fresh ginger"),
|
||||
"very unique",
|
||||
"fresh ginger",
|
||||
False,
|
||||
True,
|
||||
id="no unit match",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit=None, food="potatoes"),
|
||||
None,
|
||||
"potatoes",
|
||||
False,
|
||||
True,
|
||||
id="no unit input",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="very unique", food="very unique"),
|
||||
"very unique",
|
||||
"very unique",
|
||||
False,
|
||||
False,
|
||||
id="no matches",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit=None, food=None),
|
||||
None,
|
||||
None,
|
||||
False,
|
||||
False,
|
||||
id="no input",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="mvlun", food="potatoes"),
|
||||
"My Very Long Unit Name",
|
||||
"potatoes",
|
||||
True,
|
||||
True,
|
||||
id="unit abbreviation",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit=None, food="n̅ōr̅m̄a̅l̄i̅z̄e̅m̄e̅"),
|
||||
None,
|
||||
"ñör̃m̈ãl̈ĩz̈ẽm̈ẽ",
|
||||
False,
|
||||
True,
|
||||
id="normalization",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit=None, food="myfoodisplural"),
|
||||
None,
|
||||
"PluralFoodTest",
|
||||
False,
|
||||
True,
|
||||
id="plural food name",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="abc123", food=None),
|
||||
"PluralUnitName",
|
||||
None,
|
||||
True,
|
||||
False,
|
||||
id="plural unit name",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="doremi123", food=None),
|
||||
"PluralUnitName",
|
||||
None,
|
||||
True,
|
||||
False,
|
||||
id="plural unit abbreviation",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit=None, food="thisismyalias"),
|
||||
None,
|
||||
"IHaveAnAlias",
|
||||
False,
|
||||
True,
|
||||
id="food alias",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="thisismyalias", food=None),
|
||||
"IHaveAnAliasToo",
|
||||
None,
|
||||
True,
|
||||
False,
|
||||
id="unit alias",
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_parser_ingredient_match(
|
||||
expected_food_name: str | None,
|
||||
expected_unit_name: str | None,
|
||||
expect_food_match: bool,
|
||||
expect_unit_match: bool,
|
||||
input: ParsedIngredient,
|
||||
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
|
||||
unique_local_group_id: UUID4,
|
||||
):
|
||||
with session_context() as session:
|
||||
parser = get_parser(RegisteredParser.brute, unique_local_group_id, session, get_locale_provider())
|
||||
parsed_ingredient = parser.find_ingredient_match(input)
|
||||
|
||||
if expected_food_name:
|
||||
assert parsed_ingredient.ingredient.food and parsed_ingredient.ingredient.food.name == expected_food_name
|
||||
else:
|
||||
assert parsed_ingredient.ingredient.food is None
|
||||
|
||||
if expect_food_match:
|
||||
assert isinstance(parsed_ingredient.ingredient.food, IngredientFood)
|
||||
elif parsed_ingredient.ingredient.food and parsed_ingredient.ingredient.food.name:
|
||||
assert isinstance(parsed_ingredient.ingredient.food, CreateIngredientFood)
|
||||
else:
|
||||
assert parsed_ingredient.ingredient.food is None
|
||||
|
||||
if expected_unit_name:
|
||||
assert parsed_ingredient.ingredient.unit and parsed_ingredient.ingredient.unit.name == expected_unit_name
|
||||
else:
|
||||
assert parsed_ingredient.ingredient.unit is None
|
||||
|
||||
if expect_unit_match:
|
||||
assert isinstance(parsed_ingredient.ingredient.unit, IngredientUnit)
|
||||
elif parsed_ingredient.ingredient.unit and parsed_ingredient.ingredient.unit.name:
|
||||
assert isinstance(parsed_ingredient.ingredient.unit, CreateIngredientUnit)
|
||||
else:
|
||||
assert parsed_ingredient.ingredient.unit is None
|
||||
@@ -0,0 +1,113 @@
|
||||
import asyncio
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
import pytest
|
||||
from pydantic import UUID4
|
||||
from rapidfuzz import fuzz
|
||||
from text_unidecode import unidecode
|
||||
|
||||
from mealie.db.db_setup import session_context
|
||||
from mealie.lang.providers import get_locale_provider
|
||||
from mealie.services.parser_services import RegisteredParser, get_parser
|
||||
|
||||
|
||||
@dataclass
|
||||
class TestIngredient:
|
||||
input: str
|
||||
quantity: float
|
||||
unit: str
|
||||
food: str
|
||||
comments: str
|
||||
|
||||
|
||||
def normalize(val: str) -> str:
|
||||
val = unidecode(val).lower().strip()
|
||||
val = re.sub(r"[^a-z0-9\s]", "", val)
|
||||
return val
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_ingredient",
|
||||
[
|
||||
TestIngredient("½ cup all-purpose flour", 0.5, "cup", "all-purpose flour", ""),
|
||||
TestIngredient("1 ½ teaspoons ground black pepper", 1.5, "teaspoon", "black pepper", "ground"),
|
||||
TestIngredient("⅔ cup unsweetened flaked coconut", 0.667, "cup", "unsweetened flaked coconut", ""),
|
||||
TestIngredient("⅓ cup panko bread crumbs", 0.333, "cup", "panko bread crumbs", ""),
|
||||
TestIngredient("1/8 cup all-purpose flour", 0.125, "cup", "all-purpose flour", ""),
|
||||
TestIngredient("1/32 cup all-purpose flour", 0.031, "cup", "all-purpose flour", ""),
|
||||
TestIngredient("1 1/2 cups chopped onion ", 1.5, "cup", "onion", "chopped"),
|
||||
TestIngredient(
|
||||
"2 pounds russet potatoes, peeled, and cut into 3/4-inch cubes ",
|
||||
2,
|
||||
"pound",
|
||||
"russet potatoes",
|
||||
"peeled, and cut into 3/4 inch cubes",
|
||||
),
|
||||
TestIngredient("2 teaspoons salt (to taste) ", 2, "teaspoon", "salt", "to taste"),
|
||||
TestIngredient("1/2 cup", 0.5, "cup", "", ""),
|
||||
],
|
||||
)
|
||||
def test_nlp_parser(unique_local_group_id: UUID4, test_ingredient: TestIngredient):
|
||||
with session_context() as session:
|
||||
loop = asyncio.get_event_loop()
|
||||
parser = get_parser(RegisteredParser.nlp, unique_local_group_id, session, get_locale_provider())
|
||||
parsed = loop.run_until_complete(parser.parse_one(test_ingredient.input))
|
||||
ing = parsed.ingredient
|
||||
|
||||
assert ing.quantity == pytest.approx(test_ingredient.quantity)
|
||||
if ing.unit:
|
||||
assert ing.unit.name == test_ingredient.unit
|
||||
else:
|
||||
assert not test_ingredient.unit
|
||||
if ing.food:
|
||||
assert ing.food.name == test_ingredient.food
|
||||
else:
|
||||
assert not test_ingredient.food
|
||||
if ing.note:
|
||||
assert ing.note == test_ingredient.comments
|
||||
else:
|
||||
assert not test_ingredient.comments
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("source_str", "expected_str"),
|
||||
[
|
||||
(
|
||||
"2 teaspoon chopped fresh or dried rosemary",
|
||||
"2 teaspoon fresh rosemary or dried rosemary chopped",
|
||||
),
|
||||
(
|
||||
"153 grams 00 flour (1 cup plus 1 tablespoon)",
|
||||
"153 gram 00 flour or 1 cup and 1 tablespoon",
|
||||
),
|
||||
(
|
||||
"153 grams all-purpose flour (1 cup plus 1 tablespoon and 2 teaspoons)",
|
||||
"153 gram all-purpose flour or 1 cup plus 1 tablespoon and 2 teaspoons",
|
||||
),
|
||||
(
|
||||
"2 cups chicken broth or beef broth",
|
||||
"2 cup chicken broth or beef broth",
|
||||
),
|
||||
(
|
||||
"2 tablespoons (30ml) vegetable oil",
|
||||
"2 tablespoon vegetable oil or 30 milliliter",
|
||||
),
|
||||
(
|
||||
"1 cup fresh basil or 2 tablespoons dried basil",
|
||||
"1 cup fresh basil or 2 tablespoons dried basil",
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.asyncio
|
||||
async def test_nlp_parser_keeps_all_text(unique_local_group_id: UUID4, source_str: str, expected_str: str):
|
||||
with session_context() as session:
|
||||
parser = get_parser(RegisteredParser.nlp, unique_local_group_id, session, get_locale_provider())
|
||||
parsed = await parser.parse_one(source_str)
|
||||
|
||||
ing = parsed.ingredient
|
||||
|
||||
# The parser behavior may change slightly, so we check that it's pretty close rather than exact
|
||||
# fuzz.ratio returns a string from 0 - 100 where 100 is an exact match
|
||||
score = fuzz.ratio(ing.display, expected_str)
|
||||
assert score >= 90, f"'{ing.display}' does not sufficiently match expected '{expected_str}'"
|
||||
@@ -0,0 +1,311 @@
|
||||
import asyncio
|
||||
import json
|
||||
from typing import cast
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
from pydantic import UUID4
|
||||
|
||||
from mealie.db.db_setup import session_context
|
||||
from mealie.lang.providers import get_locale_provider
|
||||
from mealie.schema.openai.recipe_ingredient import OpenAIIngredient, OpenAIIngredients
|
||||
from mealie.schema.recipe.recipe import Recipe
|
||||
from mealie.schema.recipe.recipe_ingredient import (
|
||||
CreateIngredientFood,
|
||||
CreateIngredientUnit,
|
||||
IngredientFood,
|
||||
IngredientUnit,
|
||||
ParsedIngredient,
|
||||
RecipeIngredient,
|
||||
SaveIngredientFood,
|
||||
)
|
||||
from mealie.services.openai import OpenAIService
|
||||
from mealie.services.parser_services import RegisteredParser, get_parser
|
||||
from tests.utils.factories import random_int, random_string
|
||||
from tests.utils.fixture_schemas import TestUser
|
||||
|
||||
|
||||
def test_openai_parser(
|
||||
unique_local_group_id: UUID4,
|
||||
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
):
|
||||
ingredient_count = random_int(10, 20)
|
||||
|
||||
async def mock_get_response(self, prompt: str, message: str, *args, **kwargs) -> OpenAIIngredients | None:
|
||||
inputs = json.loads(message)
|
||||
data = OpenAIIngredients(
|
||||
ingredients=[
|
||||
OpenAIIngredient(
|
||||
quantity=random_int(0, 10),
|
||||
unit=random_string(),
|
||||
food=random_string(),
|
||||
note=random_string(),
|
||||
)
|
||||
for _ in inputs
|
||||
]
|
||||
)
|
||||
return data
|
||||
|
||||
monkeypatch.setattr(OpenAIService, "get_response", mock_get_response)
|
||||
|
||||
with session_context() as session:
|
||||
loop = asyncio.get_event_loop()
|
||||
parser = get_parser(RegisteredParser.openai, unique_local_group_id, session, get_locale_provider())
|
||||
|
||||
inputs = [random_string() for _ in range(ingredient_count)]
|
||||
parsed = loop.run_until_complete(parser.parse(inputs))
|
||||
|
||||
# since OpenAI is mocked, we don't need to validate the data, we just need to make sure parsing works
|
||||
# and that it preserves order
|
||||
assert len(parsed) == ingredient_count
|
||||
for input, output in zip(inputs, parsed, strict=True):
|
||||
assert output.input == input
|
||||
|
||||
|
||||
def test_openai_parser_sanitize_output(
|
||||
unique_local_group_id: UUID4,
|
||||
unique_user: TestUser,
|
||||
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
):
|
||||
async def mock_get_raw_response(self, prompt: str, content: list[dict], response_schema) -> MagicMock:
|
||||
# Create data with null character in JSON to test preprocessing
|
||||
data = OpenAIIngredients(
|
||||
ingredients=[
|
||||
OpenAIIngredient(
|
||||
quantity=random_int(0, 10),
|
||||
unit="",
|
||||
food="there is a null character here: \x00",
|
||||
note="",
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
# Create a mock raw response which matches the OpenAI chat response format
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = data.model_dump_json()
|
||||
return mock_response
|
||||
|
||||
# Mock the raw response here since we want to make sure our service executes processing before loading the model
|
||||
monkeypatch.setattr(OpenAIService, "_get_raw_response", mock_get_raw_response)
|
||||
|
||||
with session_context() as session:
|
||||
loop = asyncio.get_event_loop()
|
||||
parser = get_parser(RegisteredParser.openai, unique_local_group_id, session, get_locale_provider())
|
||||
|
||||
parsed = loop.run_until_complete(parser.parse([""]))
|
||||
assert len(parsed) == 1
|
||||
parsed_ing = cast(ParsedIngredient, parsed[0])
|
||||
assert parsed_ing.ingredient.food
|
||||
assert parsed_ing.ingredient.food.name == "there is a null character here: "
|
||||
|
||||
# Make sure we can create a recipe with this ingredient
|
||||
assert isinstance(parsed_ing.ingredient.food, CreateIngredientFood)
|
||||
food = unique_user.repos.ingredient_foods.create(
|
||||
parsed_ing.ingredient.food.cast(SaveIngredientFood, group_id=unique_user.group_id)
|
||||
)
|
||||
parsed_ing.ingredient.food = food
|
||||
unique_user.repos.recipes.create(
|
||||
Recipe(
|
||||
user_id=unique_user.user_id,
|
||||
group_id=unique_user.group_id,
|
||||
name=random_string(),
|
||||
recipe_ingredient=[parsed_ing.ingredient],
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"original_text,quantity,unit,food,note,qty_range,unit_range,food_range,note_range",
|
||||
[
|
||||
pytest.param(
|
||||
"2 cups flour",
|
||||
2.0,
|
||||
"Cups",
|
||||
"flour",
|
||||
"",
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
id="perfect_match_all_components",
|
||||
),
|
||||
pytest.param(
|
||||
"2 cups flour",
|
||||
3.0,
|
||||
"Cups",
|
||||
"flour",
|
||||
"",
|
||||
(0.0, 0.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
id="quantity_mismatch",
|
||||
),
|
||||
pytest.param(
|
||||
"2 cups flour",
|
||||
2.0,
|
||||
None,
|
||||
"flour",
|
||||
"",
|
||||
(1.0, 1.0),
|
||||
(0.4, 0.9),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
id="missing_unit_fallback",
|
||||
),
|
||||
pytest.param(
|
||||
"2 cups flour",
|
||||
2.0,
|
||||
"Cups",
|
||||
None,
|
||||
"",
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(0.4, 0.9),
|
||||
(1.0, 1.0),
|
||||
id="missing_food_fallback",
|
||||
),
|
||||
pytest.param(
|
||||
"2 cups flour sifted fresh",
|
||||
2.0,
|
||||
"Cups",
|
||||
"flour",
|
||||
"sifted fresh",
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(0.8, 1.0),
|
||||
id="note_full_match",
|
||||
),
|
||||
pytest.param(
|
||||
"2 cups flour sifted",
|
||||
2.0,
|
||||
"Cups",
|
||||
"flour",
|
||||
"sifted chopped",
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(0.4, 0.6),
|
||||
id="note_partial_match",
|
||||
),
|
||||
pytest.param(
|
||||
"2 cups flour",
|
||||
2.0,
|
||||
"Cups",
|
||||
"flour",
|
||||
"chopped minced",
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(0.0, 0.0),
|
||||
id="note_no_match",
|
||||
),
|
||||
pytest.param(
|
||||
"1.5 tsp salt kosher",
|
||||
1.0,
|
||||
None,
|
||||
None,
|
||||
"kosher fine",
|
||||
(0.0, 0.0),
|
||||
(0.3, 0.7),
|
||||
(0.3, 0.7),
|
||||
(0.4, 0.6),
|
||||
id="multiple_issues",
|
||||
),
|
||||
pytest.param(
|
||||
"",
|
||||
1.0,
|
||||
"Cups",
|
||||
"flour",
|
||||
"fresh",
|
||||
(0.0, 0.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(0.0, 0.0),
|
||||
id="empty_original_text",
|
||||
),
|
||||
pytest.param(
|
||||
"salt",
|
||||
0.0,
|
||||
None,
|
||||
"salt",
|
||||
"",
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
id="zero_quantity_match",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_openai_parser_confidence(
|
||||
original_text: str,
|
||||
quantity: float | None,
|
||||
unit: str | None,
|
||||
food: str | None,
|
||||
note: str,
|
||||
qty_range: tuple[float, float],
|
||||
unit_range: tuple[float, float],
|
||||
food_range: tuple[float, float],
|
||||
note_range: tuple[float, float],
|
||||
unique_local_group_id: UUID4,
|
||||
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
|
||||
):
|
||||
"""Test the _calculate_confidence method of OpenAIParser with various input scenarios."""
|
||||
|
||||
with session_context() as session:
|
||||
from mealie.services.parser_services.openai.parser import OpenAIParser
|
||||
|
||||
parser = cast(
|
||||
OpenAIParser, get_parser(RegisteredParser.openai, unique_local_group_id, session, get_locale_provider())
|
||||
)
|
||||
|
||||
# Create test ingredient
|
||||
ingredient = RecipeIngredient(
|
||||
original_text=original_text,
|
||||
quantity=quantity,
|
||||
unit=CreateIngredientUnit(name=unit) if unit else None,
|
||||
food=CreateIngredientFood(name=food) if food else None,
|
||||
note=note if note else None,
|
||||
)
|
||||
|
||||
# Calculate confidence
|
||||
confidence = parser._calculate_confidence(original_text, ingredient)
|
||||
|
||||
# All confidence values should be populated (not None) by the method
|
||||
assert confidence.quantity is not None, "Quantity confidence should not be None"
|
||||
assert confidence.unit is not None, "Unit confidence should not be None"
|
||||
assert confidence.food is not None, "Food confidence should not be None"
|
||||
assert confidence.comment is not None, "Comment confidence should not be None"
|
||||
assert confidence.average is not None, "Average confidence should not be None"
|
||||
|
||||
# Range-based assertions to handle fuzzy matching variability
|
||||
qty_min, qty_max = qty_range
|
||||
assert qty_min <= confidence.quantity <= qty_max, (
|
||||
f"Quantity confidence out of range: expected {qty_range}, got {confidence.quantity}"
|
||||
)
|
||||
|
||||
unit_min, unit_max = unit_range
|
||||
assert unit_min <= confidence.unit <= unit_max, (
|
||||
f"Unit confidence out of range: expected {unit_range}, got {confidence.unit}"
|
||||
)
|
||||
|
||||
food_min, food_max = food_range
|
||||
assert food_min <= confidence.food <= food_max, (
|
||||
f"Food confidence out of range: expected {food_range}, got {confidence.food}"
|
||||
)
|
||||
|
||||
note_min, note_max = note_range
|
||||
assert note_min <= confidence.comment <= note_max, (
|
||||
f"Note confidence out of range: expected {note_range}, got {confidence.comment}"
|
||||
)
|
||||
|
||||
# Check that average is calculated correctly
|
||||
expected_avg = (confidence.quantity + confidence.unit + confidence.food + confidence.comment) / 4
|
||||
assert abs(confidence.average - expected_avg) < 0.001, (
|
||||
f"Average confidence mismatch: expected {expected_avg}, got {confidence.average}"
|
||||
)
|
||||
@@ -1,737 +0,0 @@
|
||||
import asyncio
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from typing import cast
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
from pydantic import UUID4
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from mealie.db.db_setup import session_context
|
||||
from mealie.repos.all_repositories import get_repositories
|
||||
from mealie.repos.repository_factory import AllRepositories
|
||||
from mealie.schema.openai.recipe_ingredient import OpenAIIngredient, OpenAIIngredients
|
||||
from mealie.schema.recipe.recipe import Recipe
|
||||
from mealie.schema.recipe.recipe_ingredient import (
|
||||
CreateIngredientFood,
|
||||
CreateIngredientFoodAlias,
|
||||
CreateIngredientUnit,
|
||||
CreateIngredientUnitAlias,
|
||||
IngredientFood,
|
||||
IngredientUnit,
|
||||
ParsedIngredient,
|
||||
RecipeIngredient,
|
||||
SaveIngredientFood,
|
||||
SaveIngredientUnit,
|
||||
)
|
||||
from mealie.schema.user.user import GroupBase
|
||||
from mealie.services.openai import OpenAIService
|
||||
from mealie.services.parser_services import RegisteredParser, get_parser
|
||||
from tests.utils.factories import random_int, random_string
|
||||
from tests.utils.fixture_schemas import TestUser
|
||||
|
||||
|
||||
@dataclass
|
||||
class TestIngredient:
|
||||
input: str
|
||||
quantity: float
|
||||
unit: str
|
||||
food: str
|
||||
comments: str
|
||||
|
||||
|
||||
def build_parsed_ing(food: str | None, unit: str | None) -> ParsedIngredient:
|
||||
ing = RecipeIngredient(unit=None, food=None)
|
||||
if food:
|
||||
ing.food = CreateIngredientFood(name=food)
|
||||
if unit:
|
||||
ing.unit = CreateIngredientUnit(name=unit)
|
||||
|
||||
return ParsedIngredient(input=None, ingredient=ing)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def unique_local_group_id(unfiltered_database: AllRepositories) -> UUID4:
|
||||
return str(unfiltered_database.groups.create(GroupBase(name=random_string())).id)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def unique_db(session: Session, unique_local_group_id: str):
|
||||
return get_repositories(session, group_id=unique_local_group_id)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def parsed_ingredient_data(
|
||||
unique_db: AllRepositories, unique_local_group_id: UUID4
|
||||
) -> tuple[list[IngredientFood], list[IngredientUnit]]:
|
||||
foods = unique_db.ingredient_foods.create_many(
|
||||
[
|
||||
SaveIngredientFood(name="potatoes", group_id=unique_local_group_id),
|
||||
SaveIngredientFood(name="onion", group_id=unique_local_group_id),
|
||||
SaveIngredientFood(name="green onion", group_id=unique_local_group_id),
|
||||
SaveIngredientFood(name="frozen pearl onions", group_id=unique_local_group_id),
|
||||
SaveIngredientFood(name="bell peppers", group_id=unique_local_group_id),
|
||||
SaveIngredientFood(name="red pepper flakes", group_id=unique_local_group_id),
|
||||
SaveIngredientFood(name="fresh ginger", group_id=unique_local_group_id),
|
||||
SaveIngredientFood(name="ground ginger", group_id=unique_local_group_id),
|
||||
SaveIngredientFood(name="ñör̃m̈ãl̈ĩz̈ẽm̈ẽ", group_id=unique_local_group_id),
|
||||
SaveIngredientFood(name="PluralFoodTest", plural_name="myfoodisplural", group_id=unique_local_group_id),
|
||||
SaveIngredientFood(
|
||||
name="IHaveAnAlias",
|
||||
group_id=unique_local_group_id,
|
||||
aliases=[CreateIngredientFoodAlias(name="thisismyalias")],
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
foods.extend(
|
||||
unique_db.ingredient_foods.create_many(
|
||||
[
|
||||
SaveIngredientFood(name=f"{random_string()} food", group_id=unique_local_group_id)
|
||||
for _ in range(random_int(10, 15))
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
units = unique_db.ingredient_units.create_many(
|
||||
[
|
||||
SaveIngredientUnit(name="Cups", group_id=unique_local_group_id),
|
||||
SaveIngredientUnit(name="Tablespoon", group_id=unique_local_group_id),
|
||||
SaveIngredientUnit(name="Teaspoon", group_id=unique_local_group_id),
|
||||
SaveIngredientUnit(name="Stalk", group_id=unique_local_group_id),
|
||||
SaveIngredientUnit(name="My Very Long Unit Name", abbreviation="mvlun", group_id=unique_local_group_id),
|
||||
SaveIngredientUnit(
|
||||
name="PluralUnitName",
|
||||
plural_name="abc123",
|
||||
abbreviation="doremiabc",
|
||||
plural_abbreviation="doremi123",
|
||||
group_id=unique_local_group_id,
|
||||
),
|
||||
SaveIngredientUnit(
|
||||
name="IHaveAnAliasToo",
|
||||
group_id=unique_local_group_id,
|
||||
aliases=[CreateIngredientUnitAlias(name="thisismyalias")],
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
units.extend(
|
||||
unique_db.ingredient_foods.create_many(
|
||||
[
|
||||
SaveIngredientUnit(name=f"{random_string()} unit", group_id=unique_local_group_id)
|
||||
for _ in range(random_int(10, 15))
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
return foods, units
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input, quantity, unit, food, comment",
|
||||
[
|
||||
pytest.param("1 theelepel koffie", 1, "theelepel", "koffie", "", id="1 theelepel koffie"),
|
||||
pytest.param("3 theelepels koffie", 3, "theelepels", "koffie", "", id="3 theelepels koffie"),
|
||||
pytest.param("1 eetlepel tarwe", 1, "eetlepel", "tarwe", "", id="1 eetlepel tarwe"),
|
||||
pytest.param("20 eetlepels bloem", 20, "eetlepels", "bloem", "", id="20 eetlepels bloem"),
|
||||
pytest.param("1 mespunt kaneel", 1, "mespunt", "kaneel", "", id="1 mespunt kaneel"),
|
||||
pytest.param("1 snuf(je) zout", 1, "snuf(je)", "zout", "", id="1 snuf(je) zout"),
|
||||
pytest.param(
|
||||
"2 tbsp minced cilantro, leaves and stems",
|
||||
2,
|
||||
"tbsp",
|
||||
"minced cilantro",
|
||||
"leaves and stems",
|
||||
id="2 tbsp minced cilantro, leaves and stems",
|
||||
),
|
||||
pytest.param(
|
||||
"1 large yellow onion, coarsely chopped",
|
||||
1,
|
||||
"large",
|
||||
"yellow onion",
|
||||
"coarsely chopped",
|
||||
id="1 large yellow onion, coarsely chopped",
|
||||
),
|
||||
pytest.param("1 1/2 tsp garam masala", 1.5, "tsp", "garam masala", "", id="1 1/2 tsp garam masala"),
|
||||
pytest.param(
|
||||
"2 cups mango chunks, (2 large mangoes) (fresh or frozen)",
|
||||
2,
|
||||
"Cups",
|
||||
"mango chunks, (2 large mangoes)",
|
||||
"fresh or frozen",
|
||||
id="2 cups mango chunks, (2 large mangoes) (fresh or frozen)",
|
||||
),
|
||||
pytest.param("stalk onion", 0, "Stalk", "onion", "", id="stalk onion"),
|
||||
pytest.param("a stalk bell peppers", 0, "Stalk", "bell peppers", "", id="a stalk bell peppers"),
|
||||
pytest.param("a tablespoon unknownFood", 0, "Tablespoon", "unknownFood", "", id="a tablespoon unknownFood"),
|
||||
pytest.param(
|
||||
"stalk bell peppers, cut in pieces",
|
||||
0,
|
||||
"Stalk",
|
||||
"bell peppers",
|
||||
"cut in pieces",
|
||||
id="stalk bell peppers, cut in pieces",
|
||||
),
|
||||
pytest.param(
|
||||
"a stalk bell peppers, cut in pieces",
|
||||
0,
|
||||
"Stalk",
|
||||
"bell peppers",
|
||||
"cut in pieces",
|
||||
id="stalk bell peppers, cut in pieces",
|
||||
),
|
||||
pytest.param("red pepper flakes", 0, "", "red pepper flakes", "", id="red pepper flakes"),
|
||||
pytest.param("1 bell peppers", 1, "", "bell peppers", "", id="1 bell peppers"),
|
||||
pytest.param("1 stalk bell peppers", 1, "Stalk", "bell peppers", "", id="1 big stalk bell peppers"),
|
||||
pytest.param("a big stalk bell peppers", 0, "Stalk", "bell peppers", "", id="a big stalk bell peppers"),
|
||||
pytest.param(
|
||||
"1 bell peppers, cut in pieces", 1, "", "bell peppers", "cut in pieces", id="1 bell peppers, cut in pieces"
|
||||
),
|
||||
pytest.param(
|
||||
"bell peppers, cut in pieces", 0, "", "bell peppers", "cut in pieces", id="bell peppers, cut in pieces"
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_brute_parser(
|
||||
unique_local_group_id: UUID4,
|
||||
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
|
||||
input: str,
|
||||
quantity: int | float,
|
||||
unit: str,
|
||||
food: str,
|
||||
comment: str,
|
||||
):
|
||||
with session_context() as session:
|
||||
loop = asyncio.get_event_loop()
|
||||
parser = get_parser(RegisteredParser.brute, unique_local_group_id, session)
|
||||
parsed = loop.run_until_complete(parser.parse_one(input))
|
||||
ing = parsed.ingredient
|
||||
|
||||
if ing.quantity:
|
||||
assert ing.quantity == quantity
|
||||
else:
|
||||
assert not quantity
|
||||
if ing.unit:
|
||||
assert ing.unit.name == unit
|
||||
else:
|
||||
assert not unit
|
||||
if ing.food:
|
||||
assert ing.food.name == food
|
||||
else:
|
||||
assert not food
|
||||
if ing.note:
|
||||
assert ing.note == comment
|
||||
else:
|
||||
assert not comment
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"unit, food, expect_unit_match, expect_food_match, expected_avg",
|
||||
[
|
||||
pytest.param("Cups", "potatoes", True, True, 1.0, id="all matched"),
|
||||
pytest.param("Cups", "veryuniquefood", True, False, 0.75, id="unit matched only"),
|
||||
pytest.param("veryuniqueunit", "potatoes", False, True, 0.75, id="food matched only"),
|
||||
pytest.param("veryuniqueunit", "veryuniquefood", False, False, 0.5, id="neither matched"),
|
||||
],
|
||||
)
|
||||
def test_brute_parser_confidence(
|
||||
unit: str,
|
||||
food: str,
|
||||
expect_unit_match: bool,
|
||||
expect_food_match: bool,
|
||||
expected_avg: float,
|
||||
unique_local_group_id: UUID4,
|
||||
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]],
|
||||
):
|
||||
input_str = f"1 {unit} {food}"
|
||||
|
||||
with session_context() as session:
|
||||
original_loop = asyncio.get_event_loop()
|
||||
try:
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
parser = get_parser(RegisteredParser.brute, unique_local_group_id, session)
|
||||
parsed = loop.run_until_complete(parser.parse_one(input_str))
|
||||
finally:
|
||||
loop.close()
|
||||
asyncio.set_event_loop(original_loop)
|
||||
|
||||
conf = parsed.confidence
|
||||
|
||||
assert conf.quantity == 1
|
||||
assert conf.comment == 1
|
||||
assert conf.unit == (1 if expect_unit_match or not unit else 0)
|
||||
assert conf.food == (1 if expect_food_match or not food else 0)
|
||||
assert conf.average == expected_avg
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input, expected_unit_name, expected_food_name, expect_unit_match, expect_food_match",
|
||||
(
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="cup", food="potatoes"),
|
||||
"Cups",
|
||||
"potatoes",
|
||||
True,
|
||||
True,
|
||||
id="basic match",
|
||||
),
|
||||
pytest.param( # this should work in sqlite since "potato" is contained within "potatoes"
|
||||
build_parsed_ing(unit="cup", food="potato"),
|
||||
"Cups",
|
||||
"potatoes",
|
||||
True,
|
||||
True,
|
||||
id="basic fuzzy match",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="tablespoon", food="onion"),
|
||||
"Tablespoon",
|
||||
"onion",
|
||||
True,
|
||||
True,
|
||||
id="nested match 1",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="teaspoon", food="green onion"),
|
||||
"Teaspoon",
|
||||
"green onion",
|
||||
True,
|
||||
True,
|
||||
id="nested match 2",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="cup", food="gren onion"),
|
||||
"Cups",
|
||||
"green onion",
|
||||
True,
|
||||
True,
|
||||
id="nested match 3",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="stalk", food="very unique"),
|
||||
"Stalk",
|
||||
"very unique",
|
||||
True,
|
||||
False,
|
||||
id="no food match",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="cup", food=None),
|
||||
"Cups",
|
||||
None,
|
||||
True,
|
||||
False,
|
||||
id="no food input",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="very unique", food="fresh ginger"),
|
||||
"very unique",
|
||||
"fresh ginger",
|
||||
False,
|
||||
True,
|
||||
id="no unit match",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit=None, food="potatoes"),
|
||||
None,
|
||||
"potatoes",
|
||||
False,
|
||||
True,
|
||||
id="no unit input",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="very unique", food="very unique"),
|
||||
"very unique",
|
||||
"very unique",
|
||||
False,
|
||||
False,
|
||||
id="no matches",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit=None, food=None),
|
||||
None,
|
||||
None,
|
||||
False,
|
||||
False,
|
||||
id="no input",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="mvlun", food="potatoes"),
|
||||
"My Very Long Unit Name",
|
||||
"potatoes",
|
||||
True,
|
||||
True,
|
||||
id="unit abbreviation",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit=None, food="n̅ōr̅m̄a̅l̄i̅z̄e̅m̄e̅"),
|
||||
None,
|
||||
"ñör̃m̈ãl̈ĩz̈ẽm̈ẽ",
|
||||
False,
|
||||
True,
|
||||
id="normalization",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit=None, food="myfoodisplural"),
|
||||
None,
|
||||
"PluralFoodTest",
|
||||
False,
|
||||
True,
|
||||
id="plural food name",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="abc123", food=None),
|
||||
"PluralUnitName",
|
||||
None,
|
||||
True,
|
||||
False,
|
||||
id="plural unit name",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="doremi123", food=None),
|
||||
"PluralUnitName",
|
||||
None,
|
||||
True,
|
||||
False,
|
||||
id="plural unit abbreviation",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit=None, food="thisismyalias"),
|
||||
None,
|
||||
"IHaveAnAlias",
|
||||
False,
|
||||
True,
|
||||
id="food alias",
|
||||
),
|
||||
pytest.param(
|
||||
build_parsed_ing(unit="thisismyalias", food=None),
|
||||
"IHaveAnAliasToo",
|
||||
None,
|
||||
True,
|
||||
False,
|
||||
id="unit alias",
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_parser_ingredient_match(
|
||||
expected_food_name: str | None,
|
||||
expected_unit_name: str | None,
|
||||
expect_food_match: bool,
|
||||
expect_unit_match: bool,
|
||||
input: ParsedIngredient,
|
||||
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
|
||||
unique_local_group_id: UUID4,
|
||||
):
|
||||
with session_context() as session:
|
||||
parser = get_parser(RegisteredParser.brute, unique_local_group_id, session)
|
||||
parsed_ingredient = parser.find_ingredient_match(input)
|
||||
|
||||
if expected_food_name:
|
||||
assert parsed_ingredient.ingredient.food and parsed_ingredient.ingredient.food.name == expected_food_name
|
||||
else:
|
||||
assert parsed_ingredient.ingredient.food is None
|
||||
|
||||
if expect_food_match:
|
||||
assert isinstance(parsed_ingredient.ingredient.food, IngredientFood)
|
||||
elif parsed_ingredient.ingredient.food and parsed_ingredient.ingredient.food.name:
|
||||
assert isinstance(parsed_ingredient.ingredient.food, CreateIngredientFood)
|
||||
else:
|
||||
assert parsed_ingredient.ingredient.food is None
|
||||
|
||||
if expected_unit_name:
|
||||
assert parsed_ingredient.ingredient.unit and parsed_ingredient.ingredient.unit.name == expected_unit_name
|
||||
else:
|
||||
assert parsed_ingredient.ingredient.unit is None
|
||||
|
||||
if expect_unit_match:
|
||||
assert isinstance(parsed_ingredient.ingredient.unit, IngredientUnit)
|
||||
elif parsed_ingredient.ingredient.unit and parsed_ingredient.ingredient.unit.name:
|
||||
assert isinstance(parsed_ingredient.ingredient.unit, CreateIngredientUnit)
|
||||
else:
|
||||
assert parsed_ingredient.ingredient.unit is None
|
||||
|
||||
|
||||
def test_openai_parser(
|
||||
unique_local_group_id: UUID4,
|
||||
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
):
|
||||
ingredient_count = random_int(10, 20)
|
||||
|
||||
async def mock_get_response(self, prompt: str, message: str, *args, **kwargs) -> OpenAIIngredients | None:
|
||||
inputs = json.loads(message)
|
||||
data = OpenAIIngredients(
|
||||
ingredients=[
|
||||
OpenAIIngredient(
|
||||
quantity=random_int(0, 10),
|
||||
unit=random_string(),
|
||||
food=random_string(),
|
||||
note=random_string(),
|
||||
)
|
||||
for _ in inputs
|
||||
]
|
||||
)
|
||||
return data
|
||||
|
||||
monkeypatch.setattr(OpenAIService, "get_response", mock_get_response)
|
||||
|
||||
with session_context() as session:
|
||||
loop = asyncio.get_event_loop()
|
||||
parser = get_parser(RegisteredParser.openai, unique_local_group_id, session)
|
||||
|
||||
inputs = [random_string() for _ in range(ingredient_count)]
|
||||
parsed = loop.run_until_complete(parser.parse(inputs))
|
||||
|
||||
# since OpenAI is mocked, we don't need to validate the data, we just need to make sure parsing works
|
||||
# and that it preserves order
|
||||
assert len(parsed) == ingredient_count
|
||||
for input, output in zip(inputs, parsed, strict=True):
|
||||
assert output.input == input
|
||||
|
||||
|
||||
def test_openai_parser_sanitize_output(
|
||||
unique_local_group_id: UUID4,
|
||||
unique_user: TestUser,
|
||||
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
):
|
||||
async def mock_get_raw_response(self, prompt: str, content: list[dict], response_schema) -> MagicMock:
|
||||
# Create data with null character in JSON to test preprocessing
|
||||
data = OpenAIIngredients(
|
||||
ingredients=[
|
||||
OpenAIIngredient(
|
||||
quantity=random_int(0, 10),
|
||||
unit="",
|
||||
food="there is a null character here: \x00",
|
||||
note="",
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
# Create a mock raw response which matches the OpenAI chat response format
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = data.model_dump_json()
|
||||
return mock_response
|
||||
|
||||
# Mock the raw response here since we want to make sure our service executes processing before loading the model
|
||||
monkeypatch.setattr(OpenAIService, "_get_raw_response", mock_get_raw_response)
|
||||
|
||||
with session_context() as session:
|
||||
loop = asyncio.get_event_loop()
|
||||
parser = get_parser(RegisteredParser.openai, unique_local_group_id, session)
|
||||
|
||||
parsed = loop.run_until_complete(parser.parse([""]))
|
||||
assert len(parsed) == 1
|
||||
parsed_ing = cast(ParsedIngredient, parsed[0])
|
||||
assert parsed_ing.ingredient.food
|
||||
assert parsed_ing.ingredient.food.name == "there is a null character here: "
|
||||
|
||||
# Make sure we can create a recipe with this ingredient
|
||||
assert isinstance(parsed_ing.ingredient.food, CreateIngredientFood)
|
||||
food = unique_user.repos.ingredient_foods.create(
|
||||
parsed_ing.ingredient.food.cast(SaveIngredientFood, group_id=unique_user.group_id)
|
||||
)
|
||||
parsed_ing.ingredient.food = food
|
||||
unique_user.repos.recipes.create(
|
||||
Recipe(
|
||||
user_id=unique_user.user_id,
|
||||
group_id=unique_user.group_id,
|
||||
name=random_string(),
|
||||
recipe_ingredient=[parsed_ing.ingredient],
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"original_text,quantity,unit,food,note,qty_range,unit_range,food_range,note_range",
|
||||
[
|
||||
pytest.param(
|
||||
"2 cups flour",
|
||||
2.0,
|
||||
"Cups",
|
||||
"flour",
|
||||
"",
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
id="perfect_match_all_components",
|
||||
),
|
||||
pytest.param(
|
||||
"2 cups flour",
|
||||
3.0,
|
||||
"Cups",
|
||||
"flour",
|
||||
"",
|
||||
(0.0, 0.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
id="quantity_mismatch",
|
||||
),
|
||||
pytest.param(
|
||||
"2 cups flour",
|
||||
2.0,
|
||||
None,
|
||||
"flour",
|
||||
"",
|
||||
(1.0, 1.0),
|
||||
(0.4, 0.9),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
id="missing_unit_fallback",
|
||||
),
|
||||
pytest.param(
|
||||
"2 cups flour",
|
||||
2.0,
|
||||
"Cups",
|
||||
None,
|
||||
"",
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(0.4, 0.9),
|
||||
(1.0, 1.0),
|
||||
id="missing_food_fallback",
|
||||
),
|
||||
pytest.param(
|
||||
"2 cups flour sifted fresh",
|
||||
2.0,
|
||||
"Cups",
|
||||
"flour",
|
||||
"sifted fresh",
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(0.8, 1.0),
|
||||
id="note_full_match",
|
||||
),
|
||||
pytest.param(
|
||||
"2 cups flour sifted",
|
||||
2.0,
|
||||
"Cups",
|
||||
"flour",
|
||||
"sifted chopped",
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(0.4, 0.6),
|
||||
id="note_partial_match",
|
||||
),
|
||||
pytest.param(
|
||||
"2 cups flour",
|
||||
2.0,
|
||||
"Cups",
|
||||
"flour",
|
||||
"chopped minced",
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(0.0, 0.0),
|
||||
id="note_no_match",
|
||||
),
|
||||
pytest.param(
|
||||
"1.5 tsp salt kosher",
|
||||
1.0,
|
||||
None,
|
||||
None,
|
||||
"kosher fine",
|
||||
(0.0, 0.0),
|
||||
(0.3, 0.7),
|
||||
(0.3, 0.7),
|
||||
(0.4, 0.6),
|
||||
id="multiple_issues",
|
||||
),
|
||||
pytest.param(
|
||||
"",
|
||||
1.0,
|
||||
"Cups",
|
||||
"flour",
|
||||
"fresh",
|
||||
(0.0, 0.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(0.0, 0.0),
|
||||
id="empty_original_text",
|
||||
),
|
||||
pytest.param(
|
||||
"salt",
|
||||
0.0,
|
||||
None,
|
||||
"salt",
|
||||
"",
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
id="zero_quantity_match",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_openai_parser_confidence(
|
||||
original_text: str,
|
||||
quantity: float | None,
|
||||
unit: str | None,
|
||||
food: str | None,
|
||||
note: str,
|
||||
qty_range: tuple[float, float],
|
||||
unit_range: tuple[float, float],
|
||||
food_range: tuple[float, float],
|
||||
note_range: tuple[float, float],
|
||||
unique_local_group_id: UUID4,
|
||||
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
|
||||
):
|
||||
"""Test the _calculate_confidence method of OpenAIParser with various input scenarios."""
|
||||
|
||||
with session_context() as session:
|
||||
from mealie.services.parser_services.openai.parser import OpenAIParser
|
||||
|
||||
parser = cast(OpenAIParser, get_parser(RegisteredParser.openai, unique_local_group_id, session))
|
||||
|
||||
# Create test ingredient
|
||||
ingredient = RecipeIngredient(
|
||||
original_text=original_text,
|
||||
quantity=quantity,
|
||||
unit=CreateIngredientUnit(name=unit) if unit else None,
|
||||
food=CreateIngredientFood(name=food) if food else None,
|
||||
note=note if note else None,
|
||||
)
|
||||
|
||||
# Calculate confidence
|
||||
confidence = parser._calculate_confidence(original_text, ingredient)
|
||||
|
||||
# All confidence values should be populated (not None) by the method
|
||||
assert confidence.quantity is not None, "Quantity confidence should not be None"
|
||||
assert confidence.unit is not None, "Unit confidence should not be None"
|
||||
assert confidence.food is not None, "Food confidence should not be None"
|
||||
assert confidence.comment is not None, "Comment confidence should not be None"
|
||||
assert confidence.average is not None, "Average confidence should not be None"
|
||||
|
||||
# Range-based assertions to handle fuzzy matching variability
|
||||
qty_min, qty_max = qty_range
|
||||
assert qty_min <= confidence.quantity <= qty_max, (
|
||||
f"Quantity confidence out of range: expected {qty_range}, got {confidence.quantity}"
|
||||
)
|
||||
|
||||
unit_min, unit_max = unit_range
|
||||
assert unit_min <= confidence.unit <= unit_max, (
|
||||
f"Unit confidence out of range: expected {unit_range}, got {confidence.unit}"
|
||||
)
|
||||
|
||||
food_min, food_max = food_range
|
||||
assert food_min <= confidence.food <= food_max, (
|
||||
f"Food confidence out of range: expected {food_range}, got {confidence.food}"
|
||||
)
|
||||
|
||||
note_min, note_max = note_range
|
||||
assert note_min <= confidence.comment <= note_max, (
|
||||
f"Note confidence out of range: expected {note_range}, got {confidence.comment}"
|
||||
)
|
||||
|
||||
# Check that average is calculated correctly
|
||||
expected_avg = (confidence.quantity + confidence.unit + confidence.food + confidence.comment) / 4
|
||||
assert abs(confidence.average - expected_avg) < 0.001, (
|
||||
f"Average confidence mismatch: expected {expected_avg}, got {confidence.average}"
|
||||
)
|
||||
Reference in New Issue
Block a user