From 77081d048242f94c3637f34da06a310d689c24a2 Mon Sep 17 00:00:00 2001 From: Michael Genson <71845777+michael-genson@users.noreply.github.com> Date: Thu, 5 Mar 2026 16:28:14 -0600 Subject: [PATCH] feat: include extra ingredients + amounts in NLP parser output (#7191) --- mealie/lang/messages/en-US.json | 4 +- mealie/routes/parser/ingredient_parser.py | 4 +- mealie/services/parser_services/_base.py | 6 +- .../parser_services/ingredient_parser.py | 155 +++- .../test_recipe_ingredient_parser.py | 67 -- .../ingredient_parser/conftest.py | 93 +++ .../ingredient_parser/test_brute_parser.py | 351 +++++++++ .../ingredient_parser/test_nlp_parser.py | 113 +++ .../ingredient_parser/test_openai_parser.py | 311 ++++++++ tests/unit_tests/test_ingredient_parser.py | 737 ------------------ 10 files changed, 991 insertions(+), 850 deletions(-) delete mode 100644 tests/integration_tests/user_recipe_tests/test_recipe_ingredient_parser.py create mode 100644 tests/unit_tests/services_tests/ingredient_parser/conftest.py create mode 100644 tests/unit_tests/services_tests/ingredient_parser/test_brute_parser.py create mode 100644 tests/unit_tests/services_tests/ingredient_parser/test_nlp_parser.py create mode 100644 tests/unit_tests/services_tests/ingredient_parser/test_openai_parser.py delete mode 100644 tests/unit_tests/test_ingredient_parser.py diff --git a/mealie/lang/messages/en-US.json b/mealie/lang/messages/en-US.json index 9a815c35b..e6d1a46eb 100644 --- a/mealie/lang/messages/en-US.json +++ b/mealie/lang/messages/en-US.json @@ -17,7 +17,9 @@ "servings": "Servings", "yield": "Yield", "yields": "Yields" - } + }, + "and-amount": "and {amount}", + "or-ingredient": "or {ingredient}" }, "mealplan": { "no-recipes-match-your-rules": "No recipes match your rules" diff --git a/mealie/routes/parser/ingredient_parser.py b/mealie/routes/parser/ingredient_parser.py index a4a36d2aa..69fb765e1 100644 --- a/mealie/routes/parser/ingredient_parser.py +++ b/mealie/routes/parser/ingredient_parser.py @@ -12,11 +12,11 @@ router = APIRouter(prefix="/parser") class IngredientParserController(BaseUserController): @router.post("/ingredient", response_model=ParsedIngredient) async def parse_ingredient(self, ingredient: IngredientRequest): - parser = get_parser(ingredient.parser, self.group_id, self.session) + parser = get_parser(ingredient.parser, self.group_id, self.session, self.translator) response = await parser.parse([ingredient.ingredient]) return response[0] @router.post("/ingredients", response_model=list[ParsedIngredient]) async def parse_ingredients(self, ingredients: IngredientsRequest): - parser = get_parser(ingredients.parser, self.group_id, self.session) + parser = get_parser(ingredients.parser, self.group_id, self.session, self.translator) return await parser.parse(ingredients.ingredients) diff --git a/mealie/services/parser_services/_base.py b/mealie/services/parser_services/_base.py index e7aee5b7d..89fc8f290 100644 --- a/mealie/services/parser_services/_base.py +++ b/mealie/services/parser_services/_base.py @@ -5,6 +5,7 @@ from rapidfuzz import fuzz, process from sqlalchemy.orm import Session from mealie.db.models.recipe.ingredient import IngredientFoodModel, IngredientUnitModel +from mealie.lang.providers import Translator from mealie.repos.all_repositories import get_repositories from mealie.repos.repository_factory import AllRepositories from mealie.schema.recipe.recipe_ingredient import ( @@ -126,11 +127,14 @@ class ABCIngredientParser(ABC): Abstract class for ingredient parsers. """ - def __init__(self, group_id: UUID4, session: Session) -> None: + def __init__(self, group_id: UUID4, session: Session, translator: Translator) -> None: self.group_id = group_id self.session = session self.data_matcher = DataMatcher(self._repos, self.food_fuzzy_match_threshold, self.unit_fuzzy_match_threshold) + self.translator = translator + self.t = self.translator.t + @property def _repos(self) -> AllRepositories: return get_repositories(self.session, group_id=self.group_id) diff --git a/mealie/services/parser_services/ingredient_parser.py b/mealie/services/parser_services/ingredient_parser.py index 2f26a1ae5..6f011e501 100644 --- a/mealie/services/parser_services/ingredient_parser.py +++ b/mealie/services/parser_services/ingredient_parser.py @@ -1,4 +1,6 @@ +from dataclasses import dataclass, field from fractions import Fraction +from itertools import zip_longest from ingredient_parser import parse_ingredient from ingredient_parser.dataclasses import CompositeIngredientAmount, IngredientAmount @@ -7,6 +9,7 @@ from pydantic import UUID4 from sqlalchemy.orm import Session from mealie.core.root_logger import get_logger +from mealie.lang.providers import Translator from mealie.schema.recipe import RecipeIngredient from mealie.schema.recipe.recipe_ingredient import ( CreateIngredientFood, @@ -70,13 +73,29 @@ class BruteForceParser(ABCIngredientParser): return [await self.parse_one(ingredient) for ingredient in ingredients] +@dataclass +class _IngredientPart: + qty: float = 0 + unit: str = "" + food: str = "" + extra_amounts: list[IngredientAmount] = field(default_factory=list) + qty_conf: float = 0 + unit_conf: float = 0 + food_conf: float = 0 + + @property + def avg_conf(self) -> float: + confs = [self.qty_conf, self.unit_conf, self.food_conf] + return sum(confs) / len(confs) + + class NLPParser(ABCIngredientParser): """ Class for Ingredient Parser library """ - @staticmethod - def _extract_amount(ingredient: IngredientParserParsedIngredient) -> IngredientAmount: + @classmethod + def _extract_amount(cls, ingredient: IngredientParserParsedIngredient) -> IngredientAmount: if not (ingredient_amounts := ingredient.amount): return IngredientAmount( quantity=Fraction(0), quantity_max=Fraction(0), unit="", text="", confidence=0, starting_index=-1 @@ -88,8 +107,8 @@ class NLPParser(ABCIngredientParser): return ingredient_amount - @staticmethod - def _extract_quantity(ingredient_amount: IngredientAmount) -> tuple[float, float]: + @classmethod + def _extract_quantity(cls, ingredient_amount: IngredientAmount) -> tuple[float, float]: confidence = ingredient_amount.confidence if isinstance(ingredient_amount.quantity, str): @@ -103,27 +122,19 @@ class NLPParser(ABCIngredientParser): return qty, confidence - @staticmethod - def _extract_unit(ingredient_amount: IngredientAmount) -> tuple[str, float]: + @classmethod + def _extract_unit(cls, ingredient_amount: IngredientAmount) -> tuple[str, float]: confidence = ingredient_amount.confidence unit = str(ingredient_amount.unit) if ingredient_amount.unit else "" return unit, confidence - @staticmethod - def _extract_food(ingredient: IngredientParserParsedIngredient) -> tuple[str, float]: - if not ingredient.name: - return "", 0 - - ingredient_name = ingredient.name[0] - confidence = ingredient_name.confidence - food = ingredient_name.text - - return food, confidence - - @staticmethod - def _extract_note(ingredient: IngredientParserParsedIngredient) -> tuple[str, float]: + @classmethod + def _extract_note( + cls, ingredient: IngredientParserParsedIngredient, extra_amounts: list[IngredientAmount] | None = None + ) -> tuple[str, float]: confidences: list[float] = [] note_parts: list[str] = [] + if ingredient.size: note_parts.append(ingredient.size.text) confidences.append(ingredient.size.confidence) @@ -139,45 +150,103 @@ class NLPParser(ABCIngredientParser): # average confidence among all note parts confidence = sum(confidences) / len(confidences) if confidences else 0 + note = ", ".join(note_parts) note = note.replace("(", "").replace(")", "") + # insert extra amounts to the front of the notes with parenthesis + if extra_amounts: + amt_part = "(" + ", ".join([amount.text for amount in extra_amounts]) + ")" + note = " ".join(filter(None, [amt_part, note])) + return note, confidence def _convert_ingredient(self, ingredient: IngredientParserParsedIngredient) -> ParsedIngredient: - ingredient_amount = self._extract_amount(ingredient) - qty, qty_conf = self._extract_quantity(ingredient_amount) - unit, unit_conf = self._extract_unit(ingredient_amount) - food, food_conf = self._extract_food(ingredient) - note, note_conf = self._extract_note(ingredient) + ing_parts: list[_IngredientPart] = [] + + for amount, ing_name in zip_longest(ingredient.amount, ingredient.name, fillvalue=None): + part = _IngredientPart() + + if amount: + if isinstance(amount, CompositeIngredientAmount): + part.extra_amounts = list(amount.amounts[1:]) + amount = amount.amounts[0] + + part.qty, part.qty_conf = self._extract_quantity(amount) + part.unit, part.unit_conf = self._extract_unit(amount) + + if ing_name: + part.food = ing_name.text + part.food_conf = ing_name.confidence + + ing_parts.append(part) + + note, note_conf = self._extract_note(ingredient, ing_parts[0].extra_amounts if ing_parts else None) + + # Safeguard in case the parser outputs nothing + if not ing_parts: + ing_parts.append(_IngredientPart()) # average confidence for components which were parsed + # uses ing_parts[0] since this is the primary ingredient + primary = ing_parts[0] confidences: list[float] = [] - if qty: - confidences.append(qty_conf) - if unit: - confidences.append(unit_conf) - if food: - confidences.append(food_conf) + + if primary.qty: + confidences.append(primary.qty_conf) + if primary.unit: + confidences.append(primary.unit_conf) + if primary.food: + confidences.append(primary.food_conf) if note: confidences.append(note_conf) + if len(ing_parts) > 1: + confidences.extend([part.avg_conf for part in ing_parts[1:]]) + + recipe_ingredients: list[RecipeIngredient] = [] + for i, part in enumerate(ing_parts): + if not i: + ing_note = note + elif part.extra_amounts: + # TODO: handle extra amounts when we add support for them + # For now, just add them as a note ("and amt_1, and amt_2, and ...") + ing_note = ", ".join(self.t("recipe.and-amount", amount=a.text) for a in part.extra_amounts) + else: + ing_note = None + recipe_ingredients.append( + RecipeIngredient( + quantity=part.qty, + unit=CreateIngredientUnit(name=part.unit) if part.unit else None, + food=CreateIngredientFood(name=part.food) if part.food else None, + note=ing_note, + ) + ) + + primary_ingredient = recipe_ingredients[0] # there will always be at least one recipe ingredient + extra_ingredients = recipe_ingredients[1:] if len(recipe_ingredients) > 1 else [] + + # TODO: handle extra ingredients when we support them + # For now, just add them to the note ("or ing_1, or ing_2, or ...") + if extra_ingredients: + extras_note_parts = [ + self.t("recipe.or-ingredient", ingredient=extra_ing.display) for extra_ing in extra_ingredients + ] + extras_note = ", ".join(extras_note_parts) + primary_ingredient.note = " ".join(filter(None, [extras_note, primary_ingredient.note])) + + # re-calculate display property since we modified the note + primary_ingredient.display = primary_ingredient._format_display() parsed_ingredient = ParsedIngredient( input=ingredient.sentence, confidence=IngredientConfidence( average=(sum(confidences) / len(confidences)) if confidences else 0, - quantity=qty_conf, - unit=unit_conf, - food=food_conf, + quantity=primary.qty_conf, + unit=primary.unit_conf, + food=primary.food_conf, comment=note_conf, ), - ingredient=RecipeIngredient( - title="", - quantity=qty, - unit=CreateIngredientUnit(name=unit) if unit else None, - food=CreateIngredientFood(name=food) if food else None, - note=note, - ), + ingredient=primary_ingredient, ) return self.find_ingredient_match(parsed_ingredient) @@ -197,9 +266,11 @@ __registrar: dict[RegisteredParser, type[ABCIngredientParser]] = { } -def get_parser(parser: RegisteredParser, group_id: UUID4, session: Session) -> ABCIngredientParser: +def get_parser( + parser: RegisteredParser, group_id: UUID4, session: Session, translator: Translator +) -> ABCIngredientParser: """ get_parser returns an ingrdeint parser based on the string enum value passed in. """ - return __registrar.get(parser, NLPParser)(group_id, session) + return __registrar.get(parser, NLPParser)(group_id, session, translator) diff --git a/tests/integration_tests/user_recipe_tests/test_recipe_ingredient_parser.py b/tests/integration_tests/user_recipe_tests/test_recipe_ingredient_parser.py deleted file mode 100644 index bd7a74ebe..000000000 --- a/tests/integration_tests/user_recipe_tests/test_recipe_ingredient_parser.py +++ /dev/null @@ -1,67 +0,0 @@ -import pytest -from fastapi.testclient import TestClient - -from mealie.schema.recipe.recipe_ingredient import RegisteredParser -from tests.unit_tests.test_ingredient_parser import TestIngredient -from tests.utils import api_routes -from tests.utils.fixture_schemas import TestUser - -nlp_test_ingredients = [ - TestIngredient("½ cup all-purpose flour", 0.5, "cup", "all-purpose flour", ""), - TestIngredient("1 ½ teaspoons ground black pepper", 1.5, "teaspoon", "black pepper", "ground"), - TestIngredient("⅔ cup unsweetened flaked coconut", 0.667, "cup", "unsweetened flaked coconut", ""), - TestIngredient("⅓ cup panko bread crumbs", 0.333, "cup", "panko bread crumbs", ""), - TestIngredient("1/8 cup all-purpose flour", 0.125, "cup", "all-purpose flour", ""), - TestIngredient("1/32 cup all-purpose flour", 0.031, "cup", "all-purpose flour", ""), - TestIngredient("1 1/2 cups chopped onion ", 1.5, "cup", "onion", "chopped"), - TestIngredient( - "2 pounds russet potatoes, peeled, and cut into 3/4-inch cubes ", - 2, - "pound", - "russet potatoes", - "peeled, and cut into 3/4 inch cubes", - ), - TestIngredient("2 tablespoons (30ml) vegetable oil ", 2, "tablespoon", "vegetable oil", ""), - TestIngredient("2 teaspoons salt (to taste) ", 2, "teaspoon", "salt", "to taste"), - TestIngredient("2 cups chicken broth or beef broth ", 2, "cup", "chicken broth", ""), - TestIngredient("1/2 cup", 0.5, "cup", "", ""), -] - - -def assert_ingredient(api_response: dict, test_ingredient: TestIngredient): - response_quantity = api_response["ingredient"]["quantity"] - response_unit = api_response["ingredient"]["unit"]["name"] if api_response["ingredient"]["unit"] else "" - response_food = api_response["ingredient"]["food"]["name"] if api_response["ingredient"]["food"] else "" - response_note = api_response["ingredient"]["note"] - - assert response_quantity == pytest.approx(test_ingredient.quantity) - assert response_unit == test_ingredient.unit - assert response_food == test_ingredient.food - assert response_note == test_ingredient.comments - - -@pytest.mark.parametrize("test_ingredient", nlp_test_ingredients) -def test_recipe_ingredient_parser_nlp(api_client: TestClient, test_ingredient: TestIngredient, unique_user: TestUser): - payload = {"parser": RegisteredParser.nlp, "ingredient": test_ingredient.input} - response = api_client.post(api_routes.parser_ingredient, json=payload, headers=unique_user.token) - assert response.status_code == 200 - assert_ingredient(response.json(), test_ingredient) - - -def test_recipe_ingredients_parser_nlp(api_client: TestClient, unique_user: TestUser): - payload = {"parser": RegisteredParser.nlp, "ingredients": [x.input for x in nlp_test_ingredients]} - response = api_client.post(api_routes.parser_ingredients, json=payload, headers=unique_user.token) - assert response.status_code == 200 - - for api_ingredient, test_ingredient in zip(response.json(), nlp_test_ingredients, strict=False): - assert_ingredient(api_ingredient, test_ingredient) - - -@pytest.mark.skip("TODO: Implement") -def test_recipe_ingredient_parser_brute(api_client: TestClient): - pass - - -@pytest.mark.skip("TODO: Implement") -def test_recipe_ingredients_parser_brute(api_client: TestClient): - pass diff --git a/tests/unit_tests/services_tests/ingredient_parser/conftest.py b/tests/unit_tests/services_tests/ingredient_parser/conftest.py new file mode 100644 index 000000000..f40008935 --- /dev/null +++ b/tests/unit_tests/services_tests/ingredient_parser/conftest.py @@ -0,0 +1,93 @@ +import pytest +from pydantic import UUID4 +from sqlalchemy.orm import Session + +from mealie.repos.all_repositories import get_repositories +from mealie.repos.repository_factory import AllRepositories +from mealie.schema.recipe.recipe_ingredient import ( + CreateIngredientFoodAlias, + CreateIngredientUnitAlias, + IngredientFood, + IngredientUnit, + SaveIngredientFood, + SaveIngredientUnit, +) +from mealie.schema.user.user import GroupBase +from tests.utils.factories import random_int, random_string + + +@pytest.fixture() +def unique_local_group_id(unfiltered_database: AllRepositories) -> UUID4: + return str(unfiltered_database.groups.create(GroupBase(name=random_string())).id) + + +@pytest.fixture() +def unique_db(session: Session, unique_local_group_id: str): + return get_repositories(session, group_id=unique_local_group_id) + + +@pytest.fixture() +def parsed_ingredient_data( + unique_db: AllRepositories, unique_local_group_id: UUID4 +) -> tuple[list[IngredientFood], list[IngredientUnit]]: + foods = unique_db.ingredient_foods.create_many( + [ + SaveIngredientFood(name="potatoes", group_id=unique_local_group_id), + SaveIngredientFood(name="onion", group_id=unique_local_group_id), + SaveIngredientFood(name="green onion", group_id=unique_local_group_id), + SaveIngredientFood(name="frozen pearl onions", group_id=unique_local_group_id), + SaveIngredientFood(name="bell peppers", group_id=unique_local_group_id), + SaveIngredientFood(name="red pepper flakes", group_id=unique_local_group_id), + SaveIngredientFood(name="fresh ginger", group_id=unique_local_group_id), + SaveIngredientFood(name="ground ginger", group_id=unique_local_group_id), + SaveIngredientFood(name="ñör̃m̈ãl̈ĩz̈ẽm̈ẽ", group_id=unique_local_group_id), + SaveIngredientFood(name="PluralFoodTest", plural_name="myfoodisplural", group_id=unique_local_group_id), + SaveIngredientFood( + name="IHaveAnAlias", + group_id=unique_local_group_id, + aliases=[CreateIngredientFoodAlias(name="thisismyalias")], + ), + ] + ) + + foods.extend( + unique_db.ingredient_foods.create_many( + [ + SaveIngredientFood(name=f"{random_string()} food", group_id=unique_local_group_id) + for _ in range(random_int(10, 15)) + ] + ) + ) + + units = unique_db.ingredient_units.create_many( + [ + SaveIngredientUnit(name="Cups", group_id=unique_local_group_id), + SaveIngredientUnit(name="Tablespoon", group_id=unique_local_group_id), + SaveIngredientUnit(name="Teaspoon", group_id=unique_local_group_id), + SaveIngredientUnit(name="Stalk", group_id=unique_local_group_id), + SaveIngredientUnit(name="My Very Long Unit Name", abbreviation="mvlun", group_id=unique_local_group_id), + SaveIngredientUnit( + name="PluralUnitName", + plural_name="abc123", + abbreviation="doremiabc", + plural_abbreviation="doremi123", + group_id=unique_local_group_id, + ), + SaveIngredientUnit( + name="IHaveAnAliasToo", + group_id=unique_local_group_id, + aliases=[CreateIngredientUnitAlias(name="thisismyalias")], + ), + ] + ) + + units.extend( + unique_db.ingredient_foods.create_many( + [ + SaveIngredientUnit(name=f"{random_string()} unit", group_id=unique_local_group_id) + for _ in range(random_int(10, 15)) + ] + ) + ) + + return foods, units diff --git a/tests/unit_tests/services_tests/ingredient_parser/test_brute_parser.py b/tests/unit_tests/services_tests/ingredient_parser/test_brute_parser.py new file mode 100644 index 000000000..e50206715 --- /dev/null +++ b/tests/unit_tests/services_tests/ingredient_parser/test_brute_parser.py @@ -0,0 +1,351 @@ +import asyncio + +import pytest +from pydantic import UUID4 + +from mealie.db.db_setup import session_context +from mealie.lang.providers import get_locale_provider +from mealie.schema.recipe.recipe_ingredient import ( + CreateIngredientFood, + CreateIngredientUnit, + IngredientFood, + IngredientUnit, + ParsedIngredient, + RecipeIngredient, +) +from mealie.services.parser_services import RegisteredParser, get_parser + + +def build_parsed_ing(food: str | None, unit: str | None) -> ParsedIngredient: + ing = RecipeIngredient(unit=None, food=None) + if food: + ing.food = CreateIngredientFood(name=food) + if unit: + ing.unit = CreateIngredientUnit(name=unit) + + return ParsedIngredient(input=None, ingredient=ing) + + +@pytest.mark.parametrize( + "input, quantity, unit, food, comment", + [ + pytest.param("1 theelepel koffie", 1, "theelepel", "koffie", "", id="1 theelepel koffie"), + pytest.param("3 theelepels koffie", 3, "theelepels", "koffie", "", id="3 theelepels koffie"), + pytest.param("1 eetlepel tarwe", 1, "eetlepel", "tarwe", "", id="1 eetlepel tarwe"), + pytest.param("20 eetlepels bloem", 20, "eetlepels", "bloem", "", id="20 eetlepels bloem"), + pytest.param("1 mespunt kaneel", 1, "mespunt", "kaneel", "", id="1 mespunt kaneel"), + pytest.param("1 snuf(je) zout", 1, "snuf(je)", "zout", "", id="1 snuf(je) zout"), + pytest.param( + "2 tbsp minced cilantro, leaves and stems", + 2, + "tbsp", + "minced cilantro", + "leaves and stems", + id="2 tbsp minced cilantro, leaves and stems", + ), + pytest.param( + "1 large yellow onion, coarsely chopped", + 1, + "large", + "yellow onion", + "coarsely chopped", + id="1 large yellow onion, coarsely chopped", + ), + pytest.param("1 1/2 tsp garam masala", 1.5, "tsp", "garam masala", "", id="1 1/2 tsp garam masala"), + pytest.param( + "2 cups mango chunks, (2 large mangoes) (fresh or frozen)", + 2, + "Cups", + "mango chunks, (2 large mangoes)", + "fresh or frozen", + id="2 cups mango chunks, (2 large mangoes) (fresh or frozen)", + ), + pytest.param("stalk onion", 0, "Stalk", "onion", "", id="stalk onion"), + pytest.param("a stalk bell peppers", 0, "Stalk", "bell peppers", "", id="a stalk bell peppers"), + pytest.param("a tablespoon unknownFood", 0, "Tablespoon", "unknownFood", "", id="a tablespoon unknownFood"), + pytest.param( + "stalk bell peppers, cut in pieces", + 0, + "Stalk", + "bell peppers", + "cut in pieces", + id="stalk bell peppers, cut in pieces", + ), + pytest.param( + "a stalk bell peppers, cut in pieces", + 0, + "Stalk", + "bell peppers", + "cut in pieces", + id="a stalk bell peppers, cut in pieces", + ), + pytest.param("red pepper flakes", 0, "", "red pepper flakes", "", id="red pepper flakes"), + pytest.param("1 bell peppers", 1, "", "bell peppers", "", id="1 bell peppers"), + pytest.param("1 stalk bell peppers", 1, "Stalk", "bell peppers", "", id="1 big stalk bell peppers"), + pytest.param("a big stalk bell peppers", 0, "Stalk", "bell peppers", "", id="a big stalk bell peppers"), + pytest.param( + "1 bell peppers, cut in pieces", 1, "", "bell peppers", "cut in pieces", id="1 bell peppers, cut in pieces" + ), + pytest.param( + "bell peppers, cut in pieces", 0, "", "bell peppers", "cut in pieces", id="bell peppers, cut in pieces" + ), + ], +) +def test_brute_parser( + unique_local_group_id: UUID4, + parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated + input: str, + quantity: int | float, + unit: str, + food: str, + comment: str, +): + with session_context() as session: + loop = asyncio.get_event_loop() + parser = get_parser(RegisteredParser.brute, unique_local_group_id, session, get_locale_provider()) + parsed = loop.run_until_complete(parser.parse_one(input)) + ing = parsed.ingredient + + if ing.quantity: + assert ing.quantity == quantity + else: + assert not quantity + if ing.unit: + assert ing.unit.name == unit + else: + assert not unit + if ing.food: + assert ing.food.name == food + else: + assert not food + if ing.note: + assert ing.note == comment + else: + assert not comment + + +@pytest.mark.parametrize( + "unit, food, expect_unit_match, expect_food_match, expected_avg", + [ + pytest.param("Cups", "potatoes", True, True, 1.0, id="all matched"), + pytest.param("Cups", "veryuniquefood", True, False, 0.75, id="unit matched only"), + pytest.param("veryuniqueunit", "potatoes", False, True, 0.75, id="food matched only"), + pytest.param("veryuniqueunit", "veryuniquefood", False, False, 0.5, id="neither matched"), + ], +) +def test_brute_parser_confidence( + unit: str, + food: str, + expect_unit_match: bool, + expect_food_match: bool, + expected_avg: float, + unique_local_group_id: UUID4, + parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], +): + input_str = f"1 {unit} {food}" + + with session_context() as session: + original_loop = asyncio.get_event_loop() + try: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + parser = get_parser(RegisteredParser.brute, unique_local_group_id, session, get_locale_provider()) + parsed = loop.run_until_complete(parser.parse_one(input_str)) + finally: + loop.close() + asyncio.set_event_loop(original_loop) + + conf = parsed.confidence + + assert conf.quantity == 1 + assert conf.comment == 1 + assert conf.unit == (1 if expect_unit_match or not unit else 0) + assert conf.food == (1 if expect_food_match or not food else 0) + assert conf.average == expected_avg + + +@pytest.mark.parametrize( + "input, expected_unit_name, expected_food_name, expect_unit_match, expect_food_match", + ( + pytest.param( + build_parsed_ing(unit="cup", food="potatoes"), + "Cups", + "potatoes", + True, + True, + id="basic match", + ), + pytest.param( # this should work in sqlite since "potato" is contained within "potatoes" + build_parsed_ing(unit="cup", food="potato"), + "Cups", + "potatoes", + True, + True, + id="basic fuzzy match", + ), + pytest.param( + build_parsed_ing(unit="tablespoon", food="onion"), + "Tablespoon", + "onion", + True, + True, + id="nested match 1", + ), + pytest.param( + build_parsed_ing(unit="teaspoon", food="green onion"), + "Teaspoon", + "green onion", + True, + True, + id="nested match 2", + ), + pytest.param( + build_parsed_ing(unit="cup", food="gren onion"), + "Cups", + "green onion", + True, + True, + id="nested match 3", + ), + pytest.param( + build_parsed_ing(unit="stalk", food="very unique"), + "Stalk", + "very unique", + True, + False, + id="no food match", + ), + pytest.param( + build_parsed_ing(unit="cup", food=None), + "Cups", + None, + True, + False, + id="no food input", + ), + pytest.param( + build_parsed_ing(unit="very unique", food="fresh ginger"), + "very unique", + "fresh ginger", + False, + True, + id="no unit match", + ), + pytest.param( + build_parsed_ing(unit=None, food="potatoes"), + None, + "potatoes", + False, + True, + id="no unit input", + ), + pytest.param( + build_parsed_ing(unit="very unique", food="very unique"), + "very unique", + "very unique", + False, + False, + id="no matches", + ), + pytest.param( + build_parsed_ing(unit=None, food=None), + None, + None, + False, + False, + id="no input", + ), + pytest.param( + build_parsed_ing(unit="mvlun", food="potatoes"), + "My Very Long Unit Name", + "potatoes", + True, + True, + id="unit abbreviation", + ), + pytest.param( + build_parsed_ing(unit=None, food="n̅ōr̅m̄a̅l̄i̅z̄e̅m̄e̅"), + None, + "ñör̃m̈ãl̈ĩz̈ẽm̈ẽ", + False, + True, + id="normalization", + ), + pytest.param( + build_parsed_ing(unit=None, food="myfoodisplural"), + None, + "PluralFoodTest", + False, + True, + id="plural food name", + ), + pytest.param( + build_parsed_ing(unit="abc123", food=None), + "PluralUnitName", + None, + True, + False, + id="plural unit name", + ), + pytest.param( + build_parsed_ing(unit="doremi123", food=None), + "PluralUnitName", + None, + True, + False, + id="plural unit abbreviation", + ), + pytest.param( + build_parsed_ing(unit=None, food="thisismyalias"), + None, + "IHaveAnAlias", + False, + True, + id="food alias", + ), + pytest.param( + build_parsed_ing(unit="thisismyalias", food=None), + "IHaveAnAliasToo", + None, + True, + False, + id="unit alias", + ), + ), +) +def test_parser_ingredient_match( + expected_food_name: str | None, + expected_unit_name: str | None, + expect_food_match: bool, + expect_unit_match: bool, + input: ParsedIngredient, + parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated + unique_local_group_id: UUID4, +): + with session_context() as session: + parser = get_parser(RegisteredParser.brute, unique_local_group_id, session, get_locale_provider()) + parsed_ingredient = parser.find_ingredient_match(input) + + if expected_food_name: + assert parsed_ingredient.ingredient.food and parsed_ingredient.ingredient.food.name == expected_food_name + else: + assert parsed_ingredient.ingredient.food is None + + if expect_food_match: + assert isinstance(parsed_ingredient.ingredient.food, IngredientFood) + elif parsed_ingredient.ingredient.food and parsed_ingredient.ingredient.food.name: + assert isinstance(parsed_ingredient.ingredient.food, CreateIngredientFood) + else: + assert parsed_ingredient.ingredient.food is None + + if expected_unit_name: + assert parsed_ingredient.ingredient.unit and parsed_ingredient.ingredient.unit.name == expected_unit_name + else: + assert parsed_ingredient.ingredient.unit is None + + if expect_unit_match: + assert isinstance(parsed_ingredient.ingredient.unit, IngredientUnit) + elif parsed_ingredient.ingredient.unit and parsed_ingredient.ingredient.unit.name: + assert isinstance(parsed_ingredient.ingredient.unit, CreateIngredientUnit) + else: + assert parsed_ingredient.ingredient.unit is None diff --git a/tests/unit_tests/services_tests/ingredient_parser/test_nlp_parser.py b/tests/unit_tests/services_tests/ingredient_parser/test_nlp_parser.py new file mode 100644 index 000000000..e701ca071 --- /dev/null +++ b/tests/unit_tests/services_tests/ingredient_parser/test_nlp_parser.py @@ -0,0 +1,113 @@ +import asyncio +import re +from dataclasses import dataclass + +import pytest +from pydantic import UUID4 +from rapidfuzz import fuzz +from text_unidecode import unidecode + +from mealie.db.db_setup import session_context +from mealie.lang.providers import get_locale_provider +from mealie.services.parser_services import RegisteredParser, get_parser + + +@dataclass +class TestIngredient: + input: str + quantity: float + unit: str + food: str + comments: str + + +def normalize(val: str) -> str: + val = unidecode(val).lower().strip() + val = re.sub(r"[^a-z0-9\s]", "", val) + return val + + +@pytest.mark.parametrize( + "test_ingredient", + [ + TestIngredient("½ cup all-purpose flour", 0.5, "cup", "all-purpose flour", ""), + TestIngredient("1 ½ teaspoons ground black pepper", 1.5, "teaspoon", "black pepper", "ground"), + TestIngredient("⅔ cup unsweetened flaked coconut", 0.667, "cup", "unsweetened flaked coconut", ""), + TestIngredient("⅓ cup panko bread crumbs", 0.333, "cup", "panko bread crumbs", ""), + TestIngredient("1/8 cup all-purpose flour", 0.125, "cup", "all-purpose flour", ""), + TestIngredient("1/32 cup all-purpose flour", 0.031, "cup", "all-purpose flour", ""), + TestIngredient("1 1/2 cups chopped onion ", 1.5, "cup", "onion", "chopped"), + TestIngredient( + "2 pounds russet potatoes, peeled, and cut into 3/4-inch cubes ", + 2, + "pound", + "russet potatoes", + "peeled, and cut into 3/4 inch cubes", + ), + TestIngredient("2 teaspoons salt (to taste) ", 2, "teaspoon", "salt", "to taste"), + TestIngredient("1/2 cup", 0.5, "cup", "", ""), + ], +) +def test_nlp_parser(unique_local_group_id: UUID4, test_ingredient: TestIngredient): + with session_context() as session: + loop = asyncio.get_event_loop() + parser = get_parser(RegisteredParser.nlp, unique_local_group_id, session, get_locale_provider()) + parsed = loop.run_until_complete(parser.parse_one(test_ingredient.input)) + ing = parsed.ingredient + + assert ing.quantity == pytest.approx(test_ingredient.quantity) + if ing.unit: + assert ing.unit.name == test_ingredient.unit + else: + assert not test_ingredient.unit + if ing.food: + assert ing.food.name == test_ingredient.food + else: + assert not test_ingredient.food + if ing.note: + assert ing.note == test_ingredient.comments + else: + assert not test_ingredient.comments + + +@pytest.mark.parametrize( + ("source_str", "expected_str"), + [ + ( + "2 teaspoon chopped fresh or dried rosemary", + "2 teaspoon fresh rosemary or dried rosemary chopped", + ), + ( + "153 grams 00 flour (1 cup plus 1 tablespoon)", + "153 gram 00 flour or 1 cup and 1 tablespoon", + ), + ( + "153 grams all-purpose flour (1 cup plus 1 tablespoon and 2 teaspoons)", + "153 gram all-purpose flour or 1 cup plus 1 tablespoon and 2 teaspoons", + ), + ( + "2 cups chicken broth or beef broth", + "2 cup chicken broth or beef broth", + ), + ( + "2 tablespoons (30ml) vegetable oil", + "2 tablespoon vegetable oil or 30 milliliter", + ), + ( + "1 cup fresh basil or 2 tablespoons dried basil", + "1 cup fresh basil or 2 tablespoons dried basil", + ), + ], +) +@pytest.mark.asyncio +async def test_nlp_parser_keeps_all_text(unique_local_group_id: UUID4, source_str: str, expected_str: str): + with session_context() as session: + parser = get_parser(RegisteredParser.nlp, unique_local_group_id, session, get_locale_provider()) + parsed = await parser.parse_one(source_str) + + ing = parsed.ingredient + + # The parser behavior may change slightly, so we check that it's pretty close rather than exact + # fuzz.ratio returns a string from 0 - 100 where 100 is an exact match + score = fuzz.ratio(ing.display, expected_str) + assert score >= 90, f"'{ing.display}' does not sufficiently match expected '{expected_str}'" diff --git a/tests/unit_tests/services_tests/ingredient_parser/test_openai_parser.py b/tests/unit_tests/services_tests/ingredient_parser/test_openai_parser.py new file mode 100644 index 000000000..fe7ad05dc --- /dev/null +++ b/tests/unit_tests/services_tests/ingredient_parser/test_openai_parser.py @@ -0,0 +1,311 @@ +import asyncio +import json +from typing import cast +from unittest.mock import MagicMock + +import pytest +from pydantic import UUID4 + +from mealie.db.db_setup import session_context +from mealie.lang.providers import get_locale_provider +from mealie.schema.openai.recipe_ingredient import OpenAIIngredient, OpenAIIngredients +from mealie.schema.recipe.recipe import Recipe +from mealie.schema.recipe.recipe_ingredient import ( + CreateIngredientFood, + CreateIngredientUnit, + IngredientFood, + IngredientUnit, + ParsedIngredient, + RecipeIngredient, + SaveIngredientFood, +) +from mealie.services.openai import OpenAIService +from mealie.services.parser_services import RegisteredParser, get_parser +from tests.utils.factories import random_int, random_string +from tests.utils.fixture_schemas import TestUser + + +def test_openai_parser( + unique_local_group_id: UUID4, + parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated + monkeypatch: pytest.MonkeyPatch, +): + ingredient_count = random_int(10, 20) + + async def mock_get_response(self, prompt: str, message: str, *args, **kwargs) -> OpenAIIngredients | None: + inputs = json.loads(message) + data = OpenAIIngredients( + ingredients=[ + OpenAIIngredient( + quantity=random_int(0, 10), + unit=random_string(), + food=random_string(), + note=random_string(), + ) + for _ in inputs + ] + ) + return data + + monkeypatch.setattr(OpenAIService, "get_response", mock_get_response) + + with session_context() as session: + loop = asyncio.get_event_loop() + parser = get_parser(RegisteredParser.openai, unique_local_group_id, session, get_locale_provider()) + + inputs = [random_string() for _ in range(ingredient_count)] + parsed = loop.run_until_complete(parser.parse(inputs)) + + # since OpenAI is mocked, we don't need to validate the data, we just need to make sure parsing works + # and that it preserves order + assert len(parsed) == ingredient_count + for input, output in zip(inputs, parsed, strict=True): + assert output.input == input + + +def test_openai_parser_sanitize_output( + unique_local_group_id: UUID4, + unique_user: TestUser, + parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated + monkeypatch: pytest.MonkeyPatch, +): + async def mock_get_raw_response(self, prompt: str, content: list[dict], response_schema) -> MagicMock: + # Create data with null character in JSON to test preprocessing + data = OpenAIIngredients( + ingredients=[ + OpenAIIngredient( + quantity=random_int(0, 10), + unit="", + food="there is a null character here: \x00", + note="", + ) + ] + ) + + # Create a mock raw response which matches the OpenAI chat response format + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message.content = data.model_dump_json() + return mock_response + + # Mock the raw response here since we want to make sure our service executes processing before loading the model + monkeypatch.setattr(OpenAIService, "_get_raw_response", mock_get_raw_response) + + with session_context() as session: + loop = asyncio.get_event_loop() + parser = get_parser(RegisteredParser.openai, unique_local_group_id, session, get_locale_provider()) + + parsed = loop.run_until_complete(parser.parse([""])) + assert len(parsed) == 1 + parsed_ing = cast(ParsedIngredient, parsed[0]) + assert parsed_ing.ingredient.food + assert parsed_ing.ingredient.food.name == "there is a null character here: " + + # Make sure we can create a recipe with this ingredient + assert isinstance(parsed_ing.ingredient.food, CreateIngredientFood) + food = unique_user.repos.ingredient_foods.create( + parsed_ing.ingredient.food.cast(SaveIngredientFood, group_id=unique_user.group_id) + ) + parsed_ing.ingredient.food = food + unique_user.repos.recipes.create( + Recipe( + user_id=unique_user.user_id, + group_id=unique_user.group_id, + name=random_string(), + recipe_ingredient=[parsed_ing.ingredient], + ) + ) + + +@pytest.mark.parametrize( + "original_text,quantity,unit,food,note,qty_range,unit_range,food_range,note_range", + [ + pytest.param( + "2 cups flour", + 2.0, + "Cups", + "flour", + "", + (1.0, 1.0), + (1.0, 1.0), + (1.0, 1.0), + (1.0, 1.0), + id="perfect_match_all_components", + ), + pytest.param( + "2 cups flour", + 3.0, + "Cups", + "flour", + "", + (0.0, 0.0), + (1.0, 1.0), + (1.0, 1.0), + (1.0, 1.0), + id="quantity_mismatch", + ), + pytest.param( + "2 cups flour", + 2.0, + None, + "flour", + "", + (1.0, 1.0), + (0.4, 0.9), + (1.0, 1.0), + (1.0, 1.0), + id="missing_unit_fallback", + ), + pytest.param( + "2 cups flour", + 2.0, + "Cups", + None, + "", + (1.0, 1.0), + (1.0, 1.0), + (0.4, 0.9), + (1.0, 1.0), + id="missing_food_fallback", + ), + pytest.param( + "2 cups flour sifted fresh", + 2.0, + "Cups", + "flour", + "sifted fresh", + (1.0, 1.0), + (1.0, 1.0), + (1.0, 1.0), + (0.8, 1.0), + id="note_full_match", + ), + pytest.param( + "2 cups flour sifted", + 2.0, + "Cups", + "flour", + "sifted chopped", + (1.0, 1.0), + (1.0, 1.0), + (1.0, 1.0), + (0.4, 0.6), + id="note_partial_match", + ), + pytest.param( + "2 cups flour", + 2.0, + "Cups", + "flour", + "chopped minced", + (1.0, 1.0), + (1.0, 1.0), + (1.0, 1.0), + (0.0, 0.0), + id="note_no_match", + ), + pytest.param( + "1.5 tsp salt kosher", + 1.0, + None, + None, + "kosher fine", + (0.0, 0.0), + (0.3, 0.7), + (0.3, 0.7), + (0.4, 0.6), + id="multiple_issues", + ), + pytest.param( + "", + 1.0, + "Cups", + "flour", + "fresh", + (0.0, 0.0), + (1.0, 1.0), + (1.0, 1.0), + (0.0, 0.0), + id="empty_original_text", + ), + pytest.param( + "salt", + 0.0, + None, + "salt", + "", + (1.0, 1.0), + (1.0, 1.0), + (1.0, 1.0), + (1.0, 1.0), + id="zero_quantity_match", + ), + ], +) +def test_openai_parser_confidence( + original_text: str, + quantity: float | None, + unit: str | None, + food: str | None, + note: str, + qty_range: tuple[float, float], + unit_range: tuple[float, float], + food_range: tuple[float, float], + note_range: tuple[float, float], + unique_local_group_id: UUID4, + parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated +): + """Test the _calculate_confidence method of OpenAIParser with various input scenarios.""" + + with session_context() as session: + from mealie.services.parser_services.openai.parser import OpenAIParser + + parser = cast( + OpenAIParser, get_parser(RegisteredParser.openai, unique_local_group_id, session, get_locale_provider()) + ) + + # Create test ingredient + ingredient = RecipeIngredient( + original_text=original_text, + quantity=quantity, + unit=CreateIngredientUnit(name=unit) if unit else None, + food=CreateIngredientFood(name=food) if food else None, + note=note if note else None, + ) + + # Calculate confidence + confidence = parser._calculate_confidence(original_text, ingredient) + + # All confidence values should be populated (not None) by the method + assert confidence.quantity is not None, "Quantity confidence should not be None" + assert confidence.unit is not None, "Unit confidence should not be None" + assert confidence.food is not None, "Food confidence should not be None" + assert confidence.comment is not None, "Comment confidence should not be None" + assert confidence.average is not None, "Average confidence should not be None" + + # Range-based assertions to handle fuzzy matching variability + qty_min, qty_max = qty_range + assert qty_min <= confidence.quantity <= qty_max, ( + f"Quantity confidence out of range: expected {qty_range}, got {confidence.quantity}" + ) + + unit_min, unit_max = unit_range + assert unit_min <= confidence.unit <= unit_max, ( + f"Unit confidence out of range: expected {unit_range}, got {confidence.unit}" + ) + + food_min, food_max = food_range + assert food_min <= confidence.food <= food_max, ( + f"Food confidence out of range: expected {food_range}, got {confidence.food}" + ) + + note_min, note_max = note_range + assert note_min <= confidence.comment <= note_max, ( + f"Note confidence out of range: expected {note_range}, got {confidence.comment}" + ) + + # Check that average is calculated correctly + expected_avg = (confidence.quantity + confidence.unit + confidence.food + confidence.comment) / 4 + assert abs(confidence.average - expected_avg) < 0.001, ( + f"Average confidence mismatch: expected {expected_avg}, got {confidence.average}" + ) diff --git a/tests/unit_tests/test_ingredient_parser.py b/tests/unit_tests/test_ingredient_parser.py deleted file mode 100644 index 5dfdd5940..000000000 --- a/tests/unit_tests/test_ingredient_parser.py +++ /dev/null @@ -1,737 +0,0 @@ -import asyncio -import json -from dataclasses import dataclass -from typing import cast -from unittest.mock import MagicMock - -import pytest -from pydantic import UUID4 -from sqlalchemy.orm import Session - -from mealie.db.db_setup import session_context -from mealie.repos.all_repositories import get_repositories -from mealie.repos.repository_factory import AllRepositories -from mealie.schema.openai.recipe_ingredient import OpenAIIngredient, OpenAIIngredients -from mealie.schema.recipe.recipe import Recipe -from mealie.schema.recipe.recipe_ingredient import ( - CreateIngredientFood, - CreateIngredientFoodAlias, - CreateIngredientUnit, - CreateIngredientUnitAlias, - IngredientFood, - IngredientUnit, - ParsedIngredient, - RecipeIngredient, - SaveIngredientFood, - SaveIngredientUnit, -) -from mealie.schema.user.user import GroupBase -from mealie.services.openai import OpenAIService -from mealie.services.parser_services import RegisteredParser, get_parser -from tests.utils.factories import random_int, random_string -from tests.utils.fixture_schemas import TestUser - - -@dataclass -class TestIngredient: - input: str - quantity: float - unit: str - food: str - comments: str - - -def build_parsed_ing(food: str | None, unit: str | None) -> ParsedIngredient: - ing = RecipeIngredient(unit=None, food=None) - if food: - ing.food = CreateIngredientFood(name=food) - if unit: - ing.unit = CreateIngredientUnit(name=unit) - - return ParsedIngredient(input=None, ingredient=ing) - - -@pytest.fixture() -def unique_local_group_id(unfiltered_database: AllRepositories) -> UUID4: - return str(unfiltered_database.groups.create(GroupBase(name=random_string())).id) - - -@pytest.fixture() -def unique_db(session: Session, unique_local_group_id: str): - return get_repositories(session, group_id=unique_local_group_id) - - -@pytest.fixture() -def parsed_ingredient_data( - unique_db: AllRepositories, unique_local_group_id: UUID4 -) -> tuple[list[IngredientFood], list[IngredientUnit]]: - foods = unique_db.ingredient_foods.create_many( - [ - SaveIngredientFood(name="potatoes", group_id=unique_local_group_id), - SaveIngredientFood(name="onion", group_id=unique_local_group_id), - SaveIngredientFood(name="green onion", group_id=unique_local_group_id), - SaveIngredientFood(name="frozen pearl onions", group_id=unique_local_group_id), - SaveIngredientFood(name="bell peppers", group_id=unique_local_group_id), - SaveIngredientFood(name="red pepper flakes", group_id=unique_local_group_id), - SaveIngredientFood(name="fresh ginger", group_id=unique_local_group_id), - SaveIngredientFood(name="ground ginger", group_id=unique_local_group_id), - SaveIngredientFood(name="ñör̃m̈ãl̈ĩz̈ẽm̈ẽ", group_id=unique_local_group_id), - SaveIngredientFood(name="PluralFoodTest", plural_name="myfoodisplural", group_id=unique_local_group_id), - SaveIngredientFood( - name="IHaveAnAlias", - group_id=unique_local_group_id, - aliases=[CreateIngredientFoodAlias(name="thisismyalias")], - ), - ] - ) - - foods.extend( - unique_db.ingredient_foods.create_many( - [ - SaveIngredientFood(name=f"{random_string()} food", group_id=unique_local_group_id) - for _ in range(random_int(10, 15)) - ] - ) - ) - - units = unique_db.ingredient_units.create_many( - [ - SaveIngredientUnit(name="Cups", group_id=unique_local_group_id), - SaveIngredientUnit(name="Tablespoon", group_id=unique_local_group_id), - SaveIngredientUnit(name="Teaspoon", group_id=unique_local_group_id), - SaveIngredientUnit(name="Stalk", group_id=unique_local_group_id), - SaveIngredientUnit(name="My Very Long Unit Name", abbreviation="mvlun", group_id=unique_local_group_id), - SaveIngredientUnit( - name="PluralUnitName", - plural_name="abc123", - abbreviation="doremiabc", - plural_abbreviation="doremi123", - group_id=unique_local_group_id, - ), - SaveIngredientUnit( - name="IHaveAnAliasToo", - group_id=unique_local_group_id, - aliases=[CreateIngredientUnitAlias(name="thisismyalias")], - ), - ] - ) - - units.extend( - unique_db.ingredient_foods.create_many( - [ - SaveIngredientUnit(name=f"{random_string()} unit", group_id=unique_local_group_id) - for _ in range(random_int(10, 15)) - ] - ) - ) - - return foods, units - - -@pytest.mark.parametrize( - "input, quantity, unit, food, comment", - [ - pytest.param("1 theelepel koffie", 1, "theelepel", "koffie", "", id="1 theelepel koffie"), - pytest.param("3 theelepels koffie", 3, "theelepels", "koffie", "", id="3 theelepels koffie"), - pytest.param("1 eetlepel tarwe", 1, "eetlepel", "tarwe", "", id="1 eetlepel tarwe"), - pytest.param("20 eetlepels bloem", 20, "eetlepels", "bloem", "", id="20 eetlepels bloem"), - pytest.param("1 mespunt kaneel", 1, "mespunt", "kaneel", "", id="1 mespunt kaneel"), - pytest.param("1 snuf(je) zout", 1, "snuf(je)", "zout", "", id="1 snuf(je) zout"), - pytest.param( - "2 tbsp minced cilantro, leaves and stems", - 2, - "tbsp", - "minced cilantro", - "leaves and stems", - id="2 tbsp minced cilantro, leaves and stems", - ), - pytest.param( - "1 large yellow onion, coarsely chopped", - 1, - "large", - "yellow onion", - "coarsely chopped", - id="1 large yellow onion, coarsely chopped", - ), - pytest.param("1 1/2 tsp garam masala", 1.5, "tsp", "garam masala", "", id="1 1/2 tsp garam masala"), - pytest.param( - "2 cups mango chunks, (2 large mangoes) (fresh or frozen)", - 2, - "Cups", - "mango chunks, (2 large mangoes)", - "fresh or frozen", - id="2 cups mango chunks, (2 large mangoes) (fresh or frozen)", - ), - pytest.param("stalk onion", 0, "Stalk", "onion", "", id="stalk onion"), - pytest.param("a stalk bell peppers", 0, "Stalk", "bell peppers", "", id="a stalk bell peppers"), - pytest.param("a tablespoon unknownFood", 0, "Tablespoon", "unknownFood", "", id="a tablespoon unknownFood"), - pytest.param( - "stalk bell peppers, cut in pieces", - 0, - "Stalk", - "bell peppers", - "cut in pieces", - id="stalk bell peppers, cut in pieces", - ), - pytest.param( - "a stalk bell peppers, cut in pieces", - 0, - "Stalk", - "bell peppers", - "cut in pieces", - id="stalk bell peppers, cut in pieces", - ), - pytest.param("red pepper flakes", 0, "", "red pepper flakes", "", id="red pepper flakes"), - pytest.param("1 bell peppers", 1, "", "bell peppers", "", id="1 bell peppers"), - pytest.param("1 stalk bell peppers", 1, "Stalk", "bell peppers", "", id="1 big stalk bell peppers"), - pytest.param("a big stalk bell peppers", 0, "Stalk", "bell peppers", "", id="a big stalk bell peppers"), - pytest.param( - "1 bell peppers, cut in pieces", 1, "", "bell peppers", "cut in pieces", id="1 bell peppers, cut in pieces" - ), - pytest.param( - "bell peppers, cut in pieces", 0, "", "bell peppers", "cut in pieces", id="bell peppers, cut in pieces" - ), - ], -) -def test_brute_parser( - unique_local_group_id: UUID4, - parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated - input: str, - quantity: int | float, - unit: str, - food: str, - comment: str, -): - with session_context() as session: - loop = asyncio.get_event_loop() - parser = get_parser(RegisteredParser.brute, unique_local_group_id, session) - parsed = loop.run_until_complete(parser.parse_one(input)) - ing = parsed.ingredient - - if ing.quantity: - assert ing.quantity == quantity - else: - assert not quantity - if ing.unit: - assert ing.unit.name == unit - else: - assert not unit - if ing.food: - assert ing.food.name == food - else: - assert not food - if ing.note: - assert ing.note == comment - else: - assert not comment - - -@pytest.mark.parametrize( - "unit, food, expect_unit_match, expect_food_match, expected_avg", - [ - pytest.param("Cups", "potatoes", True, True, 1.0, id="all matched"), - pytest.param("Cups", "veryuniquefood", True, False, 0.75, id="unit matched only"), - pytest.param("veryuniqueunit", "potatoes", False, True, 0.75, id="food matched only"), - pytest.param("veryuniqueunit", "veryuniquefood", False, False, 0.5, id="neither matched"), - ], -) -def test_brute_parser_confidence( - unit: str, - food: str, - expect_unit_match: bool, - expect_food_match: bool, - expected_avg: float, - unique_local_group_id: UUID4, - parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], -): - input_str = f"1 {unit} {food}" - - with session_context() as session: - original_loop = asyncio.get_event_loop() - try: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - parser = get_parser(RegisteredParser.brute, unique_local_group_id, session) - parsed = loop.run_until_complete(parser.parse_one(input_str)) - finally: - loop.close() - asyncio.set_event_loop(original_loop) - - conf = parsed.confidence - - assert conf.quantity == 1 - assert conf.comment == 1 - assert conf.unit == (1 if expect_unit_match or not unit else 0) - assert conf.food == (1 if expect_food_match or not food else 0) - assert conf.average == expected_avg - - -@pytest.mark.parametrize( - "input, expected_unit_name, expected_food_name, expect_unit_match, expect_food_match", - ( - pytest.param( - build_parsed_ing(unit="cup", food="potatoes"), - "Cups", - "potatoes", - True, - True, - id="basic match", - ), - pytest.param( # this should work in sqlite since "potato" is contained within "potatoes" - build_parsed_ing(unit="cup", food="potato"), - "Cups", - "potatoes", - True, - True, - id="basic fuzzy match", - ), - pytest.param( - build_parsed_ing(unit="tablespoon", food="onion"), - "Tablespoon", - "onion", - True, - True, - id="nested match 1", - ), - pytest.param( - build_parsed_ing(unit="teaspoon", food="green onion"), - "Teaspoon", - "green onion", - True, - True, - id="nested match 2", - ), - pytest.param( - build_parsed_ing(unit="cup", food="gren onion"), - "Cups", - "green onion", - True, - True, - id="nested match 3", - ), - pytest.param( - build_parsed_ing(unit="stalk", food="very unique"), - "Stalk", - "very unique", - True, - False, - id="no food match", - ), - pytest.param( - build_parsed_ing(unit="cup", food=None), - "Cups", - None, - True, - False, - id="no food input", - ), - pytest.param( - build_parsed_ing(unit="very unique", food="fresh ginger"), - "very unique", - "fresh ginger", - False, - True, - id="no unit match", - ), - pytest.param( - build_parsed_ing(unit=None, food="potatoes"), - None, - "potatoes", - False, - True, - id="no unit input", - ), - pytest.param( - build_parsed_ing(unit="very unique", food="very unique"), - "very unique", - "very unique", - False, - False, - id="no matches", - ), - pytest.param( - build_parsed_ing(unit=None, food=None), - None, - None, - False, - False, - id="no input", - ), - pytest.param( - build_parsed_ing(unit="mvlun", food="potatoes"), - "My Very Long Unit Name", - "potatoes", - True, - True, - id="unit abbreviation", - ), - pytest.param( - build_parsed_ing(unit=None, food="n̅ōr̅m̄a̅l̄i̅z̄e̅m̄e̅"), - None, - "ñör̃m̈ãl̈ĩz̈ẽm̈ẽ", - False, - True, - id="normalization", - ), - pytest.param( - build_parsed_ing(unit=None, food="myfoodisplural"), - None, - "PluralFoodTest", - False, - True, - id="plural food name", - ), - pytest.param( - build_parsed_ing(unit="abc123", food=None), - "PluralUnitName", - None, - True, - False, - id="plural unit name", - ), - pytest.param( - build_parsed_ing(unit="doremi123", food=None), - "PluralUnitName", - None, - True, - False, - id="plural unit abbreviation", - ), - pytest.param( - build_parsed_ing(unit=None, food="thisismyalias"), - None, - "IHaveAnAlias", - False, - True, - id="food alias", - ), - pytest.param( - build_parsed_ing(unit="thisismyalias", food=None), - "IHaveAnAliasToo", - None, - True, - False, - id="unit alias", - ), - ), -) -def test_parser_ingredient_match( - expected_food_name: str | None, - expected_unit_name: str | None, - expect_food_match: bool, - expect_unit_match: bool, - input: ParsedIngredient, - parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated - unique_local_group_id: UUID4, -): - with session_context() as session: - parser = get_parser(RegisteredParser.brute, unique_local_group_id, session) - parsed_ingredient = parser.find_ingredient_match(input) - - if expected_food_name: - assert parsed_ingredient.ingredient.food and parsed_ingredient.ingredient.food.name == expected_food_name - else: - assert parsed_ingredient.ingredient.food is None - - if expect_food_match: - assert isinstance(parsed_ingredient.ingredient.food, IngredientFood) - elif parsed_ingredient.ingredient.food and parsed_ingredient.ingredient.food.name: - assert isinstance(parsed_ingredient.ingredient.food, CreateIngredientFood) - else: - assert parsed_ingredient.ingredient.food is None - - if expected_unit_name: - assert parsed_ingredient.ingredient.unit and parsed_ingredient.ingredient.unit.name == expected_unit_name - else: - assert parsed_ingredient.ingredient.unit is None - - if expect_unit_match: - assert isinstance(parsed_ingredient.ingredient.unit, IngredientUnit) - elif parsed_ingredient.ingredient.unit and parsed_ingredient.ingredient.unit.name: - assert isinstance(parsed_ingredient.ingredient.unit, CreateIngredientUnit) - else: - assert parsed_ingredient.ingredient.unit is None - - -def test_openai_parser( - unique_local_group_id: UUID4, - parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated - monkeypatch: pytest.MonkeyPatch, -): - ingredient_count = random_int(10, 20) - - async def mock_get_response(self, prompt: str, message: str, *args, **kwargs) -> OpenAIIngredients | None: - inputs = json.loads(message) - data = OpenAIIngredients( - ingredients=[ - OpenAIIngredient( - quantity=random_int(0, 10), - unit=random_string(), - food=random_string(), - note=random_string(), - ) - for _ in inputs - ] - ) - return data - - monkeypatch.setattr(OpenAIService, "get_response", mock_get_response) - - with session_context() as session: - loop = asyncio.get_event_loop() - parser = get_parser(RegisteredParser.openai, unique_local_group_id, session) - - inputs = [random_string() for _ in range(ingredient_count)] - parsed = loop.run_until_complete(parser.parse(inputs)) - - # since OpenAI is mocked, we don't need to validate the data, we just need to make sure parsing works - # and that it preserves order - assert len(parsed) == ingredient_count - for input, output in zip(inputs, parsed, strict=True): - assert output.input == input - - -def test_openai_parser_sanitize_output( - unique_local_group_id: UUID4, - unique_user: TestUser, - parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated - monkeypatch: pytest.MonkeyPatch, -): - async def mock_get_raw_response(self, prompt: str, content: list[dict], response_schema) -> MagicMock: - # Create data with null character in JSON to test preprocessing - data = OpenAIIngredients( - ingredients=[ - OpenAIIngredient( - quantity=random_int(0, 10), - unit="", - food="there is a null character here: \x00", - note="", - ) - ] - ) - - # Create a mock raw response which matches the OpenAI chat response format - mock_response = MagicMock() - mock_response.choices = [MagicMock()] - mock_response.choices[0].message.content = data.model_dump_json() - return mock_response - - # Mock the raw response here since we want to make sure our service executes processing before loading the model - monkeypatch.setattr(OpenAIService, "_get_raw_response", mock_get_raw_response) - - with session_context() as session: - loop = asyncio.get_event_loop() - parser = get_parser(RegisteredParser.openai, unique_local_group_id, session) - - parsed = loop.run_until_complete(parser.parse([""])) - assert len(parsed) == 1 - parsed_ing = cast(ParsedIngredient, parsed[0]) - assert parsed_ing.ingredient.food - assert parsed_ing.ingredient.food.name == "there is a null character here: " - - # Make sure we can create a recipe with this ingredient - assert isinstance(parsed_ing.ingredient.food, CreateIngredientFood) - food = unique_user.repos.ingredient_foods.create( - parsed_ing.ingredient.food.cast(SaveIngredientFood, group_id=unique_user.group_id) - ) - parsed_ing.ingredient.food = food - unique_user.repos.recipes.create( - Recipe( - user_id=unique_user.user_id, - group_id=unique_user.group_id, - name=random_string(), - recipe_ingredient=[parsed_ing.ingredient], - ) - ) - - -@pytest.mark.parametrize( - "original_text,quantity,unit,food,note,qty_range,unit_range,food_range,note_range", - [ - pytest.param( - "2 cups flour", - 2.0, - "Cups", - "flour", - "", - (1.0, 1.0), - (1.0, 1.0), - (1.0, 1.0), - (1.0, 1.0), - id="perfect_match_all_components", - ), - pytest.param( - "2 cups flour", - 3.0, - "Cups", - "flour", - "", - (0.0, 0.0), - (1.0, 1.0), - (1.0, 1.0), - (1.0, 1.0), - id="quantity_mismatch", - ), - pytest.param( - "2 cups flour", - 2.0, - None, - "flour", - "", - (1.0, 1.0), - (0.4, 0.9), - (1.0, 1.0), - (1.0, 1.0), - id="missing_unit_fallback", - ), - pytest.param( - "2 cups flour", - 2.0, - "Cups", - None, - "", - (1.0, 1.0), - (1.0, 1.0), - (0.4, 0.9), - (1.0, 1.0), - id="missing_food_fallback", - ), - pytest.param( - "2 cups flour sifted fresh", - 2.0, - "Cups", - "flour", - "sifted fresh", - (1.0, 1.0), - (1.0, 1.0), - (1.0, 1.0), - (0.8, 1.0), - id="note_full_match", - ), - pytest.param( - "2 cups flour sifted", - 2.0, - "Cups", - "flour", - "sifted chopped", - (1.0, 1.0), - (1.0, 1.0), - (1.0, 1.0), - (0.4, 0.6), - id="note_partial_match", - ), - pytest.param( - "2 cups flour", - 2.0, - "Cups", - "flour", - "chopped minced", - (1.0, 1.0), - (1.0, 1.0), - (1.0, 1.0), - (0.0, 0.0), - id="note_no_match", - ), - pytest.param( - "1.5 tsp salt kosher", - 1.0, - None, - None, - "kosher fine", - (0.0, 0.0), - (0.3, 0.7), - (0.3, 0.7), - (0.4, 0.6), - id="multiple_issues", - ), - pytest.param( - "", - 1.0, - "Cups", - "flour", - "fresh", - (0.0, 0.0), - (1.0, 1.0), - (1.0, 1.0), - (0.0, 0.0), - id="empty_original_text", - ), - pytest.param( - "salt", - 0.0, - None, - "salt", - "", - (1.0, 1.0), - (1.0, 1.0), - (1.0, 1.0), - (1.0, 1.0), - id="zero_quantity_match", - ), - ], -) -def test_openai_parser_confidence( - original_text: str, - quantity: float | None, - unit: str | None, - food: str | None, - note: str, - qty_range: tuple[float, float], - unit_range: tuple[float, float], - food_range: tuple[float, float], - note_range: tuple[float, float], - unique_local_group_id: UUID4, - parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated -): - """Test the _calculate_confidence method of OpenAIParser with various input scenarios.""" - - with session_context() as session: - from mealie.services.parser_services.openai.parser import OpenAIParser - - parser = cast(OpenAIParser, get_parser(RegisteredParser.openai, unique_local_group_id, session)) - - # Create test ingredient - ingredient = RecipeIngredient( - original_text=original_text, - quantity=quantity, - unit=CreateIngredientUnit(name=unit) if unit else None, - food=CreateIngredientFood(name=food) if food else None, - note=note if note else None, - ) - - # Calculate confidence - confidence = parser._calculate_confidence(original_text, ingredient) - - # All confidence values should be populated (not None) by the method - assert confidence.quantity is not None, "Quantity confidence should not be None" - assert confidence.unit is not None, "Unit confidence should not be None" - assert confidence.food is not None, "Food confidence should not be None" - assert confidence.comment is not None, "Comment confidence should not be None" - assert confidence.average is not None, "Average confidence should not be None" - - # Range-based assertions to handle fuzzy matching variability - qty_min, qty_max = qty_range - assert qty_min <= confidence.quantity <= qty_max, ( - f"Quantity confidence out of range: expected {qty_range}, got {confidence.quantity}" - ) - - unit_min, unit_max = unit_range - assert unit_min <= confidence.unit <= unit_max, ( - f"Unit confidence out of range: expected {unit_range}, got {confidence.unit}" - ) - - food_min, food_max = food_range - assert food_min <= confidence.food <= food_max, ( - f"Food confidence out of range: expected {food_range}, got {confidence.food}" - ) - - note_min, note_max = note_range - assert note_min <= confidence.comment <= note_max, ( - f"Note confidence out of range: expected {note_range}, got {confidence.comment}" - ) - - # Check that average is calculated correctly - expected_avg = (confidence.quantity + confidence.unit + confidence.food + confidence.comment) / 4 - assert abs(confidence.average - expected_avg) < 0.001, ( - f"Average confidence mismatch: expected {expected_avg}, got {confidence.average}" - )