feat: include extra ingredients + amounts in NLP parser output (#7191)

This commit is contained in:
Michael Genson
2026-03-05 16:28:14 -06:00
committed by GitHub
parent bf11729a23
commit 77081d0482
10 changed files with 991 additions and 850 deletions

View File

@@ -17,7 +17,9 @@
"servings": "Servings",
"yield": "Yield",
"yields": "Yields"
}
},
"and-amount": "and {amount}",
"or-ingredient": "or {ingredient}"
},
"mealplan": {
"no-recipes-match-your-rules": "No recipes match your rules"

View File

@@ -12,11 +12,11 @@ router = APIRouter(prefix="/parser")
class IngredientParserController(BaseUserController):
@router.post("/ingredient", response_model=ParsedIngredient)
async def parse_ingredient(self, ingredient: IngredientRequest):
parser = get_parser(ingredient.parser, self.group_id, self.session)
parser = get_parser(ingredient.parser, self.group_id, self.session, self.translator)
response = await parser.parse([ingredient.ingredient])
return response[0]
@router.post("/ingredients", response_model=list[ParsedIngredient])
async def parse_ingredients(self, ingredients: IngredientsRequest):
parser = get_parser(ingredients.parser, self.group_id, self.session)
parser = get_parser(ingredients.parser, self.group_id, self.session, self.translator)
return await parser.parse(ingredients.ingredients)

View File

@@ -5,6 +5,7 @@ from rapidfuzz import fuzz, process
from sqlalchemy.orm import Session
from mealie.db.models.recipe.ingredient import IngredientFoodModel, IngredientUnitModel
from mealie.lang.providers import Translator
from mealie.repos.all_repositories import get_repositories
from mealie.repos.repository_factory import AllRepositories
from mealie.schema.recipe.recipe_ingredient import (
@@ -126,11 +127,14 @@ class ABCIngredientParser(ABC):
Abstract class for ingredient parsers.
"""
def __init__(self, group_id: UUID4, session: Session) -> None:
def __init__(self, group_id: UUID4, session: Session, translator: Translator) -> None:
self.group_id = group_id
self.session = session
self.data_matcher = DataMatcher(self._repos, self.food_fuzzy_match_threshold, self.unit_fuzzy_match_threshold)
self.translator = translator
self.t = self.translator.t
@property
def _repos(self) -> AllRepositories:
return get_repositories(self.session, group_id=self.group_id)

View File

@@ -1,4 +1,6 @@
from dataclasses import dataclass, field
from fractions import Fraction
from itertools import zip_longest
from ingredient_parser import parse_ingredient
from ingredient_parser.dataclasses import CompositeIngredientAmount, IngredientAmount
@@ -7,6 +9,7 @@ from pydantic import UUID4
from sqlalchemy.orm import Session
from mealie.core.root_logger import get_logger
from mealie.lang.providers import Translator
from mealie.schema.recipe import RecipeIngredient
from mealie.schema.recipe.recipe_ingredient import (
CreateIngredientFood,
@@ -70,13 +73,29 @@ class BruteForceParser(ABCIngredientParser):
return [await self.parse_one(ingredient) for ingredient in ingredients]
@dataclass
class _IngredientPart:
qty: float = 0
unit: str = ""
food: str = ""
extra_amounts: list[IngredientAmount] = field(default_factory=list)
qty_conf: float = 0
unit_conf: float = 0
food_conf: float = 0
@property
def avg_conf(self) -> float:
confs = [self.qty_conf, self.unit_conf, self.food_conf]
return sum(confs) / len(confs)
class NLPParser(ABCIngredientParser):
"""
Class for Ingredient Parser library
"""
@staticmethod
def _extract_amount(ingredient: IngredientParserParsedIngredient) -> IngredientAmount:
@classmethod
def _extract_amount(cls, ingredient: IngredientParserParsedIngredient) -> IngredientAmount:
if not (ingredient_amounts := ingredient.amount):
return IngredientAmount(
quantity=Fraction(0), quantity_max=Fraction(0), unit="", text="", confidence=0, starting_index=-1
@@ -88,8 +107,8 @@ class NLPParser(ABCIngredientParser):
return ingredient_amount
@staticmethod
def _extract_quantity(ingredient_amount: IngredientAmount) -> tuple[float, float]:
@classmethod
def _extract_quantity(cls, ingredient_amount: IngredientAmount) -> tuple[float, float]:
confidence = ingredient_amount.confidence
if isinstance(ingredient_amount.quantity, str):
@@ -103,27 +122,19 @@ class NLPParser(ABCIngredientParser):
return qty, confidence
@staticmethod
def _extract_unit(ingredient_amount: IngredientAmount) -> tuple[str, float]:
@classmethod
def _extract_unit(cls, ingredient_amount: IngredientAmount) -> tuple[str, float]:
confidence = ingredient_amount.confidence
unit = str(ingredient_amount.unit) if ingredient_amount.unit else ""
return unit, confidence
@staticmethod
def _extract_food(ingredient: IngredientParserParsedIngredient) -> tuple[str, float]:
if not ingredient.name:
return "", 0
ingredient_name = ingredient.name[0]
confidence = ingredient_name.confidence
food = ingredient_name.text
return food, confidence
@staticmethod
def _extract_note(ingredient: IngredientParserParsedIngredient) -> tuple[str, float]:
@classmethod
def _extract_note(
cls, ingredient: IngredientParserParsedIngredient, extra_amounts: list[IngredientAmount] | None = None
) -> tuple[str, float]:
confidences: list[float] = []
note_parts: list[str] = []
if ingredient.size:
note_parts.append(ingredient.size.text)
confidences.append(ingredient.size.confidence)
@@ -139,45 +150,103 @@ class NLPParser(ABCIngredientParser):
# average confidence among all note parts
confidence = sum(confidences) / len(confidences) if confidences else 0
note = ", ".join(note_parts)
note = note.replace("(", "").replace(")", "")
# insert extra amounts to the front of the notes with parenthesis
if extra_amounts:
amt_part = "(" + ", ".join([amount.text for amount in extra_amounts]) + ")"
note = " ".join(filter(None, [amt_part, note]))
return note, confidence
def _convert_ingredient(self, ingredient: IngredientParserParsedIngredient) -> ParsedIngredient:
ingredient_amount = self._extract_amount(ingredient)
qty, qty_conf = self._extract_quantity(ingredient_amount)
unit, unit_conf = self._extract_unit(ingredient_amount)
food, food_conf = self._extract_food(ingredient)
note, note_conf = self._extract_note(ingredient)
ing_parts: list[_IngredientPart] = []
for amount, ing_name in zip_longest(ingredient.amount, ingredient.name, fillvalue=None):
part = _IngredientPart()
if amount:
if isinstance(amount, CompositeIngredientAmount):
part.extra_amounts = list(amount.amounts[1:])
amount = amount.amounts[0]
part.qty, part.qty_conf = self._extract_quantity(amount)
part.unit, part.unit_conf = self._extract_unit(amount)
if ing_name:
part.food = ing_name.text
part.food_conf = ing_name.confidence
ing_parts.append(part)
note, note_conf = self._extract_note(ingredient, ing_parts[0].extra_amounts if ing_parts else None)
# Safeguard in case the parser outputs nothing
if not ing_parts:
ing_parts.append(_IngredientPart())
# average confidence for components which were parsed
# uses ing_parts[0] since this is the primary ingredient
primary = ing_parts[0]
confidences: list[float] = []
if qty:
confidences.append(qty_conf)
if unit:
confidences.append(unit_conf)
if food:
confidences.append(food_conf)
if primary.qty:
confidences.append(primary.qty_conf)
if primary.unit:
confidences.append(primary.unit_conf)
if primary.food:
confidences.append(primary.food_conf)
if note:
confidences.append(note_conf)
if len(ing_parts) > 1:
confidences.extend([part.avg_conf for part in ing_parts[1:]])
recipe_ingredients: list[RecipeIngredient] = []
for i, part in enumerate(ing_parts):
if not i:
ing_note = note
elif part.extra_amounts:
# TODO: handle extra amounts when we add support for them
# For now, just add them as a note ("and amt_1, and amt_2, and ...")
ing_note = ", ".join(self.t("recipe.and-amount", amount=a.text) for a in part.extra_amounts)
else:
ing_note = None
recipe_ingredients.append(
RecipeIngredient(
quantity=part.qty,
unit=CreateIngredientUnit(name=part.unit) if part.unit else None,
food=CreateIngredientFood(name=part.food) if part.food else None,
note=ing_note,
)
)
primary_ingredient = recipe_ingredients[0] # there will always be at least one recipe ingredient
extra_ingredients = recipe_ingredients[1:] if len(recipe_ingredients) > 1 else []
# TODO: handle extra ingredients when we support them
# For now, just add them to the note ("or ing_1, or ing_2, or ...")
if extra_ingredients:
extras_note_parts = [
self.t("recipe.or-ingredient", ingredient=extra_ing.display) for extra_ing in extra_ingredients
]
extras_note = ", ".join(extras_note_parts)
primary_ingredient.note = " ".join(filter(None, [extras_note, primary_ingredient.note]))
# re-calculate display property since we modified the note
primary_ingredient.display = primary_ingredient._format_display()
parsed_ingredient = ParsedIngredient(
input=ingredient.sentence,
confidence=IngredientConfidence(
average=(sum(confidences) / len(confidences)) if confidences else 0,
quantity=qty_conf,
unit=unit_conf,
food=food_conf,
quantity=primary.qty_conf,
unit=primary.unit_conf,
food=primary.food_conf,
comment=note_conf,
),
ingredient=RecipeIngredient(
title="",
quantity=qty,
unit=CreateIngredientUnit(name=unit) if unit else None,
food=CreateIngredientFood(name=food) if food else None,
note=note,
),
ingredient=primary_ingredient,
)
return self.find_ingredient_match(parsed_ingredient)
@@ -197,9 +266,11 @@ __registrar: dict[RegisteredParser, type[ABCIngredientParser]] = {
}
def get_parser(parser: RegisteredParser, group_id: UUID4, session: Session) -> ABCIngredientParser:
def get_parser(
parser: RegisteredParser, group_id: UUID4, session: Session, translator: Translator
) -> ABCIngredientParser:
"""
get_parser returns an ingrdeint parser based on the string enum value
passed in.
"""
return __registrar.get(parser, NLPParser)(group_id, session)
return __registrar.get(parser, NLPParser)(group_id, session, translator)

View File

@@ -1,67 +0,0 @@
import pytest
from fastapi.testclient import TestClient
from mealie.schema.recipe.recipe_ingredient import RegisteredParser
from tests.unit_tests.test_ingredient_parser import TestIngredient
from tests.utils import api_routes
from tests.utils.fixture_schemas import TestUser
nlp_test_ingredients = [
TestIngredient("½ cup all-purpose flour", 0.5, "cup", "all-purpose flour", ""),
TestIngredient("1½ teaspoons ground black pepper", 1.5, "teaspoon", "black pepper", "ground"),
TestIngredient("⅔ cup unsweetened flaked coconut", 0.667, "cup", "unsweetened flaked coconut", ""),
TestIngredient("⅓ cup panko bread crumbs", 0.333, "cup", "panko bread crumbs", ""),
TestIngredient("1/8 cup all-purpose flour", 0.125, "cup", "all-purpose flour", ""),
TestIngredient("1/32 cup all-purpose flour", 0.031, "cup", "all-purpose flour", ""),
TestIngredient("1 1/2 cups chopped onion ", 1.5, "cup", "onion", "chopped"),
TestIngredient(
"2 pounds russet potatoes, peeled, and cut into 3/4-inch cubes ",
2,
"pound",
"russet potatoes",
"peeled, and cut into 3/4 inch cubes",
),
TestIngredient("2 tablespoons (30ml) vegetable oil ", 2, "tablespoon", "vegetable oil", ""),
TestIngredient("2 teaspoons salt (to taste) ", 2, "teaspoon", "salt", "to taste"),
TestIngredient("2 cups chicken broth or beef broth ", 2, "cup", "chicken broth", ""),
TestIngredient("1/2 cup", 0.5, "cup", "", ""),
]
def assert_ingredient(api_response: dict, test_ingredient: TestIngredient):
response_quantity = api_response["ingredient"]["quantity"]
response_unit = api_response["ingredient"]["unit"]["name"] if api_response["ingredient"]["unit"] else ""
response_food = api_response["ingredient"]["food"]["name"] if api_response["ingredient"]["food"] else ""
response_note = api_response["ingredient"]["note"]
assert response_quantity == pytest.approx(test_ingredient.quantity)
assert response_unit == test_ingredient.unit
assert response_food == test_ingredient.food
assert response_note == test_ingredient.comments
@pytest.mark.parametrize("test_ingredient", nlp_test_ingredients)
def test_recipe_ingredient_parser_nlp(api_client: TestClient, test_ingredient: TestIngredient, unique_user: TestUser):
payload = {"parser": RegisteredParser.nlp, "ingredient": test_ingredient.input}
response = api_client.post(api_routes.parser_ingredient, json=payload, headers=unique_user.token)
assert response.status_code == 200
assert_ingredient(response.json(), test_ingredient)
def test_recipe_ingredients_parser_nlp(api_client: TestClient, unique_user: TestUser):
payload = {"parser": RegisteredParser.nlp, "ingredients": [x.input for x in nlp_test_ingredients]}
response = api_client.post(api_routes.parser_ingredients, json=payload, headers=unique_user.token)
assert response.status_code == 200
for api_ingredient, test_ingredient in zip(response.json(), nlp_test_ingredients, strict=False):
assert_ingredient(api_ingredient, test_ingredient)
@pytest.mark.skip("TODO: Implement")
def test_recipe_ingredient_parser_brute(api_client: TestClient):
pass
@pytest.mark.skip("TODO: Implement")
def test_recipe_ingredients_parser_brute(api_client: TestClient):
pass

View File

@@ -0,0 +1,93 @@
import pytest
from pydantic import UUID4
from sqlalchemy.orm import Session
from mealie.repos.all_repositories import get_repositories
from mealie.repos.repository_factory import AllRepositories
from mealie.schema.recipe.recipe_ingredient import (
CreateIngredientFoodAlias,
CreateIngredientUnitAlias,
IngredientFood,
IngredientUnit,
SaveIngredientFood,
SaveIngredientUnit,
)
from mealie.schema.user.user import GroupBase
from tests.utils.factories import random_int, random_string
@pytest.fixture()
def unique_local_group_id(unfiltered_database: AllRepositories) -> UUID4:
return str(unfiltered_database.groups.create(GroupBase(name=random_string())).id)
@pytest.fixture()
def unique_db(session: Session, unique_local_group_id: str):
return get_repositories(session, group_id=unique_local_group_id)
@pytest.fixture()
def parsed_ingredient_data(
unique_db: AllRepositories, unique_local_group_id: UUID4
) -> tuple[list[IngredientFood], list[IngredientUnit]]:
foods = unique_db.ingredient_foods.create_many(
[
SaveIngredientFood(name="potatoes", group_id=unique_local_group_id),
SaveIngredientFood(name="onion", group_id=unique_local_group_id),
SaveIngredientFood(name="green onion", group_id=unique_local_group_id),
SaveIngredientFood(name="frozen pearl onions", group_id=unique_local_group_id),
SaveIngredientFood(name="bell peppers", group_id=unique_local_group_id),
SaveIngredientFood(name="red pepper flakes", group_id=unique_local_group_id),
SaveIngredientFood(name="fresh ginger", group_id=unique_local_group_id),
SaveIngredientFood(name="ground ginger", group_id=unique_local_group_id),
SaveIngredientFood(name="ñör̃m̈ãl̈ĩz̈ẽm̈ẽ", group_id=unique_local_group_id),
SaveIngredientFood(name="PluralFoodTest", plural_name="myfoodisplural", group_id=unique_local_group_id),
SaveIngredientFood(
name="IHaveAnAlias",
group_id=unique_local_group_id,
aliases=[CreateIngredientFoodAlias(name="thisismyalias")],
),
]
)
foods.extend(
unique_db.ingredient_foods.create_many(
[
SaveIngredientFood(name=f"{random_string()} food", group_id=unique_local_group_id)
for _ in range(random_int(10, 15))
]
)
)
units = unique_db.ingredient_units.create_many(
[
SaveIngredientUnit(name="Cups", group_id=unique_local_group_id),
SaveIngredientUnit(name="Tablespoon", group_id=unique_local_group_id),
SaveIngredientUnit(name="Teaspoon", group_id=unique_local_group_id),
SaveIngredientUnit(name="Stalk", group_id=unique_local_group_id),
SaveIngredientUnit(name="My Very Long Unit Name", abbreviation="mvlun", group_id=unique_local_group_id),
SaveIngredientUnit(
name="PluralUnitName",
plural_name="abc123",
abbreviation="doremiabc",
plural_abbreviation="doremi123",
group_id=unique_local_group_id,
),
SaveIngredientUnit(
name="IHaveAnAliasToo",
group_id=unique_local_group_id,
aliases=[CreateIngredientUnitAlias(name="thisismyalias")],
),
]
)
units.extend(
unique_db.ingredient_foods.create_many(
[
SaveIngredientUnit(name=f"{random_string()} unit", group_id=unique_local_group_id)
for _ in range(random_int(10, 15))
]
)
)
return foods, units

View File

@@ -0,0 +1,351 @@
import asyncio
import pytest
from pydantic import UUID4
from mealie.db.db_setup import session_context
from mealie.lang.providers import get_locale_provider
from mealie.schema.recipe.recipe_ingredient import (
CreateIngredientFood,
CreateIngredientUnit,
IngredientFood,
IngredientUnit,
ParsedIngredient,
RecipeIngredient,
)
from mealie.services.parser_services import RegisteredParser, get_parser
def build_parsed_ing(food: str | None, unit: str | None) -> ParsedIngredient:
ing = RecipeIngredient(unit=None, food=None)
if food:
ing.food = CreateIngredientFood(name=food)
if unit:
ing.unit = CreateIngredientUnit(name=unit)
return ParsedIngredient(input=None, ingredient=ing)
@pytest.mark.parametrize(
"input, quantity, unit, food, comment",
[
pytest.param("1 theelepel koffie", 1, "theelepel", "koffie", "", id="1 theelepel koffie"),
pytest.param("3 theelepels koffie", 3, "theelepels", "koffie", "", id="3 theelepels koffie"),
pytest.param("1 eetlepel tarwe", 1, "eetlepel", "tarwe", "", id="1 eetlepel tarwe"),
pytest.param("20 eetlepels bloem", 20, "eetlepels", "bloem", "", id="20 eetlepels bloem"),
pytest.param("1 mespunt kaneel", 1, "mespunt", "kaneel", "", id="1 mespunt kaneel"),
pytest.param("1 snuf(je) zout", 1, "snuf(je)", "zout", "", id="1 snuf(je) zout"),
pytest.param(
"2 tbsp minced cilantro, leaves and stems",
2,
"tbsp",
"minced cilantro",
"leaves and stems",
id="2 tbsp minced cilantro, leaves and stems",
),
pytest.param(
"1 large yellow onion, coarsely chopped",
1,
"large",
"yellow onion",
"coarsely chopped",
id="1 large yellow onion, coarsely chopped",
),
pytest.param("1 1/2 tsp garam masala", 1.5, "tsp", "garam masala", "", id="1 1/2 tsp garam masala"),
pytest.param(
"2 cups mango chunks, (2 large mangoes) (fresh or frozen)",
2,
"Cups",
"mango chunks, (2 large mangoes)",
"fresh or frozen",
id="2 cups mango chunks, (2 large mangoes) (fresh or frozen)",
),
pytest.param("stalk onion", 0, "Stalk", "onion", "", id="stalk onion"),
pytest.param("a stalk bell peppers", 0, "Stalk", "bell peppers", "", id="a stalk bell peppers"),
pytest.param("a tablespoon unknownFood", 0, "Tablespoon", "unknownFood", "", id="a tablespoon unknownFood"),
pytest.param(
"stalk bell peppers, cut in pieces",
0,
"Stalk",
"bell peppers",
"cut in pieces",
id="stalk bell peppers, cut in pieces",
),
pytest.param(
"a stalk bell peppers, cut in pieces",
0,
"Stalk",
"bell peppers",
"cut in pieces",
id="a stalk bell peppers, cut in pieces",
),
pytest.param("red pepper flakes", 0, "", "red pepper flakes", "", id="red pepper flakes"),
pytest.param("1 bell peppers", 1, "", "bell peppers", "", id="1 bell peppers"),
pytest.param("1 stalk bell peppers", 1, "Stalk", "bell peppers", "", id="1 big stalk bell peppers"),
pytest.param("a big stalk bell peppers", 0, "Stalk", "bell peppers", "", id="a big stalk bell peppers"),
pytest.param(
"1 bell peppers, cut in pieces", 1, "", "bell peppers", "cut in pieces", id="1 bell peppers, cut in pieces"
),
pytest.param(
"bell peppers, cut in pieces", 0, "", "bell peppers", "cut in pieces", id="bell peppers, cut in pieces"
),
],
)
def test_brute_parser(
unique_local_group_id: UUID4,
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
input: str,
quantity: int | float,
unit: str,
food: str,
comment: str,
):
with session_context() as session:
loop = asyncio.get_event_loop()
parser = get_parser(RegisteredParser.brute, unique_local_group_id, session, get_locale_provider())
parsed = loop.run_until_complete(parser.parse_one(input))
ing = parsed.ingredient
if ing.quantity:
assert ing.quantity == quantity
else:
assert not quantity
if ing.unit:
assert ing.unit.name == unit
else:
assert not unit
if ing.food:
assert ing.food.name == food
else:
assert not food
if ing.note:
assert ing.note == comment
else:
assert not comment
@pytest.mark.parametrize(
"unit, food, expect_unit_match, expect_food_match, expected_avg",
[
pytest.param("Cups", "potatoes", True, True, 1.0, id="all matched"),
pytest.param("Cups", "veryuniquefood", True, False, 0.75, id="unit matched only"),
pytest.param("veryuniqueunit", "potatoes", False, True, 0.75, id="food matched only"),
pytest.param("veryuniqueunit", "veryuniquefood", False, False, 0.5, id="neither matched"),
],
)
def test_brute_parser_confidence(
unit: str,
food: str,
expect_unit_match: bool,
expect_food_match: bool,
expected_avg: float,
unique_local_group_id: UUID4,
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]],
):
input_str = f"1 {unit} {food}"
with session_context() as session:
original_loop = asyncio.get_event_loop()
try:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
parser = get_parser(RegisteredParser.brute, unique_local_group_id, session, get_locale_provider())
parsed = loop.run_until_complete(parser.parse_one(input_str))
finally:
loop.close()
asyncio.set_event_loop(original_loop)
conf = parsed.confidence
assert conf.quantity == 1
assert conf.comment == 1
assert conf.unit == (1 if expect_unit_match or not unit else 0)
assert conf.food == (1 if expect_food_match or not food else 0)
assert conf.average == expected_avg
@pytest.mark.parametrize(
"input, expected_unit_name, expected_food_name, expect_unit_match, expect_food_match",
(
pytest.param(
build_parsed_ing(unit="cup", food="potatoes"),
"Cups",
"potatoes",
True,
True,
id="basic match",
),
pytest.param( # this should work in sqlite since "potato" is contained within "potatoes"
build_parsed_ing(unit="cup", food="potato"),
"Cups",
"potatoes",
True,
True,
id="basic fuzzy match",
),
pytest.param(
build_parsed_ing(unit="tablespoon", food="onion"),
"Tablespoon",
"onion",
True,
True,
id="nested match 1",
),
pytest.param(
build_parsed_ing(unit="teaspoon", food="green onion"),
"Teaspoon",
"green onion",
True,
True,
id="nested match 2",
),
pytest.param(
build_parsed_ing(unit="cup", food="gren onion"),
"Cups",
"green onion",
True,
True,
id="nested match 3",
),
pytest.param(
build_parsed_ing(unit="stalk", food="very unique"),
"Stalk",
"very unique",
True,
False,
id="no food match",
),
pytest.param(
build_parsed_ing(unit="cup", food=None),
"Cups",
None,
True,
False,
id="no food input",
),
pytest.param(
build_parsed_ing(unit="very unique", food="fresh ginger"),
"very unique",
"fresh ginger",
False,
True,
id="no unit match",
),
pytest.param(
build_parsed_ing(unit=None, food="potatoes"),
None,
"potatoes",
False,
True,
id="no unit input",
),
pytest.param(
build_parsed_ing(unit="very unique", food="very unique"),
"very unique",
"very unique",
False,
False,
id="no matches",
),
pytest.param(
build_parsed_ing(unit=None, food=None),
None,
None,
False,
False,
id="no input",
),
pytest.param(
build_parsed_ing(unit="mvlun", food="potatoes"),
"My Very Long Unit Name",
"potatoes",
True,
True,
id="unit abbreviation",
),
pytest.param(
build_parsed_ing(unit=None, food="n̅ōr̅m̄a̅l̄i̅z̄e̅m̄e̅"),
None,
"ñör̃m̈ãl̈ĩz̈ẽm̈ẽ",
False,
True,
id="normalization",
),
pytest.param(
build_parsed_ing(unit=None, food="myfoodisplural"),
None,
"PluralFoodTest",
False,
True,
id="plural food name",
),
pytest.param(
build_parsed_ing(unit="abc123", food=None),
"PluralUnitName",
None,
True,
False,
id="plural unit name",
),
pytest.param(
build_parsed_ing(unit="doremi123", food=None),
"PluralUnitName",
None,
True,
False,
id="plural unit abbreviation",
),
pytest.param(
build_parsed_ing(unit=None, food="thisismyalias"),
None,
"IHaveAnAlias",
False,
True,
id="food alias",
),
pytest.param(
build_parsed_ing(unit="thisismyalias", food=None),
"IHaveAnAliasToo",
None,
True,
False,
id="unit alias",
),
),
)
def test_parser_ingredient_match(
expected_food_name: str | None,
expected_unit_name: str | None,
expect_food_match: bool,
expect_unit_match: bool,
input: ParsedIngredient,
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
unique_local_group_id: UUID4,
):
with session_context() as session:
parser = get_parser(RegisteredParser.brute, unique_local_group_id, session, get_locale_provider())
parsed_ingredient = parser.find_ingredient_match(input)
if expected_food_name:
assert parsed_ingredient.ingredient.food and parsed_ingredient.ingredient.food.name == expected_food_name
else:
assert parsed_ingredient.ingredient.food is None
if expect_food_match:
assert isinstance(parsed_ingredient.ingredient.food, IngredientFood)
elif parsed_ingredient.ingredient.food and parsed_ingredient.ingredient.food.name:
assert isinstance(parsed_ingredient.ingredient.food, CreateIngredientFood)
else:
assert parsed_ingredient.ingredient.food is None
if expected_unit_name:
assert parsed_ingredient.ingredient.unit and parsed_ingredient.ingredient.unit.name == expected_unit_name
else:
assert parsed_ingredient.ingredient.unit is None
if expect_unit_match:
assert isinstance(parsed_ingredient.ingredient.unit, IngredientUnit)
elif parsed_ingredient.ingredient.unit and parsed_ingredient.ingredient.unit.name:
assert isinstance(parsed_ingredient.ingredient.unit, CreateIngredientUnit)
else:
assert parsed_ingredient.ingredient.unit is None

View File

@@ -0,0 +1,113 @@
import asyncio
import re
from dataclasses import dataclass
import pytest
from pydantic import UUID4
from rapidfuzz import fuzz
from text_unidecode import unidecode
from mealie.db.db_setup import session_context
from mealie.lang.providers import get_locale_provider
from mealie.services.parser_services import RegisteredParser, get_parser
@dataclass
class TestIngredient:
input: str
quantity: float
unit: str
food: str
comments: str
def normalize(val: str) -> str:
val = unidecode(val).lower().strip()
val = re.sub(r"[^a-z0-9\s]", "", val)
return val
@pytest.mark.parametrize(
"test_ingredient",
[
TestIngredient("½ cup all-purpose flour", 0.5, "cup", "all-purpose flour", ""),
TestIngredient("1 ½ teaspoons ground black pepper", 1.5, "teaspoon", "black pepper", "ground"),
TestIngredient("⅔ cup unsweetened flaked coconut", 0.667, "cup", "unsweetened flaked coconut", ""),
TestIngredient("⅓ cup panko bread crumbs", 0.333, "cup", "panko bread crumbs", ""),
TestIngredient("1/8 cup all-purpose flour", 0.125, "cup", "all-purpose flour", ""),
TestIngredient("1/32 cup all-purpose flour", 0.031, "cup", "all-purpose flour", ""),
TestIngredient("1 1/2 cups chopped onion ", 1.5, "cup", "onion", "chopped"),
TestIngredient(
"2 pounds russet potatoes, peeled, and cut into 3/4-inch cubes ",
2,
"pound",
"russet potatoes",
"peeled, and cut into 3/4 inch cubes",
),
TestIngredient("2 teaspoons salt (to taste) ", 2, "teaspoon", "salt", "to taste"),
TestIngredient("1/2 cup", 0.5, "cup", "", ""),
],
)
def test_nlp_parser(unique_local_group_id: UUID4, test_ingredient: TestIngredient):
with session_context() as session:
loop = asyncio.get_event_loop()
parser = get_parser(RegisteredParser.nlp, unique_local_group_id, session, get_locale_provider())
parsed = loop.run_until_complete(parser.parse_one(test_ingredient.input))
ing = parsed.ingredient
assert ing.quantity == pytest.approx(test_ingredient.quantity)
if ing.unit:
assert ing.unit.name == test_ingredient.unit
else:
assert not test_ingredient.unit
if ing.food:
assert ing.food.name == test_ingredient.food
else:
assert not test_ingredient.food
if ing.note:
assert ing.note == test_ingredient.comments
else:
assert not test_ingredient.comments
@pytest.mark.parametrize(
("source_str", "expected_str"),
[
(
"2 teaspoon chopped fresh or dried rosemary",
"2 teaspoon fresh rosemary or dried rosemary chopped",
),
(
"153 grams 00 flour (1 cup plus 1 tablespoon)",
"153 gram 00 flour or 1 cup and 1 tablespoon",
),
(
"153 grams all-purpose flour (1 cup plus 1 tablespoon and 2 teaspoons)",
"153 gram all-purpose flour or 1 cup plus 1 tablespoon and 2 teaspoons",
),
(
"2 cups chicken broth or beef broth",
"2 cup chicken broth or beef broth",
),
(
"2 tablespoons (30ml) vegetable oil",
"2 tablespoon vegetable oil or 30 milliliter",
),
(
"1 cup fresh basil or 2 tablespoons dried basil",
"1 cup fresh basil or 2 tablespoons dried basil",
),
],
)
@pytest.mark.asyncio
async def test_nlp_parser_keeps_all_text(unique_local_group_id: UUID4, source_str: str, expected_str: str):
with session_context() as session:
parser = get_parser(RegisteredParser.nlp, unique_local_group_id, session, get_locale_provider())
parsed = await parser.parse_one(source_str)
ing = parsed.ingredient
# The parser behavior may change slightly, so we check that it's pretty close rather than exact
# fuzz.ratio returns a string from 0 - 100 where 100 is an exact match
score = fuzz.ratio(ing.display, expected_str)
assert score >= 90, f"'{ing.display}' does not sufficiently match expected '{expected_str}'"

View File

@@ -0,0 +1,311 @@
import asyncio
import json
from typing import cast
from unittest.mock import MagicMock
import pytest
from pydantic import UUID4
from mealie.db.db_setup import session_context
from mealie.lang.providers import get_locale_provider
from mealie.schema.openai.recipe_ingredient import OpenAIIngredient, OpenAIIngredients
from mealie.schema.recipe.recipe import Recipe
from mealie.schema.recipe.recipe_ingredient import (
CreateIngredientFood,
CreateIngredientUnit,
IngredientFood,
IngredientUnit,
ParsedIngredient,
RecipeIngredient,
SaveIngredientFood,
)
from mealie.services.openai import OpenAIService
from mealie.services.parser_services import RegisteredParser, get_parser
from tests.utils.factories import random_int, random_string
from tests.utils.fixture_schemas import TestUser
def test_openai_parser(
unique_local_group_id: UUID4,
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
monkeypatch: pytest.MonkeyPatch,
):
ingredient_count = random_int(10, 20)
async def mock_get_response(self, prompt: str, message: str, *args, **kwargs) -> OpenAIIngredients | None:
inputs = json.loads(message)
data = OpenAIIngredients(
ingredients=[
OpenAIIngredient(
quantity=random_int(0, 10),
unit=random_string(),
food=random_string(),
note=random_string(),
)
for _ in inputs
]
)
return data
monkeypatch.setattr(OpenAIService, "get_response", mock_get_response)
with session_context() as session:
loop = asyncio.get_event_loop()
parser = get_parser(RegisteredParser.openai, unique_local_group_id, session, get_locale_provider())
inputs = [random_string() for _ in range(ingredient_count)]
parsed = loop.run_until_complete(parser.parse(inputs))
# since OpenAI is mocked, we don't need to validate the data, we just need to make sure parsing works
# and that it preserves order
assert len(parsed) == ingredient_count
for input, output in zip(inputs, parsed, strict=True):
assert output.input == input
def test_openai_parser_sanitize_output(
unique_local_group_id: UUID4,
unique_user: TestUser,
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
monkeypatch: pytest.MonkeyPatch,
):
async def mock_get_raw_response(self, prompt: str, content: list[dict], response_schema) -> MagicMock:
# Create data with null character in JSON to test preprocessing
data = OpenAIIngredients(
ingredients=[
OpenAIIngredient(
quantity=random_int(0, 10),
unit="",
food="there is a null character here: \x00",
note="",
)
]
)
# Create a mock raw response which matches the OpenAI chat response format
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = data.model_dump_json()
return mock_response
# Mock the raw response here since we want to make sure our service executes processing before loading the model
monkeypatch.setattr(OpenAIService, "_get_raw_response", mock_get_raw_response)
with session_context() as session:
loop = asyncio.get_event_loop()
parser = get_parser(RegisteredParser.openai, unique_local_group_id, session, get_locale_provider())
parsed = loop.run_until_complete(parser.parse([""]))
assert len(parsed) == 1
parsed_ing = cast(ParsedIngredient, parsed[0])
assert parsed_ing.ingredient.food
assert parsed_ing.ingredient.food.name == "there is a null character here: "
# Make sure we can create a recipe with this ingredient
assert isinstance(parsed_ing.ingredient.food, CreateIngredientFood)
food = unique_user.repos.ingredient_foods.create(
parsed_ing.ingredient.food.cast(SaveIngredientFood, group_id=unique_user.group_id)
)
parsed_ing.ingredient.food = food
unique_user.repos.recipes.create(
Recipe(
user_id=unique_user.user_id,
group_id=unique_user.group_id,
name=random_string(),
recipe_ingredient=[parsed_ing.ingredient],
)
)
@pytest.mark.parametrize(
"original_text,quantity,unit,food,note,qty_range,unit_range,food_range,note_range",
[
pytest.param(
"2 cups flour",
2.0,
"Cups",
"flour",
"",
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
id="perfect_match_all_components",
),
pytest.param(
"2 cups flour",
3.0,
"Cups",
"flour",
"",
(0.0, 0.0),
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
id="quantity_mismatch",
),
pytest.param(
"2 cups flour",
2.0,
None,
"flour",
"",
(1.0, 1.0),
(0.4, 0.9),
(1.0, 1.0),
(1.0, 1.0),
id="missing_unit_fallback",
),
pytest.param(
"2 cups flour",
2.0,
"Cups",
None,
"",
(1.0, 1.0),
(1.0, 1.0),
(0.4, 0.9),
(1.0, 1.0),
id="missing_food_fallback",
),
pytest.param(
"2 cups flour sifted fresh",
2.0,
"Cups",
"flour",
"sifted fresh",
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
(0.8, 1.0),
id="note_full_match",
),
pytest.param(
"2 cups flour sifted",
2.0,
"Cups",
"flour",
"sifted chopped",
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
(0.4, 0.6),
id="note_partial_match",
),
pytest.param(
"2 cups flour",
2.0,
"Cups",
"flour",
"chopped minced",
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
(0.0, 0.0),
id="note_no_match",
),
pytest.param(
"1.5 tsp salt kosher",
1.0,
None,
None,
"kosher fine",
(0.0, 0.0),
(0.3, 0.7),
(0.3, 0.7),
(0.4, 0.6),
id="multiple_issues",
),
pytest.param(
"",
1.0,
"Cups",
"flour",
"fresh",
(0.0, 0.0),
(1.0, 1.0),
(1.0, 1.0),
(0.0, 0.0),
id="empty_original_text",
),
pytest.param(
"salt",
0.0,
None,
"salt",
"",
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
id="zero_quantity_match",
),
],
)
def test_openai_parser_confidence(
original_text: str,
quantity: float | None,
unit: str | None,
food: str | None,
note: str,
qty_range: tuple[float, float],
unit_range: tuple[float, float],
food_range: tuple[float, float],
note_range: tuple[float, float],
unique_local_group_id: UUID4,
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
):
"""Test the _calculate_confidence method of OpenAIParser with various input scenarios."""
with session_context() as session:
from mealie.services.parser_services.openai.parser import OpenAIParser
parser = cast(
OpenAIParser, get_parser(RegisteredParser.openai, unique_local_group_id, session, get_locale_provider())
)
# Create test ingredient
ingredient = RecipeIngredient(
original_text=original_text,
quantity=quantity,
unit=CreateIngredientUnit(name=unit) if unit else None,
food=CreateIngredientFood(name=food) if food else None,
note=note if note else None,
)
# Calculate confidence
confidence = parser._calculate_confidence(original_text, ingredient)
# All confidence values should be populated (not None) by the method
assert confidence.quantity is not None, "Quantity confidence should not be None"
assert confidence.unit is not None, "Unit confidence should not be None"
assert confidence.food is not None, "Food confidence should not be None"
assert confidence.comment is not None, "Comment confidence should not be None"
assert confidence.average is not None, "Average confidence should not be None"
# Range-based assertions to handle fuzzy matching variability
qty_min, qty_max = qty_range
assert qty_min <= confidence.quantity <= qty_max, (
f"Quantity confidence out of range: expected {qty_range}, got {confidence.quantity}"
)
unit_min, unit_max = unit_range
assert unit_min <= confidence.unit <= unit_max, (
f"Unit confidence out of range: expected {unit_range}, got {confidence.unit}"
)
food_min, food_max = food_range
assert food_min <= confidence.food <= food_max, (
f"Food confidence out of range: expected {food_range}, got {confidence.food}"
)
note_min, note_max = note_range
assert note_min <= confidence.comment <= note_max, (
f"Note confidence out of range: expected {note_range}, got {confidence.comment}"
)
# Check that average is calculated correctly
expected_avg = (confidence.quantity + confidence.unit + confidence.food + confidence.comment) / 4
assert abs(confidence.average - expected_avg) < 0.001, (
f"Average confidence mismatch: expected {expected_avg}, got {confidence.average}"
)

View File

@@ -1,737 +0,0 @@
import asyncio
import json
from dataclasses import dataclass
from typing import cast
from unittest.mock import MagicMock
import pytest
from pydantic import UUID4
from sqlalchemy.orm import Session
from mealie.db.db_setup import session_context
from mealie.repos.all_repositories import get_repositories
from mealie.repos.repository_factory import AllRepositories
from mealie.schema.openai.recipe_ingredient import OpenAIIngredient, OpenAIIngredients
from mealie.schema.recipe.recipe import Recipe
from mealie.schema.recipe.recipe_ingredient import (
CreateIngredientFood,
CreateIngredientFoodAlias,
CreateIngredientUnit,
CreateIngredientUnitAlias,
IngredientFood,
IngredientUnit,
ParsedIngredient,
RecipeIngredient,
SaveIngredientFood,
SaveIngredientUnit,
)
from mealie.schema.user.user import GroupBase
from mealie.services.openai import OpenAIService
from mealie.services.parser_services import RegisteredParser, get_parser
from tests.utils.factories import random_int, random_string
from tests.utils.fixture_schemas import TestUser
@dataclass
class TestIngredient:
input: str
quantity: float
unit: str
food: str
comments: str
def build_parsed_ing(food: str | None, unit: str | None) -> ParsedIngredient:
ing = RecipeIngredient(unit=None, food=None)
if food:
ing.food = CreateIngredientFood(name=food)
if unit:
ing.unit = CreateIngredientUnit(name=unit)
return ParsedIngredient(input=None, ingredient=ing)
@pytest.fixture()
def unique_local_group_id(unfiltered_database: AllRepositories) -> UUID4:
return str(unfiltered_database.groups.create(GroupBase(name=random_string())).id)
@pytest.fixture()
def unique_db(session: Session, unique_local_group_id: str):
return get_repositories(session, group_id=unique_local_group_id)
@pytest.fixture()
def parsed_ingredient_data(
unique_db: AllRepositories, unique_local_group_id: UUID4
) -> tuple[list[IngredientFood], list[IngredientUnit]]:
foods = unique_db.ingredient_foods.create_many(
[
SaveIngredientFood(name="potatoes", group_id=unique_local_group_id),
SaveIngredientFood(name="onion", group_id=unique_local_group_id),
SaveIngredientFood(name="green onion", group_id=unique_local_group_id),
SaveIngredientFood(name="frozen pearl onions", group_id=unique_local_group_id),
SaveIngredientFood(name="bell peppers", group_id=unique_local_group_id),
SaveIngredientFood(name="red pepper flakes", group_id=unique_local_group_id),
SaveIngredientFood(name="fresh ginger", group_id=unique_local_group_id),
SaveIngredientFood(name="ground ginger", group_id=unique_local_group_id),
SaveIngredientFood(name="ñör̃m̈ãl̈ĩz̈ẽm̈ẽ", group_id=unique_local_group_id),
SaveIngredientFood(name="PluralFoodTest", plural_name="myfoodisplural", group_id=unique_local_group_id),
SaveIngredientFood(
name="IHaveAnAlias",
group_id=unique_local_group_id,
aliases=[CreateIngredientFoodAlias(name="thisismyalias")],
),
]
)
foods.extend(
unique_db.ingredient_foods.create_many(
[
SaveIngredientFood(name=f"{random_string()} food", group_id=unique_local_group_id)
for _ in range(random_int(10, 15))
]
)
)
units = unique_db.ingredient_units.create_many(
[
SaveIngredientUnit(name="Cups", group_id=unique_local_group_id),
SaveIngredientUnit(name="Tablespoon", group_id=unique_local_group_id),
SaveIngredientUnit(name="Teaspoon", group_id=unique_local_group_id),
SaveIngredientUnit(name="Stalk", group_id=unique_local_group_id),
SaveIngredientUnit(name="My Very Long Unit Name", abbreviation="mvlun", group_id=unique_local_group_id),
SaveIngredientUnit(
name="PluralUnitName",
plural_name="abc123",
abbreviation="doremiabc",
plural_abbreviation="doremi123",
group_id=unique_local_group_id,
),
SaveIngredientUnit(
name="IHaveAnAliasToo",
group_id=unique_local_group_id,
aliases=[CreateIngredientUnitAlias(name="thisismyalias")],
),
]
)
units.extend(
unique_db.ingredient_foods.create_many(
[
SaveIngredientUnit(name=f"{random_string()} unit", group_id=unique_local_group_id)
for _ in range(random_int(10, 15))
]
)
)
return foods, units
@pytest.mark.parametrize(
"input, quantity, unit, food, comment",
[
pytest.param("1 theelepel koffie", 1, "theelepel", "koffie", "", id="1 theelepel koffie"),
pytest.param("3 theelepels koffie", 3, "theelepels", "koffie", "", id="3 theelepels koffie"),
pytest.param("1 eetlepel tarwe", 1, "eetlepel", "tarwe", "", id="1 eetlepel tarwe"),
pytest.param("20 eetlepels bloem", 20, "eetlepels", "bloem", "", id="20 eetlepels bloem"),
pytest.param("1 mespunt kaneel", 1, "mespunt", "kaneel", "", id="1 mespunt kaneel"),
pytest.param("1 snuf(je) zout", 1, "snuf(je)", "zout", "", id="1 snuf(je) zout"),
pytest.param(
"2 tbsp minced cilantro, leaves and stems",
2,
"tbsp",
"minced cilantro",
"leaves and stems",
id="2 tbsp minced cilantro, leaves and stems",
),
pytest.param(
"1 large yellow onion, coarsely chopped",
1,
"large",
"yellow onion",
"coarsely chopped",
id="1 large yellow onion, coarsely chopped",
),
pytest.param("1 1/2 tsp garam masala", 1.5, "tsp", "garam masala", "", id="1 1/2 tsp garam masala"),
pytest.param(
"2 cups mango chunks, (2 large mangoes) (fresh or frozen)",
2,
"Cups",
"mango chunks, (2 large mangoes)",
"fresh or frozen",
id="2 cups mango chunks, (2 large mangoes) (fresh or frozen)",
),
pytest.param("stalk onion", 0, "Stalk", "onion", "", id="stalk onion"),
pytest.param("a stalk bell peppers", 0, "Stalk", "bell peppers", "", id="a stalk bell peppers"),
pytest.param("a tablespoon unknownFood", 0, "Tablespoon", "unknownFood", "", id="a tablespoon unknownFood"),
pytest.param(
"stalk bell peppers, cut in pieces",
0,
"Stalk",
"bell peppers",
"cut in pieces",
id="stalk bell peppers, cut in pieces",
),
pytest.param(
"a stalk bell peppers, cut in pieces",
0,
"Stalk",
"bell peppers",
"cut in pieces",
id="stalk bell peppers, cut in pieces",
),
pytest.param("red pepper flakes", 0, "", "red pepper flakes", "", id="red pepper flakes"),
pytest.param("1 bell peppers", 1, "", "bell peppers", "", id="1 bell peppers"),
pytest.param("1 stalk bell peppers", 1, "Stalk", "bell peppers", "", id="1 big stalk bell peppers"),
pytest.param("a big stalk bell peppers", 0, "Stalk", "bell peppers", "", id="a big stalk bell peppers"),
pytest.param(
"1 bell peppers, cut in pieces", 1, "", "bell peppers", "cut in pieces", id="1 bell peppers, cut in pieces"
),
pytest.param(
"bell peppers, cut in pieces", 0, "", "bell peppers", "cut in pieces", id="bell peppers, cut in pieces"
),
],
)
def test_brute_parser(
unique_local_group_id: UUID4,
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
input: str,
quantity: int | float,
unit: str,
food: str,
comment: str,
):
with session_context() as session:
loop = asyncio.get_event_loop()
parser = get_parser(RegisteredParser.brute, unique_local_group_id, session)
parsed = loop.run_until_complete(parser.parse_one(input))
ing = parsed.ingredient
if ing.quantity:
assert ing.quantity == quantity
else:
assert not quantity
if ing.unit:
assert ing.unit.name == unit
else:
assert not unit
if ing.food:
assert ing.food.name == food
else:
assert not food
if ing.note:
assert ing.note == comment
else:
assert not comment
@pytest.mark.parametrize(
"unit, food, expect_unit_match, expect_food_match, expected_avg",
[
pytest.param("Cups", "potatoes", True, True, 1.0, id="all matched"),
pytest.param("Cups", "veryuniquefood", True, False, 0.75, id="unit matched only"),
pytest.param("veryuniqueunit", "potatoes", False, True, 0.75, id="food matched only"),
pytest.param("veryuniqueunit", "veryuniquefood", False, False, 0.5, id="neither matched"),
],
)
def test_brute_parser_confidence(
unit: str,
food: str,
expect_unit_match: bool,
expect_food_match: bool,
expected_avg: float,
unique_local_group_id: UUID4,
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]],
):
input_str = f"1 {unit} {food}"
with session_context() as session:
original_loop = asyncio.get_event_loop()
try:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
parser = get_parser(RegisteredParser.brute, unique_local_group_id, session)
parsed = loop.run_until_complete(parser.parse_one(input_str))
finally:
loop.close()
asyncio.set_event_loop(original_loop)
conf = parsed.confidence
assert conf.quantity == 1
assert conf.comment == 1
assert conf.unit == (1 if expect_unit_match or not unit else 0)
assert conf.food == (1 if expect_food_match or not food else 0)
assert conf.average == expected_avg
@pytest.mark.parametrize(
"input, expected_unit_name, expected_food_name, expect_unit_match, expect_food_match",
(
pytest.param(
build_parsed_ing(unit="cup", food="potatoes"),
"Cups",
"potatoes",
True,
True,
id="basic match",
),
pytest.param( # this should work in sqlite since "potato" is contained within "potatoes"
build_parsed_ing(unit="cup", food="potato"),
"Cups",
"potatoes",
True,
True,
id="basic fuzzy match",
),
pytest.param(
build_parsed_ing(unit="tablespoon", food="onion"),
"Tablespoon",
"onion",
True,
True,
id="nested match 1",
),
pytest.param(
build_parsed_ing(unit="teaspoon", food="green onion"),
"Teaspoon",
"green onion",
True,
True,
id="nested match 2",
),
pytest.param(
build_parsed_ing(unit="cup", food="gren onion"),
"Cups",
"green onion",
True,
True,
id="nested match 3",
),
pytest.param(
build_parsed_ing(unit="stalk", food="very unique"),
"Stalk",
"very unique",
True,
False,
id="no food match",
),
pytest.param(
build_parsed_ing(unit="cup", food=None),
"Cups",
None,
True,
False,
id="no food input",
),
pytest.param(
build_parsed_ing(unit="very unique", food="fresh ginger"),
"very unique",
"fresh ginger",
False,
True,
id="no unit match",
),
pytest.param(
build_parsed_ing(unit=None, food="potatoes"),
None,
"potatoes",
False,
True,
id="no unit input",
),
pytest.param(
build_parsed_ing(unit="very unique", food="very unique"),
"very unique",
"very unique",
False,
False,
id="no matches",
),
pytest.param(
build_parsed_ing(unit=None, food=None),
None,
None,
False,
False,
id="no input",
),
pytest.param(
build_parsed_ing(unit="mvlun", food="potatoes"),
"My Very Long Unit Name",
"potatoes",
True,
True,
id="unit abbreviation",
),
pytest.param(
build_parsed_ing(unit=None, food="n̅ōr̅m̄a̅l̄i̅z̄e̅m̄e̅"),
None,
"ñör̃m̈ãl̈ĩz̈ẽm̈ẽ",
False,
True,
id="normalization",
),
pytest.param(
build_parsed_ing(unit=None, food="myfoodisplural"),
None,
"PluralFoodTest",
False,
True,
id="plural food name",
),
pytest.param(
build_parsed_ing(unit="abc123", food=None),
"PluralUnitName",
None,
True,
False,
id="plural unit name",
),
pytest.param(
build_parsed_ing(unit="doremi123", food=None),
"PluralUnitName",
None,
True,
False,
id="plural unit abbreviation",
),
pytest.param(
build_parsed_ing(unit=None, food="thisismyalias"),
None,
"IHaveAnAlias",
False,
True,
id="food alias",
),
pytest.param(
build_parsed_ing(unit="thisismyalias", food=None),
"IHaveAnAliasToo",
None,
True,
False,
id="unit alias",
),
),
)
def test_parser_ingredient_match(
expected_food_name: str | None,
expected_unit_name: str | None,
expect_food_match: bool,
expect_unit_match: bool,
input: ParsedIngredient,
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
unique_local_group_id: UUID4,
):
with session_context() as session:
parser = get_parser(RegisteredParser.brute, unique_local_group_id, session)
parsed_ingredient = parser.find_ingredient_match(input)
if expected_food_name:
assert parsed_ingredient.ingredient.food and parsed_ingredient.ingredient.food.name == expected_food_name
else:
assert parsed_ingredient.ingredient.food is None
if expect_food_match:
assert isinstance(parsed_ingredient.ingredient.food, IngredientFood)
elif parsed_ingredient.ingredient.food and parsed_ingredient.ingredient.food.name:
assert isinstance(parsed_ingredient.ingredient.food, CreateIngredientFood)
else:
assert parsed_ingredient.ingredient.food is None
if expected_unit_name:
assert parsed_ingredient.ingredient.unit and parsed_ingredient.ingredient.unit.name == expected_unit_name
else:
assert parsed_ingredient.ingredient.unit is None
if expect_unit_match:
assert isinstance(parsed_ingredient.ingredient.unit, IngredientUnit)
elif parsed_ingredient.ingredient.unit and parsed_ingredient.ingredient.unit.name:
assert isinstance(parsed_ingredient.ingredient.unit, CreateIngredientUnit)
else:
assert parsed_ingredient.ingredient.unit is None
def test_openai_parser(
unique_local_group_id: UUID4,
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
monkeypatch: pytest.MonkeyPatch,
):
ingredient_count = random_int(10, 20)
async def mock_get_response(self, prompt: str, message: str, *args, **kwargs) -> OpenAIIngredients | None:
inputs = json.loads(message)
data = OpenAIIngredients(
ingredients=[
OpenAIIngredient(
quantity=random_int(0, 10),
unit=random_string(),
food=random_string(),
note=random_string(),
)
for _ in inputs
]
)
return data
monkeypatch.setattr(OpenAIService, "get_response", mock_get_response)
with session_context() as session:
loop = asyncio.get_event_loop()
parser = get_parser(RegisteredParser.openai, unique_local_group_id, session)
inputs = [random_string() for _ in range(ingredient_count)]
parsed = loop.run_until_complete(parser.parse(inputs))
# since OpenAI is mocked, we don't need to validate the data, we just need to make sure parsing works
# and that it preserves order
assert len(parsed) == ingredient_count
for input, output in zip(inputs, parsed, strict=True):
assert output.input == input
def test_openai_parser_sanitize_output(
unique_local_group_id: UUID4,
unique_user: TestUser,
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
monkeypatch: pytest.MonkeyPatch,
):
async def mock_get_raw_response(self, prompt: str, content: list[dict], response_schema) -> MagicMock:
# Create data with null character in JSON to test preprocessing
data = OpenAIIngredients(
ingredients=[
OpenAIIngredient(
quantity=random_int(0, 10),
unit="",
food="there is a null character here: \x00",
note="",
)
]
)
# Create a mock raw response which matches the OpenAI chat response format
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = data.model_dump_json()
return mock_response
# Mock the raw response here since we want to make sure our service executes processing before loading the model
monkeypatch.setattr(OpenAIService, "_get_raw_response", mock_get_raw_response)
with session_context() as session:
loop = asyncio.get_event_loop()
parser = get_parser(RegisteredParser.openai, unique_local_group_id, session)
parsed = loop.run_until_complete(parser.parse([""]))
assert len(parsed) == 1
parsed_ing = cast(ParsedIngredient, parsed[0])
assert parsed_ing.ingredient.food
assert parsed_ing.ingredient.food.name == "there is a null character here: "
# Make sure we can create a recipe with this ingredient
assert isinstance(parsed_ing.ingredient.food, CreateIngredientFood)
food = unique_user.repos.ingredient_foods.create(
parsed_ing.ingredient.food.cast(SaveIngredientFood, group_id=unique_user.group_id)
)
parsed_ing.ingredient.food = food
unique_user.repos.recipes.create(
Recipe(
user_id=unique_user.user_id,
group_id=unique_user.group_id,
name=random_string(),
recipe_ingredient=[parsed_ing.ingredient],
)
)
@pytest.mark.parametrize(
"original_text,quantity,unit,food,note,qty_range,unit_range,food_range,note_range",
[
pytest.param(
"2 cups flour",
2.0,
"Cups",
"flour",
"",
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
id="perfect_match_all_components",
),
pytest.param(
"2 cups flour",
3.0,
"Cups",
"flour",
"",
(0.0, 0.0),
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
id="quantity_mismatch",
),
pytest.param(
"2 cups flour",
2.0,
None,
"flour",
"",
(1.0, 1.0),
(0.4, 0.9),
(1.0, 1.0),
(1.0, 1.0),
id="missing_unit_fallback",
),
pytest.param(
"2 cups flour",
2.0,
"Cups",
None,
"",
(1.0, 1.0),
(1.0, 1.0),
(0.4, 0.9),
(1.0, 1.0),
id="missing_food_fallback",
),
pytest.param(
"2 cups flour sifted fresh",
2.0,
"Cups",
"flour",
"sifted fresh",
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
(0.8, 1.0),
id="note_full_match",
),
pytest.param(
"2 cups flour sifted",
2.0,
"Cups",
"flour",
"sifted chopped",
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
(0.4, 0.6),
id="note_partial_match",
),
pytest.param(
"2 cups flour",
2.0,
"Cups",
"flour",
"chopped minced",
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
(0.0, 0.0),
id="note_no_match",
),
pytest.param(
"1.5 tsp salt kosher",
1.0,
None,
None,
"kosher fine",
(0.0, 0.0),
(0.3, 0.7),
(0.3, 0.7),
(0.4, 0.6),
id="multiple_issues",
),
pytest.param(
"",
1.0,
"Cups",
"flour",
"fresh",
(0.0, 0.0),
(1.0, 1.0),
(1.0, 1.0),
(0.0, 0.0),
id="empty_original_text",
),
pytest.param(
"salt",
0.0,
None,
"salt",
"",
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
id="zero_quantity_match",
),
],
)
def test_openai_parser_confidence(
original_text: str,
quantity: float | None,
unit: str | None,
food: str | None,
note: str,
qty_range: tuple[float, float],
unit_range: tuple[float, float],
food_range: tuple[float, float],
note_range: tuple[float, float],
unique_local_group_id: UUID4,
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
):
"""Test the _calculate_confidence method of OpenAIParser with various input scenarios."""
with session_context() as session:
from mealie.services.parser_services.openai.parser import OpenAIParser
parser = cast(OpenAIParser, get_parser(RegisteredParser.openai, unique_local_group_id, session))
# Create test ingredient
ingredient = RecipeIngredient(
original_text=original_text,
quantity=quantity,
unit=CreateIngredientUnit(name=unit) if unit else None,
food=CreateIngredientFood(name=food) if food else None,
note=note if note else None,
)
# Calculate confidence
confidence = parser._calculate_confidence(original_text, ingredient)
# All confidence values should be populated (not None) by the method
assert confidence.quantity is not None, "Quantity confidence should not be None"
assert confidence.unit is not None, "Unit confidence should not be None"
assert confidence.food is not None, "Food confidence should not be None"
assert confidence.comment is not None, "Comment confidence should not be None"
assert confidence.average is not None, "Average confidence should not be None"
# Range-based assertions to handle fuzzy matching variability
qty_min, qty_max = qty_range
assert qty_min <= confidence.quantity <= qty_max, (
f"Quantity confidence out of range: expected {qty_range}, got {confidence.quantity}"
)
unit_min, unit_max = unit_range
assert unit_min <= confidence.unit <= unit_max, (
f"Unit confidence out of range: expected {unit_range}, got {confidence.unit}"
)
food_min, food_max = food_range
assert food_min <= confidence.food <= food_max, (
f"Food confidence out of range: expected {food_range}, got {confidence.food}"
)
note_min, note_max = note_range
assert note_min <= confidence.comment <= note_max, (
f"Note confidence out of range: expected {note_range}, got {confidence.comment}"
)
# Check that average is calculated correctly
expected_avg = (confidence.quantity + confidence.unit + confidence.food + confidence.comment) / 4
assert abs(confidence.average - expected_avg) < 0.001, (
f"Average confidence mismatch: expected {expected_avg}, got {confidence.average}"
)