mirror of
https://github.com/mealie-recipes/mealie.git
synced 2025-10-27 16:24:31 -04:00
feat: Manually calculate OpenAI Parsing Confidence (#6141)
This commit is contained in:
@@ -7,30 +7,6 @@ from ._base import OpenAIBase
|
||||
|
||||
|
||||
class OpenAIIngredient(OpenAIBase):
|
||||
input: str = Field(
|
||||
...,
|
||||
description=dedent(
|
||||
"""
|
||||
The input is simply the ingredient string you are processing as-is. It is forbidden to
|
||||
modify this at all, you must provide the input exactly as you received it.
|
||||
"""
|
||||
),
|
||||
)
|
||||
confidence: float | None = Field(
|
||||
None,
|
||||
description=dedent(
|
||||
"""
|
||||
This value is a float between 0 - 100, where 100 is full confidence that the result is correct,
|
||||
and 0 is no confidence that the result is correct. If you're unable to parse anything,
|
||||
and you put the entire string in the notes, you should return 0 confidence. If you can easily
|
||||
parse the string into each component, then you should return a confidence of 100. If you have to
|
||||
guess which part is the unit and which part is the food, your confidence should be lower, such as 60.
|
||||
Even if there is no unit or note, if you're able to determine the food, you may use a higher confidence.
|
||||
If the entire ingredient consists of only a food, you can use a confidence of 100.
|
||||
"""
|
||||
),
|
||||
)
|
||||
|
||||
quantity: float | None = Field(
|
||||
0,
|
||||
description=dedent(
|
||||
@@ -73,21 +49,10 @@ class OpenAIIngredient(OpenAIBase):
|
||||
),
|
||||
)
|
||||
|
||||
@field_validator("confidence", "quantity", mode="before")
|
||||
@field_validator("quantity", mode="before")
|
||||
def coerce_none_float(cls, v: Any) -> Any:
|
||||
return v or 0
|
||||
|
||||
@field_validator("confidence")
|
||||
def validate_confidence(cls, v: float | None) -> float:
|
||||
v = v or 0
|
||||
|
||||
if v < 0:
|
||||
v = 0
|
||||
elif v > 100:
|
||||
v = 100
|
||||
|
||||
return v / 100
|
||||
|
||||
|
||||
class OpenAIIngredients(OpenAIBase):
|
||||
ingredients: list[OpenAIIngredient] = []
|
||||
|
||||
@@ -2,6 +2,8 @@ import asyncio
|
||||
import json
|
||||
from collections.abc import Awaitable
|
||||
|
||||
from rapidfuzz import fuzz
|
||||
|
||||
from mealie.schema.openai.recipe_ingredient import OpenAIIngredient, OpenAIIngredients
|
||||
from mealie.schema.recipe.recipe_ingredient import (
|
||||
CreateIngredientFood,
|
||||
@@ -13,12 +15,84 @@ from mealie.schema.recipe.recipe_ingredient import (
|
||||
from mealie.services.openai import OpenAIDataInjection, OpenAIService
|
||||
|
||||
from .._base import ABCIngredientParser
|
||||
from ..parser_utils import extract_quantity_from_string
|
||||
|
||||
|
||||
class OpenAIParser(ABCIngredientParser):
|
||||
def _convert_ingredient(self, openai_ing: OpenAIIngredient) -> ParsedIngredient:
|
||||
def _calculate_qty_conf(self, original_text: str, parsed_qty: float | None) -> float:
|
||||
"""Compares the extracted quantity to a brute-force parsed quantity."""
|
||||
|
||||
expected_qty, _ = extract_quantity_from_string(original_text)
|
||||
parsed_qty = parsed_qty or 0
|
||||
if parsed_qty == expected_qty:
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
def _calculate_note_conf(self, original_text: str, note: str | None) -> float:
|
||||
"""
|
||||
Calculate confidence based on how many words in the note are found in the original text.
|
||||
Uses alphanumeric filtering and lowercasing to improve matching.
|
||||
"""
|
||||
|
||||
if not note:
|
||||
return 1
|
||||
|
||||
note_words: list[str] = []
|
||||
for word in note.strip().lower().split():
|
||||
clean_word = "".join(filter(str.isalnum, word))
|
||||
if clean_word:
|
||||
note_words.append(clean_word)
|
||||
|
||||
if not note_words:
|
||||
return 1
|
||||
|
||||
original_words: list[str] = []
|
||||
for word in original_text.strip().lower().split():
|
||||
clean_word = "".join(filter(str.isalnum, word))
|
||||
if clean_word:
|
||||
original_words.append(clean_word)
|
||||
|
||||
note_conf_sum = sum(1 for word in note_words if word in original_words)
|
||||
return note_conf_sum / len(note_words)
|
||||
|
||||
def _calculate_overall_confidence(self, original_text: str, ing_text: str) -> float:
|
||||
"""
|
||||
Calculate overall confidence based on fuzzy matching between the original text and the ingredient text.
|
||||
Uses token sort ratio to account for word order variations.
|
||||
"""
|
||||
|
||||
ratio = fuzz.token_sort_ratio(original_text, ing_text)
|
||||
return ratio / 100.0
|
||||
|
||||
def _calculate_confidence(self, original_text: str, ing: RecipeIngredient) -> IngredientConfidence:
|
||||
qty_conf = self._calculate_qty_conf(original_text, ing.quantity)
|
||||
note_conf = self._calculate_note_conf(original_text, ing.note)
|
||||
|
||||
# Not all ingredients will have a food and/or unit,
|
||||
# so if either is missing we fall back to overall confidence.
|
||||
overall_confidence = self._calculate_overall_confidence(original_text, ing.display)
|
||||
if ing.food:
|
||||
food_conf = 1.0
|
||||
else:
|
||||
food_conf = overall_confidence
|
||||
|
||||
if ing.unit:
|
||||
unit_conf = 1.0
|
||||
else:
|
||||
unit_conf = overall_confidence
|
||||
|
||||
return IngredientConfidence(
|
||||
average=(qty_conf + unit_conf + food_conf + note_conf) / 4,
|
||||
quantity=qty_conf,
|
||||
unit=unit_conf,
|
||||
food=food_conf,
|
||||
comment=note_conf,
|
||||
)
|
||||
|
||||
def _convert_ingredient(self, original_text: str, openai_ing: OpenAIIngredient) -> ParsedIngredient:
|
||||
ingredient = RecipeIngredient(
|
||||
original_text=openai_ing.input,
|
||||
original_text=original_text,
|
||||
quantity=openai_ing.quantity,
|
||||
unit=CreateIngredientUnit(name=openai_ing.unit) if openai_ing.unit else None,
|
||||
food=CreateIngredientFood(name=openai_ing.food) if openai_ing.food else None,
|
||||
@@ -26,8 +100,8 @@ class OpenAIParser(ABCIngredientParser):
|
||||
)
|
||||
|
||||
parsed_ingredient = ParsedIngredient(
|
||||
input=openai_ing.input,
|
||||
confidence=IngredientConfidence(average=openai_ing.confidence),
|
||||
input=original_text,
|
||||
confidence=self._calculate_confidence(original_text, ingredient),
|
||||
ingredient=ingredient,
|
||||
)
|
||||
|
||||
@@ -53,7 +127,7 @@ class OpenAIParser(ABCIngredientParser):
|
||||
"Below is a list of units found in the units database. While parsing, you should "
|
||||
"reference this list when determining which part of the input is the unit. You may "
|
||||
"find a unit in the input that does not exist in this list. This should not prevent "
|
||||
"you from parsing that text as a unit, however it may lower your confidence level."
|
||||
"you from parsing that text as a unit."
|
||||
),
|
||||
value=list(set(self.data_matcher.units_by_alias)),
|
||||
),
|
||||
@@ -107,4 +181,13 @@ class OpenAIParser(ABCIngredientParser):
|
||||
|
||||
async def parse(self, ingredients: list[str]) -> list[ParsedIngredient]:
|
||||
response = await self._parse(ingredients)
|
||||
return [self._convert_ingredient(ing) for ing in response.ingredients]
|
||||
if len(response.ingredients) != len(ingredients):
|
||||
raise ValueError(
|
||||
"OpenAI returned an unexpected number of ingredients. "
|
||||
f"Expected {len(ingredients)}, got {len(response.ingredients)}"
|
||||
)
|
||||
|
||||
return [
|
||||
self._convert_ingredient(original_text, ing)
|
||||
for original_text, ing in zip(ingredients, response.ingredients, strict=True)
|
||||
]
|
||||
|
||||
@@ -464,8 +464,6 @@ def test_openai_parser(
|
||||
data = OpenAIIngredients(
|
||||
ingredients=[
|
||||
OpenAIIngredient(
|
||||
input=input,
|
||||
confidence=1,
|
||||
quantity=random_int(0, 10),
|
||||
unit=random_string(),
|
||||
food=random_string(),
|
||||
@@ -502,8 +500,6 @@ def test_openai_parser_sanitize_output(
|
||||
data = OpenAIIngredients(
|
||||
ingredients=[
|
||||
OpenAIIngredient(
|
||||
input="there is a null character here: \x00",
|
||||
confidence=1,
|
||||
quantity=random_int(0, 10),
|
||||
unit="",
|
||||
food="there is a null character here: \x00",
|
||||
@@ -522,8 +518,8 @@ def test_openai_parser_sanitize_output(
|
||||
parsed = loop.run_until_complete(parser.parse([""]))
|
||||
assert len(parsed) == 1
|
||||
parsed_ing = cast(ParsedIngredient, parsed[0])
|
||||
assert parsed_ing.input
|
||||
assert "\x00" not in parsed_ing.input
|
||||
assert parsed_ing.ingredient.food
|
||||
assert parsed_ing.ingredient.food.name == "there is a null character here: "
|
||||
|
||||
# Make sure we can create a recipe with this ingredient
|
||||
assert isinstance(parsed_ing.ingredient.food, CreateIngredientFood)
|
||||
@@ -539,3 +535,195 @@ def test_openai_parser_sanitize_output(
|
||||
recipe_ingredient=[parsed_ing.ingredient],
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"original_text,quantity,unit,food,note,qty_range,unit_range,food_range,note_range",
|
||||
[
|
||||
pytest.param(
|
||||
"2 cups flour",
|
||||
2.0,
|
||||
"Cups",
|
||||
"flour",
|
||||
"",
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
id="perfect_match_all_components",
|
||||
),
|
||||
pytest.param(
|
||||
"2 cups flour",
|
||||
3.0,
|
||||
"Cups",
|
||||
"flour",
|
||||
"",
|
||||
(0.0, 0.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
id="quantity_mismatch",
|
||||
),
|
||||
pytest.param(
|
||||
"2 cups flour",
|
||||
2.0,
|
||||
None,
|
||||
"flour",
|
||||
"",
|
||||
(1.0, 1.0),
|
||||
(0.4, 0.9),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
id="missing_unit_fallback",
|
||||
),
|
||||
pytest.param(
|
||||
"2 cups flour",
|
||||
2.0,
|
||||
"Cups",
|
||||
None,
|
||||
"",
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(0.4, 0.9),
|
||||
(1.0, 1.0),
|
||||
id="missing_food_fallback",
|
||||
),
|
||||
pytest.param(
|
||||
"2 cups flour sifted fresh",
|
||||
2.0,
|
||||
"Cups",
|
||||
"flour",
|
||||
"sifted fresh",
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(0.8, 1.0),
|
||||
id="note_full_match",
|
||||
),
|
||||
pytest.param(
|
||||
"2 cups flour sifted",
|
||||
2.0,
|
||||
"Cups",
|
||||
"flour",
|
||||
"sifted chopped",
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(0.4, 0.6),
|
||||
id="note_partial_match",
|
||||
),
|
||||
pytest.param(
|
||||
"2 cups flour",
|
||||
2.0,
|
||||
"Cups",
|
||||
"flour",
|
||||
"chopped minced",
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(0.0, 0.0),
|
||||
id="note_no_match",
|
||||
),
|
||||
pytest.param(
|
||||
"1.5 tsp salt kosher",
|
||||
1.0,
|
||||
None,
|
||||
None,
|
||||
"kosher fine",
|
||||
(0.0, 0.0),
|
||||
(0.3, 0.7),
|
||||
(0.3, 0.7),
|
||||
(0.4, 0.6),
|
||||
id="multiple_issues",
|
||||
),
|
||||
pytest.param(
|
||||
"",
|
||||
1.0,
|
||||
"Cups",
|
||||
"flour",
|
||||
"fresh",
|
||||
(0.0, 0.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(0.0, 0.0),
|
||||
id="empty_original_text",
|
||||
),
|
||||
pytest.param(
|
||||
"salt",
|
||||
0.0,
|
||||
None,
|
||||
"salt",
|
||||
"",
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
(1.0, 1.0),
|
||||
id="zero_quantity_match",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_openai_parser_confidence(
|
||||
original_text: str,
|
||||
quantity: float | None,
|
||||
unit: str | None,
|
||||
food: str | None,
|
||||
note: str,
|
||||
qty_range: tuple[float, float],
|
||||
unit_range: tuple[float, float],
|
||||
food_range: tuple[float, float],
|
||||
note_range: tuple[float, float],
|
||||
unique_local_group_id: UUID4,
|
||||
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
|
||||
):
|
||||
"""Test the _calculate_confidence method of OpenAIParser with various input scenarios."""
|
||||
|
||||
with session_context() as session:
|
||||
from mealie.services.parser_services.openai.parser import OpenAIParser
|
||||
|
||||
parser = cast(OpenAIParser, get_parser(RegisteredParser.openai, unique_local_group_id, session))
|
||||
|
||||
# Create test ingredient
|
||||
ingredient = RecipeIngredient(
|
||||
original_text=original_text,
|
||||
quantity=quantity,
|
||||
unit=CreateIngredientUnit(name=unit) if unit else None,
|
||||
food=CreateIngredientFood(name=food) if food else None,
|
||||
note=note if note else None,
|
||||
)
|
||||
|
||||
# Calculate confidence
|
||||
confidence = parser._calculate_confidence(original_text, ingredient)
|
||||
|
||||
# All confidence values should be populated (not None) by the method
|
||||
assert confidence.quantity is not None, "Quantity confidence should not be None"
|
||||
assert confidence.unit is not None, "Unit confidence should not be None"
|
||||
assert confidence.food is not None, "Food confidence should not be None"
|
||||
assert confidence.comment is not None, "Comment confidence should not be None"
|
||||
assert confidence.average is not None, "Average confidence should not be None"
|
||||
|
||||
# Range-based assertions to handle fuzzy matching variability
|
||||
qty_min, qty_max = qty_range
|
||||
assert qty_min <= confidence.quantity <= qty_max, (
|
||||
f"Quantity confidence out of range: expected {qty_range}, got {confidence.quantity}"
|
||||
)
|
||||
|
||||
unit_min, unit_max = unit_range
|
||||
assert unit_min <= confidence.unit <= unit_max, (
|
||||
f"Unit confidence out of range: expected {unit_range}, got {confidence.unit}"
|
||||
)
|
||||
|
||||
food_min, food_max = food_range
|
||||
assert food_min <= confidence.food <= food_max, (
|
||||
f"Food confidence out of range: expected {food_range}, got {confidence.food}"
|
||||
)
|
||||
|
||||
note_min, note_max = note_range
|
||||
assert note_min <= confidence.comment <= note_max, (
|
||||
f"Note confidence out of range: expected {note_range}, got {confidence.comment}"
|
||||
)
|
||||
|
||||
# Check that average is calculated correctly
|
||||
expected_avg = (confidence.quantity + confidence.unit + confidence.food + confidence.comment) / 4
|
||||
assert abs(confidence.average - expected_avg) < 0.001, (
|
||||
f"Average confidence mismatch: expected {expected_avg}, got {confidence.average}"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user