feat: Manually calculate OpenAI Parsing Confidence (#6141)

This commit is contained in:
Michael Genson
2025-09-19 23:09:34 -05:00
committed by GitHub
parent cb8c1423c5
commit a9090bc2bd
3 changed files with 284 additions and 48 deletions

View File

@@ -7,30 +7,6 @@ from ._base import OpenAIBase
class OpenAIIngredient(OpenAIBase):
input: str = Field(
...,
description=dedent(
"""
The input is simply the ingredient string you are processing as-is. It is forbidden to
modify this at all, you must provide the input exactly as you received it.
"""
),
)
confidence: float | None = Field(
None,
description=dedent(
"""
This value is a float between 0 - 100, where 100 is full confidence that the result is correct,
and 0 is no confidence that the result is correct. If you're unable to parse anything,
and you put the entire string in the notes, you should return 0 confidence. If you can easily
parse the string into each component, then you should return a confidence of 100. If you have to
guess which part is the unit and which part is the food, your confidence should be lower, such as 60.
Even if there is no unit or note, if you're able to determine the food, you may use a higher confidence.
If the entire ingredient consists of only a food, you can use a confidence of 100.
"""
),
)
quantity: float | None = Field(
0,
description=dedent(
@@ -73,21 +49,10 @@ class OpenAIIngredient(OpenAIBase):
),
)
@field_validator("confidence", "quantity", mode="before")
@field_validator("quantity", mode="before")
def coerce_none_float(cls, v: Any) -> Any:
return v or 0
@field_validator("confidence")
def validate_confidence(cls, v: float | None) -> float:
v = v or 0
if v < 0:
v = 0
elif v > 100:
v = 100
return v / 100
class OpenAIIngredients(OpenAIBase):
ingredients: list[OpenAIIngredient] = []

View File

@@ -2,6 +2,8 @@ import asyncio
import json
from collections.abc import Awaitable
from rapidfuzz import fuzz
from mealie.schema.openai.recipe_ingredient import OpenAIIngredient, OpenAIIngredients
from mealie.schema.recipe.recipe_ingredient import (
CreateIngredientFood,
@@ -13,12 +15,84 @@ from mealie.schema.recipe.recipe_ingredient import (
from mealie.services.openai import OpenAIDataInjection, OpenAIService
from .._base import ABCIngredientParser
from ..parser_utils import extract_quantity_from_string
class OpenAIParser(ABCIngredientParser):
def _convert_ingredient(self, openai_ing: OpenAIIngredient) -> ParsedIngredient:
def _calculate_qty_conf(self, original_text: str, parsed_qty: float | None) -> float:
"""Compares the extracted quantity to a brute-force parsed quantity."""
expected_qty, _ = extract_quantity_from_string(original_text)
parsed_qty = parsed_qty or 0
if parsed_qty == expected_qty:
return 1
else:
return 0
def _calculate_note_conf(self, original_text: str, note: str | None) -> float:
"""
Calculate confidence based on how many words in the note are found in the original text.
Uses alphanumeric filtering and lowercasing to improve matching.
"""
if not note:
return 1
note_words: list[str] = []
for word in note.strip().lower().split():
clean_word = "".join(filter(str.isalnum, word))
if clean_word:
note_words.append(clean_word)
if not note_words:
return 1
original_words: list[str] = []
for word in original_text.strip().lower().split():
clean_word = "".join(filter(str.isalnum, word))
if clean_word:
original_words.append(clean_word)
note_conf_sum = sum(1 for word in note_words if word in original_words)
return note_conf_sum / len(note_words)
def _calculate_overall_confidence(self, original_text: str, ing_text: str) -> float:
"""
Calculate overall confidence based on fuzzy matching between the original text and the ingredient text.
Uses token sort ratio to account for word order variations.
"""
ratio = fuzz.token_sort_ratio(original_text, ing_text)
return ratio / 100.0
def _calculate_confidence(self, original_text: str, ing: RecipeIngredient) -> IngredientConfidence:
qty_conf = self._calculate_qty_conf(original_text, ing.quantity)
note_conf = self._calculate_note_conf(original_text, ing.note)
# Not all ingredients will have a food and/or unit,
# so if either is missing we fall back to overall confidence.
overall_confidence = self._calculate_overall_confidence(original_text, ing.display)
if ing.food:
food_conf = 1.0
else:
food_conf = overall_confidence
if ing.unit:
unit_conf = 1.0
else:
unit_conf = overall_confidence
return IngredientConfidence(
average=(qty_conf + unit_conf + food_conf + note_conf) / 4,
quantity=qty_conf,
unit=unit_conf,
food=food_conf,
comment=note_conf,
)
def _convert_ingredient(self, original_text: str, openai_ing: OpenAIIngredient) -> ParsedIngredient:
ingredient = RecipeIngredient(
original_text=openai_ing.input,
original_text=original_text,
quantity=openai_ing.quantity,
unit=CreateIngredientUnit(name=openai_ing.unit) if openai_ing.unit else None,
food=CreateIngredientFood(name=openai_ing.food) if openai_ing.food else None,
@@ -26,8 +100,8 @@ class OpenAIParser(ABCIngredientParser):
)
parsed_ingredient = ParsedIngredient(
input=openai_ing.input,
confidence=IngredientConfidence(average=openai_ing.confidence),
input=original_text,
confidence=self._calculate_confidence(original_text, ingredient),
ingredient=ingredient,
)
@@ -53,7 +127,7 @@ class OpenAIParser(ABCIngredientParser):
"Below is a list of units found in the units database. While parsing, you should "
"reference this list when determining which part of the input is the unit. You may "
"find a unit in the input that does not exist in this list. This should not prevent "
"you from parsing that text as a unit, however it may lower your confidence level."
"you from parsing that text as a unit."
),
value=list(set(self.data_matcher.units_by_alias)),
),
@@ -107,4 +181,13 @@ class OpenAIParser(ABCIngredientParser):
async def parse(self, ingredients: list[str]) -> list[ParsedIngredient]:
response = await self._parse(ingredients)
return [self._convert_ingredient(ing) for ing in response.ingredients]
if len(response.ingredients) != len(ingredients):
raise ValueError(
"OpenAI returned an unexpected number of ingredients. "
f"Expected {len(ingredients)}, got {len(response.ingredients)}"
)
return [
self._convert_ingredient(original_text, ing)
for original_text, ing in zip(ingredients, response.ingredients, strict=True)
]

View File

@@ -464,8 +464,6 @@ def test_openai_parser(
data = OpenAIIngredients(
ingredients=[
OpenAIIngredient(
input=input,
confidence=1,
quantity=random_int(0, 10),
unit=random_string(),
food=random_string(),
@@ -502,8 +500,6 @@ def test_openai_parser_sanitize_output(
data = OpenAIIngredients(
ingredients=[
OpenAIIngredient(
input="there is a null character here: \x00",
confidence=1,
quantity=random_int(0, 10),
unit="",
food="there is a null character here: \x00",
@@ -522,8 +518,8 @@ def test_openai_parser_sanitize_output(
parsed = loop.run_until_complete(parser.parse([""]))
assert len(parsed) == 1
parsed_ing = cast(ParsedIngredient, parsed[0])
assert parsed_ing.input
assert "\x00" not in parsed_ing.input
assert parsed_ing.ingredient.food
assert parsed_ing.ingredient.food.name == "there is a null character here: "
# Make sure we can create a recipe with this ingredient
assert isinstance(parsed_ing.ingredient.food, CreateIngredientFood)
@@ -539,3 +535,195 @@ def test_openai_parser_sanitize_output(
recipe_ingredient=[parsed_ing.ingredient],
)
)
@pytest.mark.parametrize(
"original_text,quantity,unit,food,note,qty_range,unit_range,food_range,note_range",
[
pytest.param(
"2 cups flour",
2.0,
"Cups",
"flour",
"",
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
id="perfect_match_all_components",
),
pytest.param(
"2 cups flour",
3.0,
"Cups",
"flour",
"",
(0.0, 0.0),
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
id="quantity_mismatch",
),
pytest.param(
"2 cups flour",
2.0,
None,
"flour",
"",
(1.0, 1.0),
(0.4, 0.9),
(1.0, 1.0),
(1.0, 1.0),
id="missing_unit_fallback",
),
pytest.param(
"2 cups flour",
2.0,
"Cups",
None,
"",
(1.0, 1.0),
(1.0, 1.0),
(0.4, 0.9),
(1.0, 1.0),
id="missing_food_fallback",
),
pytest.param(
"2 cups flour sifted fresh",
2.0,
"Cups",
"flour",
"sifted fresh",
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
(0.8, 1.0),
id="note_full_match",
),
pytest.param(
"2 cups flour sifted",
2.0,
"Cups",
"flour",
"sifted chopped",
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
(0.4, 0.6),
id="note_partial_match",
),
pytest.param(
"2 cups flour",
2.0,
"Cups",
"flour",
"chopped minced",
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
(0.0, 0.0),
id="note_no_match",
),
pytest.param(
"1.5 tsp salt kosher",
1.0,
None,
None,
"kosher fine",
(0.0, 0.0),
(0.3, 0.7),
(0.3, 0.7),
(0.4, 0.6),
id="multiple_issues",
),
pytest.param(
"",
1.0,
"Cups",
"flour",
"fresh",
(0.0, 0.0),
(1.0, 1.0),
(1.0, 1.0),
(0.0, 0.0),
id="empty_original_text",
),
pytest.param(
"salt",
0.0,
None,
"salt",
"",
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
(1.0, 1.0),
id="zero_quantity_match",
),
],
)
def test_openai_parser_confidence(
original_text: str,
quantity: float | None,
unit: str | None,
food: str | None,
note: str,
qty_range: tuple[float, float],
unit_range: tuple[float, float],
food_range: tuple[float, float],
note_range: tuple[float, float],
unique_local_group_id: UUID4,
parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated
):
"""Test the _calculate_confidence method of OpenAIParser with various input scenarios."""
with session_context() as session:
from mealie.services.parser_services.openai.parser import OpenAIParser
parser = cast(OpenAIParser, get_parser(RegisteredParser.openai, unique_local_group_id, session))
# Create test ingredient
ingredient = RecipeIngredient(
original_text=original_text,
quantity=quantity,
unit=CreateIngredientUnit(name=unit) if unit else None,
food=CreateIngredientFood(name=food) if food else None,
note=note if note else None,
)
# Calculate confidence
confidence = parser._calculate_confidence(original_text, ingredient)
# All confidence values should be populated (not None) by the method
assert confidence.quantity is not None, "Quantity confidence should not be None"
assert confidence.unit is not None, "Unit confidence should not be None"
assert confidence.food is not None, "Food confidence should not be None"
assert confidence.comment is not None, "Comment confidence should not be None"
assert confidence.average is not None, "Average confidence should not be None"
# Range-based assertions to handle fuzzy matching variability
qty_min, qty_max = qty_range
assert qty_min <= confidence.quantity <= qty_max, (
f"Quantity confidence out of range: expected {qty_range}, got {confidence.quantity}"
)
unit_min, unit_max = unit_range
assert unit_min <= confidence.unit <= unit_max, (
f"Unit confidence out of range: expected {unit_range}, got {confidence.unit}"
)
food_min, food_max = food_range
assert food_min <= confidence.food <= food_max, (
f"Food confidence out of range: expected {food_range}, got {confidence.food}"
)
note_min, note_max = note_range
assert note_min <= confidence.comment <= note_max, (
f"Note confidence out of range: expected {note_range}, got {confidence.comment}"
)
# Check that average is calculated correctly
expected_avg = (confidence.quantity + confidence.unit + confidence.food + confidence.comment) / 4
assert abs(confidence.average - expected_avg) < 0.001, (
f"Average confidence mismatch: expected {expected_avg}, got {confidence.average}"
)