feat: Manually calculate OpenAI Parsing Confidence (#6141)

2026-02-04 23:13:12 -05:00 · 2025-09-19 23:09:34 -05:00
parent cb8c1423c5
commit a9090bc2bd
3 changed files with 284 additions and 48 deletions
--- a/mealie/schema/openai/recipe_ingredient.py
+++ b/mealie/schema/openai/recipe_ingredient.py
@@ -7,30 +7,6 @@ from ._base import OpenAIBase
 class OpenAIIngredient(OpenAIBase):
    input: str = Field(
        ...,
        description=dedent(
            """
            The input is simply the ingredient string you are processing as-is. It is forbidden to
            modify this at all, you must provide the input exactly as you received it.
            """
        ),
    )
    confidence: float | None = Field(
        None,
        description=dedent(
            """
            This value is a float between 0 - 100, where 100 is full confidence that the result is correct,
            and 0 is no confidence that the result is correct. If you're unable to parse anything,
            and you put the entire string in the notes, you should return 0 confidence. If you can easily
            parse the string into each component, then you should return a confidence of 100. If you have to
            guess which part is the unit and which part is the food, your confidence should be lower, such as 60.
            Even if there is no unit or note, if you're able to determine the food, you may use a higher confidence.
            If the entire ingredient consists of only a food, you can use a confidence of 100.
            """
        ),
    )
    quantity: float | None = Field(
        0,
        description=dedent(
@@ -73,21 +49,10 @@ class OpenAIIngredient(OpenAIBase):
        ),
    )
-    @field_validator("confidence", "quantity", mode="before")
+    @field_validator("quantity", mode="before")
    def coerce_none_float(cls, v: Any) -> Any:
        return v or 0
    @field_validator("confidence")
    def validate_confidence(cls, v: float | None) -> float:
        v = v or 0
        if v < 0:
            v = 0
        elif v > 100:
            v = 100
        return v / 100
 class OpenAIIngredients(OpenAIBase):
    ingredients: list[OpenAIIngredient] = []
--- a/mealie/services/parser_services/openai/parser.py
+++ b/mealie/services/parser_services/openai/parser.py
@@ -2,6 +2,8 @@ import asyncio
 import json
 from collections.abc import Awaitable
 from rapidfuzz import fuzz
 from mealie.schema.openai.recipe_ingredient import OpenAIIngredient, OpenAIIngredients
 from mealie.schema.recipe.recipe_ingredient import (
    CreateIngredientFood,
@@ -13,12 +15,84 @@ from mealie.schema.recipe.recipe_ingredient import (
 from mealie.services.openai import OpenAIDataInjection, OpenAIService
 from .._base import ABCIngredientParser
 from ..parser_utils import extract_quantity_from_string
 class OpenAIParser(ABCIngredientParser):
-    def _convert_ingredient(self, openai_ing: OpenAIIngredient) -> ParsedIngredient:
+    def _calculate_qty_conf(self, original_text: str, parsed_qty: float | None) -> float:
        """Compares the extracted quantity to a brute-force parsed quantity."""
        expected_qty, _ = extract_quantity_from_string(original_text)
        parsed_qty = parsed_qty or 0
        if parsed_qty == expected_qty:
            return 1
        else:
            return 0
    def _calculate_note_conf(self, original_text: str, note: str | None) -> float:
        """
        Calculate confidence based on how many words in the note are found in the original text.
        Uses alphanumeric filtering and lowercasing to improve matching.
        """
        if not note:
            return 1
        note_words: list[str] = []
        for word in note.strip().lower().split():
            clean_word = "".join(filter(str.isalnum, word))
            if clean_word:
                note_words.append(clean_word)
        if not note_words:
            return 1
        original_words: list[str] = []
        for word in original_text.strip().lower().split():
            clean_word = "".join(filter(str.isalnum, word))
            if clean_word:
                original_words.append(clean_word)
        note_conf_sum = sum(1 for word in note_words if word in original_words)
        return note_conf_sum / len(note_words)
    def _calculate_overall_confidence(self, original_text: str, ing_text: str) -> float:
        """
        Calculate overall confidence based on fuzzy matching between the original text and the ingredient text.
        Uses token sort ratio to account for word order variations.
        """
        ratio = fuzz.token_sort_ratio(original_text, ing_text)
        return ratio / 100.0
    def _calculate_confidence(self, original_text: str, ing: RecipeIngredient) -> IngredientConfidence:
        qty_conf = self._calculate_qty_conf(original_text, ing.quantity)
        note_conf = self._calculate_note_conf(original_text, ing.note)
        # Not all ingredients will have a food and/or unit,
        # so if either is missing we fall back to overall confidence.
        overall_confidence = self._calculate_overall_confidence(original_text, ing.display)
        if ing.food:
            food_conf = 1.0
        else:
            food_conf = overall_confidence
        if ing.unit:
            unit_conf = 1.0
        else:
            unit_conf = overall_confidence
        return IngredientConfidence(
            average=(qty_conf + unit_conf + food_conf + note_conf) / 4,
            quantity=qty_conf,
            unit=unit_conf,
            food=food_conf,
            comment=note_conf,
        )
    def _convert_ingredient(self, original_text: str, openai_ing: OpenAIIngredient) -> ParsedIngredient:
        ingredient = RecipeIngredient(
-            original_text=openai_ing.input,
+            original_text=original_text,
            quantity=openai_ing.quantity,
            unit=CreateIngredientUnit(name=openai_ing.unit) if openai_ing.unit else None,
            food=CreateIngredientFood(name=openai_ing.food) if openai_ing.food else None,
@@ -26,8 +100,8 @@ class OpenAIParser(ABCIngredientParser):
        )
        parsed_ingredient = ParsedIngredient(
-            input=openai_ing.input,
+            input=original_text,
-            confidence=IngredientConfidence(average=openai_ing.confidence),
+            confidence=self._calculate_confidence(original_text, ingredient),
            ingredient=ingredient,
        )
@@ -53,7 +127,7 @@ class OpenAIParser(ABCIngredientParser):
                            "Below is a list of units found in the units database. While parsing, you should "
                            "reference this list when determining which part of the input is the unit. You may "
                            "find a unit in the input that does not exist in this list. This should not prevent "
-                            "you from parsing that text as a unit, however it may lower your confidence level."
+                            "you from parsing that text as a unit."
                        ),
                        value=list(set(self.data_matcher.units_by_alias)),
                    ),
@@ -107,4 +181,13 @@ class OpenAIParser(ABCIngredientParser):
    async def parse(self, ingredients: list[str]) -> list[ParsedIngredient]:
        response = await self._parse(ingredients)
-        return [self._convert_ingredient(ing) for ing in response.ingredients]
+        if len(response.ingredients) != len(ingredients):
            raise ValueError(
                "OpenAI returned an unexpected number of ingredients. "
                f"Expected {len(ingredients)}, got {len(response.ingredients)}"
            )
        return [
            self._convert_ingredient(original_text, ing)
            for original_text, ing in zip(ingredients, response.ingredients, strict=True)
        ]
--- a/tests/unit_tests/test_ingredient_parser.py
+++ b/tests/unit_tests/test_ingredient_parser.py
@@ -464,8 +464,6 @@ def test_openai_parser(
        data = OpenAIIngredients(
            ingredients=[
                OpenAIIngredient(
                    input=input,
                    confidence=1,
                    quantity=random_int(0, 10),
                    unit=random_string(),
                    food=random_string(),
@@ -502,8 +500,6 @@ def test_openai_parser_sanitize_output(
        data = OpenAIIngredients(
            ingredients=[
                OpenAIIngredient(
                    input="there is a null character here: \x00",
                    confidence=1,
                    quantity=random_int(0, 10),
                    unit="",
                    food="there is a null character here: \x00",
@@ -522,8 +518,8 @@ def test_openai_parser_sanitize_output(
        parsed = loop.run_until_complete(parser.parse([""]))
        assert len(parsed) == 1
        parsed_ing = cast(ParsedIngredient, parsed[0])
-        assert parsed_ing.input
+        assert parsed_ing.ingredient.food
-        assert "\x00" not in parsed_ing.input
+        assert parsed_ing.ingredient.food.name == "there is a null character here: "
        # Make sure we can create a recipe with this ingredient
        assert isinstance(parsed_ing.ingredient.food, CreateIngredientFood)
@@ -539,3 +535,195 @@ def test_openai_parser_sanitize_output(
                recipe_ingredient=[parsed_ing.ingredient],
            )
        )
@pytest.mark.parametrize(
    "original_text,quantity,unit,food,note,qty_range,unit_range,food_range,note_range",
    [
        pytest.param(
            "2 cups flour",
            2.0,
            "Cups",
            "flour",
            "",
            (1.0, 1.0),
            (1.0, 1.0),
            (1.0, 1.0),
            (1.0, 1.0),
            id="perfect_match_all_components",
        ),
        pytest.param(
            "2 cups flour",
            3.0,
            "Cups",
            "flour",
            "",
            (0.0, 0.0),
            (1.0, 1.0),
            (1.0, 1.0),
            (1.0, 1.0),
            id="quantity_mismatch",
        ),
        pytest.param(
            "2 cups flour",
            2.0,
            None,
            "flour",
            "",
            (1.0, 1.0),
            (0.4, 0.9),
            (1.0, 1.0),
            (1.0, 1.0),
            id="missing_unit_fallback",
        ),
        pytest.param(
            "2 cups flour",
            2.0,
            "Cups",
            None,
            "",
            (1.0, 1.0),
            (1.0, 1.0),
            (0.4, 0.9),
            (1.0, 1.0),
            id="missing_food_fallback",
        ),
        pytest.param(
            "2 cups flour sifted fresh",
            2.0,
            "Cups",
            "flour",
            "sifted fresh",
            (1.0, 1.0),
            (1.0, 1.0),
            (1.0, 1.0),
            (0.8, 1.0),
            id="note_full_match",
        ),
        pytest.param(
            "2 cups flour sifted",
            2.0,
            "Cups",
            "flour",
            "sifted chopped",
            (1.0, 1.0),
            (1.0, 1.0),
            (1.0, 1.0),
            (0.4, 0.6),
            id="note_partial_match",
        ),
        pytest.param(
            "2 cups flour",
            2.0,
            "Cups",
            "flour",
            "chopped minced",
            (1.0, 1.0),
            (1.0, 1.0),
            (1.0, 1.0),
            (0.0, 0.0),
            id="note_no_match",
        ),
        pytest.param(
            "1.5 tsp salt kosher",
            1.0,
            None,
            None,
            "kosher fine",
            (0.0, 0.0),
            (0.3, 0.7),
            (0.3, 0.7),
            (0.4, 0.6),
            id="multiple_issues",
        ),
        pytest.param(
            "",
            1.0,
            "Cups",
            "flour",
            "fresh",
            (0.0, 0.0),
            (1.0, 1.0),
            (1.0, 1.0),
            (0.0, 0.0),
            id="empty_original_text",
        ),
        pytest.param(
            "salt",
            0.0,
            None,
            "salt",
            "",
            (1.0, 1.0),
            (1.0, 1.0),
            (1.0, 1.0),
            (1.0, 1.0),
            id="zero_quantity_match",
        ),
    ],
 )
 def test_openai_parser_confidence(
    original_text: str,
    quantity: float | None,
    unit: str | None,
    food: str | None,
    note: str,
    qty_range: tuple[float, float],
    unit_range: tuple[float, float],
    food_range: tuple[float, float],
    note_range: tuple[float, float],
    unique_local_group_id: UUID4,
    parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]],  # required so database is populated
 ):
    """Test the _calculate_confidence method of OpenAIParser with various input scenarios."""
    with session_context() as session:
        from mealie.services.parser_services.openai.parser import OpenAIParser
        parser = cast(OpenAIParser, get_parser(RegisteredParser.openai, unique_local_group_id, session))
        # Create test ingredient
        ingredient = RecipeIngredient(
            original_text=original_text,
            quantity=quantity,
            unit=CreateIngredientUnit(name=unit) if unit else None,
            food=CreateIngredientFood(name=food) if food else None,
            note=note if note else None,
        )
        # Calculate confidence
        confidence = parser._calculate_confidence(original_text, ingredient)
        # All confidence values should be populated (not None) by the method
        assert confidence.quantity is not None, "Quantity confidence should not be None"
        assert confidence.unit is not None, "Unit confidence should not be None"
        assert confidence.food is not None, "Food confidence should not be None"
        assert confidence.comment is not None, "Comment confidence should not be None"
        assert confidence.average is not None, "Average confidence should not be None"
        # Range-based assertions to handle fuzzy matching variability
        qty_min, qty_max = qty_range
        assert qty_min <= confidence.quantity <= qty_max, (
            f"Quantity confidence out of range: expected {qty_range}, got {confidence.quantity}"
        )
        unit_min, unit_max = unit_range
        assert unit_min <= confidence.unit <= unit_max, (
            f"Unit confidence out of range: expected {unit_range}, got {confidence.unit}"
        )
        food_min, food_max = food_range
        assert food_min <= confidence.food <= food_max, (
            f"Food confidence out of range: expected {food_range}, got {confidence.food}"
        )
        note_min, note_max = note_range
        assert note_min <= confidence.comment <= note_max, (
            f"Note confidence out of range: expected {note_range}, got {confidence.comment}"
        )
        # Check that average is calculated correctly
        expected_avg = (confidence.quantity + confidence.unit + confidence.food + confidence.comment) / 4
        assert abs(confidence.average - expected_avg) < 0.001, (
            f"Average confidence mismatch: expected {expected_avg}, got {confidence.average}"
        )