feat: Manually calculate OpenAI Parsing Confidence (#6141)

2025-10-27 16:24:31 -04:00 · 2025-09-19 23:09:34 -05:00
parent cb8c1423c5
commit a9090bc2bd
3 changed files with 284 additions and 48 deletions
--- a/mealie/schema/openai/recipe_ingredient.py
+++ b/mealie/schema/openai/recipe_ingredient.py
@@ -7,30 +7,6 @@ from ._base import OpenAIBase


 class OpenAIIngredient(OpenAIBase):
-    input: str = Field(
-        ...,
-        description=dedent(
-            """
-            The input is simply the ingredient string you are processing as-is. It is forbidden to
-            modify this at all, you must provide the input exactly as you received it.
-            """
-        ),
-    )
-    confidence: float | None = Field(
-        None,
-        description=dedent(
-            """
-            This value is a float between 0 - 100, where 100 is full confidence that the result is correct,
-            and 0 is no confidence that the result is correct. If you're unable to parse anything,
-            and you put the entire string in the notes, you should return 0 confidence. If you can easily
-            parse the string into each component, then you should return a confidence of 100. If you have to
-            guess which part is the unit and which part is the food, your confidence should be lower, such as 60.
-            Even if there is no unit or note, if you're able to determine the food, you may use a higher confidence.
-            If the entire ingredient consists of only a food, you can use a confidence of 100.
-            """
-        ),
-    )
-
    quantity: float | None = Field(
        0,
        description=dedent(
@@ -73,21 +49,10 @@ class OpenAIIngredient(OpenAIBase):
        ),
    )

-    @field_validator("confidence", "quantity", mode="before")
+    @field_validator("quantity", mode="before")
    def coerce_none_float(cls, v: Any) -> Any:
        return v or 0

-    @field_validator("confidence")
-    def validate_confidence(cls, v: float | None) -> float:
-        v = v or 0
-
-        if v < 0:
-            v = 0
-        elif v > 100:
-            v = 100
-
-        return v / 100
-

 class OpenAIIngredients(OpenAIBase):
    ingredients: list[OpenAIIngredient] = []
--- a/mealie/services/parser_services/openai/parser.py
+++ b/mealie/services/parser_services/openai/parser.py
@@ -2,6 +2,8 @@ import asyncio
 import json
 from collections.abc import Awaitable

+from rapidfuzz import fuzz
+
 from mealie.schema.openai.recipe_ingredient import OpenAIIngredient, OpenAIIngredients
 from mealie.schema.recipe.recipe_ingredient import (
    CreateIngredientFood,
@@ -13,12 +15,84 @@ from mealie.schema.recipe.recipe_ingredient import (
 from mealie.services.openai import OpenAIDataInjection, OpenAIService

 from .._base import ABCIngredientParser
+from ..parser_utils import extract_quantity_from_string


 class OpenAIParser(ABCIngredientParser):
-    def _convert_ingredient(self, openai_ing: OpenAIIngredient) -> ParsedIngredient:
+    def _calculate_qty_conf(self, original_text: str, parsed_qty: float | None) -> float:
+        """Compares the extracted quantity to a brute-force parsed quantity."""
+
+        expected_qty, _ = extract_quantity_from_string(original_text)
+        parsed_qty = parsed_qty or 0
+        if parsed_qty == expected_qty:
+            return 1
+        else:
+            return 0
+
+    def _calculate_note_conf(self, original_text: str, note: str | None) -> float:
+        """
+        Calculate confidence based on how many words in the note are found in the original text.
+        Uses alphanumeric filtering and lowercasing to improve matching.
+        """
+
+        if not note:
+            return 1
+
+        note_words: list[str] = []
+        for word in note.strip().lower().split():
+            clean_word = "".join(filter(str.isalnum, word))
+            if clean_word:
+                note_words.append(clean_word)
+
+        if not note_words:
+            return 1
+
+        original_words: list[str] = []
+        for word in original_text.strip().lower().split():
+            clean_word = "".join(filter(str.isalnum, word))
+            if clean_word:
+                original_words.append(clean_word)
+
+        note_conf_sum = sum(1 for word in note_words if word in original_words)
+        return note_conf_sum / len(note_words)
+
+    def _calculate_overall_confidence(self, original_text: str, ing_text: str) -> float:
+        """
+        Calculate overall confidence based on fuzzy matching between the original text and the ingredient text.
+        Uses token sort ratio to account for word order variations.
+        """
+
+        ratio = fuzz.token_sort_ratio(original_text, ing_text)
+        return ratio / 100.0
+
+    def _calculate_confidence(self, original_text: str, ing: RecipeIngredient) -> IngredientConfidence:
+        qty_conf = self._calculate_qty_conf(original_text, ing.quantity)
+        note_conf = self._calculate_note_conf(original_text, ing.note)
+
+        # Not all ingredients will have a food and/or unit,
+        # so if either is missing we fall back to overall confidence.
+        overall_confidence = self._calculate_overall_confidence(original_text, ing.display)
+        if ing.food:
+            food_conf = 1.0
+        else:
+            food_conf = overall_confidence
+
+        if ing.unit:
+            unit_conf = 1.0
+        else:
+            unit_conf = overall_confidence
+
+        return IngredientConfidence(
+            average=(qty_conf + unit_conf + food_conf + note_conf) / 4,
+            quantity=qty_conf,
+            unit=unit_conf,
+            food=food_conf,
+            comment=note_conf,
+        )
+
+    def _convert_ingredient(self, original_text: str, openai_ing: OpenAIIngredient) -> ParsedIngredient:
        ingredient = RecipeIngredient(
-            original_text=openai_ing.input,
+            original_text=original_text,
            quantity=openai_ing.quantity,
            unit=CreateIngredientUnit(name=openai_ing.unit) if openai_ing.unit else None,
            food=CreateIngredientFood(name=openai_ing.food) if openai_ing.food else None,
@@ -26,8 +100,8 @@ class OpenAIParser(ABCIngredientParser):
        )

        parsed_ingredient = ParsedIngredient(
-            input=openai_ing.input,
-            confidence=IngredientConfidence(average=openai_ing.confidence),
+            input=original_text,
+            confidence=self._calculate_confidence(original_text, ingredient),
            ingredient=ingredient,
        )

@@ -53,7 +127,7 @@ class OpenAIParser(ABCIngredientParser):
                            "Below is a list of units found in the units database. While parsing, you should "
                            "reference this list when determining which part of the input is the unit. You may "
                            "find a unit in the input that does not exist in this list. This should not prevent "
-                            "you from parsing that text as a unit, however it may lower your confidence level."
+                            "you from parsing that text as a unit."
                        ),
                        value=list(set(self.data_matcher.units_by_alias)),
                    ),
@@ -107,4 +181,13 @@ class OpenAIParser(ABCIngredientParser):

    async def parse(self, ingredients: list[str]) -> list[ParsedIngredient]:
        response = await self._parse(ingredients)
-        return [self._convert_ingredient(ing) for ing in response.ingredients]
+        if len(response.ingredients) != len(ingredients):
+            raise ValueError(
+                "OpenAI returned an unexpected number of ingredients. "
+                f"Expected {len(ingredients)}, got {len(response.ingredients)}"
+            )
+
+        return [
+            self._convert_ingredient(original_text, ing)
+            for original_text, ing in zip(ingredients, response.ingredients, strict=True)
+        ]
--- a/tests/unit_tests/test_ingredient_parser.py
+++ b/tests/unit_tests/test_ingredient_parser.py
@@ -464,8 +464,6 @@ def test_openai_parser(
        data = OpenAIIngredients(
            ingredients=[
                OpenAIIngredient(
-                    input=input,
-                    confidence=1,
                    quantity=random_int(0, 10),
                    unit=random_string(),
                    food=random_string(),
@@ -502,8 +500,6 @@ def test_openai_parser_sanitize_output(
        data = OpenAIIngredients(
            ingredients=[
                OpenAIIngredient(
-                    input="there is a null character here: \x00",
-                    confidence=1,
                    quantity=random_int(0, 10),
                    unit="",
                    food="there is a null character here: \x00",
@@ -522,8 +518,8 @@ def test_openai_parser_sanitize_output(
        parsed = loop.run_until_complete(parser.parse([""]))
        assert len(parsed) == 1
        parsed_ing = cast(ParsedIngredient, parsed[0])
-        assert parsed_ing.input
-        assert "\x00" not in parsed_ing.input
+        assert parsed_ing.ingredient.food
+        assert parsed_ing.ingredient.food.name == "there is a null character here: "

        # Make sure we can create a recipe with this ingredient
        assert isinstance(parsed_ing.ingredient.food, CreateIngredientFood)
@@ -539,3 +535,195 @@ def test_openai_parser_sanitize_output(
                recipe_ingredient=[parsed_ing.ingredient],
            )
        )
+
+
+@pytest.mark.parametrize(
+    "original_text,quantity,unit,food,note,qty_range,unit_range,food_range,note_range",
+    [
+        pytest.param(
+            "2 cups flour",
+            2.0,
+            "Cups",
+            "flour",
+            "",
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (1.0, 1.0),
+            id="perfect_match_all_components",
+        ),
+        pytest.param(
+            "2 cups flour",
+            3.0,
+            "Cups",
+            "flour",
+            "",
+            (0.0, 0.0),
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (1.0, 1.0),
+            id="quantity_mismatch",
+        ),
+        pytest.param(
+            "2 cups flour",
+            2.0,
+            None,
+            "flour",
+            "",
+            (1.0, 1.0),
+            (0.4, 0.9),
+            (1.0, 1.0),
+            (1.0, 1.0),
+            id="missing_unit_fallback",
+        ),
+        pytest.param(
+            "2 cups flour",
+            2.0,
+            "Cups",
+            None,
+            "",
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (0.4, 0.9),
+            (1.0, 1.0),
+            id="missing_food_fallback",
+        ),
+        pytest.param(
+            "2 cups flour sifted fresh",
+            2.0,
+            "Cups",
+            "flour",
+            "sifted fresh",
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (0.8, 1.0),
+            id="note_full_match",
+        ),
+        pytest.param(
+            "2 cups flour sifted",
+            2.0,
+            "Cups",
+            "flour",
+            "sifted chopped",
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (0.4, 0.6),
+            id="note_partial_match",
+        ),
+        pytest.param(
+            "2 cups flour",
+            2.0,
+            "Cups",
+            "flour",
+            "chopped minced",
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (0.0, 0.0),
+            id="note_no_match",
+        ),
+        pytest.param(
+            "1.5 tsp salt kosher",
+            1.0,
+            None,
+            None,
+            "kosher fine",
+            (0.0, 0.0),
+            (0.3, 0.7),
+            (0.3, 0.7),
+            (0.4, 0.6),
+            id="multiple_issues",
+        ),
+        pytest.param(
+            "",
+            1.0,
+            "Cups",
+            "flour",
+            "fresh",
+            (0.0, 0.0),
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (0.0, 0.0),
+            id="empty_original_text",
+        ),
+        pytest.param(
+            "salt",
+            0.0,
+            None,
+            "salt",
+            "",
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (1.0, 1.0),
+            id="zero_quantity_match",
+        ),
+    ],
+)
+def test_openai_parser_confidence(
+    original_text: str,
+    quantity: float | None,
+    unit: str | None,
+    food: str | None,
+    note: str,
+    qty_range: tuple[float, float],
+    unit_range: tuple[float, float],
+    food_range: tuple[float, float],
+    note_range: tuple[float, float],
+    unique_local_group_id: UUID4,
+    parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]],  # required so database is populated
+):
+    """Test the _calculate_confidence method of OpenAIParser with various input scenarios."""
+
+    with session_context() as session:
+        from mealie.services.parser_services.openai.parser import OpenAIParser
+
+        parser = cast(OpenAIParser, get_parser(RegisteredParser.openai, unique_local_group_id, session))
+
+        # Create test ingredient
+        ingredient = RecipeIngredient(
+            original_text=original_text,
+            quantity=quantity,
+            unit=CreateIngredientUnit(name=unit) if unit else None,
+            food=CreateIngredientFood(name=food) if food else None,
+            note=note if note else None,
+        )
+
+        # Calculate confidence
+        confidence = parser._calculate_confidence(original_text, ingredient)
+
+        # All confidence values should be populated (not None) by the method
+        assert confidence.quantity is not None, "Quantity confidence should not be None"
+        assert confidence.unit is not None, "Unit confidence should not be None"
+        assert confidence.food is not None, "Food confidence should not be None"
+        assert confidence.comment is not None, "Comment confidence should not be None"
+        assert confidence.average is not None, "Average confidence should not be None"
+
+        # Range-based assertions to handle fuzzy matching variability
+        qty_min, qty_max = qty_range
+        assert qty_min <= confidence.quantity <= qty_max, (
+            f"Quantity confidence out of range: expected {qty_range}, got {confidence.quantity}"
+        )
+
+        unit_min, unit_max = unit_range
+        assert unit_min <= confidence.unit <= unit_max, (
+            f"Unit confidence out of range: expected {unit_range}, got {confidence.unit}"
+        )
+
+        food_min, food_max = food_range
+        assert food_min <= confidence.food <= food_max, (
+            f"Food confidence out of range: expected {food_range}, got {confidence.food}"
+        )
+
+        note_min, note_max = note_range
+        assert note_min <= confidence.comment <= note_max, (
+            f"Note confidence out of range: expected {note_range}, got {confidence.comment}"
+        )
+
+        # Check that average is calculated correctly
+        expected_avg = (confidence.quantity + confidence.unit + confidence.food + confidence.comment) / 4
+        assert abs(confidence.average - expected_avg) < 0.001, (
+            f"Average confidence mismatch: expected {expected_avg}, got {confidence.average}"
+        )