feat: Manually calculate OpenAI Parsing Confidence (#6141)

2025-10-27 08:14:30 -04:00 · 2025-09-19 23:09:34 -05:00
parent cb8c1423c5
commit a9090bc2bd
3 changed files with 284 additions and 48 deletions
--- a/tests/unit_tests/test_ingredient_parser.py
+++ b/tests/unit_tests/test_ingredient_parser.py
@@ -464,8 +464,6 @@ def test_openai_parser(
        data = OpenAIIngredients(
            ingredients=[
                OpenAIIngredient(
-                    input=input,
-                    confidence=1,
                    quantity=random_int(0, 10),
                    unit=random_string(),
                    food=random_string(),
@@ -502,8 +500,6 @@ def test_openai_parser_sanitize_output(
        data = OpenAIIngredients(
            ingredients=[
                OpenAIIngredient(
-                    input="there is a null character here: \x00",
-                    confidence=1,
                    quantity=random_int(0, 10),
                    unit="",
                    food="there is a null character here: \x00",
@@ -522,8 +518,8 @@ def test_openai_parser_sanitize_output(
        parsed = loop.run_until_complete(parser.parse([""]))
        assert len(parsed) == 1
        parsed_ing = cast(ParsedIngredient, parsed[0])
-        assert parsed_ing.input
-        assert "\x00" not in parsed_ing.input
+        assert parsed_ing.ingredient.food
+        assert parsed_ing.ingredient.food.name == "there is a null character here: "

        # Make sure we can create a recipe with this ingredient
        assert isinstance(parsed_ing.ingredient.food, CreateIngredientFood)
@@ -539,3 +535,195 @@ def test_openai_parser_sanitize_output(
                recipe_ingredient=[parsed_ing.ingredient],
            )
        )
+
+
+@pytest.mark.parametrize(
+    "original_text,quantity,unit,food,note,qty_range,unit_range,food_range,note_range",
+    [
+        pytest.param(
+            "2 cups flour",
+            2.0,
+            "Cups",
+            "flour",
+            "",
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (1.0, 1.0),
+            id="perfect_match_all_components",
+        ),
+        pytest.param(
+            "2 cups flour",
+            3.0,
+            "Cups",
+            "flour",
+            "",
+            (0.0, 0.0),
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (1.0, 1.0),
+            id="quantity_mismatch",
+        ),
+        pytest.param(
+            "2 cups flour",
+            2.0,
+            None,
+            "flour",
+            "",
+            (1.0, 1.0),
+            (0.4, 0.9),
+            (1.0, 1.0),
+            (1.0, 1.0),
+            id="missing_unit_fallback",
+        ),
+        pytest.param(
+            "2 cups flour",
+            2.0,
+            "Cups",
+            None,
+            "",
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (0.4, 0.9),
+            (1.0, 1.0),
+            id="missing_food_fallback",
+        ),
+        pytest.param(
+            "2 cups flour sifted fresh",
+            2.0,
+            "Cups",
+            "flour",
+            "sifted fresh",
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (0.8, 1.0),
+            id="note_full_match",
+        ),
+        pytest.param(
+            "2 cups flour sifted",
+            2.0,
+            "Cups",
+            "flour",
+            "sifted chopped",
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (0.4, 0.6),
+            id="note_partial_match",
+        ),
+        pytest.param(
+            "2 cups flour",
+            2.0,
+            "Cups",
+            "flour",
+            "chopped minced",
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (0.0, 0.0),
+            id="note_no_match",
+        ),
+        pytest.param(
+            "1.5 tsp salt kosher",
+            1.0,
+            None,
+            None,
+            "kosher fine",
+            (0.0, 0.0),
+            (0.3, 0.7),
+            (0.3, 0.7),
+            (0.4, 0.6),
+            id="multiple_issues",
+        ),
+        pytest.param(
+            "",
+            1.0,
+            "Cups",
+            "flour",
+            "fresh",
+            (0.0, 0.0),
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (0.0, 0.0),
+            id="empty_original_text",
+        ),
+        pytest.param(
+            "salt",
+            0.0,
+            None,
+            "salt",
+            "",
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (1.0, 1.0),
+            (1.0, 1.0),
+            id="zero_quantity_match",
+        ),
+    ],
+)
+def test_openai_parser_confidence(
+    original_text: str,
+    quantity: float | None,
+    unit: str | None,
+    food: str | None,
+    note: str,
+    qty_range: tuple[float, float],
+    unit_range: tuple[float, float],
+    food_range: tuple[float, float],
+    note_range: tuple[float, float],
+    unique_local_group_id: UUID4,
+    parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]],  # required so database is populated
+):
+    """Test the _calculate_confidence method of OpenAIParser with various input scenarios."""
+
+    with session_context() as session:
+        from mealie.services.parser_services.openai.parser import OpenAIParser
+
+        parser = cast(OpenAIParser, get_parser(RegisteredParser.openai, unique_local_group_id, session))
+
+        # Create test ingredient
+        ingredient = RecipeIngredient(
+            original_text=original_text,
+            quantity=quantity,
+            unit=CreateIngredientUnit(name=unit) if unit else None,
+            food=CreateIngredientFood(name=food) if food else None,
+            note=note if note else None,
+        )
+
+        # Calculate confidence
+        confidence = parser._calculate_confidence(original_text, ingredient)
+
+        # All confidence values should be populated (not None) by the method
+        assert confidence.quantity is not None, "Quantity confidence should not be None"
+        assert confidence.unit is not None, "Unit confidence should not be None"
+        assert confidence.food is not None, "Food confidence should not be None"
+        assert confidence.comment is not None, "Comment confidence should not be None"
+        assert confidence.average is not None, "Average confidence should not be None"
+
+        # Range-based assertions to handle fuzzy matching variability
+        qty_min, qty_max = qty_range
+        assert qty_min <= confidence.quantity <= qty_max, (
+            f"Quantity confidence out of range: expected {qty_range}, got {confidence.quantity}"
+        )
+
+        unit_min, unit_max = unit_range
+        assert unit_min <= confidence.unit <= unit_max, (
+            f"Unit confidence out of range: expected {unit_range}, got {confidence.unit}"
+        )
+
+        food_min, food_max = food_range
+        assert food_min <= confidence.food <= food_max, (
+            f"Food confidence out of range: expected {food_range}, got {confidence.food}"
+        )
+
+        note_min, note_max = note_range
+        assert note_min <= confidence.comment <= note_max, (
+            f"Note confidence out of range: expected {note_range}, got {confidence.comment}"
+        )
+
+        # Check that average is calculated correctly
+        expected_avg = (confidence.quantity + confidence.unit + confidence.food + confidence.comment) / 4
+        assert abs(confidence.average - expected_avg) < 0.001, (
+            f"Average confidence mismatch: expected {expected_avg}, got {confidence.average}"
+        )