mirror of
				https://github.com/mealie-recipes/mealie.git
				synced 2025-10-31 02:03:35 -04:00 
			
		
		
		
	feat: Manually calculate OpenAI Parsing Confidence (#6141)
This commit is contained in:
		| @@ -7,30 +7,6 @@ from ._base import OpenAIBase | |||||||
|  |  | ||||||
|  |  | ||||||
| class OpenAIIngredient(OpenAIBase): | class OpenAIIngredient(OpenAIBase): | ||||||
|     input: str = Field( |  | ||||||
|         ..., |  | ||||||
|         description=dedent( |  | ||||||
|             """ |  | ||||||
|             The input is simply the ingredient string you are processing as-is. It is forbidden to |  | ||||||
|             modify this at all, you must provide the input exactly as you received it. |  | ||||||
|             """ |  | ||||||
|         ), |  | ||||||
|     ) |  | ||||||
|     confidence: float | None = Field( |  | ||||||
|         None, |  | ||||||
|         description=dedent( |  | ||||||
|             """ |  | ||||||
|             This value is a float between 0 - 100, where 100 is full confidence that the result is correct, |  | ||||||
|             and 0 is no confidence that the result is correct. If you're unable to parse anything, |  | ||||||
|             and you put the entire string in the notes, you should return 0 confidence. If you can easily |  | ||||||
|             parse the string into each component, then you should return a confidence of 100. If you have to |  | ||||||
|             guess which part is the unit and which part is the food, your confidence should be lower, such as 60. |  | ||||||
|             Even if there is no unit or note, if you're able to determine the food, you may use a higher confidence. |  | ||||||
|             If the entire ingredient consists of only a food, you can use a confidence of 100. |  | ||||||
|             """ |  | ||||||
|         ), |  | ||||||
|     ) |  | ||||||
|  |  | ||||||
|     quantity: float | None = Field( |     quantity: float | None = Field( | ||||||
|         0, |         0, | ||||||
|         description=dedent( |         description=dedent( | ||||||
| @@ -73,21 +49,10 @@ class OpenAIIngredient(OpenAIBase): | |||||||
|         ), |         ), | ||||||
|     ) |     ) | ||||||
|  |  | ||||||
|     @field_validator("confidence", "quantity", mode="before") |     @field_validator("quantity", mode="before") | ||||||
|     def coerce_none_float(cls, v: Any) -> Any: |     def coerce_none_float(cls, v: Any) -> Any: | ||||||
|         return v or 0 |         return v or 0 | ||||||
|  |  | ||||||
|     @field_validator("confidence") |  | ||||||
|     def validate_confidence(cls, v: float | None) -> float: |  | ||||||
|         v = v or 0 |  | ||||||
|  |  | ||||||
|         if v < 0: |  | ||||||
|             v = 0 |  | ||||||
|         elif v > 100: |  | ||||||
|             v = 100 |  | ||||||
|  |  | ||||||
|         return v / 100 |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class OpenAIIngredients(OpenAIBase): | class OpenAIIngredients(OpenAIBase): | ||||||
|     ingredients: list[OpenAIIngredient] = [] |     ingredients: list[OpenAIIngredient] = [] | ||||||
|   | |||||||
| @@ -2,6 +2,8 @@ import asyncio | |||||||
| import json | import json | ||||||
| from collections.abc import Awaitable | from collections.abc import Awaitable | ||||||
|  |  | ||||||
|  | from rapidfuzz import fuzz | ||||||
|  |  | ||||||
| from mealie.schema.openai.recipe_ingredient import OpenAIIngredient, OpenAIIngredients | from mealie.schema.openai.recipe_ingredient import OpenAIIngredient, OpenAIIngredients | ||||||
| from mealie.schema.recipe.recipe_ingredient import ( | from mealie.schema.recipe.recipe_ingredient import ( | ||||||
|     CreateIngredientFood, |     CreateIngredientFood, | ||||||
| @@ -13,12 +15,84 @@ from mealie.schema.recipe.recipe_ingredient import ( | |||||||
| from mealie.services.openai import OpenAIDataInjection, OpenAIService | from mealie.services.openai import OpenAIDataInjection, OpenAIService | ||||||
|  |  | ||||||
| from .._base import ABCIngredientParser | from .._base import ABCIngredientParser | ||||||
|  | from ..parser_utils import extract_quantity_from_string | ||||||
|  |  | ||||||
|  |  | ||||||
| class OpenAIParser(ABCIngredientParser): | class OpenAIParser(ABCIngredientParser): | ||||||
|     def _convert_ingredient(self, openai_ing: OpenAIIngredient) -> ParsedIngredient: |     def _calculate_qty_conf(self, original_text: str, parsed_qty: float | None) -> float: | ||||||
|  |         """Compares the extracted quantity to a brute-force parsed quantity.""" | ||||||
|  |  | ||||||
|  |         expected_qty, _ = extract_quantity_from_string(original_text) | ||||||
|  |         parsed_qty = parsed_qty or 0 | ||||||
|  |         if parsed_qty == expected_qty: | ||||||
|  |             return 1 | ||||||
|  |         else: | ||||||
|  |             return 0 | ||||||
|  |  | ||||||
|  |     def _calculate_note_conf(self, original_text: str, note: str | None) -> float: | ||||||
|  |         """ | ||||||
|  |         Calculate confidence based on how many words in the note are found in the original text. | ||||||
|  |         Uses alphanumeric filtering and lowercasing to improve matching. | ||||||
|  |         """ | ||||||
|  |  | ||||||
|  |         if not note: | ||||||
|  |             return 1 | ||||||
|  |  | ||||||
|  |         note_words: list[str] = [] | ||||||
|  |         for word in note.strip().lower().split(): | ||||||
|  |             clean_word = "".join(filter(str.isalnum, word)) | ||||||
|  |             if clean_word: | ||||||
|  |                 note_words.append(clean_word) | ||||||
|  |  | ||||||
|  |         if not note_words: | ||||||
|  |             return 1 | ||||||
|  |  | ||||||
|  |         original_words: list[str] = [] | ||||||
|  |         for word in original_text.strip().lower().split(): | ||||||
|  |             clean_word = "".join(filter(str.isalnum, word)) | ||||||
|  |             if clean_word: | ||||||
|  |                 original_words.append(clean_word) | ||||||
|  |  | ||||||
|  |         note_conf_sum = sum(1 for word in note_words if word in original_words) | ||||||
|  |         return note_conf_sum / len(note_words) | ||||||
|  |  | ||||||
|  |     def _calculate_overall_confidence(self, original_text: str, ing_text: str) -> float: | ||||||
|  |         """ | ||||||
|  |         Calculate overall confidence based on fuzzy matching between the original text and the ingredient text. | ||||||
|  |         Uses token sort ratio to account for word order variations. | ||||||
|  |         """ | ||||||
|  |  | ||||||
|  |         ratio = fuzz.token_sort_ratio(original_text, ing_text) | ||||||
|  |         return ratio / 100.0 | ||||||
|  |  | ||||||
|  |     def _calculate_confidence(self, original_text: str, ing: RecipeIngredient) -> IngredientConfidence: | ||||||
|  |         qty_conf = self._calculate_qty_conf(original_text, ing.quantity) | ||||||
|  |         note_conf = self._calculate_note_conf(original_text, ing.note) | ||||||
|  |  | ||||||
|  |         # Not all ingredients will have a food and/or unit, | ||||||
|  |         # so if either is missing we fall back to overall confidence. | ||||||
|  |         overall_confidence = self._calculate_overall_confidence(original_text, ing.display) | ||||||
|  |         if ing.food: | ||||||
|  |             food_conf = 1.0 | ||||||
|  |         else: | ||||||
|  |             food_conf = overall_confidence | ||||||
|  |  | ||||||
|  |         if ing.unit: | ||||||
|  |             unit_conf = 1.0 | ||||||
|  |         else: | ||||||
|  |             unit_conf = overall_confidence | ||||||
|  |  | ||||||
|  |         return IngredientConfidence( | ||||||
|  |             average=(qty_conf + unit_conf + food_conf + note_conf) / 4, | ||||||
|  |             quantity=qty_conf, | ||||||
|  |             unit=unit_conf, | ||||||
|  |             food=food_conf, | ||||||
|  |             comment=note_conf, | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     def _convert_ingredient(self, original_text: str, openai_ing: OpenAIIngredient) -> ParsedIngredient: | ||||||
|         ingredient = RecipeIngredient( |         ingredient = RecipeIngredient( | ||||||
|             original_text=openai_ing.input, |             original_text=original_text, | ||||||
|             quantity=openai_ing.quantity, |             quantity=openai_ing.quantity, | ||||||
|             unit=CreateIngredientUnit(name=openai_ing.unit) if openai_ing.unit else None, |             unit=CreateIngredientUnit(name=openai_ing.unit) if openai_ing.unit else None, | ||||||
|             food=CreateIngredientFood(name=openai_ing.food) if openai_ing.food else None, |             food=CreateIngredientFood(name=openai_ing.food) if openai_ing.food else None, | ||||||
| @@ -26,8 +100,8 @@ class OpenAIParser(ABCIngredientParser): | |||||||
|         ) |         ) | ||||||
|  |  | ||||||
|         parsed_ingredient = ParsedIngredient( |         parsed_ingredient = ParsedIngredient( | ||||||
|             input=openai_ing.input, |             input=original_text, | ||||||
|             confidence=IngredientConfidence(average=openai_ing.confidence), |             confidence=self._calculate_confidence(original_text, ingredient), | ||||||
|             ingredient=ingredient, |             ingredient=ingredient, | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
| @@ -53,7 +127,7 @@ class OpenAIParser(ABCIngredientParser): | |||||||
|                             "Below is a list of units found in the units database. While parsing, you should " |                             "Below is a list of units found in the units database. While parsing, you should " | ||||||
|                             "reference this list when determining which part of the input is the unit. You may " |                             "reference this list when determining which part of the input is the unit. You may " | ||||||
|                             "find a unit in the input that does not exist in this list. This should not prevent " |                             "find a unit in the input that does not exist in this list. This should not prevent " | ||||||
|                             "you from parsing that text as a unit, however it may lower your confidence level." |                             "you from parsing that text as a unit." | ||||||
|                         ), |                         ), | ||||||
|                         value=list(set(self.data_matcher.units_by_alias)), |                         value=list(set(self.data_matcher.units_by_alias)), | ||||||
|                     ), |                     ), | ||||||
| @@ -107,4 +181,13 @@ class OpenAIParser(ABCIngredientParser): | |||||||
|  |  | ||||||
|     async def parse(self, ingredients: list[str]) -> list[ParsedIngredient]: |     async def parse(self, ingredients: list[str]) -> list[ParsedIngredient]: | ||||||
|         response = await self._parse(ingredients) |         response = await self._parse(ingredients) | ||||||
|         return [self._convert_ingredient(ing) for ing in response.ingredients] |         if len(response.ingredients) != len(ingredients): | ||||||
|  |             raise ValueError( | ||||||
|  |                 "OpenAI returned an unexpected number of ingredients. " | ||||||
|  |                 f"Expected {len(ingredients)}, got {len(response.ingredients)}" | ||||||
|  |             ) | ||||||
|  |  | ||||||
|  |         return [ | ||||||
|  |             self._convert_ingredient(original_text, ing) | ||||||
|  |             for original_text, ing in zip(ingredients, response.ingredients, strict=True) | ||||||
|  |         ] | ||||||
|   | |||||||
| @@ -464,8 +464,6 @@ def test_openai_parser( | |||||||
|         data = OpenAIIngredients( |         data = OpenAIIngredients( | ||||||
|             ingredients=[ |             ingredients=[ | ||||||
|                 OpenAIIngredient( |                 OpenAIIngredient( | ||||||
|                     input=input, |  | ||||||
|                     confidence=1, |  | ||||||
|                     quantity=random_int(0, 10), |                     quantity=random_int(0, 10), | ||||||
|                     unit=random_string(), |                     unit=random_string(), | ||||||
|                     food=random_string(), |                     food=random_string(), | ||||||
| @@ -502,8 +500,6 @@ def test_openai_parser_sanitize_output( | |||||||
|         data = OpenAIIngredients( |         data = OpenAIIngredients( | ||||||
|             ingredients=[ |             ingredients=[ | ||||||
|                 OpenAIIngredient( |                 OpenAIIngredient( | ||||||
|                     input="there is a null character here: \x00", |  | ||||||
|                     confidence=1, |  | ||||||
|                     quantity=random_int(0, 10), |                     quantity=random_int(0, 10), | ||||||
|                     unit="", |                     unit="", | ||||||
|                     food="there is a null character here: \x00", |                     food="there is a null character here: \x00", | ||||||
| @@ -522,8 +518,8 @@ def test_openai_parser_sanitize_output( | |||||||
|         parsed = loop.run_until_complete(parser.parse([""])) |         parsed = loop.run_until_complete(parser.parse([""])) | ||||||
|         assert len(parsed) == 1 |         assert len(parsed) == 1 | ||||||
|         parsed_ing = cast(ParsedIngredient, parsed[0]) |         parsed_ing = cast(ParsedIngredient, parsed[0]) | ||||||
|         assert parsed_ing.input |         assert parsed_ing.ingredient.food | ||||||
|         assert "\x00" not in parsed_ing.input |         assert parsed_ing.ingredient.food.name == "there is a null character here: " | ||||||
|  |  | ||||||
|         # Make sure we can create a recipe with this ingredient |         # Make sure we can create a recipe with this ingredient | ||||||
|         assert isinstance(parsed_ing.ingredient.food, CreateIngredientFood) |         assert isinstance(parsed_ing.ingredient.food, CreateIngredientFood) | ||||||
| @@ -539,3 +535,195 @@ def test_openai_parser_sanitize_output( | |||||||
|                 recipe_ingredient=[parsed_ing.ingredient], |                 recipe_ingredient=[parsed_ing.ingredient], | ||||||
|             ) |             ) | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @pytest.mark.parametrize( | ||||||
|  |     "original_text,quantity,unit,food,note,qty_range,unit_range,food_range,note_range", | ||||||
|  |     [ | ||||||
|  |         pytest.param( | ||||||
|  |             "2 cups flour", | ||||||
|  |             2.0, | ||||||
|  |             "Cups", | ||||||
|  |             "flour", | ||||||
|  |             "", | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             id="perfect_match_all_components", | ||||||
|  |         ), | ||||||
|  |         pytest.param( | ||||||
|  |             "2 cups flour", | ||||||
|  |             3.0, | ||||||
|  |             "Cups", | ||||||
|  |             "flour", | ||||||
|  |             "", | ||||||
|  |             (0.0, 0.0), | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             id="quantity_mismatch", | ||||||
|  |         ), | ||||||
|  |         pytest.param( | ||||||
|  |             "2 cups flour", | ||||||
|  |             2.0, | ||||||
|  |             None, | ||||||
|  |             "flour", | ||||||
|  |             "", | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (0.4, 0.9), | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             id="missing_unit_fallback", | ||||||
|  |         ), | ||||||
|  |         pytest.param( | ||||||
|  |             "2 cups flour", | ||||||
|  |             2.0, | ||||||
|  |             "Cups", | ||||||
|  |             None, | ||||||
|  |             "", | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (0.4, 0.9), | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             id="missing_food_fallback", | ||||||
|  |         ), | ||||||
|  |         pytest.param( | ||||||
|  |             "2 cups flour sifted fresh", | ||||||
|  |             2.0, | ||||||
|  |             "Cups", | ||||||
|  |             "flour", | ||||||
|  |             "sifted fresh", | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (0.8, 1.0), | ||||||
|  |             id="note_full_match", | ||||||
|  |         ), | ||||||
|  |         pytest.param( | ||||||
|  |             "2 cups flour sifted", | ||||||
|  |             2.0, | ||||||
|  |             "Cups", | ||||||
|  |             "flour", | ||||||
|  |             "sifted chopped", | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (0.4, 0.6), | ||||||
|  |             id="note_partial_match", | ||||||
|  |         ), | ||||||
|  |         pytest.param( | ||||||
|  |             "2 cups flour", | ||||||
|  |             2.0, | ||||||
|  |             "Cups", | ||||||
|  |             "flour", | ||||||
|  |             "chopped minced", | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (0.0, 0.0), | ||||||
|  |             id="note_no_match", | ||||||
|  |         ), | ||||||
|  |         pytest.param( | ||||||
|  |             "1.5 tsp salt kosher", | ||||||
|  |             1.0, | ||||||
|  |             None, | ||||||
|  |             None, | ||||||
|  |             "kosher fine", | ||||||
|  |             (0.0, 0.0), | ||||||
|  |             (0.3, 0.7), | ||||||
|  |             (0.3, 0.7), | ||||||
|  |             (0.4, 0.6), | ||||||
|  |             id="multiple_issues", | ||||||
|  |         ), | ||||||
|  |         pytest.param( | ||||||
|  |             "", | ||||||
|  |             1.0, | ||||||
|  |             "Cups", | ||||||
|  |             "flour", | ||||||
|  |             "fresh", | ||||||
|  |             (0.0, 0.0), | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (0.0, 0.0), | ||||||
|  |             id="empty_original_text", | ||||||
|  |         ), | ||||||
|  |         pytest.param( | ||||||
|  |             "salt", | ||||||
|  |             0.0, | ||||||
|  |             None, | ||||||
|  |             "salt", | ||||||
|  |             "", | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             (1.0, 1.0), | ||||||
|  |             id="zero_quantity_match", | ||||||
|  |         ), | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | def test_openai_parser_confidence( | ||||||
|  |     original_text: str, | ||||||
|  |     quantity: float | None, | ||||||
|  |     unit: str | None, | ||||||
|  |     food: str | None, | ||||||
|  |     note: str, | ||||||
|  |     qty_range: tuple[float, float], | ||||||
|  |     unit_range: tuple[float, float], | ||||||
|  |     food_range: tuple[float, float], | ||||||
|  |     note_range: tuple[float, float], | ||||||
|  |     unique_local_group_id: UUID4, | ||||||
|  |     parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]],  # required so database is populated | ||||||
|  | ): | ||||||
|  |     """Test the _calculate_confidence method of OpenAIParser with various input scenarios.""" | ||||||
|  |  | ||||||
|  |     with session_context() as session: | ||||||
|  |         from mealie.services.parser_services.openai.parser import OpenAIParser | ||||||
|  |  | ||||||
|  |         parser = cast(OpenAIParser, get_parser(RegisteredParser.openai, unique_local_group_id, session)) | ||||||
|  |  | ||||||
|  |         # Create test ingredient | ||||||
|  |         ingredient = RecipeIngredient( | ||||||
|  |             original_text=original_text, | ||||||
|  |             quantity=quantity, | ||||||
|  |             unit=CreateIngredientUnit(name=unit) if unit else None, | ||||||
|  |             food=CreateIngredientFood(name=food) if food else None, | ||||||
|  |             note=note if note else None, | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |         # Calculate confidence | ||||||
|  |         confidence = parser._calculate_confidence(original_text, ingredient) | ||||||
|  |  | ||||||
|  |         # All confidence values should be populated (not None) by the method | ||||||
|  |         assert confidence.quantity is not None, "Quantity confidence should not be None" | ||||||
|  |         assert confidence.unit is not None, "Unit confidence should not be None" | ||||||
|  |         assert confidence.food is not None, "Food confidence should not be None" | ||||||
|  |         assert confidence.comment is not None, "Comment confidence should not be None" | ||||||
|  |         assert confidence.average is not None, "Average confidence should not be None" | ||||||
|  |  | ||||||
|  |         # Range-based assertions to handle fuzzy matching variability | ||||||
|  |         qty_min, qty_max = qty_range | ||||||
|  |         assert qty_min <= confidence.quantity <= qty_max, ( | ||||||
|  |             f"Quantity confidence out of range: expected {qty_range}, got {confidence.quantity}" | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |         unit_min, unit_max = unit_range | ||||||
|  |         assert unit_min <= confidence.unit <= unit_max, ( | ||||||
|  |             f"Unit confidence out of range: expected {unit_range}, got {confidence.unit}" | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |         food_min, food_max = food_range | ||||||
|  |         assert food_min <= confidence.food <= food_max, ( | ||||||
|  |             f"Food confidence out of range: expected {food_range}, got {confidence.food}" | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |         note_min, note_max = note_range | ||||||
|  |         assert note_min <= confidence.comment <= note_max, ( | ||||||
|  |             f"Note confidence out of range: expected {note_range}, got {confidence.comment}" | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |         # Check that average is calculated correctly | ||||||
|  |         expected_avg = (confidence.quantity + confidence.unit + confidence.food + confidence.comment) / 4 | ||||||
|  |         assert abs(confidence.average - expected_avg) < 0.001, ( | ||||||
|  |             f"Average confidence mismatch: expected {expected_avg}, got {confidence.average}" | ||||||
|  |         ) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user