mirror of
				https://github.com/mealie-recipes/mealie.git
				synced 2025-10-30 17:53:31 -04:00 
			
		
		
		
	feat: Manually calculate OpenAI Parsing Confidence (#6141)
This commit is contained in:
		| @@ -7,30 +7,6 @@ from ._base import OpenAIBase | ||||
|  | ||||
|  | ||||
| class OpenAIIngredient(OpenAIBase): | ||||
|     input: str = Field( | ||||
|         ..., | ||||
|         description=dedent( | ||||
|             """ | ||||
|             The input is simply the ingredient string you are processing as-is. It is forbidden to | ||||
|             modify this at all, you must provide the input exactly as you received it. | ||||
|             """ | ||||
|         ), | ||||
|     ) | ||||
|     confidence: float | None = Field( | ||||
|         None, | ||||
|         description=dedent( | ||||
|             """ | ||||
|             This value is a float between 0 - 100, where 100 is full confidence that the result is correct, | ||||
|             and 0 is no confidence that the result is correct. If you're unable to parse anything, | ||||
|             and you put the entire string in the notes, you should return 0 confidence. If you can easily | ||||
|             parse the string into each component, then you should return a confidence of 100. If you have to | ||||
|             guess which part is the unit and which part is the food, your confidence should be lower, such as 60. | ||||
|             Even if there is no unit or note, if you're able to determine the food, you may use a higher confidence. | ||||
|             If the entire ingredient consists of only a food, you can use a confidence of 100. | ||||
|             """ | ||||
|         ), | ||||
|     ) | ||||
|  | ||||
|     quantity: float | None = Field( | ||||
|         0, | ||||
|         description=dedent( | ||||
| @@ -73,21 +49,10 @@ class OpenAIIngredient(OpenAIBase): | ||||
|         ), | ||||
|     ) | ||||
|  | ||||
|     @field_validator("confidence", "quantity", mode="before") | ||||
|     @field_validator("quantity", mode="before") | ||||
|     def coerce_none_float(cls, v: Any) -> Any: | ||||
|         return v or 0 | ||||
|  | ||||
|     @field_validator("confidence") | ||||
|     def validate_confidence(cls, v: float | None) -> float: | ||||
|         v = v or 0 | ||||
|  | ||||
|         if v < 0: | ||||
|             v = 0 | ||||
|         elif v > 100: | ||||
|             v = 100 | ||||
|  | ||||
|         return v / 100 | ||||
|  | ||||
|  | ||||
| class OpenAIIngredients(OpenAIBase): | ||||
|     ingredients: list[OpenAIIngredient] = [] | ||||
|   | ||||
| @@ -2,6 +2,8 @@ import asyncio | ||||
| import json | ||||
| from collections.abc import Awaitable | ||||
|  | ||||
| from rapidfuzz import fuzz | ||||
|  | ||||
| from mealie.schema.openai.recipe_ingredient import OpenAIIngredient, OpenAIIngredients | ||||
| from mealie.schema.recipe.recipe_ingredient import ( | ||||
|     CreateIngredientFood, | ||||
| @@ -13,12 +15,84 @@ from mealie.schema.recipe.recipe_ingredient import ( | ||||
| from mealie.services.openai import OpenAIDataInjection, OpenAIService | ||||
|  | ||||
| from .._base import ABCIngredientParser | ||||
| from ..parser_utils import extract_quantity_from_string | ||||
|  | ||||
|  | ||||
| class OpenAIParser(ABCIngredientParser): | ||||
|     def _convert_ingredient(self, openai_ing: OpenAIIngredient) -> ParsedIngredient: | ||||
|     def _calculate_qty_conf(self, original_text: str, parsed_qty: float | None) -> float: | ||||
|         """Compares the extracted quantity to a brute-force parsed quantity.""" | ||||
|  | ||||
|         expected_qty, _ = extract_quantity_from_string(original_text) | ||||
|         parsed_qty = parsed_qty or 0 | ||||
|         if parsed_qty == expected_qty: | ||||
|             return 1 | ||||
|         else: | ||||
|             return 0 | ||||
|  | ||||
|     def _calculate_note_conf(self, original_text: str, note: str | None) -> float: | ||||
|         """ | ||||
|         Calculate confidence based on how many words in the note are found in the original text. | ||||
|         Uses alphanumeric filtering and lowercasing to improve matching. | ||||
|         """ | ||||
|  | ||||
|         if not note: | ||||
|             return 1 | ||||
|  | ||||
|         note_words: list[str] = [] | ||||
|         for word in note.strip().lower().split(): | ||||
|             clean_word = "".join(filter(str.isalnum, word)) | ||||
|             if clean_word: | ||||
|                 note_words.append(clean_word) | ||||
|  | ||||
|         if not note_words: | ||||
|             return 1 | ||||
|  | ||||
|         original_words: list[str] = [] | ||||
|         for word in original_text.strip().lower().split(): | ||||
|             clean_word = "".join(filter(str.isalnum, word)) | ||||
|             if clean_word: | ||||
|                 original_words.append(clean_word) | ||||
|  | ||||
|         note_conf_sum = sum(1 for word in note_words if word in original_words) | ||||
|         return note_conf_sum / len(note_words) | ||||
|  | ||||
|     def _calculate_overall_confidence(self, original_text: str, ing_text: str) -> float: | ||||
|         """ | ||||
|         Calculate overall confidence based on fuzzy matching between the original text and the ingredient text. | ||||
|         Uses token sort ratio to account for word order variations. | ||||
|         """ | ||||
|  | ||||
|         ratio = fuzz.token_sort_ratio(original_text, ing_text) | ||||
|         return ratio / 100.0 | ||||
|  | ||||
|     def _calculate_confidence(self, original_text: str, ing: RecipeIngredient) -> IngredientConfidence: | ||||
|         qty_conf = self._calculate_qty_conf(original_text, ing.quantity) | ||||
|         note_conf = self._calculate_note_conf(original_text, ing.note) | ||||
|  | ||||
|         # Not all ingredients will have a food and/or unit, | ||||
|         # so if either is missing we fall back to overall confidence. | ||||
|         overall_confidence = self._calculate_overall_confidence(original_text, ing.display) | ||||
|         if ing.food: | ||||
|             food_conf = 1.0 | ||||
|         else: | ||||
|             food_conf = overall_confidence | ||||
|  | ||||
|         if ing.unit: | ||||
|             unit_conf = 1.0 | ||||
|         else: | ||||
|             unit_conf = overall_confidence | ||||
|  | ||||
|         return IngredientConfidence( | ||||
|             average=(qty_conf + unit_conf + food_conf + note_conf) / 4, | ||||
|             quantity=qty_conf, | ||||
|             unit=unit_conf, | ||||
|             food=food_conf, | ||||
|             comment=note_conf, | ||||
|         ) | ||||
|  | ||||
|     def _convert_ingredient(self, original_text: str, openai_ing: OpenAIIngredient) -> ParsedIngredient: | ||||
|         ingredient = RecipeIngredient( | ||||
|             original_text=openai_ing.input, | ||||
|             original_text=original_text, | ||||
|             quantity=openai_ing.quantity, | ||||
|             unit=CreateIngredientUnit(name=openai_ing.unit) if openai_ing.unit else None, | ||||
|             food=CreateIngredientFood(name=openai_ing.food) if openai_ing.food else None, | ||||
| @@ -26,8 +100,8 @@ class OpenAIParser(ABCIngredientParser): | ||||
|         ) | ||||
|  | ||||
|         parsed_ingredient = ParsedIngredient( | ||||
|             input=openai_ing.input, | ||||
|             confidence=IngredientConfidence(average=openai_ing.confidence), | ||||
|             input=original_text, | ||||
|             confidence=self._calculate_confidence(original_text, ingredient), | ||||
|             ingredient=ingredient, | ||||
|         ) | ||||
|  | ||||
| @@ -53,7 +127,7 @@ class OpenAIParser(ABCIngredientParser): | ||||
|                             "Below is a list of units found in the units database. While parsing, you should " | ||||
|                             "reference this list when determining which part of the input is the unit. You may " | ||||
|                             "find a unit in the input that does not exist in this list. This should not prevent " | ||||
|                             "you from parsing that text as a unit, however it may lower your confidence level." | ||||
|                             "you from parsing that text as a unit." | ||||
|                         ), | ||||
|                         value=list(set(self.data_matcher.units_by_alias)), | ||||
|                     ), | ||||
| @@ -107,4 +181,13 @@ class OpenAIParser(ABCIngredientParser): | ||||
|  | ||||
|     async def parse(self, ingredients: list[str]) -> list[ParsedIngredient]: | ||||
|         response = await self._parse(ingredients) | ||||
|         return [self._convert_ingredient(ing) for ing in response.ingredients] | ||||
|         if len(response.ingredients) != len(ingredients): | ||||
|             raise ValueError( | ||||
|                 "OpenAI returned an unexpected number of ingredients. " | ||||
|                 f"Expected {len(ingredients)}, got {len(response.ingredients)}" | ||||
|             ) | ||||
|  | ||||
|         return [ | ||||
|             self._convert_ingredient(original_text, ing) | ||||
|             for original_text, ing in zip(ingredients, response.ingredients, strict=True) | ||||
|         ] | ||||
|   | ||||
| @@ -464,8 +464,6 @@ def test_openai_parser( | ||||
|         data = OpenAIIngredients( | ||||
|             ingredients=[ | ||||
|                 OpenAIIngredient( | ||||
|                     input=input, | ||||
|                     confidence=1, | ||||
|                     quantity=random_int(0, 10), | ||||
|                     unit=random_string(), | ||||
|                     food=random_string(), | ||||
| @@ -502,8 +500,6 @@ def test_openai_parser_sanitize_output( | ||||
|         data = OpenAIIngredients( | ||||
|             ingredients=[ | ||||
|                 OpenAIIngredient( | ||||
|                     input="there is a null character here: \x00", | ||||
|                     confidence=1, | ||||
|                     quantity=random_int(0, 10), | ||||
|                     unit="", | ||||
|                     food="there is a null character here: \x00", | ||||
| @@ -522,8 +518,8 @@ def test_openai_parser_sanitize_output( | ||||
|         parsed = loop.run_until_complete(parser.parse([""])) | ||||
|         assert len(parsed) == 1 | ||||
|         parsed_ing = cast(ParsedIngredient, parsed[0]) | ||||
|         assert parsed_ing.input | ||||
|         assert "\x00" not in parsed_ing.input | ||||
|         assert parsed_ing.ingredient.food | ||||
|         assert parsed_ing.ingredient.food.name == "there is a null character here: " | ||||
|  | ||||
|         # Make sure we can create a recipe with this ingredient | ||||
|         assert isinstance(parsed_ing.ingredient.food, CreateIngredientFood) | ||||
| @@ -539,3 +535,195 @@ def test_openai_parser_sanitize_output( | ||||
|                 recipe_ingredient=[parsed_ing.ingredient], | ||||
|             ) | ||||
|         ) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "original_text,quantity,unit,food,note,qty_range,unit_range,food_range,note_range", | ||||
|     [ | ||||
|         pytest.param( | ||||
|             "2 cups flour", | ||||
|             2.0, | ||||
|             "Cups", | ||||
|             "flour", | ||||
|             "", | ||||
|             (1.0, 1.0), | ||||
|             (1.0, 1.0), | ||||
|             (1.0, 1.0), | ||||
|             (1.0, 1.0), | ||||
|             id="perfect_match_all_components", | ||||
|         ), | ||||
|         pytest.param( | ||||
|             "2 cups flour", | ||||
|             3.0, | ||||
|             "Cups", | ||||
|             "flour", | ||||
|             "", | ||||
|             (0.0, 0.0), | ||||
|             (1.0, 1.0), | ||||
|             (1.0, 1.0), | ||||
|             (1.0, 1.0), | ||||
|             id="quantity_mismatch", | ||||
|         ), | ||||
|         pytest.param( | ||||
|             "2 cups flour", | ||||
|             2.0, | ||||
|             None, | ||||
|             "flour", | ||||
|             "", | ||||
|             (1.0, 1.0), | ||||
|             (0.4, 0.9), | ||||
|             (1.0, 1.0), | ||||
|             (1.0, 1.0), | ||||
|             id="missing_unit_fallback", | ||||
|         ), | ||||
|         pytest.param( | ||||
|             "2 cups flour", | ||||
|             2.0, | ||||
|             "Cups", | ||||
|             None, | ||||
|             "", | ||||
|             (1.0, 1.0), | ||||
|             (1.0, 1.0), | ||||
|             (0.4, 0.9), | ||||
|             (1.0, 1.0), | ||||
|             id="missing_food_fallback", | ||||
|         ), | ||||
|         pytest.param( | ||||
|             "2 cups flour sifted fresh", | ||||
|             2.0, | ||||
|             "Cups", | ||||
|             "flour", | ||||
|             "sifted fresh", | ||||
|             (1.0, 1.0), | ||||
|             (1.0, 1.0), | ||||
|             (1.0, 1.0), | ||||
|             (0.8, 1.0), | ||||
|             id="note_full_match", | ||||
|         ), | ||||
|         pytest.param( | ||||
|             "2 cups flour sifted", | ||||
|             2.0, | ||||
|             "Cups", | ||||
|             "flour", | ||||
|             "sifted chopped", | ||||
|             (1.0, 1.0), | ||||
|             (1.0, 1.0), | ||||
|             (1.0, 1.0), | ||||
|             (0.4, 0.6), | ||||
|             id="note_partial_match", | ||||
|         ), | ||||
|         pytest.param( | ||||
|             "2 cups flour", | ||||
|             2.0, | ||||
|             "Cups", | ||||
|             "flour", | ||||
|             "chopped minced", | ||||
|             (1.0, 1.0), | ||||
|             (1.0, 1.0), | ||||
|             (1.0, 1.0), | ||||
|             (0.0, 0.0), | ||||
|             id="note_no_match", | ||||
|         ), | ||||
|         pytest.param( | ||||
|             "1.5 tsp salt kosher", | ||||
|             1.0, | ||||
|             None, | ||||
|             None, | ||||
|             "kosher fine", | ||||
|             (0.0, 0.0), | ||||
|             (0.3, 0.7), | ||||
|             (0.3, 0.7), | ||||
|             (0.4, 0.6), | ||||
|             id="multiple_issues", | ||||
|         ), | ||||
|         pytest.param( | ||||
|             "", | ||||
|             1.0, | ||||
|             "Cups", | ||||
|             "flour", | ||||
|             "fresh", | ||||
|             (0.0, 0.0), | ||||
|             (1.0, 1.0), | ||||
|             (1.0, 1.0), | ||||
|             (0.0, 0.0), | ||||
|             id="empty_original_text", | ||||
|         ), | ||||
|         pytest.param( | ||||
|             "salt", | ||||
|             0.0, | ||||
|             None, | ||||
|             "salt", | ||||
|             "", | ||||
|             (1.0, 1.0), | ||||
|             (1.0, 1.0), | ||||
|             (1.0, 1.0), | ||||
|             (1.0, 1.0), | ||||
|             id="zero_quantity_match", | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_openai_parser_confidence( | ||||
|     original_text: str, | ||||
|     quantity: float | None, | ||||
|     unit: str | None, | ||||
|     food: str | None, | ||||
|     note: str, | ||||
|     qty_range: tuple[float, float], | ||||
|     unit_range: tuple[float, float], | ||||
|     food_range: tuple[float, float], | ||||
|     note_range: tuple[float, float], | ||||
|     unique_local_group_id: UUID4, | ||||
|     parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]],  # required so database is populated | ||||
| ): | ||||
|     """Test the _calculate_confidence method of OpenAIParser with various input scenarios.""" | ||||
|  | ||||
|     with session_context() as session: | ||||
|         from mealie.services.parser_services.openai.parser import OpenAIParser | ||||
|  | ||||
|         parser = cast(OpenAIParser, get_parser(RegisteredParser.openai, unique_local_group_id, session)) | ||||
|  | ||||
|         # Create test ingredient | ||||
|         ingredient = RecipeIngredient( | ||||
|             original_text=original_text, | ||||
|             quantity=quantity, | ||||
|             unit=CreateIngredientUnit(name=unit) if unit else None, | ||||
|             food=CreateIngredientFood(name=food) if food else None, | ||||
|             note=note if note else None, | ||||
|         ) | ||||
|  | ||||
|         # Calculate confidence | ||||
|         confidence = parser._calculate_confidence(original_text, ingredient) | ||||
|  | ||||
|         # All confidence values should be populated (not None) by the method | ||||
|         assert confidence.quantity is not None, "Quantity confidence should not be None" | ||||
|         assert confidence.unit is not None, "Unit confidence should not be None" | ||||
|         assert confidence.food is not None, "Food confidence should not be None" | ||||
|         assert confidence.comment is not None, "Comment confidence should not be None" | ||||
|         assert confidence.average is not None, "Average confidence should not be None" | ||||
|  | ||||
|         # Range-based assertions to handle fuzzy matching variability | ||||
|         qty_min, qty_max = qty_range | ||||
|         assert qty_min <= confidence.quantity <= qty_max, ( | ||||
|             f"Quantity confidence out of range: expected {qty_range}, got {confidence.quantity}" | ||||
|         ) | ||||
|  | ||||
|         unit_min, unit_max = unit_range | ||||
|         assert unit_min <= confidence.unit <= unit_max, ( | ||||
|             f"Unit confidence out of range: expected {unit_range}, got {confidence.unit}" | ||||
|         ) | ||||
|  | ||||
|         food_min, food_max = food_range | ||||
|         assert food_min <= confidence.food <= food_max, ( | ||||
|             f"Food confidence out of range: expected {food_range}, got {confidence.food}" | ||||
|         ) | ||||
|  | ||||
|         note_min, note_max = note_range | ||||
|         assert note_min <= confidence.comment <= note_max, ( | ||||
|             f"Note confidence out of range: expected {note_range}, got {confidence.comment}" | ||||
|         ) | ||||
|  | ||||
|         # Check that average is calculated correctly | ||||
|         expected_avg = (confidence.quantity + confidence.unit + confidence.food + confidence.comment) / 4 | ||||
|         assert abs(confidence.average - expected_avg) < 0.001, ( | ||||
|             f"Average confidence mismatch: expected {expected_avg}, got {confidence.average}" | ||||
|         ) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user