mirror of
				https://github.com/mealie-recipes/mealie.git
				synced 2025-10-30 17:53:31 -04:00 
			
		
		
		
	Better bruteforce parsing for units (#3066)
* try to match units when brute parsing and no amount is matched * brute parser: better handle multiple word food items Also checks the case when a food might have been split in a unit + ingredient * fix formatting * add test cases for ingredient parsing that don't start with an amount * parametrized tests and added ingredient data fixture * fixed group_id ref in tests * fixed test inputs * add extra tests for units as third token --------- Co-authored-by: Michael Genson <71845777+michael-genson@users.noreply.github.com>
This commit is contained in:
		| @@ -132,7 +132,7 @@ def parse_ingredient(tokens) -> tuple[str, str]: | |||||||
|     return ingredient, note |     return ingredient, note | ||||||
|  |  | ||||||
|  |  | ||||||
| def parse(ing_str) -> BruteParsedIngredient: | def parse(ing_str, parser) -> BruteParsedIngredient: | ||||||
|     amount = 0.0 |     amount = 0.0 | ||||||
|     unit = "" |     unit = "" | ||||||
|     ingredient = "" |     ingredient = "" | ||||||
| @@ -192,12 +192,20 @@ def parse(ing_str) -> BruteParsedIngredient: | |||||||
|             # which means this is the ingredient |             # which means this is the ingredient | ||||||
|             ingredient = tokens[1] |             ingredient = tokens[1] | ||||||
|     except ValueError: |     except ValueError: | ||||||
|         try: |         # can't parse first argument as amount | ||||||
|             # can't parse first argument as amount |         # try to parse as unit and ingredient (e.g. "a tblsp salt"), with unit in first three tokens | ||||||
|             # -> no unit -> parse everything as ingredient |         # won't work for units that have spaces | ||||||
|             ingredient, note = parse_ingredient(tokens) |         for index, token in enumerate(tokens[:3]): | ||||||
|         except ValueError: |             if parser.find_unit_match(token): | ||||||
|             ingredient = " ".join(tokens[1:]) |                 unit = token | ||||||
|  |                 ingredient, note = parse_ingredient(tokens[index + 1 :]) | ||||||
|  |                 break | ||||||
|  |         if not unit: | ||||||
|  |             try: | ||||||
|  |                 # no unit -> parse everything as ingredient | ||||||
|  |                 ingredient, note = parse_ingredient(tokens) | ||||||
|  |             except ValueError: | ||||||
|  |                 ingredient = " ".join(tokens[1:]) | ||||||
|  |  | ||||||
|     if unit_note not in note: |     if unit_note not in note: | ||||||
|         note += " " + unit_note |         note += " " + unit_note | ||||||
|   | |||||||
| @@ -126,22 +126,24 @@ class ABCIngredientParser(ABC): | |||||||
|  |  | ||||||
|         return store_map[fuzz_result[0]] |         return store_map[fuzz_result[0]] | ||||||
|  |  | ||||||
|     def find_food_match(self, food: IngredientFood | CreateIngredientFood) -> IngredientFood | None: |     def find_food_match(self, food: IngredientFood | CreateIngredientFood | str) -> IngredientFood | None: | ||||||
|         if isinstance(food, IngredientFood): |         if isinstance(food, IngredientFood): | ||||||
|             return food |             return food | ||||||
|  |  | ||||||
|         match_value = IngredientFoodModel.normalize(food.name) |         food_name = food if isinstance(food, str) else food.name | ||||||
|  |         match_value = IngredientFoodModel.normalize(food_name) | ||||||
|         return self.find_match( |         return self.find_match( | ||||||
|             match_value, |             match_value, | ||||||
|             store_map=self.foods_by_alias, |             store_map=self.foods_by_alias, | ||||||
|             fuzzy_match_threshold=self.food_fuzzy_match_threshold, |             fuzzy_match_threshold=self.food_fuzzy_match_threshold, | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
|     def find_unit_match(self, unit: IngredientUnit | CreateIngredientUnit) -> IngredientUnit | None: |     def find_unit_match(self, unit: IngredientUnit | CreateIngredientUnit | str) -> IngredientUnit | None: | ||||||
|         if isinstance(unit, IngredientUnit): |         if isinstance(unit, IngredientUnit): | ||||||
|             return unit |             return unit | ||||||
|  |  | ||||||
|         match_value = IngredientUnitModel.normalize(unit.name) |         unit_name = unit if isinstance(unit, str) else unit.name | ||||||
|  |         match_value = IngredientUnitModel.normalize(unit_name) | ||||||
|         return self.find_match( |         return self.find_match( | ||||||
|             match_value, |             match_value, | ||||||
|             store_map=self.units_by_alias, |             store_map=self.units_by_alias, | ||||||
| @@ -155,6 +157,16 @@ class ABCIngredientParser(ABC): | |||||||
|         if ingredient.ingredient.unit and (unit_match := self.find_unit_match(ingredient.ingredient.unit)): |         if ingredient.ingredient.unit and (unit_match := self.find_unit_match(ingredient.ingredient.unit)): | ||||||
|             ingredient.ingredient.unit = unit_match |             ingredient.ingredient.unit = unit_match | ||||||
|  |  | ||||||
|  |         # Parser might have wrongly split a food into a unit and food. | ||||||
|  |         if isinstance(ingredient.ingredient.food, CreateIngredientFood) and isinstance( | ||||||
|  |             ingredient.ingredient.unit, CreateIngredientUnit | ||||||
|  |         ): | ||||||
|  |             if food_match := self.find_food_match( | ||||||
|  |                 f"{ingredient.ingredient.unit.name} {ingredient.ingredient.food.name}" | ||||||
|  |             ): | ||||||
|  |                 ingredient.ingredient.food = food_match | ||||||
|  |                 ingredient.ingredient.unit = None | ||||||
|  |  | ||||||
|         return ingredient |         return ingredient | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -164,7 +176,7 @@ class BruteForceParser(ABCIngredientParser): | |||||||
|     """ |     """ | ||||||
|  |  | ||||||
|     def parse_one(self, ingredient: str) -> ParsedIngredient: |     def parse_one(self, ingredient: str) -> ParsedIngredient: | ||||||
|         bfi = brute.parse(ingredient) |         bfi = brute.parse(ingredient, self) | ||||||
|  |  | ||||||
|         parsed_ingredient = ParsedIngredient( |         parsed_ingredient = ParsedIngredient( | ||||||
|             input=ingredient, |             input=ingredient, | ||||||
|   | |||||||
| @@ -135,7 +135,7 @@ test_ingredients = [ | |||||||
|  |  | ||||||
|  |  | ||||||
| @pytest.mark.skipif(not crf_exists(), reason="CRF++ not installed") | @pytest.mark.skipif(not crf_exists(), reason="CRF++ not installed") | ||||||
| def test_nlp_parser(): | def test_nlp_parser() -> None: | ||||||
|     models: list[CRFIngredient] = convert_list_to_crf_model([x.input for x in test_ingredients]) |     models: list[CRFIngredient] = convert_list_to_crf_model([x.input for x in test_ingredients]) | ||||||
|  |  | ||||||
|     # Iterate over models and test_ingredients to gather |     # Iterate over models and test_ingredients to gather | ||||||
| @@ -147,37 +147,102 @@ def test_nlp_parser(): | |||||||
|         assert model.unit == test_ingredient.unit |         assert model.unit == test_ingredient.unit | ||||||
|  |  | ||||||
|  |  | ||||||
| def test_brute_parser(unique_user: TestUser): | @pytest.mark.parametrize( | ||||||
|     # input: (quantity, unit, food, comments) |     "input, quantity, unit, food, comment", | ||||||
|     expectations = { |     [ | ||||||
|         # Dutch |         pytest.param("1 theelepel koffie", 1, "theelepel", "koffie", "", id="1 theelepel koffie"), | ||||||
|         "1 theelepel koffie": (1, "theelepel", "koffie", ""), |         pytest.param("3 theelepels koffie", 3, "theelepels", "koffie", "", id="3 theelepels koffie"), | ||||||
|         "3 theelepels koffie": (3, "theelepels", "koffie", ""), |         pytest.param("1 eetlepel tarwe", 1, "eetlepel", "tarwe", "", id="1 eetlepel tarwe"), | ||||||
|         "1 eetlepel tarwe": (1, "eetlepel", "tarwe", ""), |         pytest.param("20 eetlepels bloem", 20, "eetlepels", "bloem", "", id="20 eetlepels bloem"), | ||||||
|         "20 eetlepels bloem": (20, "eetlepels", "bloem", ""), |         pytest.param("1 mespunt kaneel", 1, "mespunt", "kaneel", "", id="1 mespunt kaneel"), | ||||||
|         "1 mespunt kaneel": (1, "mespunt", "kaneel", ""), |         pytest.param("1 snuf(je) zout", 1, "snuf(je)", "zout", "", id="1 snuf(je) zout"), | ||||||
|         "1 snuf(je) zout": (1, "snuf(je)", "zout", ""), |         pytest.param( | ||||||
|         "2 tbsp minced cilantro, leaves and stems": (2, "tbsp", "minced cilantro", "leaves and stems"), |             "2 tbsp minced cilantro, leaves and stems", | ||||||
|         "1 large yellow onion, coarsely chopped": (1, "large", "yellow onion", "coarsely chopped"), |  | ||||||
|         "1 1/2 tsp garam masala": (1.5, "tsp", "garam masala", ""), |  | ||||||
|         "2 cups mango chunks, (2 large mangoes) (fresh or frozen)": ( |  | ||||||
|             2, |             2, | ||||||
|             "cups", |             "tbsp", | ||||||
|  |             "minced cilantro", | ||||||
|  |             "leaves and stems", | ||||||
|  |             id="2 tbsp minced cilantro, leaves and stems", | ||||||
|  |         ), | ||||||
|  |         pytest.param( | ||||||
|  |             "1 large yellow onion, coarsely chopped", | ||||||
|  |             1, | ||||||
|  |             "large", | ||||||
|  |             "yellow onion", | ||||||
|  |             "coarsely chopped", | ||||||
|  |             id="1 large yellow onion, coarsely chopped", | ||||||
|  |         ), | ||||||
|  |         pytest.param("1 1/2 tsp garam masala", 1.5, "tsp", "garam masala", "", id="1 1/2 tsp garam masala"), | ||||||
|  |         pytest.param( | ||||||
|  |             "2 cups mango chunks, (2 large mangoes) (fresh or frozen)", | ||||||
|  |             2, | ||||||
|  |             "Cups", | ||||||
|             "mango chunks, (2 large mangoes)", |             "mango chunks, (2 large mangoes)", | ||||||
|             "fresh or frozen", |             "fresh or frozen", | ||||||
|  |             id="2 cups mango chunks, (2 large mangoes) (fresh or frozen)", | ||||||
|         ), |         ), | ||||||
|     } |         pytest.param("stalk onion", 0, "Stalk", "onion", "", id="stalk onion"), | ||||||
|  |         pytest.param("a stalk bell peppers", 0, "Stalk", "bell peppers", "", id="a stalk bell peppers"), | ||||||
|  |         pytest.param("a tablespoon unknownFood", 0, "Tablespoon", "unknownFood", "", id="a tablespoon unknownFood"), | ||||||
|  |         pytest.param( | ||||||
|  |             "stalk bell peppers, cut in pieces", | ||||||
|  |             0, | ||||||
|  |             "Stalk", | ||||||
|  |             "bell peppers", | ||||||
|  |             "cut in pieces", | ||||||
|  |             id="stalk bell peppers, cut in pieces", | ||||||
|  |         ), | ||||||
|  |         pytest.param( | ||||||
|  |             "a stalk bell peppers, cut in pieces", | ||||||
|  |             0, | ||||||
|  |             "Stalk", | ||||||
|  |             "bell peppers", | ||||||
|  |             "cut in pieces", | ||||||
|  |             id="stalk bell peppers, cut in pieces", | ||||||
|  |         ), | ||||||
|  |         pytest.param("red pepper flakes", 0, "", "red pepper flakes", "", id="red pepper flakes"), | ||||||
|  |         pytest.param("1 red pepper flakes", 1, "", "red pepper flakes", "", id="1 red pepper flakes"), | ||||||
|  |         pytest.param("1 bell peppers", 1, "", "bell peppers", "", id="1 bell peppers"), | ||||||
|  |         pytest.param("1 stalk bell peppers", 1, "Stalk", "bell peppers", "", id="1 big stalk bell peppers"), | ||||||
|  |         pytest.param("a big stalk bell peppers", 0, "Stalk", "bell peppers", "", id="a big stalk bell peppers"), | ||||||
|  |         pytest.param( | ||||||
|  |             "1 bell peppers, cut in pieces", 1, "", "bell peppers", "cut in pieces", id="1 bell peppers, cut in pieces" | ||||||
|  |         ), | ||||||
|  |         pytest.param( | ||||||
|  |             "bell peppers, cut in pieces", 0, "", "bell peppers", "cut in pieces", id="bell peppers, cut in pieces" | ||||||
|  |         ), | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | def test_brute_parser( | ||||||
|  |     unique_local_group_id: UUID4, | ||||||
|  |     parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]],  # required so database is populated | ||||||
|  |     input: str, | ||||||
|  |     quantity: int | float, | ||||||
|  |     unit: str, | ||||||
|  |     food: str, | ||||||
|  |     comment: str, | ||||||
|  | ): | ||||||
|     with session_context() as session: |     with session_context() as session: | ||||||
|         parser = get_parser(RegisteredParser.brute, unique_user.group_id, session) |         parser = get_parser(RegisteredParser.brute, unique_local_group_id, session) | ||||||
|  |         parsed = parser.parse_one(input) | ||||||
|  |         ing = parsed.ingredient | ||||||
|  |  | ||||||
|         for key, val in expectations.items(): |         if ing.quantity: | ||||||
|             parsed = parser.parse_one(key) |             assert ing.quantity == quantity | ||||||
|  |         else: | ||||||
|             assert parsed.ingredient.quantity == val[0] |             assert not quantity | ||||||
|             assert parsed.ingredient.unit.name == val[1] |         if ing.unit: | ||||||
|             assert parsed.ingredient.food.name == val[2] |             assert ing.unit.name == unit | ||||||
|             assert parsed.ingredient.note in {val[3], None} |         else: | ||||||
|  |             assert not unit | ||||||
|  |         if ing.food: | ||||||
|  |             assert ing.food.name == food | ||||||
|  |         else: | ||||||
|  |             assert not food | ||||||
|  |         if ing.note: | ||||||
|  |             assert ing.note == comment | ||||||
|  |         else: | ||||||
|  |             assert not comment | ||||||
|  |  | ||||||
|  |  | ||||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user