mirror of
https://github.com/mealie-recipes/mealie.git
synced 2026-01-26 10:33:12 -05:00
Better bruteforce parsing for units (#3066)
* try to match units when brute parsing and no amount is matched * brute parser: better handle multiple word food items Also checks the case when a food might have been split in a unit + ingredient * fix formatting * add test cases for ingredient parsing that don't start with an amount * parametrized tests and added ingredient data fixture * fixed group_id ref in tests * fixed test inputs * add extra tests for units as third token --------- Co-authored-by: Michael Genson <71845777+michael-genson@users.noreply.github.com>
This commit is contained in:
@@ -132,7 +132,7 @@ def parse_ingredient(tokens) -> tuple[str, str]:
|
||||
return ingredient, note
|
||||
|
||||
|
||||
def parse(ing_str) -> BruteParsedIngredient:
|
||||
def parse(ing_str, parser) -> BruteParsedIngredient:
|
||||
amount = 0.0
|
||||
unit = ""
|
||||
ingredient = ""
|
||||
@@ -192,12 +192,20 @@ def parse(ing_str) -> BruteParsedIngredient:
|
||||
# which means this is the ingredient
|
||||
ingredient = tokens[1]
|
||||
except ValueError:
|
||||
try:
|
||||
# can't parse first argument as amount
|
||||
# -> no unit -> parse everything as ingredient
|
||||
ingredient, note = parse_ingredient(tokens)
|
||||
except ValueError:
|
||||
ingredient = " ".join(tokens[1:])
|
||||
# can't parse first argument as amount
|
||||
# try to parse as unit and ingredient (e.g. "a tblsp salt"), with unit in first three tokens
|
||||
# won't work for units that have spaces
|
||||
for index, token in enumerate(tokens[:3]):
|
||||
if parser.find_unit_match(token):
|
||||
unit = token
|
||||
ingredient, note = parse_ingredient(tokens[index + 1 :])
|
||||
break
|
||||
if not unit:
|
||||
try:
|
||||
# no unit -> parse everything as ingredient
|
||||
ingredient, note = parse_ingredient(tokens)
|
||||
except ValueError:
|
||||
ingredient = " ".join(tokens[1:])
|
||||
|
||||
if unit_note not in note:
|
||||
note += " " + unit_note
|
||||
|
||||
@@ -126,22 +126,24 @@ class ABCIngredientParser(ABC):
|
||||
|
||||
return store_map[fuzz_result[0]]
|
||||
|
||||
def find_food_match(self, food: IngredientFood | CreateIngredientFood) -> IngredientFood | None:
|
||||
def find_food_match(self, food: IngredientFood | CreateIngredientFood | str) -> IngredientFood | None:
|
||||
if isinstance(food, IngredientFood):
|
||||
return food
|
||||
|
||||
match_value = IngredientFoodModel.normalize(food.name)
|
||||
food_name = food if isinstance(food, str) else food.name
|
||||
match_value = IngredientFoodModel.normalize(food_name)
|
||||
return self.find_match(
|
||||
match_value,
|
||||
store_map=self.foods_by_alias,
|
||||
fuzzy_match_threshold=self.food_fuzzy_match_threshold,
|
||||
)
|
||||
|
||||
def find_unit_match(self, unit: IngredientUnit | CreateIngredientUnit) -> IngredientUnit | None:
|
||||
def find_unit_match(self, unit: IngredientUnit | CreateIngredientUnit | str) -> IngredientUnit | None:
|
||||
if isinstance(unit, IngredientUnit):
|
||||
return unit
|
||||
|
||||
match_value = IngredientUnitModel.normalize(unit.name)
|
||||
unit_name = unit if isinstance(unit, str) else unit.name
|
||||
match_value = IngredientUnitModel.normalize(unit_name)
|
||||
return self.find_match(
|
||||
match_value,
|
||||
store_map=self.units_by_alias,
|
||||
@@ -155,6 +157,16 @@ class ABCIngredientParser(ABC):
|
||||
if ingredient.ingredient.unit and (unit_match := self.find_unit_match(ingredient.ingredient.unit)):
|
||||
ingredient.ingredient.unit = unit_match
|
||||
|
||||
# Parser might have wrongly split a food into a unit and food.
|
||||
if isinstance(ingredient.ingredient.food, CreateIngredientFood) and isinstance(
|
||||
ingredient.ingredient.unit, CreateIngredientUnit
|
||||
):
|
||||
if food_match := self.find_food_match(
|
||||
f"{ingredient.ingredient.unit.name} {ingredient.ingredient.food.name}"
|
||||
):
|
||||
ingredient.ingredient.food = food_match
|
||||
ingredient.ingredient.unit = None
|
||||
|
||||
return ingredient
|
||||
|
||||
|
||||
@@ -164,7 +176,7 @@ class BruteForceParser(ABCIngredientParser):
|
||||
"""
|
||||
|
||||
def parse_one(self, ingredient: str) -> ParsedIngredient:
|
||||
bfi = brute.parse(ingredient)
|
||||
bfi = brute.parse(ingredient, self)
|
||||
|
||||
parsed_ingredient = ParsedIngredient(
|
||||
input=ingredient,
|
||||
|
||||
Reference in New Issue
Block a user