BiteWiseFinal

Sleeping

App Files Files Community

BiteWiseFinal / services /ner.py

anaygupta

Update services/ner.py

c5e7cb1 verified 14 days ago

raw

history blame contribute delete

2.04 kB

	from __future__ import annotations

	from functools import lru_cache
	from typing import List

	from .text_utils import dedupe_preserve_order, normalize_text, strip_amounts_and_preps

	try:
	from ingredient_parser import parse_ingredient
	except Exception: # pragma: no cover
	parse_ingredient = None


	def _clean_fragment(fragment: str) -> str:
	fragment = (fragment or "").strip()
	fragment = fragment.lstrip("-•*").strip()
	fragment = fragment.removeprefix("and ").removeprefix("or ").strip()
	fragment = strip_amounts_and_preps(fragment)
	return fragment


	def _parsed_name(parsed) -> str:
	if parsed is None:
	return ""

	name = getattr(parsed, "name", None)
	if not name:
	return ""

	try:
	first = name[0]
	text = getattr(first, "text", "") or ""
	return normalize_text(text)
	except Exception:
	pass

	text = getattr(name, "text", "") or ""
	return normalize_text(text)


	@lru_cache(maxsize=4096)
	def parse_single_ingredient(fragment: str) -> str:
	fragment = _clean_fragment(fragment)
	if not fragment:
	return ""

	if parse_ingredient is not None:
	try:
	parsed = parse_ingredient(fragment)
	name = _parsed_name(parsed)
	if name:
	return name
	except Exception:
	pass

	return normalize_text(fragment)


	def extract_ingredients(recipe_text: str, max_items: int = 48) -> List[str]:
	text = (recipe_text or "").strip()
	if not text:
	return []

	parts = [part.strip() for part in text.split(",")]

	out: List[str] = []
	for part in parts:
	candidate = parse_single_ingredient(part)
	candidate = normalize_text(candidate)

	if not candidate:
	continue
	if len(candidate) < 2:
	continue
	if candidate in {"and", "or", "the", "a", "an"}:
	continue

	out.append(candidate)
	if len(out) >= max_items:
	break

	return dedupe_preserve_order(out)