BiteWiseFinal

Sleeping

App Files Files Community

BiteWiseFinal / services /semantic.py

anaygupta

Update services/semantic.py

244053a verified 14 days ago

raw

history blame contribute delete

3.22 kB

	from __future__ import annotations

	from dataclasses import dataclass
	from pathlib import Path
	from typing import List, Tuple

	from .text_utils import normalize_text, singularize, ingredient_lookup_variants

	try:
	import gensim.downloader as api
	except Exception: # pragma: no cover
	api = None


	DEFAULT_MODEL = "glove-wiki-gigaword-50"


	@dataclass
	class SemanticCandidate:
	term: str
	score: float


	class WordVectorFallback:
	def __init__(self, model_name: str = DEFAULT_MODEL, model_path: str \| None = None, enable_download: bool = True):
	self.model_name = model_name or DEFAULT_MODEL
	self.model_path = model_path
	self.enable_download = enable_download
	self.available = False
	self._kind = "disabled"
	self._model = None

	self._load()

	def _load(self) -> None:
	if api is None:
	self.available = False
	self._kind = "unavailable"
	return

	try:
	if self.model_path:
	path = Path(self.model_path)
	if path.exists():
	# Keep this permissive; local path loading is optional.
	self._model = api.load(self.model_name)
	elif self.enable_download:
	self._model = api.load(self.model_name)
	else:
	self._model = None
	else:
	if self.enable_download:
	self._model = api.load(self.model_name)
	else:
	self._model = None

	self.available = self._model is not None
	self._kind = "glove" if self.available else "disabled"
	except Exception:
	self._model = None
	self.available = False
	self._kind = "disabled"

	def _normalize_candidate(self, term: str) -> str:
	term = normalize_text(term)
	term = singularize(term)
	return term

	def most_similar(self, ingredient: str, topn: int = 10) -> List[Tuple[str, float]]:
	if not self.available or self._model is None:
	return []

	query = self._normalize_candidate(ingredient)
	if not query:
	return []

	try:
	raw = self._model.most_similar(query, topn=max(topn, 10))
	except Exception:
	return []

	out: List[Tuple[str, float]] = []
	seen = set()
	for term, score in raw:
	term = self._normalize_candidate(term.replace("_", " "))
	if not term or term in seen:
	continue
	seen.add(term)

	# Keep only candidates with at least some lexical overlap or clear phrase family.
	query_parts = set(query.split())
	term_parts = set(term.split())
	if query_parts and term_parts and not (query_parts & term_parts):
	# Allow the last fallback word variant to pass through only if it looks like a food term.
	variants = ingredient_lookup_variants(term)
	if not variants:
	continue

	out.append((term, float(score)))
	if len(out) >= topn:
	break

	return out