Spaces:

Gianone
/

smartplate

Running

App Files Files Community

smartplate / src /ml_model.py

Gianone

feat: deploy SmartPlate full pipeline (CV + ML + NLP)

c173dc3 about 1 month ago

Raw

History Blame Contribute Delete

5.66 kB

	"""
	Block 2: ML Health Classifier — USDA nutrition lookup + Logistic Regression scoring.
	"""

	from __future__ import annotations

	import logging
	from dataclasses import dataclass
	from pathlib import Path
	from typing import Any, Dict, Optional

	import numpy as np

	logger = logging.getLogger(__name__)

	_PROJECT_ROOT = Path(__file__).parent.parent
	_DEFAULT_MODEL_PATH = _PROJECT_ROOT / "models" / "health_classifier.pkl"


	@dataclass
	class NutritionResult:
	"""Nutrition result dataclass — kept for backward compatibility with tests.

	Attributes mirror the USDA-based nutritional values per 100g.
	"""

	food_label: str
	energy_kcal: float
	fat_g: float
	saturated_fat_g: float
	sugars_g: float
	fiber_g: float
	proteins_g: float
	salt_g: float
	health_score: float
	health_label: str
	nutriscore: Optional[str] = None


	class MLModel:
	"""USDA nutrition lookup + Logistic Regression health classifier.

	The model bundle (``health_classifier.pkl``) contains:
	- ``model``: fitted LogisticRegression
	- ``scaler``: fitted StandardScaler
	- ``label_encoder``: fitted LabelEncoder (healthy / medium / unhealthy)
	- ``feature_cols``: list of 16 feature column names
	- ``usda_nutrition``: dict of curated nutrition data per food class

	Args:
	model_path: Override path to the ``.pkl`` bundle.
	"""

	def __init__(self, model_path: Optional[str] = None) -> None:
	self.model_path = Path(model_path) if model_path else _DEFAULT_MODEL_PATH
	self._classifier: Optional[Dict[str, Any]] = None

	def _load(self) -> None:
	"""Lazy-load the model bundle from disk using joblib."""
	try:
	import joblib
	except ImportError as exc:
	raise ImportError(
	"joblib is required. Run: pip install joblib"
	) from exc

	if not self.model_path.exists():
	raise FileNotFoundError(
	f"Model bundle not found: {self.model_path}. "
	"Run notebook 03_ml_health_classifier.ipynb first."
	)

	logger.info("Loading ML model from %s ...", self.model_path)
	self._classifier = joblib.load(self.model_path)
	logger.info(
	"ML model loaded (type: %s, accuracy: %.4f)",
	self._classifier.get("model_type"),
	self._classifier.get("test_accuracy", 0),
	)

	def _build_features(self, nutrition: Dict[str, float]) -> np.ndarray:
	"""Compute 16 model features from 8 base USDA nutrients.

	Feature engineering mirrors Notebook 03 (cell 8 / cell 17).
	"""
	kcal = nutrition["kcal"]
	fat = nutrition["fat"]
	sat_fat = nutrition["sat_fat"]
	carbs = nutrition["carbs"]
	sugar = nutrition["sugar"]
	fiber = nutrition["fiber"]
	protein = nutrition["protein"]
	salt = nutrition["salt"]

	feature_map: Dict[str, float] = {
	"kcal": kcal,
	"fat": fat,
	"sat_fat": sat_fat,
	"carbs": carbs,
	"sugar": sugar,
	"fiber": fiber,
	"protein": protein,
	"salt": salt,
	"sugar_to_carb_ratio": sugar / (carbs + 1e-6),
	"sat_fat_pct_of_fat": sat_fat / (fat + 1e-6),
	"calorie_density": kcal / 100,
	"protein_to_kcal": protein * 4 / (kcal + 1e-6),
	"fiber_to_carb_ratio": fiber / (carbs + 1e-6),
	"high_sugar": float(sugar > 15),
	"high_salt": float(salt > 1.5),
	"high_sat_fat": float(sat_fat > 5),
	}

	cols = self._classifier["feature_cols"]
	return np.array([[feature_map[c] for c in cols]])

	def predict(self, food_class: str) -> Dict[str, Any]:
	"""Look up USDA nutrition and predict health label.

	Args:
	food_class: Food class name as returned by CVModel (e.g. ``"pizza"``).

	Returns:
	{
	"food_class": str,
	"nutrition": {"kcal": float, "fat": float, "sat_fat": float,
	"carbs": float, "sugar": float, "fiber": float,
	"protein": float, "salt": float},
	"health_label": str, # "healthy" \| "medium" \| "unhealthy"
	"probabilities": {"healthy": float, "medium": float, "unhealthy": float}
	}

	Raises:
	ValueError: If ``food_class`` is not in the USDA nutrition table.
	FileNotFoundError: If the model bundle is missing.
	"""
	if self._classifier is None:
	self._load()

	bundle = self._classifier
	usda: Dict[str, Dict[str, float]] = bundle["usda_nutrition"]

	normalized = food_class.lower().replace(" ", "_").replace("-", "_")
	if normalized not in usda:
	raise ValueError(
	f"Unknown food class: '{food_class}'. "
	f"Supported: {sorted(usda.keys())}"
	)

	nutrition = usda[normalized]
	X = self._build_features(nutrition)
	X_scaled = bundle["scaler"].transform(X)

	pred_idx = bundle["model"].predict(X_scaled)[0]
	proba = bundle["model"].predict_proba(X_scaled)[0]

	le = bundle["label_encoder"]
	health_label = str(le.inverse_transform([pred_idx])[0])
	probabilities = {
	str(cls): round(float(p), 4)
	for cls, p in zip(le.classes_, proba)
	}

	return {
	"food_class": normalized,
	"nutrition": nutrition,
	"health_label": health_label,
	"probabilities": probabilities,
	}