|
|
|
|
|
|
|
|
import sys |
|
|
import json |
|
|
import numpy as np |
|
|
import cv2 |
|
|
import torch |
|
|
from PIL import Image |
|
|
import gradio as gr |
|
|
from ultralytics import YOLO |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sys.path.append("ml-depth-pro/src") |
|
|
import depth_pro |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
print(f"[INFO] Using device: {device}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("[INFO] Loading Depth Pro model...") |
|
|
dp_model, dp_transform = depth_pro.create_model_and_transforms() |
|
|
dp_model = dp_model.to(device) |
|
|
dp_model.eval() |
|
|
print("[INFO] Depth Pro ready.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
YOLO_MODEL_PATH = r"C:\Users\monol\Desktop\Senior_demo\ml-depth-pro\model\yolo-seg.pt" |
|
|
print(f"[INFO] Loading YOLO model from: {YOLO_MODEL_PATH}") |
|
|
yolo_model = YOLO(YOLO_MODEL_PATH) |
|
|
print("[INFO] YOLO ready.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
with open("presetdata.json", "r", encoding="utf-8") as f: |
|
|
PRESET_LIST = json.load(f) |
|
|
PRESET_BY_CLASS = {item["class"]: item for item in PRESET_LIST} |
|
|
print(f"[INFO] Loaded {len(PRESET_LIST)} preset entries.") |
|
|
except Exception as e: |
|
|
print("[WARN] Could not load presetdata.json:", e) |
|
|
PRESET_LIST = [] |
|
|
PRESET_BY_CLASS = {} |
|
|
|
|
|
try: |
|
|
with open("nutrition_data.json", "r", encoding="utf-8") as f: |
|
|
NUTRITION_LIST = json.load(f) |
|
|
NUTR_BY_CLASS = {item["class"]: item for item in NUTRITION_LIST} |
|
|
print(f"[INFO] Loaded {len(NUTRITION_LIST)} nutrition entries.") |
|
|
except Exception as e: |
|
|
print("[WARN] Could not load nutrition_data.json:", e) |
|
|
NUTRITION_LIST = [] |
|
|
NUTR_BY_CLASS = {} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def make_depth_vis(depth: np.ndarray) -> np.ndarray: |
|
|
""" |
|
|
depth: HxW float (meters), may contain NaNs |
|
|
returns: HxWx3 uint8 RGB image |
|
|
""" |
|
|
d = depth.copy() |
|
|
d[~np.isfinite(d)] = np.nan |
|
|
|
|
|
if not np.isfinite(d).any(): |
|
|
return np.zeros((*depth.shape, 3), dtype=np.uint8) |
|
|
|
|
|
d_min = np.nanpercentile(d, 1) |
|
|
d_max = np.nanpercentile(d, 99) |
|
|
if d_max <= d_min: |
|
|
d_max = d_min + 1e-6 |
|
|
|
|
|
d_norm = (d - d_min) / (d_max - d_min) |
|
|
d_norm = np.clip(d_norm, 0.0, 1.0) |
|
|
d_uint8 = (d_norm * 255).astype(np.uint8) |
|
|
|
|
|
depth_color_bgr = cv2.applyColorMap(d_uint8, cv2.COLORMAP_INFERNO) |
|
|
depth_color_rgb = cv2.cvtColor(depth_color_bgr, cv2.COLOR_BGR2RGB) |
|
|
return depth_color_rgb |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def estimate_portion_for_class(cls_name, area_in_pct, z_in_m, default_z_in=None): |
|
|
""" |
|
|
Estimate portion (grams) for one class using preset reference + depth. |
|
|
area_in_pct: percentage area of image (0-100) |
|
|
z_in_m: median depth for that class (meters) |
|
|
""" |
|
|
preset = PRESET_BY_CLASS.get(cls_name) |
|
|
if not preset: |
|
|
return None |
|
|
|
|
|
try: |
|
|
mass_ref = float(preset["portion"]) |
|
|
area_ref = float(preset["mask_region"]) |
|
|
z_ref = float(preset["center_depth"]) |
|
|
except (KeyError, ValueError, TypeError): |
|
|
return None |
|
|
|
|
|
if area_ref <= 0 or z_ref <= 0: |
|
|
return None |
|
|
|
|
|
if z_in_m is None: |
|
|
z_in_m = default_z_in |
|
|
if z_in_m is None or not np.isfinite(z_in_m) or z_in_m <= 0: |
|
|
return None |
|
|
|
|
|
|
|
|
mass_in = mass_ref * (area_in_pct / area_ref) * (z_in_m / z_ref) ** 2 |
|
|
|
|
|
return { |
|
|
"class": cls_name, |
|
|
"estimated_portion_g": float(mass_in), |
|
|
"area_in_pct": float(area_in_pct), |
|
|
"area_ref_pct": float(area_ref), |
|
|
"z_in_m": float(z_in_m), |
|
|
"z_ref_m": float(z_ref), |
|
|
"mass_ref_g": float(mass_ref), |
|
|
} |
|
|
|
|
|
|
|
|
def estimate_nutrition_for_mass(class_name, mass_g): |
|
|
""" |
|
|
Use nutrition_data.json to scale nutrition by mass. |
|
|
Typically data is per 100 g. |
|
|
""" |
|
|
nutr = NUTR_BY_CLASS.get(class_name) |
|
|
if not nutr: |
|
|
return None |
|
|
|
|
|
try: |
|
|
ref_mass = float(nutr["amount"]) |
|
|
calories = float(nutr["calories"]) |
|
|
protein = float(nutr["protein"]) |
|
|
fat = float(nutr["fat"]) |
|
|
carbs = float(nutr["carbohydrates"]) |
|
|
sodium = float(nutr["sodium"]) |
|
|
except (KeyError, ValueError, TypeError): |
|
|
return None |
|
|
|
|
|
if ref_mass <= 0: |
|
|
return None |
|
|
|
|
|
factor = mass_g / ref_mass |
|
|
|
|
|
return { |
|
|
"class": class_name, |
|
|
"mass_g": float(mass_g), |
|
|
"calories": calories * factor, |
|
|
"protein": protein * factor, |
|
|
"fat": fat * factor, |
|
|
"carbohydrates": carbs * factor, |
|
|
"sodium": sodium * factor, |
|
|
} |
|
|
|
|
|
|
|
|
def breakdown_ingredients(dish_class_name, dish_mass_g): |
|
|
""" |
|
|
Split a dish (e.g., pad kaprao) into ingredients using presetdata.json, |
|
|
then compute ingredient-level nutrition if available in nutrition_data.json. |
|
|
""" |
|
|
preset = PRESET_BY_CLASS.get(dish_class_name) |
|
|
if not preset or "ingredients" not in preset: |
|
|
return [], [] |
|
|
|
|
|
try: |
|
|
portion_ref = float(preset["portion"]) |
|
|
except (KeyError, ValueError, TypeError): |
|
|
return [], [] |
|
|
|
|
|
if portion_ref <= 0: |
|
|
return [], [] |
|
|
|
|
|
ingredient_masses = [] |
|
|
ingredient_nutrition = [] |
|
|
|
|
|
for ing in preset["ingredients"]: |
|
|
ing_name = ing.get("name") |
|
|
try: |
|
|
ing_ref_mass = float(ing["amount"]) |
|
|
except (KeyError, ValueError, TypeError): |
|
|
continue |
|
|
|
|
|
ratio = ing_ref_mass / portion_ref |
|
|
ing_mass_in = dish_mass_g * ratio |
|
|
|
|
|
ingredient_masses.append({ |
|
|
"dish_class": dish_class_name, |
|
|
"ingredient": ing_name, |
|
|
"mass_g": float(ing_mass_in), |
|
|
}) |
|
|
|
|
|
nutr = estimate_nutrition_for_mass(ing_name, ing_mass_in) |
|
|
if nutr: |
|
|
nutr["dish_class"] = dish_class_name |
|
|
ingredient_nutrition.append(nutr) |
|
|
|
|
|
return ingredient_masses, ingredient_nutrition |
|
|
|
|
|
|
|
|
def postprocess_ai_results(rows, center_depth_m): |
|
|
""" |
|
|
rows: list of [class_name, area_pct, median_depth_m] |
|
|
center_depth_m: depth at center of image (meters) |
|
|
|
|
|
Returns: |
|
|
- portions_json: list of dicts like |
|
|
{ |
|
|
"class": "pad kaprao", |
|
|
"portion": 100, |
|
|
"portion_label": "gram", |
|
|
"center_depth": "0.47", |
|
|
"mask_region": "5.07" |
|
|
} |
|
|
- dish_nutr_json: list of dish-level nutrition dicts |
|
|
- ingredient_nutr_json: list of ingredient-level nutrition dicts |
|
|
""" |
|
|
portions_json = [] |
|
|
dish_nutr_json = [] |
|
|
ingredient_nutr_json = [] |
|
|
|
|
|
for cls_name, area_pct, md in rows: |
|
|
if area_pct is None: |
|
|
continue |
|
|
|
|
|
|
|
|
if md is not None and np.isfinite(md): |
|
|
z_in = md |
|
|
else: |
|
|
z_in = center_depth_m |
|
|
|
|
|
portion_info = estimate_portion_for_class( |
|
|
cls_name=cls_name, |
|
|
area_in_pct=area_pct, |
|
|
z_in_m=z_in, |
|
|
default_z_in=center_depth_m, |
|
|
) |
|
|
if portion_info is None: |
|
|
continue |
|
|
|
|
|
|
|
|
portions_json.append({ |
|
|
"class": portion_info["class"], |
|
|
"portion": round(portion_info["estimated_portion_g"], 2), |
|
|
"portion_label": "gram", |
|
|
"center_depth": f"{portion_info['z_in_m']:.2f}", |
|
|
"mask_region": f"{portion_info['area_in_pct']:.2f}", |
|
|
}) |
|
|
|
|
|
|
|
|
dish_n = estimate_nutrition_for_mass( |
|
|
cls_name, |
|
|
portion_info["estimated_portion_g"] |
|
|
) |
|
|
if dish_n: |
|
|
dish_nutr_json.append({ |
|
|
"class": dish_n["class"], |
|
|
"mass_g": round(dish_n["mass_g"], 2), |
|
|
"calories": round(dish_n["calories"], 1), |
|
|
"protein": round(dish_n["protein"], 1), |
|
|
"fat": round(dish_n["fat"], 1), |
|
|
"carbohydrates": round(dish_n["carbohydrates"], 1), |
|
|
"sodium": round(dish_n["sodium"], 1), |
|
|
}) |
|
|
|
|
|
|
|
|
ing_masses, ing_nutrition = breakdown_ingredients( |
|
|
dish_class_name=cls_name, |
|
|
dish_mass_g=portion_info["estimated_portion_g"], |
|
|
) |
|
|
|
|
|
|
|
|
nutr_lookup = {} |
|
|
for n in ing_nutrition: |
|
|
key = (n.get("dish_class", cls_name), n["class"]) |
|
|
nutr_lookup[key] = n |
|
|
|
|
|
for mass_rec in ing_masses: |
|
|
dish_cls = mass_rec["dish_class"] |
|
|
ing_name = mass_rec["ingredient"] |
|
|
mass_g = mass_rec["mass_g"] |
|
|
|
|
|
key = (dish_cls, ing_name) |
|
|
n = nutr_lookup.get(key) |
|
|
|
|
|
if n is not None: |
|
|
|
|
|
ingredient_nutr_json.append({ |
|
|
"dish_class": dish_cls, |
|
|
"ingredient": ing_name, |
|
|
"mass_g": round(mass_g, 2), |
|
|
"calories": round(n["calories"], 1), |
|
|
"protein": round(n["protein"], 1), |
|
|
"fat": round(n["fat"], 1), |
|
|
"carbohydrates": round(n["carbohydrates"], 1), |
|
|
"sodium": round(n["sodium"], 1), |
|
|
}) |
|
|
else: |
|
|
|
|
|
ingredient_nutr_json.append({ |
|
|
"dish_class": dish_cls, |
|
|
"ingredient": ing_name, |
|
|
"mass_g": round(mass_g, 2), |
|
|
"calories": None, |
|
|
"protein": None, |
|
|
"fat": None, |
|
|
"carbohydrates": None, |
|
|
"sodium": None, |
|
|
}) |
|
|
|
|
|
|
|
|
return portions_json, dish_nutr_json, ingredient_nutr_json |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def analyze_image(pil_img: Image.Image): |
|
|
|
|
|
if pil_img is None: |
|
|
blank = np.zeros((10, 10, 3), dtype=np.uint8) |
|
|
return blank, blank, "Please upload an image first.", [], [], [], [] |
|
|
|
|
|
|
|
|
pil_img = pil_img.convert("RGB") |
|
|
rgb_np = np.array(pil_img) |
|
|
H_s, W_s, _ = rgb_np.shape |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
seg_vis = rgb_np.copy() |
|
|
class_to_mask = {} |
|
|
|
|
|
|
|
|
bgr_np = cv2.cvtColor(rgb_np, cv2.COLOR_RGB2BGR) |
|
|
|
|
|
try: |
|
|
results = yolo_model.predict( |
|
|
source=bgr_np, |
|
|
save=False, |
|
|
conf=0.25, |
|
|
iou=0.7, |
|
|
verbose=False |
|
|
) |
|
|
r = results[0] |
|
|
|
|
|
|
|
|
seg_plot_bgr = r.plot() |
|
|
seg_vis = cv2.cvtColor(seg_plot_bgr, cv2.COLOR_BGR2RGB) |
|
|
|
|
|
if r.masks is not None and len(r.masks.data) > 0: |
|
|
masks = r.masks.data.cpu().numpy() |
|
|
boxes = r.boxes |
|
|
for i in range(len(masks)): |
|
|
cls_id = int(boxes.cls[i]) |
|
|
cls_name = yolo_model.names[cls_id] |
|
|
mask_i = masks[i] > 0.5 |
|
|
if cls_name not in class_to_mask: |
|
|
class_to_mask[cls_name] = mask_i |
|
|
else: |
|
|
class_to_mask[cls_name] |= mask_i |
|
|
else: |
|
|
print("[YOLO] No masks found.") |
|
|
except Exception as e: |
|
|
print("[YOLO ERROR]", e) |
|
|
|
|
|
seg_vis = seg_vis.astype(np.uint8) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
dp_in = dp_transform(pil_img).to(device) |
|
|
with torch.no_grad(): |
|
|
pred = dp_model.infer(dp_in, f_px=None) |
|
|
|
|
|
depth = pred["depth"] |
|
|
if isinstance(depth, torch.Tensor): |
|
|
depth = depth.squeeze().cpu().numpy() |
|
|
except Exception as e: |
|
|
blank = np.zeros((10, 10, 3), dtype=np.uint8) |
|
|
return blank, seg_vis, f"Depth estimation error: {e}", [], [], [], [] |
|
|
|
|
|
if depth is None or not np.isfinite(depth).any(): |
|
|
blank = np.zeros((10, 10, 3), dtype=np.uint8) |
|
|
return blank, seg_vis, "Depth map invalid (NaN/empty).", [], [], [], [] |
|
|
|
|
|
H_d, W_d = depth.shape |
|
|
|
|
|
|
|
|
depth_vis = make_depth_vis(depth) |
|
|
depth_vis_big = cv2.resize(depth_vis, (W_s, H_s), interpolation=cv2.INTER_LINEAR) |
|
|
depth_vis_big = depth_vis_big.astype(np.uint8) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cx_d, cy_d = W_d // 2, H_d // 2 |
|
|
center_depth = float(depth[cy_d, cx_d]) |
|
|
|
|
|
roi = depth[int(H_d * 0.4):int(H_d * 0.6), int(W_d * 0.4):int(W_d * 0.6)] |
|
|
roi = roi[np.isfinite(roi)] |
|
|
roi_depth = float(np.median(roi)) if roi.size > 0 else float("nan") |
|
|
|
|
|
depth_lines = [ |
|
|
"### Depth Estimate", |
|
|
f"- Center depth: **{center_depth:.2f} m**", |
|
|
] |
|
|
if np.isfinite(roi_depth): |
|
|
depth_lines.append(f"- Center ROI median depth: **{roi_depth:.2f} m**") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
total_pixels = H_s * W_s |
|
|
rows = [] |
|
|
|
|
|
for cls_name, mask in class_to_mask.items(): |
|
|
|
|
|
area_px = int(mask.sum()) |
|
|
area_pct = 100.0 * area_px / total_pixels if total_pixels > 0 else 0.0 |
|
|
|
|
|
|
|
|
mask_u8 = (mask.astype(np.uint8) * 255) |
|
|
mask_depth = cv2.resize( |
|
|
mask_u8, (W_d, H_d), interpolation=cv2.INTER_NEAREST |
|
|
) > 0 |
|
|
|
|
|
obj_depths = depth[mask_depth & np.isfinite(depth)] |
|
|
if obj_depths.size > 0: |
|
|
median_depth = float(np.median(obj_depths)) |
|
|
else: |
|
|
median_depth = float("nan") |
|
|
|
|
|
rows.append([ |
|
|
cls_name, |
|
|
round(area_pct, 2), |
|
|
None if not np.isfinite(median_depth) else round(median_depth, 2) |
|
|
]) |
|
|
|
|
|
|
|
|
portions_json, dish_nutr_json, ingredient_nutr_json = postprocess_ai_results( |
|
|
rows, center_depth |
|
|
) |
|
|
|
|
|
if rows: |
|
|
depth_lines.append("\n### Object distances (per class)") |
|
|
for cls_name, area_pct, md in rows: |
|
|
if md is None: |
|
|
depth_lines.append( |
|
|
f"- {cls_name}: {area_pct:.2f}% of image, depth: N/A" |
|
|
) |
|
|
else: |
|
|
depth_lines.append( |
|
|
f"- {cls_name}: {area_pct:.2f}% of image, median depth **{md:.2f} m**" |
|
|
) |
|
|
else: |
|
|
depth_lines.append("\n_No segmentation masks detected._") |
|
|
|
|
|
depth_text = "\n".join(depth_lines) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
portions_table_rows = [ |
|
|
[ |
|
|
p["class"], |
|
|
p["portion"], |
|
|
p["portion_label"], |
|
|
p["center_depth"], |
|
|
p["mask_region"], |
|
|
] |
|
|
for p in portions_json |
|
|
] |
|
|
|
|
|
|
|
|
dish_table_rows = [ |
|
|
[ |
|
|
d["class"], |
|
|
d["mass_g"], |
|
|
d["calories"], |
|
|
d["protein"], |
|
|
d["fat"], |
|
|
d["carbohydrates"], |
|
|
d["sodium"], |
|
|
] |
|
|
for d in dish_nutr_json |
|
|
] |
|
|
|
|
|
|
|
|
|
|
|
ingredient_table_rows = [ |
|
|
[ |
|
|
ing["dish_class"], |
|
|
ing["ingredient"], |
|
|
ing["mass_g"], |
|
|
ing["calories"], |
|
|
ing["protein"], |
|
|
ing["fat"], |
|
|
ing["carbohydrates"], |
|
|
ing["sodium"], |
|
|
] |
|
|
for ing in ingredient_nutr_json |
|
|
] |
|
|
|
|
|
return ( |
|
|
depth_vis_big, |
|
|
seg_vis, |
|
|
depth_text, |
|
|
rows, |
|
|
portions_table_rows, |
|
|
dish_table_rows, |
|
|
ingredient_table_rows, |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown( |
|
|
"<h2 style='text-align:center;'>Depth Pro + YOLO Segmentation + Nutrition Demo</h2>" |
|
|
"<p style='text-align:center;'>" |
|
|
"Upload a food image → get depth map, object distance, estimated portion, and nutrition per dish & ingredient." |
|
|
"</p>" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
input_img = gr.Image(label="Upload food image", type="pil") |
|
|
|
|
|
with gr.Row(): |
|
|
depth_out = gr.Image(label="Depth overlay", type="numpy") |
|
|
seg_out = gr.Image(label="Segmentation result", type="numpy") |
|
|
|
|
|
with gr.Row(): |
|
|
depth_info = gr.Markdown(label="Depth estimate") |
|
|
|
|
|
seg_table = gr.Dataframe( |
|
|
headers=["Class", "Area % of image", "Median depth (m)"], |
|
|
datatype=["str", "number", "number"], |
|
|
label="Segmentation stats" |
|
|
) |
|
|
|
|
|
portions_table = gr.Dataframe( |
|
|
headers=["Class", "Portion (g)", "Unit", "Center depth (m)", "Mask region (%)"], |
|
|
datatype=["str", "number", "str", "str", "str"], |
|
|
label="Estimated Portions (per class)", |
|
|
) |
|
|
|
|
|
dish_nutrition_table = gr.Dataframe( |
|
|
headers=["Class", "Mass (g)", "Calories", "Protein (g)", "Fat (g)", "Carbs (g)", "Sodium (mg)"], |
|
|
datatype=["str", "number", "number", "number", "number", "number", "number"], |
|
|
label="Dish Nutrition (per class)", |
|
|
) |
|
|
|
|
|
ingredient_nutrition_table = gr.Dataframe( |
|
|
headers=["Dish", "Ingredient", "Mass (g)", "Calories", "Protein (g)", "Fat (g)", "Carbs (g)", "Sodium (mg)"], |
|
|
datatype=["str", "str", "number", "number", "number", "number", "number", "number"], |
|
|
label="Ingredient Nutrition (per ingredient)", |
|
|
) |
|
|
|
|
|
run_btn = gr.Button("Run analysis") |
|
|
|
|
|
run_btn.click( |
|
|
fn=analyze_image, |
|
|
inputs=input_img, |
|
|
outputs=[ |
|
|
depth_out, |
|
|
seg_out, |
|
|
depth_info, |
|
|
seg_table, |
|
|
portions_table, |
|
|
dish_nutrition_table, |
|
|
ingredient_nutrition_table, |
|
|
], |
|
|
) |
|
|
|
|
|
demo.launch(server_name="0.0.0.0", server_port=7860) |
|
|
|