Spaces:

SmartHeal
/

SmartHeal-Agentic-AI

Running

App Files Files Community

SmartHeal commited on Aug 19

Commit

028ea67

verified ·

1 Parent(s): f1a9c91

Update src/ai_processor.py

Browse files

Files changed (1) hide show

src/ai_processor.py +165 -289

src/ai_processor.py CHANGED Viewed

@@ -3,12 +3,14 @@
 # Turn on deep logging: export LOGLEVEL=DEBUG SMARTHEAL_DEBUG=1
 import os
 import logging
 from datetime import datetime
 from typing import Optional, Dict, List, Tuple
-# ---- Environment defaults (do NOT globally hint CUDA here) ----
 os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
 LOGLEVEL = os.getenv("LOGLEVEL", "INFO").upper()
 SMARTHEAL_DEBUG = os.getenv("SMARTHEAL_DEBUG", "0") == "1"
@@ -26,20 +28,22 @@ logging.basicConfig(
 def _log_kv(prefix: str, kv: Dict):
     logging.debug(prefix + " | " + " | ".join(f"{k}={v}" for k, v in kv.items()))
-# --- Spaces GPU decorator (REQUIRED) ---
-from spaces import GPU as _SPACES_GPU
-@_SPACES_GPU(enable_queue=True)
-def smartheal_gpu_stub(ping: int = 0) -> str:
-    return "ready"
-# ---- Paths / constants ----
 UPLOADS_DIR = "uploads"
 os.makedirs(UPLOADS_DIR, exist_ok=True)
 HF_TOKEN = os.getenv("HF_TOKEN", None)
 YOLO_MODEL_PATH = "src/best.pt"
-SEG_MODEL_PATH = "src/segmentation_model.h5"
 GUIDELINE_PDFS = ["src/eHealth in Wound Care.pdf", "src/IWGDF Guideline.pdf", "src/evaluation.pdf"]
 DATASET_ID = "SmartHeal/wound-image-uploads"
 DEFAULT_PX_PER_CM = 38.0
@@ -53,35 +57,17 @@ SEG_THRESH = float(os.getenv("SEG_THRESH", "0.5"))
 models_cache: Dict[str, object] = {}
 knowledge_base_cache: Dict[str, object] = {}
-# ---------- Utilities to prevent CUDA in main process ----------
-from contextlib import contextmanager
-@contextmanager
-def _no_cuda_env():
-    """
-    Mask GPUs so any library imported/constructed in the main process
-    cannot see CUDA (required for Spaces Stateless GPU).
-    """
-    prev = os.environ.get("CUDA_VISIBLE_DEVICES")
-    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
-    try:
-        yield
-    finally:
-        if prev is None:
-            os.environ.pop("CUDA_VISIBLE_DEVICES", None)
-        else:
-            os.environ["CUDA_VISIBLE_DEVICES"] = prev
-# ---------- Lazy imports (wrapped where needed) ----------
 def _import_ultralytics():
-    # Prevent Ultralytics from probing CUDA on import
-    with _no_cuda_env():
-        from ultralytics import YOLO
     return YOLO
 def _import_tf_loader():
     import tensorflow as tf
-    tf.config.set_visible_devices([], "GPU")
     from tensorflow.keras.models import load_model
     return load_model
@@ -105,116 +91,57 @@ def _import_hf_hub():
     from huggingface_hub import HfApi, HfFolder
     return HfApi, HfFolder
-# ---------- SmartHeal prompts (system + user prefix) ----------
-SMARTHEAL_SYSTEM_PROMPT = """\
-You are SmartHeal Clinical Assistant, a wound-care decision-support system.
-You analyze wound photographs and brief patient context to produce careful,
-specific, guideline-informed recommendations WITHOUT diagnosing. You always:
-- Use the measurements calculated by the vision pipeline as ground truth.
-- Prefer concise, actionable steps tailored to exudate level, infection risk, and pain.
-- Flag uncertainties and red flags that need escalation to a clinician.
-- Avoid contraindicated advice; do not infer unseen comorbidities.
-- Keep under 300 words and use the requested headings exactly.
-- Tone: professional, clear, and conservative; no definitive medical claims.
-- Safety: remind the user to seek clinician review for changes or red flags.
-"""
-SMARTHEAL_USER_PREFIX = """\
-Patient: {patient_info}
-Visual findings: type={wound_type}, size={length_cm}x{breadth_cm} cm, area={area_cm2} cm^2,
-detection_conf={det_conf:.2f}, calibration={px_per_cm} px/cm.
-Guideline context (snippets you can draw principles from; do not quote at length):
-{guideline_context}
-Write a structured answer with these headings exactly:
-1. Clinical Summary (max 4 bullet points)
-2. Likely Stage/Type (if uncertain, say 'uncertain')
-3. Treatment Plan (specific dressing choices and frequency based on exudate/infection risk)
-4. Red Flags (what to escalate and when)
-5. Follow-up Cadence (days)
-6. Notes (assumptions/uncertainties)
-Keep to 220–300 words. Do NOT provide diagnosis. Avoid contraindicated advice.
-"""
-# ---------- VLM (MedGemma replaced with Qwen2-VL) ----------
-@_SPACES_GPU(enable_queue=True)
-def _vlm_infer_gpu(messages, model_id: str, max_new_tokens: int, token: Optional[str]):
-    """
-    Runs entirely inside a Spaces GPU worker. It's the ONLY place we allow CUDA init.
-    """
-    from transformers import pipeline
-    import torch # Ensure torch is imported here
-    pipe = pipeline(
-        task="image-text-to-text",
-        model=model_id,
-        torch_dtype=torch.bfloat16, # Use torch_dtype from the working example
-        device_map="auto",            # CUDA init happens here, safely in GPU worker
-        token=token,
-        trust_remote_code=True,
-        model_kwargs={"low_cpu_mem_usage": True},
-    )
-    out = pipe(text=messages, max_new_tokens=max_new_tokens, do_sample=False, temperature=0.2)
-    try:
-        txt = out[0]["generated_text"][-1].get("content", "")
-    except Exception:
-        txt = out[0].get("generated_text", "")
-    return (txt or "").strip() or "⚠️ Empty response"
-def generate_medgemma_report(  # kept name so callers don't change
     patient_info: str,
     visual_results: Dict,
     guideline_context: str,
     image_pil: Image.Image,
     max_new_tokens: Optional[int] = None,
 ) -> str:
-    """
-    MedGemma replacement using Qwen/Qwen2-VL-2B-Instruct via image-text-to-text.
-    Loads & runs ONLY inside a GPU worker to satisfy Stateless GPU constraints.
-    """
-    if os.getenv("SMARTHEAL_ENABLE_VLM", "1") != "1":
         return "⚠️ VLM disabled"
-    model_id = os.getenv("SMARTHEAL_VLM_MODEL", "Qwen/Qwen2-VL-2B-Instruct")
-    max_new_tokens = max_new_tokens or int(os.getenv("SMARTHEAL_VLM_MAX_TOKENS", "600"))
-    uprompt = SMARTHEAL_USER_PREFIX.format(
-        patient_info=patient_info,
-        wound_type=visual_results.get("wound_type", "Unknown"),
-        length_cm=visual_results.get("length_cm", 0),
-        breadth_cm=visual_results.get("breadth_cm", 0),
-        area_cm2=visual_results.get("surface_area_cm2", 0),
-        det_conf=float(visual_results.get("detection_confidence", 0.0)),
-        px_per_cm=visual_results.get("px_per_cm", "?"),
-        guideline_context=(guideline_context or "")[:900],
-    )
-    messages = [
-        {"role": "system", "content": [{"type": "text", "text": SMARTHEAL_SYSTEM_PROMPT}]},
-        {"role": "user", "content": [
-            {"type": "image", "image": image_pil},
-            {"type": "text",  "text": uprompt},
-        ]},
-    ]
     try:
-        # IMPORTANT: do not import transformers or touch CUDA here. Only call the GPU worker.
-        return _vlm_infer_gpu(messages, model_id, max_new_tokens, HF_TOKEN)
     except Exception as e:
-        logging.error(f"VLM call failed: {e}")
         return "⚠️ VLM error"
 # ---------- Initialize CPU models ----------
 def load_yolo_model():
     YOLO = _import_ultralytics()
-    # Construct model with CUDA masked to avoid auto-selecting cuda:0
-    with _no_cuda_env():
-        model = YOLO(YOLO_MODEL_PATH)
-    return model
 def load_segmentation_model():
-    import tensorflow as tf
     load_model = _import_tf_loader()
-    return load_model(SEG_MODEL_PATH, compile=False, custom_objects={'InputLayer': tf.keras.layers.InputLayer})
 def load_classification_pipeline():
     pipe = _import_hf_cls()
@@ -236,7 +163,7 @@ def initialize_cpu_models() -> None:
     if "det" not in models_cache:
         try:
             models_cache["det"] = load_yolo_model()
-            logging.info("✅ YOLO loaded (CPU; CUDA masked in main)")
         except Exception as e:
             logging.error(f"YOLO load failed: {e}")
@@ -271,7 +198,6 @@ def initialize_cpu_models() -> None:
             models_cache["embedding_model"] = None
             logging.warning(f"Embeddings unavailable: {e}")
 def setup_knowledge_base() -> None:
     if "vector_store" in knowledge_base_cache:
         return
@@ -359,6 +285,7 @@ def estimate_px_per_cm_from_exif(pil_img: Image.Image, default_px_per_cm: float
 # ---------- Segmentation helpers ----------
 def _imagenet_norm(arr: np.ndarray) -> np.ndarray:
     mean = np.array([123.675, 116.28, 103.53], dtype=np.float32)
     std  = np.array([58.395, 57.12, 57.375], dtype=np.float32)
     return (arr.astype(np.float32) - mean) / std
@@ -382,166 +309,112 @@ def _to_prob(pred: np.ndarray) -> np.ndarray:
         p = 1.0 / (1.0 + np.exp(-p))
     return p.astype(np.float32)
-# ---- Adaptive threshold + GrabCut grow ----
-def _adaptive_prob_threshold(p: np.ndarray) -> float:
-    """
-    Choose a threshold that avoids tiny blobs while not swallowing skin.
-    Try Otsu and the 90th percentile, clamp to [0.25, 0.65], pick by area heuristic.
-    """
-    p01 = np.clip(p.astype(np.float32), 0, 1)
-    p255 = (p01 * 255).astype(np.uint8)
-    ret_otsu, _ = cv2.threshold(p255, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
-    thr_otsu = float(np.clip(ret_otsu / 255.0, 0.25, 0.65))
-    thr_pctl = float(np.clip(np.percentile(p01, 90), 0.25, 0.65))
-    def area_frac(thr: float) -> float:
-        return float((p01 >= thr).sum()) / float(p01.size)
-    af_otsu = area_frac(thr_otsu)
-    af_pctl = area_frac(thr_pctl)
-    def score(af: float) -> float:
-        target_low, target_high = 0.03, 0.10
-        if af < target_low: return abs(af - target_low) * 3.0
-        if af > target_high: return abs(af - target_high) * 1.5
-        return 0.0
-    return thr_otsu if score(af_otsu) <= score(af_pctl) else thr_pctl
-def _grabcut_refine(bgr: np.ndarray, seed01: np.ndarray, iters: int = 3) -> np.ndarray:
-    """Grow from a confident core into low-contrast margins."""
-    h, w = bgr.shape[:2]
-    gc = np.full((h, w), cv2.GC_PR_BGD, np.uint8)
-    k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
-    seed_dil = cv2.dilate(seed01, k, iterations=1)
-    gc[seed01.astype(bool)] = cv2.GC_PR_FGD
-    gc[seed_dil.astype(bool)] = cv2.GC_FGD
-    gc[0, :], gc[-1, :], gc[:, 0], gc[:, 1] = cv2.GC_BGD, cv2.GC_BGD, cv2.GC_BGD, cv2.GC_BGD
-    bgdModel = np.zeros((1, 65), np.float64)
-    fgdModel = np.zeros((1, 65), np.float64)
-    cv2.grabCut(bgr, gc, None, bgdModel, fgdModel, iters, cv2.GC_INIT_WITH_MASK)
-    return np.where((gc == cv2.GC_FGD) | (gc == cv2.GC_PR_FGD), 1, 0).astype(np.uint8)
 def _fill_holes(mask01: np.ndarray) -> np.ndarray:
     h, w = mask01.shape[:2]
     ff = np.zeros((h + 2, w + 2), np.uint8)
     m = (mask01 * 255).astype(np.uint8).copy()
     cv2.floodFill(m, ff, (0, 0), 255)
     m_inv = cv2.bitwise_not(m)
     out = ((mask01 * 255) | m_inv) // 255
     return out.astype(np.uint8)
-def _clean_mask(mask01: np.ndarray) -> np.ndarray:
-    """Open → Close → Fill holes → Largest component (no dilation)."""
-    mask01 = (mask01 > 0).astype(np.uint8)
-    k3 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
-    k5 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
-    mask01 = cv2.morphologyEx(mask01, cv2.MORPH_OPEN, k3, iterations=1)
-    mask01 = cv2.morphologyEx(mask01, cv2.MORPH_CLOSE, k5, iterations=1)
-    mask01 = _fill_holes(mask01)
-    # Keep largest component only
-    num, labels, stats, _ = cv2.connectedComponentsWithStats(mask01, 8)
-    if num > 1:
-        areas = stats[1:, cv2.CC_STAT_AREA]
-        if areas.size:
-            largest_idx = 1 + int(np.argmax(areas))
-            mask01 = (labels == largest_idx).astype(np.uint8)
-    return (mask01 > 0).astype(np.uint8)
-# Global last debug dict (per-process)
 _last_seg_debug: Dict[str, object] = {}
 def segment_wound(image_bgr: np.ndarray, ts: str, out_dir: str) -> Tuple[np.ndarray, Dict[str, object]]:
     """
-    TF model → adaptive threshold on prob → GrabCut grow → cleanup.
-    Fallback: KMeans-Lab.
     Returns (mask_uint8_0_255, debug_dict)
     """
-    debug = {"used": None, "reason": None, "positive_fraction": 0.0,
-             "thr": None, "heatmap_path": None, "roi_seen_by_model": None}
     seg_model = models_cache.get("seg", None)
-    # --- Model path ---
     if seg_model is not None:
         try:
             ishape = getattr(seg_model, "input_shape", None)
             if not ishape or len(ishape) < 4:
                 raise ValueError(f"Bad seg input_shape: {ishape}")
             th, tw = int(ishape[1]), int(ishape[2])
             x = _preprocess_for_seg(image_bgr, (th, tw))
-            roi_seen_path = None
             if SMARTHEAL_DEBUG:
-                roi_seen_path = os.path.join(out_dir, f"roi_for_seg_{ts}.png")
-                cv2.imwrite(roi_seen_path, image_bgr)
             pred = seg_model.predict(x, verbose=0)
-            if isinstance(pred, (list, tuple)): pred = pred[0]
-            p = _to_prob(pred)
-            p = cv2.resize(p, (image_bgr.shape[1], image_bgr.shape[0]), interpolation=cv2.INTER_LINEAR)
-            heatmap_path = None
             if SMARTHEAL_DEBUG:
                 hm = (np.clip(p, 0, 1) * 255).astype(np.uint8)
                 heat = cv2.applyColorMap(hm, cv2.COLORMAP_JET)
                 heatmap_path = os.path.join(out_dir, f"seg_pred_heatmap_{ts}.png")
                 cv2.imwrite(heatmap_path, heat)
-            thr = _adaptive_prob_threshold(p)
-            core01 = (p >= thr).astype(np.uint8)
-            core_frac = float(core01.sum()) / float(core01.size)
-            if core_frac < 0.005:
-                thr2 = max(thr - 0.10, 0.15)
-                core01 = (p >= thr2).astype(np.uint8)
-                thr = thr2
-                core_frac = float(core01.sum()) / float(core01.size)
-            if core01.any():
-                gc01 = _grabcut_refine(image_bgr, core01, iters=3)
-                mask01 = _clean_mask(gc01)
-            else:
-                mask01 = np.zeros(core01.shape, np.uint8)
-            pos_frac = float(mask01.sum()) / float(mask01.size)
-            logging.info(f"SegModel USED | thr={float(thr):.2f} core_frac={core_frac:.4f} final_frac={pos_frac:.4f}")
-            debug.update({
-                "used": "tf_model",
-                "reason": "ok",
-                "positive_fraction": pos_frac,
-                "thr": float(thr),
                 "heatmap_path": heatmap_path,
-                "roi_seen_by_model": roi_seen_path
-            })
-            return (mask01 * 255).astype(np.uint8), debug
         except Exception as e:
-            logging.warning(f"⚠️ Segmentation model failed → fallback. Reason: {e}")
-            debug.update({"used": "fallback_kmeans", "reason": f"model_failed: {e}"})
-    # --- Fallback: KMeans in Lab (reddest cluster as wound) ---
     Z = image_bgr.reshape((-1, 3)).astype(np.float32)
     criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
     _, labels, centers = cv2.kmeans(Z, 2, None, criteria, 5, cv2.KMEANS_PP_CENTERS)
     centers_u8 = centers.astype(np.uint8).reshape(1, 2, 3)
     centers_lab = cv2.cvtColor(centers_u8, cv2.COLOR_BGR2LAB)[0]
-    wound_idx = int(np.argmax(centers_lab[:, 1]))  # maximize a* (red)
-    mask01 = (labels.reshape(image_bgr.shape[:2]) == wound_idx).astype(np.uint8)
-    mask01 = _clean_mask(mask01)
-    pos_frac = float(mask01.sum()) / float(mask01.size)
-    logging.info(f"KMeans USED | final_frac={pos_frac:.4f}")
-    debug.update({
-        "used": "fallback_kmeans",
-        "reason": debug.get("reason") or "no_model",
-        "positive_fraction": pos_frac,
-        "thr": None
-    })
-    return (mask01 * 255).astype(np.uint8), debug
 # ---------- Measurement + overlay helpers ----------
 def largest_component_mask(binary01: np.ndarray, min_area_px: int = 50) -> np.ndarray:
@@ -554,6 +427,17 @@ def largest_component_mask(binary01: np.ndarray, min_area_px: int = 50) -> np.nd
     largest_idx = 1 + int(np.argmax(areas))
     return (labels == largest_idx).astype(np.uint8)
 def measure_min_area_rect(mask01: np.ndarray, px_per_cm: float) -> Tuple[float, float, Tuple]:
     contours, _ = cv2.findContours(mask01.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
     if not contours:
@@ -567,23 +451,9 @@ def measure_min_area_rect(mask01: np.ndarray, px_per_cm: float) -> Tuple[float,
     box = cv2.boxPoints(rect).astype(int)
     return length_cm, breadth_cm, (box, rect[0])
-def area_cm2_from_contour(mask01: np.ndarray, px_per_cm: float) -> Tuple[float, Optional[np.ndarray]]:
-    """Area from largest polygon (sub-pixel); returns (area_cm2, contour)."""
-    m = (mask01 > 0).astype(np.uint8)
-    contours, _ = cv2.findContours(m, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    if not contours:
-        return 0.0, None
-    cnt = max(contours, key=cv2.contourArea)
-    poly_area_px2 = float(cv2.contourArea(cnt))
-    area_cm2 = round(poly_area_px2 / (max(px_per_cm, 1e-6) ** 2), 2)
-    return area_cm2, cnt
-def clamp_area_with_minrect(cnt: np.ndarray, px_per_cm: float, area_cm2_poly: float) -> float:
-    rect = cv2.minAreaRect(cnt)
-    (w_px, h_px) = rect[1]
-    rect_area_px2 = float(max(w_px, 0.0) * max(h_px, 0.0))
-    rect_area_cm2 = rect_area_px2 / (max(px_per_cm, 1e-6) ** 2)
-    return round(min(area_cm2_poly, rect_area_cm2 * 1.05), 2)
 def draw_measurement_overlay(
     base_bgr: np.ndarray,
@@ -594,13 +464,16 @@ def draw_measurement_overlay(
     thickness: int = 2
 ) -> np.ndarray:
     """
-    1) Strong red mask overlay + white contour
-    2) Min-area rectangle
-    3) Double-headed arrows labeled Length/Width
     """
     overlay = base_bgr.copy()
-    # Mask tint
     mask255 = (mask01 * 255).astype(np.uint8)
     mask3 = cv2.merge([mask255, mask255, mask255])
     red = np.zeros_like(overlay); red[:] = (0, 0, 255)
@@ -608,7 +481,7 @@ def draw_measurement_overlay(
     tinted = cv2.addWeighted(overlay, 1 - alpha, red, alpha, 0)
     overlay = np.where(mask3 > 0, tinted, overlay)
-    # Contour
     cnts, _ = cv2.findContours(mask255, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
     if cnts:
         cv2.drawContours(overlay, cnts, -1, (255, 255, 255), 2)
@@ -617,11 +490,19 @@ def draw_measurement_overlay(
         cv2.polylines(overlay, [rect_box], True, (255, 255, 255), thickness)
         pts = rect_box.reshape(-1, 2)
-        def midpoint(a, b): return (int((a[0] + b[0]) / 2), int((a[1] + b[1]) / 2))
         e = [np.linalg.norm(pts[i] - pts[(i + 1) % 4]) for i in range(4)]
         long_edge_idx = int(np.argmax(e))
         mids = [midpoint(pts[i], pts[(i + 1) % 4]) for i in range(4)]
         long_pair = (long_edge_idx, (long_edge_idx + 2) % 4)
         short_pair = ((long_edge_idx + 1) % 4, (long_edge_idx + 3) % 4)
         def draw_double_arrow(img, p1, p2):
@@ -635,6 +516,7 @@ def draw_measurement_overlay(
             cv2.putText(overlay, text, org, cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 4, cv2.LINE_AA)
             cv2.putText(overlay, text, org, cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2, cv2.LINE_AA)
         draw_double_arrow(overlay, mids[long_pair[0]], mids[long_pair[1]])
         draw_double_arrow(overlay, mids[short_pair[0]], mids[short_pair[1]])
         put_label(f"Length: {length_cm:.2f} cm", mids[long_pair[0]])
@@ -663,18 +545,12 @@ class AIProcessor:
         """
         try:
             px_per_cm, exif_meta = estimate_px_per_cm_from_exif(image_pil, DEFAULT_PX_PER_CM)
-            # Guardrails for calibration to avoid huge area blow-ups
-            px_per_cm = float(np.clip(px_per_cm, 20.0, 350.0))
-            if (exif_meta or {}).get("used") != "exif":
-                logging.warning(f"Calibration fallback used: px_per_cm={px_per_cm:.2f} (default). Prefer ruler/Aruco for accuracy.")
             image_cv = cv2.cvtColor(np.array(image_pil.convert("RGB")), cv2.COLOR_RGB2BGR)
             # --- Detection ---
             det_model = self.models_cache.get("det")
             if det_model is None:
                 raise RuntimeError("YOLO model not loaded")
-            # Force CPU inference and avoid CUDA touch
             results = det_model.predict(image_cv, verbose=False, device="cpu")
             if (not results) or (not getattr(results[0], "boxes", None)) or (len(results[0].boxes) == 0):
                 try:
@@ -702,23 +578,20 @@ class AIProcessor:
             mask_u8_255, seg_debug = segment_wound(roi, ts, out_dir)
             mask01 = (mask_u8_255 > 127).astype(np.uint8)
             if mask01.any():
                 mask01 = _clean_mask(mask01)
                 logging.debug(f"Mask postproc: px_after={int(mask01.sum())}")
-            # --- Measurement (accurate & conservative) ---
             if mask01.any():
                 length_cm, breadth_cm, (box_pts, _) = measure_min_area_rect(mask01, px_per_cm)
-                area_poly_cm2, largest_cnt = area_cm2_from_contour(mask01, px_per_cm)
-                if largest_cnt is not None:
-                    surface_area_cm2 = clamp_area_with_minrect(largest_cnt, px_per_cm, area_poly_cm2)
-                else:
-                    surface_area_cm2 = area_poly_cm2
                 anno_roi = draw_measurement_overlay(roi, mask01, box_pts, length_cm, breadth_cm)
                 segmentation_empty = False
             else:
-                # Fallback if seg failed: use ROI dimensions
                 h_px = max(0, y2 - y1); w_px = max(0, x2 - x1)
                 length_cm = round(max(h_px, w_px) / px_per_cm, 2)
                 breadth_cm = round(min(h_px, w_px) / px_per_cm, 2)
@@ -742,7 +615,7 @@ class AIProcessor:
             roi_mask_path = os.path.join(out_dir, f"roi_mask_{ts}.png")
             cv2.imwrite(roi_mask_path, (mask01 * 255).astype(np.uint8))
-            # ROI overlay (mask tint + contour, without arrows)
             mask255 = (mask01 * 255).astype(np.uint8)
             mask3   = cv2.merge([mask255, mask255, mask255])
             red     = np.zeros_like(roi); red[:] = (0, 0, 255)
@@ -785,7 +658,7 @@ class AIProcessor:
                 "seg_used": seg_debug.get("used"),
                 "seg_reason": seg_debug.get("reason"),
                 "positive_fraction": round(float(seg_debug.get("positive_fraction", 0.0)), 6),
-                "threshold": seg_debug.get("thr"),
                 "segmentation_empty": segmentation_empty,
                 "exif_px_per_cm": round(px_per_cm, 3),
             }
@@ -801,7 +674,7 @@ class AIProcessor:
                 "detection_confidence": float(results[0].boxes.conf[0].cpu().item())
                     if getattr(results[0].boxes, "conf", None) is not None else 0.0,
                 "detection_image_path": detection_path,
-                "segmentation_image_path": annotated_seg_path,
                 "segmentation_annotated_path": annotated_seg_path,
                 "segmentation_roi_path": segmentation_roi_path,
                 "roi_mask_path": roi_mask_path,
@@ -819,9 +692,12 @@ class AIProcessor:
             vs = self.knowledge_base_cache.get("vector_store")
             if not vs:
                 return "Knowledge base is not available."
-            retriever = vs.as_retriever(search_kwargs={"k": 5})
-            # Modern API (avoid get_relevant_documents deprecation)
-            docs = retriever.invoke(query)
             lines: List[str] = []
             for d in docs:
                 src = (d.metadata or {}).get("source", "N/A")
@@ -875,7 +751,7 @@ Automated analysis provides quantitative measurements; verify via clinical exami
             )
             if report and report.strip() and not report.startswith(("⚠️", "❌")):
                 return report
-            logging.warning("VLM unavailable/invalid; using fallback.")
             return self._generate_fallback_report(patient_info, visual_results, guideline_context)
         except Exception as e:
             logging.error(f"Report generation failed: {e}")

 # Turn on deep logging: export LOGLEVEL=DEBUG SMARTHEAL_DEBUG=1
 import os
+import time
 import logging
 from datetime import datetime
 from typing import Optional, Dict, List, Tuple
+# ---- Environment defaults ----
 os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
+os.environ.setdefault("CUDA_VISIBLE_DEVICES", "")
 LOGLEVEL = os.getenv("LOGLEVEL", "INFO").upper()
 SMARTHEAL_DEBUG = os.getenv("SMARTHEAL_DEBUG", "0") == "1"
 def _log_kv(prefix: str, kv: Dict):
     logging.debug(prefix + " | " + " | ".join(f"{k}={v}" for k, v in kv.items()))
+# --- Optional Spaces GPU stub (harmless) ---
+try:
+    import spaces as _spaces
+    @_spaces.GPU(enable_queue=False)
+    def smartheal_gpu_stub(ping: int = 0) -> str:
+        return "ready"
+    logging.info("Registered @spaces.GPU stub (enable_queue=False).")
+except Exception:
+    pass
 UPLOADS_DIR = "uploads"
 os.makedirs(UPLOADS_DIR, exist_ok=True)
 HF_TOKEN = os.getenv("HF_TOKEN", None)
 YOLO_MODEL_PATH = "src/best.pt"
+SEG_MODEL_PATH = "src/segmentation_model.h5"   # optional
 GUIDELINE_PDFS = ["src/eHealth in Wound Care.pdf", "src/IWGDF Guideline.pdf", "src/evaluation.pdf"]
 DATASET_ID = "SmartHeal/wound-image-uploads"
 DEFAULT_PX_PER_CM = 38.0
 models_cache: Dict[str, object] = {}
 knowledge_base_cache: Dict[str, object] = {}
+# ---------- Lazy imports ----------
 def _import_ultralytics():
+    from ultralytics import YOLO
     return YOLO
 def _import_tf_loader():
     import tensorflow as tf
+    try:
+        tf.config.set_visible_devices([], "GPU")  # keep TF on CPU
+    except Exception:
+        pass
     from tensorflow.keras.models import load_model
     return load_model
     from huggingface_hub import HfApi, HfFolder
     return HfApi, HfFolder
+# ---------- VLM (disabled by default) ----------
+def generate_medgemma_report(
     patient_info: str,
     visual_results: Dict,
     guideline_context: str,
     image_pil: Image.Image,
     max_new_tokens: Optional[int] = None,
 ) -> str:
+    if os.getenv("SMARTHEAL_ENABLE_VLM", "0") != "1":
         return "⚠️ VLM disabled"
     try:
+        from transformers import pipeline
+        pipe = pipeline(
+            task="image-text-to-text",
+            model="google/medgemma-4b-it",
+            device_map=None,
+            token=HF_TOKEN,
+            trust_remote_code=True,
+            model_kwargs={"low_cpu_mem_usage": True},
+        )
+        prompt = (
+            "You are a medical AI assistant. Analyze this wound image and patient data.\n\n"
+            f"Patient: {patient_info}\n"
+            f"Wound: {visual_results.get('wound_type', 'Unknown')} - "
+            f"{visual_results.get('length_cm', 0)}×{visual_results.get('breadth_cm', 0)} cm\n\n"
+            "Provide a structured report with:\n"
+            "1. Clinical Summary\n2. Treatment Recommendations\n3. Risk Assessment\n4. Monitoring Plan\n"
+        )
+        messages = [{"role": "user", "content": [
+            {"type": "image", "image": image_pil},
+            {"type": "text",  "text": prompt},
+        ]}]
+        out = pipe(text=messages, max_new_tokens=max_new_tokens or 600, do_sample=False, temperature=0.7)
+        if out and len(out) > 0:
+            try:
+                return out[0]["generated_text"][-1].get("content", "").strip() or "⚠️ Empty response"
+            except Exception:
+                return (out[0].get("generated_text", "") or "").strip() or "⚠️ Empty response"
+        return "⚠️ No output generated"
     except Exception as e:
+        logging.error(f"❌ MedGemma generation error: {e}")
         return "⚠️ VLM error"
 # ---------- Initialize CPU models ----------
 def load_yolo_model():
     YOLO = _import_ultralytics()
+    return YOLO(YOLO_MODEL_PATH)
 def load_segmentation_model():
     load_model = _import_tf_loader()
+    return load_model(SEG_MODEL_PATH, compile=False)
 def load_classification_pipeline():
     pipe = _import_hf_cls()
     if "det" not in models_cache:
         try:
             models_cache["det"] = load_yolo_model()
+            logging.info("✅ YOLO loaded (CPU)")
         except Exception as e:
             logging.error(f"YOLO load failed: {e}")
             models_cache["embedding_model"] = None
             logging.warning(f"Embeddings unavailable: {e}")
 def setup_knowledge_base() -> None:
     if "vector_store" in knowledge_base_cache:
         return
 # ---------- Segmentation helpers ----------
 def _imagenet_norm(arr: np.ndarray) -> np.ndarray:
+    # expects RGB 0..255 -> float
     mean = np.array([123.675, 116.28, 103.53], dtype=np.float32)
     std  = np.array([58.395, 57.12, 57.375], dtype=np.float32)
     return (arr.astype(np.float32) - mean) / std
         p = 1.0 / (1.0 + np.exp(-p))
     return p.astype(np.float32)
+# ---- Robust mask post-processing (for "proper" masking) ----
 def _fill_holes(mask01: np.ndarray) -> np.ndarray:
+    # Flood-fill from border, then invert
     h, w = mask01.shape[:2]
     ff = np.zeros((h + 2, w + 2), np.uint8)
     m = (mask01 * 255).astype(np.uint8).copy()
     cv2.floodFill(m, ff, (0, 0), 255)
     m_inv = cv2.bitwise_not(m)
+    # Combine original with filled holes
     out = ((mask01 * 255) | m_inv) // 255
     return out.astype(np.uint8)
+# Global last debug dict (per-process) to attach into results
 _last_seg_debug: Dict[str, object] = {}
 def segment_wound(image_bgr: np.ndarray, ts: str, out_dir: str) -> Tuple[np.ndarray, Dict[str, object]]:
     """
+    Attempts TF segmentation first; falls back to KMeans if needed.
     Returns (mask_uint8_0_255, debug_dict)
     """
+    global _last_seg_debug
+    _last_seg_debug = {}
     seg_model = models_cache.get("seg", None)
+    used = "fallback_kmeans"
+    reason = "no_model"
+    heatmap_path = None
+    saw_roi_path = None
     if seg_model is not None:
         try:
             ishape = getattr(seg_model, "input_shape", None)
             if not ishape or len(ishape) < 4:
                 raise ValueError(f"Bad seg input_shape: {ishape}")
             th, tw = int(ishape[1]), int(ishape[2])
             x = _preprocess_for_seg(image_bgr, (th, tw))
+            saw_roi = (cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB) if SEG_EXPECTS_RGB else image_bgr)
             if SMARTHEAL_DEBUG:
+                saw_roi_path = os.path.join(out_dir, f"roi_for_seg_{ts}.png")
+                cv2.imwrite(saw_roi_path, (cv2.cvtColor(saw_roi, cv2.COLOR_RGB2BGR) if SEG_EXPECTS_RGB else saw_roi))
+            # Inference
             pred = seg_model.predict(x, verbose=0)
+            if isinstance(pred, (list, tuple)):
+                pred = pred[0]
+            p = _to_prob(pred)  # HxW
+            p = cv2.resize(p, (image_bgr.shape[1], image_bgr.shape[0]))  # back to ROI size
+            # Debug stats
+            pmin, pmax, pmean = float(p.min()), float(p.max()), float(p.mean())
+            _log_kv("SEG_PROB_STATS", {"min": pmin, "max": pmax, "mean": pmean})
             if SMARTHEAL_DEBUG:
                 hm = (np.clip(p, 0, 1) * 255).astype(np.uint8)
                 heat = cv2.applyColorMap(hm, cv2.COLORMAP_JET)
                 heatmap_path = os.path.join(out_dir, f"seg_pred_heatmap_{ts}.png")
                 cv2.imwrite(heatmap_path, heat)
+            # Threshold
+            thr = SEG_THRESH
+            mask = (p >= thr).astype(np.uint8)  # 0/1
+            pos = int(mask.sum())
+            frac = pos / float(mask.size)
+            logging.info(f"SegModel USED | thr={thr} pos_px={pos} pos_frac={frac:.4f} ex_rgb={SEG_EXPECTS_RGB} norm={SEG_NORM}")
+            used = "tf_model"
+            reason = "ok"
+            _last_seg_debug = {
+                "used": used,
+                "reason": reason,
+                "input_shape": ishape,
+                "prob_min": pmin, "prob_max": pmax, "prob_mean": pmean,
+                "threshold": thr,
+                "positive_fraction": frac,
                 "heatmap_path": heatmap_path,
+                "roi_seen_by_model": saw_roi_path,
+            }
+            return (mask * 255).astype(np.uint8), _last_seg_debug
         except Exception as e:
+            reason = f"model_failed: {e}"
+            logging.warning(f"⚠️ Segmentation model prediction failed → fallback. Reason: {e}")
+    # --- Fallback: KMeans (k=2), pick 'reddest' cluster in Lab a* ---
     Z = image_bgr.reshape((-1, 3)).astype(np.float32)
     criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
     _, labels, centers = cv2.kmeans(Z, 2, None, criteria, 5, cv2.KMEANS_PP_CENTERS)
     centers_u8 = centers.astype(np.uint8).reshape(1, 2, 3)
     centers_lab = cv2.cvtColor(centers_u8, cv2.COLOR_BGR2LAB)[0]
+    wound_idx = int(np.argmax(centers_lab[:, 1]))  # maximize a* (redness)
+    mask = (labels.reshape(image_bgr.shape[:2]) == wound_idx).astype(np.uint8)
+    pos = int(mask.sum()); frac = pos / float(mask.size)
+    logging.info(f"KMeans USED | pos_px={pos} pos_frac={frac:.4f}")
+    _last_seg_debug = {
+        "used": used,
+        "reason": reason,
+        "kmeans_centers_bgr": centers.tolist(),
+        "kmeans_centers_lab": centers_lab.astype(float).tolist(),
+        "positive_fraction": frac,
+        "heatmap_path": heatmap_path,
+        "roi_seen_by_model": saw_roi_path,
+    }
+    return (mask * 255).astype(np.uint8), _last_seg_debug
 # ---------- Measurement + overlay helpers ----------
 def largest_component_mask(binary01: np.ndarray, min_area_px: int = 50) -> np.ndarray:
     largest_idx = 1 + int(np.argmax(areas))
     return (labels == largest_idx).astype(np.uint8)
+def _clean_mask(mask01: np.ndarray) -> np.ndarray:
+    """Open→Close→Fill holes→Largest component."""
+    if mask01.dtype != np.uint8:
+        mask01 = mask01.astype(np.uint8)
+    k = np.ones((3, 3), np.uint8)
+    mask01 = cv2.morphologyEx(mask01, cv2.MORPH_OPEN, k, iterations=1)
+    mask01 = cv2.morphologyEx(mask01, cv2.MORPH_CLOSE, k, iterations=2)
+    mask01 = _fill_holes(mask01)
+    mask01 = largest_component_mask(mask01, min_area_px=30)
+    return (mask01 > 0).astype(np.uint8)
 def measure_min_area_rect(mask01: np.ndarray, px_per_cm: float) -> Tuple[float, float, Tuple]:
     contours, _ = cv2.findContours(mask01.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
     if not contours:
     box = cv2.boxPoints(rect).astype(int)
     return length_cm, breadth_cm, (box, rect[0])
+def count_area_cm2(mask01: np.ndarray, px_per_cm: float) -> float:
+    px_count = float(mask01.astype(bool).sum())
+    return round(px_count / (max(px_per_cm, 1e-6) ** 2), 2)
 def draw_measurement_overlay(
     base_bgr: np.ndarray,
     thickness: int = 2
 ) -> np.ndarray:
     """
+    Draws:
+      1) Strong red mask overlay with white contour.
+      2) Min-area rectangle.
+      3) Two double-headed arrows:
+           - 'Length' along the longer side.
+           - 'Width'  along the shorter side.
     """
     overlay = base_bgr.copy()
+    # --- Strong overlay from mask (tinted red where mask==1) ---
     mask255 = (mask01 * 255).astype(np.uint8)
     mask3 = cv2.merge([mask255, mask255, mask255])
     red = np.zeros_like(overlay); red[:] = (0, 0, 255)
     tinted = cv2.addWeighted(overlay, 1 - alpha, red, alpha, 0)
     overlay = np.where(mask3 > 0, tinted, overlay)
+    # Draw wound contour
     cnts, _ = cv2.findContours(mask255, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
     if cnts:
         cv2.drawContours(overlay, cnts, -1, (255, 255, 255), 2)
         cv2.polylines(overlay, [rect_box], True, (255, 255, 255), thickness)
         pts = rect_box.reshape(-1, 2)
+        def midpoint(a, b):
+            return (int((a[0] + b[0]) / 2), int((a[1] + b[1]) / 2))
+        # Edge lengths
         e = [np.linalg.norm(pts[i] - pts[(i + 1) % 4]) for i in range(4)]
         long_edge_idx = int(np.argmax(e))
+        short_edge_idx = (long_edge_idx + 1) % 2  # 0/1 map for pairs below
+        # Midpoints of opposite edges for arrows
         mids = [midpoint(pts[i], pts[(i + 1) % 4]) for i in range(4)]
+        # Long side uses edges long_edge_idx and the opposite edge (i+2)
         long_pair = (long_edge_idx, (long_edge_idx + 2) % 4)
+        # Short side uses the other pair
         short_pair = ((long_edge_idx + 1) % 4, (long_edge_idx + 3) % 4)
         def draw_double_arrow(img, p1, p2):
             cv2.putText(overlay, text, org, cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 4, cv2.LINE_AA)
             cv2.putText(overlay, text, org, cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2, cv2.LINE_AA)
+        # Draw arrows and labels
         draw_double_arrow(overlay, mids[long_pair[0]], mids[long_pair[1]])
         draw_double_arrow(overlay, mids[short_pair[0]], mids[short_pair[1]])
         put_label(f"Length: {length_cm:.2f} cm", mids[long_pair[0]])
         """
         try:
             px_per_cm, exif_meta = estimate_px_per_cm_from_exif(image_pil, DEFAULT_PX_PER_CM)
             image_cv = cv2.cvtColor(np.array(image_pil.convert("RGB")), cv2.COLOR_RGB2BGR)
             # --- Detection ---
             det_model = self.models_cache.get("det")
             if det_model is None:
                 raise RuntimeError("YOLO model not loaded")
             results = det_model.predict(image_cv, verbose=False, device="cpu")
             if (not results) or (not getattr(results[0], "boxes", None)) or (len(results[0].boxes) == 0):
                 try:
             mask_u8_255, seg_debug = segment_wound(roi, ts, out_dir)
             mask01 = (mask_u8_255 > 127).astype(np.uint8)
+            # Robust post-processing to ensure "proper" masking
             if mask01.any():
                 mask01 = _clean_mask(mask01)
                 logging.debug(f"Mask postproc: px_after={int(mask01.sum())}")
+            # --- Measurement ---
             if mask01.any():
                 length_cm, breadth_cm, (box_pts, _) = measure_min_area_rect(mask01, px_per_cm)
+                surface_area_cm2 = count_area_cm2(mask01, px_per_cm)
+                # Final annotated ROI with mask + arrows + labels
                 anno_roi = draw_measurement_overlay(roi, mask01, box_pts, length_cm, breadth_cm)
                 segmentation_empty = False
             else:
+                # Graceful fallback if seg failed: use ROI box as bounds
                 h_px = max(0, y2 - y1); w_px = max(0, x2 - x1)
                 length_cm = round(max(h_px, w_px) / px_per_cm, 2)
                 breadth_cm = round(min(h_px, w_px) / px_per_cm, 2)
             roi_mask_path = os.path.join(out_dir, f"roi_mask_{ts}.png")
             cv2.imwrite(roi_mask_path, (mask01 * 255).astype(np.uint8))
+            # ROI overlay (clear mask w/ white contour, no arrows)
             mask255 = (mask01 * 255).astype(np.uint8)
             mask3   = cv2.merge([mask255, mask255, mask255])
             red     = np.zeros_like(roi); red[:] = (0, 0, 255)
                 "seg_used": seg_debug.get("used"),
                 "seg_reason": seg_debug.get("reason"),
                 "positive_fraction": round(float(seg_debug.get("positive_fraction", 0.0)), 6),
+                "threshold": seg_debug.get("threshold", SEG_THRESH),
                 "segmentation_empty": segmentation_empty,
                 "exif_px_per_cm": round(px_per_cm, 3),
             }
                 "detection_confidence": float(results[0].boxes.conf[0].cpu().item())
                     if getattr(results[0].boxes, "conf", None) is not None else 0.0,
                 "detection_image_path": detection_path,
+                "segmentation_image_path": segmentation_path,
                 "segmentation_annotated_path": annotated_seg_path,
                 "segmentation_roi_path": segmentation_roi_path,
                 "roi_mask_path": roi_mask_path,
             vs = self.knowledge_base_cache.get("vector_store")
             if not vs:
                 return "Knowledge base is not available."
+            try:
+                retriever = vs.as_retriever(search_kwargs={"k": 5})
+                docs = retriever.get_relevant_documents(query)
+            except Exception:
+                retriever = vs.as_retriever(search_kwargs={"k": 5})
+                docs = retriever.invoke(query)
             lines: List[str] = []
             for d in docs:
                 src = (d.metadata or {}).get("source", "N/A")
             )
             if report and report.strip() and not report.startswith(("⚠️", "❌")):
                 return report
+            logging.warning("MedGemma unavailable/invalid; using fallback.")
             return self._generate_fallback_report(patient_info, visual_results, guideline_context)
         except Exception as e:
             logging.error(f"Report generation failed: {e}")