Spaces:

Pepguy
/

ai_text_dresscode

Running

App Files Files Community

Pepguy commited on Nov 4, 2025

Commit

a02ad5f

verified ·

1 Parent(s): 0900715

Update app.py

Browse files

Files changed (1) hide show

app.py +197 -214

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import base64
 import logging
 import uuid
 import time
 from typing import List, Dict, Any, Tuple, Optional
 from flask import Flask, request, jsonify
@@ -15,12 +16,8 @@ import numpy as np
 import cv2
 # genai client
-try:
-    from google import genai
-    from google.genai import types
-except Exception:
-    genai = None
-    types = None
 # Firebase Admin (in-memory JSON init)
 try:
@@ -36,17 +33,11 @@ except Exception:
 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger("wardrobe-server")
-GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "").strip()
-if GEMINI_API_KEY and genai:
-    try:
-        client = genai.Client(api_key=GEMINI_API_KEY)
-    except Exception as e:
-        log.exception("Failed to init genai client: %s", e)
-        client = None
-else:
-    client = None
-    if not GEMINI_API_KEY:
-        log.info("GEMINI_API_KEY not set; model calls disabled.")
 # Firebase config (read service account JSON from env)
 FIREBASE_ADMIN_JSON = os.getenv("FIREBASE_ADMIN_JSON", "").strip()
@@ -58,8 +49,9 @@ if FIREBASE_ADMIN_JSON and not FIREBASE_ADMIN_AVAILABLE:
 app = Flask(__name__)
 CORS(app)
-# ---------- Category options (must match frontend) ----------
-CATEGORY_OPTIONS = [
     "Heels",
     "Sneakers",
     "Loafers",
@@ -73,8 +65,89 @@ CATEGORY_OPTIONS = [
     "Coat",
     "Shorts",
 ]
-# normalized set for quick match
-_CATEGORY_RENORM = [c.lower() for c in CATEGORY_OPTIONS]
 # ---------- Firebase init helpers ----------
 _firebase_app = None
@@ -147,52 +220,19 @@ def upload_b64_to_firebase(base64_str: str, path: str, content_type="image/jpeg"
 # ---------- Image helpers (with EXIF transpose) ----------
 def read_image_bytes(file_storage) -> Tuple[np.ndarray, int, int, bytes]:
     """
-    Read uploaded bytes, apply EXIF orientation via PIL.ImageOps.exif_transpose,
-    re-encode to JPEG bytes (EXIF cleared), and return (bgr_numpy, width, height, jpeg_bytes).
     """
     data = file_storage.read()
-    if not data:
-        raise ValueError("No image data uploaded")
-    # Try opening with PIL to read EXIF and apply transpose
-    try:
-        img = Image.open(io.BytesIO(data))
-    except Exception as e:
-        log.warning("PIL failed to open image; falling back to OpenCV decode: %s", e)
-        arr_np = np.frombuffer(data, np.uint8)
-        cv_img = cv2.imdecode(arr_np, cv2.IMREAD_COLOR)
-        if cv_img is None:
-            raise RuntimeError("Could not decode uploaded image")
-        h, w = cv_img.shape[:2]
-        _, jpeg = cv2.imencode(".jpg", cv_img, [int(cv2.IMWRITE_JPEG_QUALITY), 92])
-        return cv_img, w, h, jpeg.tobytes()
-    # log original EXIF orientation when present
-    try:
-        exif = img._getexif() or {}
-        orientation = None
-        if isinstance(exif, dict):
-            orientation = exif.get(274)  # tag 274 orientation
-        log.debug("Original EXIF orientation: %s", orientation)
-    except Exception:
-        orientation = None
-    # physically apply EXIF rotation (so image pixels are upright)
     try:
         img = ImageOps.exif_transpose(img)
-    except Exception as e:
-        log.warning("exif_transpose failed: %s", e)
-    # ensure RGB, then re-encode to JPEG to remove orientation tag from bytes
     img = img.convert("RGB")
     w, h = img.size
-    buf = io.BytesIO()
-    img.save(buf, format="JPEG", quality=92, optimize=True)
-    jpeg_bytes = buf.getvalue()
-    # convert to BGR numpy for OpenCV operations
-    arr = np.array(img)[:, :, ::-1]  # RGB -> BGR
-    return arr, w, h, jpeg_bytes
 def crop_and_b64(bgr_img: np.ndarray, x: int, y: int, w: int, h: int, max_side=512) -> str:
     h_img, w_img = bgr_img.shape[:2]
@@ -201,7 +241,6 @@ def crop_and_b64(bgr_img: np.ndarray, x: int, y: int, w: int, h: int, max_side=5
     crop = bgr_img[y:y2, x:x2]
     if crop.size == 0:
         return ""
-    # resize if too large
     max_dim = max(crop.shape[0], crop.shape[1])
     if max_dim > max_side:
         scale = max_side / max_dim
@@ -263,20 +302,29 @@ def fallback_contour_crops(bgr_img, max_items=8) -> List[Dict[str, Any]]:
 def analyze_crop_with_gemini(jpeg_b64: str) -> Dict[str, Any]:
     """
     Run Gemini on the cropped image bytes to extract:
-      type, summary, brand, tags
-    Returns dict, falls back to defaults on error.
     """
-    if not client or not types:
         return {"type": "unknown", "summary": "", "brand": "", "tags": []}
     try:
         prompt = (
             "You are an assistant that identifies clothing item characteristics from an image. "
             "Return only a JSON object with keys: type (single word like 'shoe','top','jacket'), "
             "summary (a single short sentence, one line), brand (brand name if visible else empty string), "
-            "tags (an array of short single-word tags). Keep values short and concise."
         )
-        contents = [types.Content(role="user", parts=[types.Part.from_text(text=prompt)])]
         image_bytes = base64.b64decode(jpeg_b64)
         contents.append(types.Content(role="user", parts=[types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg")]))
@@ -291,22 +339,24 @@ def analyze_crop_with_gemini(jpeg_b64: str) -> Dict[str, Any]:
             "required": ["type", "summary"]
         }
         cfg = types.GenerateContentConfig(response_mime_type="application/json", response_schema=schema)
         resp = client.models.generate_content(model="gemini-2.5-flash-lite", contents=contents, config=cfg)
         text = resp.text or ""
         parsed = {}
         try:
             parsed = json.loads(text)
         except Exception as e:
             log.warning("Failed parsing Gemini analysis JSON: %s — raw: %s", e, (text[:300] if text else ""))
             parsed = {"type": "unknown", "summary": "", "brand": "", "tags": []}
-        # coerce
-        parsed["type"] = str(parsed.get("type","") or "").strip()
-        parsed["summary"] = str(parsed.get("summary","") or "").strip()
-        parsed["brand"] = str(parsed.get("brand","") or "").strip()
-        tags = parsed.get("tags", [])
-        if not isinstance(tags, list):
-            tags = []
-        parsed["tags"] = [str(t).strip() for t in tags if str(t).strip()]
         return {
             "type": parsed.get("type", "unknown") or "unknown",
             "summary": parsed.get("summary", "") or "",
@@ -317,73 +367,6 @@ def analyze_crop_with_gemini(jpeg_b64: str) -> Dict[str, Any]:
         log.exception("analyze_crop_with_gemini failure: %s", e)
         return {"type": "unknown", "summary": "", "brand": "", "tags": []}
-# ---------- Title mapping helper ----------
-def choose_title_from_label_and_analysis(label: str, analysis: Dict[str, Any]) -> str:
-    """
-    Return a title that is guaranteed to be one of CATEGORY_OPTIONS.
-    Heuristics:
-      - check analysis.type
-      - check analysis.tags
-      - check label text
-      - fallback to 'T-Shirt'
-    """
-    def find_match_in_text(txt: str) -> Optional[str]:
-        if not txt:
-            return None
-        s = txt.lower()
-        # quick synonyms mapping
-        synonyms = {
-            "tshirt": "T-Shirt", "t-shirt": "T-Shirt", "tee": "T-Shirt",
-            "sneaker": "Sneakers", "trainers": "Sneakers",
-            "jeans": "Jeans", "denim": "Jeans",
-            "dress": "Dress",
-            "skirt": "Skirt",
-            "jacket": "Jacket",
-            "coat": "Coat",
-            "blazer": "Blazer",
-            "boot": "Boots",
-            "heel": "Heels",
-            "loafer": "Loafers",
-            "short": "Shorts",
-            "shoe": "Sneakers",  # generic shoe -> put under Sneakers by default
-            "sneakers": "Sneakers",
-        }
-        for k, v in synonyms.items():
-            if k in s:
-                return v
-        # check direct category words
-        for idx, cat in enumerate(CATEGORY_OPTIONS):
-            if cat.lower().replace("-", "").replace(" ", "") in s.replace("-", "").replace(" ", ""):
-                return CATEGORY_OPTIONS[idx]
-        return None
-    # try analysis.type first
-    atype = (analysis.get("type") or "").strip()
-    match = find_match_in_text(atype)
-    if match:
-        return match
-    # try analysis.tags
-    tags = analysis.get("tags") or []
-    if isinstance(tags, list):
-        for t in tags:
-            m = find_match_in_text(t)
-            if m:
-                return m
-    # try label (raw detection label from detection model)
-    m = find_match_in_text(label or "")
-    if m:
-        return m
-    # try analysis.summary casual check
-    m = find_match_in_text(analysis.get("summary", "") or "")
-    if m:
-        return m
-    # fallback: prefer 'T-Shirt' as generic top fallback (guaranteed category)
-    return "T-Shirt"
 # ---------- Main / processing ----------
 @app.route("/process", methods=["POST"])
 def process_image():
@@ -394,15 +377,14 @@ def process_image():
     uid = (request.form.get("uid") or request.args.get("uid") or "anon").strip() or "anon"
     try:
-        # read and get corrected jpeg bytes (EXIF transpose applied)
-        bgr_img, img_w, img_h, corrected_jpeg_bytes = read_image_bytes(file)
     except Exception as e:
         log.error("invalid image: %s", e)
         return jsonify({"error": "invalid image"}), 400
     session_id = str(uuid.uuid4())
-    # Detection prompt (Gemini expects the corrected image bytes)
     user_prompt = (
         "You are an assistant that extracts clothing detections from a single image. "
         "Return a JSON object with a single key 'items' which is an array. Each item must have: "
@@ -414,11 +396,9 @@ def process_image():
     try:
         contents = [
-            types.Content(role="user", parts=[types.Part.from_text(text=user_prompt)]) if types else None
         ]
-        # attach corrected jpeg bytes
-        if types:
-            contents.append(types.Content(role="user", parts=[types.Part.from_bytes(data=corrected_jpeg_bytes, mime_type="image/jpeg")]))
         schema = {
             "type": "object",
@@ -448,17 +428,12 @@ def process_image():
             "required": ["items"]
         }
-        cfg = types.GenerateContentConfig(response_mime_type="application/json", response_schema=schema) if types else None
-        if client and types:
-            log.info("Calling Gemini model for detection (gemini-2.5-flash-lite)...")
-            model_resp = client.models.generate_content(model="gemini-2.5-flash-lite", contents=contents, config=cfg)
-            raw_text = model_resp.text or ""
-        else:
-            log.info("Gemini client not configured, skipping model detection — using fallback.")
-            raw_text = ""
-        log.info("Gemini raw response length: %d", len(raw_text) if raw_text else 0)
         parsed = None
         try:
@@ -471,7 +446,7 @@ def process_image():
         if parsed and isinstance(parsed.get("items"), list) and len(parsed["items"])>0:
             for it in parsed["items"]:
                 try:
-                    raw_label = str(it.get("label","unknown"))[:64]
                     bbox = it.get("bbox",{})
                     nx = float(bbox.get("x",0))
                     ny = float(bbox.get("y",0))
@@ -483,45 +458,24 @@ def process_image():
                     pw = int(nw * img_w); ph = int(nh * img_h)
                     if pw <= 8 or ph <= 8:
                         continue
-                    crop_b64 = crop_and_b64(bgr_img, px, py, pw, ph)
-                    if not crop_b64:
                         continue
-                    # analyze crop with Gemini (optional)
-                    analysis = analyze_crop_with_gemini(crop_b64) if client else {"type":"unknown","summary":"","brand":"","tags":[]}
-                    # choose title within CATEGORY_OPTIONS
-                    title = choose_title_from_label_and_analysis(raw_label, analysis)
-                    item_id = str(uuid.uuid4())
-                    itm = {
-                        "id": item_id,
-                        "label": raw_label,
-                        "title": title,
                         "confidence": float(it.get("confidence", 0.5)),
                         "bbox": {"x": px, "y": py, "w": pw, "h": ph},
-                        "thumbnail_b64": crop_b64,
-                        "analysis": analysis,
                         "source": "gemini"
-                    }
-                    items_out.append(itm)
                 except Exception as e:
                     log.warning("skipping item due to error: %s", e)
         else:
             log.info("Gemini returned no items or parse failed — using fallback contour crops.")
             items_out = fallback_contour_crops(bgr_img, max_items=8)
-            # do analysis + title mapping for fallback crops
-            for itm in items_out:
-                try:
-                    crop_b64 = itm.get("thumbnail_b64")
-                    analysis = analyze_crop_with_gemini(crop_b64) if client else {"type":"unknown","summary":"","brand":"","tags":[]}
-                    itm["analysis"] = analysis
-                    itm["title"] = choose_title_from_label_and_analysis(itm.get("label","unknown"), analysis)
-                except Exception:
-                    itm["analysis"] = {"type":"unknown","summary":"","brand":"","tags":[]}
-                    itm["title"] = choose_title_from_label_and_analysis(itm.get("label","unknown"), itm["analysis"])
-        # Auto-upload thumbnails to Firebase Storage (temporary, marked by session_id)
         if FIREBASE_ADMIN_JSON and FIREBASE_ADMIN_AVAILABLE:
             try:
                 init_firebase_admin_if_needed()
@@ -535,6 +489,25 @@ def process_image():
                 b64 = itm.get("thumbnail_b64")
                 if not b64:
                     continue
                 item_id = itm.get("id") or str(uuid.uuid4())
                 path = f"detected/{safe_uid}/{item_id}.jpg"
                 try:
@@ -543,41 +516,44 @@ def process_image():
                         "session_id": session_id,
                         "uploaded_by": safe_uid,
                         "uploaded_at": str(int(time.time())),
-                        # AI fields
-                        "ai_type": itm.get("analysis", {}).get("type", ""),
-                        "ai_brand": itm.get("analysis", {}).get("brand", ""),
-                        "ai_summary": itm.get("analysis", {}).get("summary", ""),
-                        "ai_tags": json.dumps(itm.get("analysis", {}).get("tags", [])),
-                        "ai_title": itm.get("title", "")
                     }
                     url = upload_b64_to_firebase(b64, path, content_type="image/jpeg", metadata=metadata)
                     itm["thumbnail_url"] = url
                     itm["thumbnail_path"] = path
-                    # remove raw base64 to keep response small
                     itm.pop("thumbnail_b64", None)
                     itm["_session_id"] = session_id
-                    # annotate uploaded_at (unix)
-                    itm["uploaded_at"] = int(time.time())
-                    log.debug("Auto-uploaded thumbnail for %s -> %s (session=%s)", item_id, url, session_id)
                 except Exception as up_e:
                     log.warning("Auto-upload failed for %s: %s", item_id, up_e)
-                    # keep thumbnail_b64 as fallback
         else:
             if not FIREBASE_ADMIN_JSON:
                 log.info("FIREBASE_ADMIN_JSON not set; skipping server-side thumbnail upload.")
             else:
                 log.info("Firebase admin SDK not available; skipping server-side thumbnail upload.")
-        # Final response: items contain id,title,confidence,bbox,thumbnail_url or thumbnail_b64,analysis,uploaded_at if available,source, _session_id
         return jsonify({"ok": True, "items": items_out, "session_id": session_id, "debug": {"raw_model_text": (raw_text or "")[:1600]}}), 200
     except Exception as ex:
         log.exception("Processing error: %s", ex)
         try:
             items_out = fallback_contour_crops(bgr_img, max_items=8)
             for itm in items_out:
-                itm["analysis"] = analyze_crop_with_gemini(itm.get("thumbnail_b64")) if client else {"type":"unknown","summary":"","brand":"","tags":[]}
-                itm["title"] = choose_title_from_label_and_analysis(itm.get("label","unknown"), itm["analysis"])
             return jsonify({"ok": True, "items": items_out, "session_id": session_id, "debug": {"error": str(ex)}}), 200
         except Exception as e2:
             log.exception("Fallback also failed: %s", e2)
@@ -597,7 +573,6 @@ def finalize_detections():
     Returns:
       { ok: True, kept: [...], deleted: [...], errors: [...] }
-    kept entries include id, thumbnail_url, thumbnail_path, analysis, title, uploaded_at
     """
     try:
         body = request.get_json(force=True)
@@ -644,35 +619,43 @@ def finalize_detections():
                     continue
                 if item_id in keep_ids:
                     try:
                         blob.make_public()
                         url = blob.public_url
                     except Exception:
                         url = f"gs://{bucket.name}/{name}"
                     ai_type = md.get("ai_type") or ""
                     ai_brand = md.get("ai_brand") or ""
                     ai_summary = md.get("ai_summary") or ""
                     ai_tags_raw = md.get("ai_tags") or "[]"
                     try:
                         ai_tags = json.loads(ai_tags_raw) if isinstance(ai_tags_raw, str) else ai_tags_raw
                     except Exception:
                         ai_tags = []
-                    ai_title = md.get("ai_title") or ""
-                    uploaded_at = md.get("uploaded_at") or None
                     kept.append({
                         "id": item_id,
                         "thumbnail_url": url,
                         "thumbnail_path": name,
                         "analysis": {
-                            "type": ai_type,
-                            "brand": ai_brand,
-                            "summary": ai_summary,
-                            "tags": ai_tags
                         },
-                        "title": ai_title or choose_title_from_label_and_analysis("", {"type": ai_type, "summary": ai_summary, "brand": ai_brand, "tags": ai_tags}),
-                        "uploaded_at": int(uploaded_at) if uploaded_at and str(uploaded_at).isdigit() else uploaded_at
                     })
                 else:
                     try:

 import logging
 import uuid
 import time
+import re
 from typing import List, Dict, Any, Tuple, Optional
 from flask import Flask, request, jsonify
 import cv2
 # genai client
+from google import genai
+from google.genai import types
 # Firebase Admin (in-memory JSON init)
 try:
 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger("wardrobe-server")
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
+if not GEMINI_API_KEY:
+    log.warning("GEMINI_API_KEY not set — gemini calls will fail (but fallback still works).")
+client = genai.Client(api_key=GEMINI_API_KEY) if GEMINI_API_KEY else None
 # Firebase config (read service account JSON from env)
 FIREBASE_ADMIN_JSON = os.getenv("FIREBASE_ADMIN_JSON", "").strip()
 app = Flask(__name__)
 CORS(app)
+# ---------- Category mapping (must match frontend) ----------
+# These values intentionally match the CATEGORY_OPTIONS array on the frontend.
+CATEGORIES = [
     "Heels",
     "Sneakers",
     "Loafers",
     "Coat",
     "Shorts",
 ]
+# simple synonyms / keyword -> category mapping (lowercase keys)
+SYNONYMS: Dict[str, str] = {
+    "heel": "Heels",
+    "heels": "Heels",
+    "sneaker": "Sneakers",
+    "sneakers": "Sneakers",
+    "trainer": "Sneakers",
+    "trainers": "Sneakers",
+    "loafer": "Loafers",
+    "loafers": "Loafers",
+    "boot": "Boots",
+    "boots": "Boots",
+    "dress": "Dress",
+    "gown": "Dress",
+    "jean": "Jeans",
+    "jeans": "Jeans",
+    "denim": "Jeans",
+    "skirt": "Skirt",
+    "jacket": "Jacket",
+    "coat": "Coat",
+    "blazer": "Blazer",
+    "t-shirt": "T-Shirt",
+    "t shirt": "T-Shirt",
+    "tee": "T-Shirt",
+    "shirt": "T-Shirt",
+    "top": "T-Shirt",
+    "short": "Shorts",
+    "shorts": "Shorts",
+    "shoe": "Sneakers",  # generic shoe -> map to Sneakers as fallback
+    "shoes": "Sneakers",
+    "sandal": "Heels",  # if ambiguous, map sandals to Heels bucket (you can adjust)
+    "sandals": "Heels",
+}
+def normalize_text(s: str) -> str:
+    return re.sub(r'[^a-z0-9\s\-]', ' ', s.lower()).strip()
+def choose_category_from_candidates(*candidates: Optional[str], tags: Optional[List[str]] = None) -> str:
+    """
+    Given a list of candidate strings (analysis.type, label, summary, etc.) and optional tags,
+    attempt to pick a category from CATEGORIES. Returns a category string guaranteed to be in CATEGORIES.
+    Falls back to "T-Shirt" if nothing matches.
+    """
+    # try tags first (explicit tag likely to indicate category)
+    if tags:
+        for t in tags:
+            if not t:
+                continue
+            tok = normalize_text(str(t))
+            # direct synonym match
+            if tok in SYNONYMS:
+                return SYNONYMS[tok]
+            # partial substring match
+            for key, cat in SYNONYMS.items():
+                if key in tok:
+                    return cat
+            # try direct category name match
+            for cat in CATEGORIES:
+                if tok == cat.lower() or cat.lower() in tok:
+                    return cat
+    # iterate through candidate strings in order provided
+    for c in candidates:
+        if not c:
+            continue
+        s = normalize_text(str(c))
+        # exact category match
+        for cat in CATEGORIES:
+            if s == cat.lower() or cat.lower() in s:
+                return cat
+        # check synonyms dictionary words
+        words = s.split()
+        for w in words:
+            if w in SYNONYMS:
+                return SYNONYMS[w]
+        # check substrings (e.g., "sneaker" inside longer text)
+        for key, cat in SYNONYMS.items():
+            if key in s:
+                return cat
+    # If nothing found, return a safe default present in CATEGORIES
+    return "T-Shirt"
 # ---------- Firebase init helpers ----------
 _firebase_app = None
 # ---------- Image helpers (with EXIF transpose) ----------
 def read_image_bytes(file_storage) -> Tuple[np.ndarray, int, int, bytes]:
     """
+    Read bytes, apply EXIF orientation, return BGR numpy, width, height and raw bytes.
     """
     data = file_storage.read()
+    img = Image.open(io.BytesIO(data))
+    # apply EXIF orientation so photos from phones are upright
     try:
         img = ImageOps.exif_transpose(img)
+    except Exception:
+        pass
     img = img.convert("RGB")
     w, h = img.size
+    arr = np.array(img)[:, :, ::-1]  # RGB -> BGR for OpenCV
+    return arr, w, h, data
 def crop_and_b64(bgr_img: np.ndarray, x: int, y: int, w: int, h: int, max_side=512) -> str:
     h_img, w_img = bgr_img.shape[:2]
     crop = bgr_img[y:y2, x:x2]
     if crop.size == 0:
         return ""
     max_dim = max(crop.shape[0], crop.shape[1])
     if max_dim > max_side:
         scale = max_side / max_dim
 def analyze_crop_with_gemini(jpeg_b64: str) -> Dict[str, Any]:
     """
     Run Gemini on the cropped image bytes to extract:
+      type (one-word category like 'shoe', 'jacket', 'dress'),
+      summary (single-line description),
+      brand (string or empty),
+      tags (array of short descriptors)
+    Returns dict, falls back to empty/defaults on error or missing key.
     """
+    if not client:
         return {"type": "unknown", "summary": "", "brand": "", "tags": []}
     try:
+        # prepare prompt
         prompt = (
             "You are an assistant that identifies clothing item characteristics from an image. "
             "Return only a JSON object with keys: type (single word like 'shoe','top','jacket'), "
             "summary (a single short sentence, one line), brand (brand name if visible else empty string), "
+            "tags (an array of short single-word tags describing visible attributes, e.g. ['striped','leather','white']). "
+            "Keep values short and concise."
         )
+        contents = [
+            types.Content(role="user", parts=[types.Part.from_text(text=prompt)])
+        ]
+        # attach the image bytes
         image_bytes = base64.b64decode(jpeg_b64)
         contents.append(types.Content(role="user", parts=[types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg")]))
             "required": ["type", "summary"]
         }
         cfg = types.GenerateContentConfig(response_mime_type="application/json", response_schema=schema)
+        # call model (use the same model family you used before)
         resp = client.models.generate_content(model="gemini-2.5-flash-lite", contents=contents, config=cfg)
         text = resp.text or ""
         parsed = {}
         try:
             parsed = json.loads(text)
+            # coerce expected shapes
+            parsed["type"] = str(parsed.get("type", "")).strip()
+            parsed["summary"] = str(parsed.get("summary", "")).strip()
+            parsed["brand"] = str(parsed.get("brand", "")).strip()
+            tags = parsed.get("tags", [])
+            if not isinstance(tags, list):
+                tags = []
+            parsed["tags"] = [str(t).strip() for t in tags if str(t).strip()]
         except Exception as e:
             log.warning("Failed parsing Gemini analysis JSON: %s — raw: %s", e, (text[:300] if text else ""))
             parsed = {"type": "unknown", "summary": "", "brand": "", "tags": []}
         return {
             "type": parsed.get("type", "unknown") or "unknown",
             "summary": parsed.get("summary", "") or "",
         log.exception("analyze_crop_with_gemini failure: %s", e)
         return {"type": "unknown", "summary": "", "brand": "", "tags": []}
 # ---------- Main / processing ----------
 @app.route("/process", methods=["POST"])
 def process_image():
     uid = (request.form.get("uid") or request.args.get("uid") or "anon").strip() or "anon"
     try:
+        bgr_img, img_w, img_h, raw_bytes = read_image_bytes(file)
     except Exception as e:
         log.error("invalid image: %s", e)
         return jsonify({"error": "invalid image"}), 400
     session_id = str(uuid.uuid4())
+    # Detection prompt (same as before)
     user_prompt = (
         "You are an assistant that extracts clothing detections from a single image. "
         "Return a JSON object with a single key 'items' which is an array. Each item must have: "
     try:
         contents = [
+            types.Content(role="user", parts=[types.Part.from_text(text=user_prompt)])
         ]
+        contents.append(types.Content(role="user", parts=[types.Part.from_bytes(data=raw_bytes, mime_type="image/jpeg")]))
         schema = {
             "type": "object",
             "required": ["items"]
         }
+        cfg = types.GenerateContentConfig(response_mime_type="application/json", response_schema=schema)
+        log.info("Calling Gemini model for detection (gemini-2.5-flash-lite)...")
+        model_resp = client.models.generate_content(model="gemini-2.5-flash-lite", contents=contents, config=cfg) if client else None
+        raw_text = (model_resp.text or "") if model_resp else ""
+        log.info("Gemini raw response length: %d", len(raw_text))
         parsed = None
         try:
         if parsed and isinstance(parsed.get("items"), list) and len(parsed["items"])>0:
             for it in parsed["items"]:
                 try:
+                    label = str(it.get("label","unknown"))[:48]
                     bbox = it.get("bbox",{})
                     nx = float(bbox.get("x",0))
                     ny = float(bbox.get("y",0))
                     pw = int(nw * img_w); ph = int(nh * img_h)
                     if pw <= 8 or ph <= 8:
                         continue
+                    b64 = crop_and_b64(bgr_img, px, py, pw, ph)
+                    if not b64:
                         continue
+                    items_out.append({
+                        "id": str(uuid.uuid4()),
+                        "label": label,
                         "confidence": float(it.get("confidence", 0.5)),
                         "bbox": {"x": px, "y": py, "w": pw, "h": ph},
+                        "thumbnail_b64": b64,
                         "source": "gemini"
+                    })
                 except Exception as e:
                     log.warning("skipping item due to error: %s", e)
         else:
             log.info("Gemini returned no items or parse failed — using fallback contour crops.")
             items_out = fallback_contour_crops(bgr_img, max_items=8)
+        # Perform AI analysis per crop (if possible) and auto-upload to firebase with metadata (tmp + session)
         if FIREBASE_ADMIN_JSON and FIREBASE_ADMIN_AVAILABLE:
             try:
                 init_firebase_admin_if_needed()
                 b64 = itm.get("thumbnail_b64")
                 if not b64:
                     continue
+                # analyze
+                try:
+                    analysis = analyze_crop_with_gemini(b64) if client else {"type":"unknown","summary":"","brand":"","tags":[]}
+                except Exception as ae:
+                    log.warning("analysis failed: %s", ae)
+                    analysis = {"type":"unknown","summary":"","brand":"","tags":[]}
+                itm["analysis"] = analysis
+                # choose a frontend-category-compatible title
+                # prefer analysis.type, then label, then tags, then summary
+                title = choose_category_from_candidates(
+                    analysis.get("type", ""),
+                    itm.get("label", ""),
+                    ' '.join(analysis.get("tags", [])),
+                    tags=analysis.get("tags", [])
+                )
+                itm["title"] = title
                 item_id = itm.get("id") or str(uuid.uuid4())
                 path = f"detected/{safe_uid}/{item_id}.jpg"
                 try:
                         "session_id": session_id,
                         "uploaded_by": safe_uid,
                         "uploaded_at": str(int(time.time())),
+                        # store AI fields as JSON strings for later inspection
+                        "ai_type": analysis.get("type",""),
+                        "ai_brand": analysis.get("brand",""),
+                        "ai_summary": analysis.get("summary",""),
+                        "ai_tags": json.dumps(analysis.get("tags", [])),
+                        "title": title,
                     }
                     url = upload_b64_to_firebase(b64, path, content_type="image/jpeg", metadata=metadata)
                     itm["thumbnail_url"] = url
                     itm["thumbnail_path"] = path
                     itm.pop("thumbnail_b64", None)
                     itm["_session_id"] = session_id
+                    log.debug("Auto-uploaded thumbnail for %s -> %s (session=%s) title=%s", item_id, url, session_id, title)
                 except Exception as up_e:
                     log.warning("Auto-upload failed for %s: %s", item_id, up_e)
+                    # keep thumbnail_b64 and analysis for client fallback
         else:
             if not FIREBASE_ADMIN_JSON:
                 log.info("FIREBASE_ADMIN_JSON not set; skipping server-side thumbnail upload.")
             else:
                 log.info("Firebase admin SDK not available; skipping server-side thumbnail upload.")
+            # For non-upload path, still add a title derived from label/unknown so frontend has it
+            for itm in items_out:
+                if "title" not in itm:
+                    analysis = itm.get("analysis") or {"type":"unknown","tags":[]}
+                    title = choose_category_from_candidates(analysis.get("type",""), itm.get("label",""), tags=analysis.get("tags", []))
+                    itm["title"] = title
         return jsonify({"ok": True, "items": items_out, "session_id": session_id, "debug": {"raw_model_text": (raw_text or "")[:1600]}}), 200
     except Exception as ex:
         log.exception("Processing error: %s", ex)
         try:
             items_out = fallback_contour_crops(bgr_img, max_items=8)
+            # give fallback items a default title so frontend can filter
             for itm in items_out:
+                if "title" not in itm:
+                    itm["title"] = choose_category_from_candidates(itm.get("label","unknown"))
             return jsonify({"ok": True, "items": items_out, "session_id": session_id, "debug": {"error": str(ex)}}), 200
         except Exception as e2:
             log.exception("Fallback also failed: %s", e2)
     Returns:
       { ok: True, kept: [...], deleted: [...], errors: [...] }
     """
     try:
         body = request.get_json(force=True)
                     continue
                 if item_id in keep_ids:
+                    # ensure public URL available if possible
                     try:
                         blob.make_public()
                         url = blob.public_url
                     except Exception:
                         url = f"gs://{bucket.name}/{name}"
+                    # extract AI metadata (if present)
                     ai_type = md.get("ai_type") or ""
                     ai_brand = md.get("ai_brand") or ""
                     ai_summary = md.get("ai_summary") or ""
                     ai_tags_raw = md.get("ai_tags") or "[]"
+                    title_meta = md.get("title") or ""
                     try:
                         ai_tags = json.loads(ai_tags_raw) if isinstance(ai_tags_raw, str) else ai_tags_raw
                     except Exception:
                         ai_tags = []
+                    # derive title: prefer stored metadata title, then ai_type/tags/summary
+                    title = None
+                    if title_meta:
+                        try:
+                            title = json.loads(title_meta) if (title_meta.startswith('[') or title_meta.startswith('{')) else str(title_meta)
+                        except Exception:
+                            title = str(title_meta)
+                    if not title:
+                        title = choose_category_from_candidates(ai_type, ai_summary, tags=ai_tags)
                     kept.append({
                         "id": item_id,
                         "thumbnail_url": url,
                         "thumbnail_path": name,
                         "analysis": {
+                          "type": ai_type,
+                          "brand": ai_brand,
+                          "summary": ai_summary,
+                          "tags": ai_tags
                         },
+                        "title": title
                     })
                 else:
                     try: