Spaces:

Pepguy
/

ai_text_dresscode

Sleeping

App Files Files Community

Pepguy commited on Nov 4, 2025

Commit

0900715

verified ·

1 Parent(s): 6deee40

Update app.py

Browse files

Files changed (1) hide show

app.py +279 -212

app.py CHANGED Viewed

@@ -2,13 +2,10 @@
 import os
 import io
 import json
-from io import BytesIO
 import base64
 import logging
 import uuid
 import time
-import re
 from typing import List, Dict, Any, Tuple, Optional
 from flask import Flask, request, jsonify
@@ -18,8 +15,12 @@ import numpy as np
 import cv2
 # genai client
-from google import genai
-from google.genai import types
 # Firebase Admin (in-memory JSON init)
 try:
@@ -35,11 +36,17 @@ except Exception:
 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger("wardrobe-server")
-GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
-if not GEMINI_API_KEY:
-    log.warning("GEMINI_API_KEY not set — gemini calls will fail (but fallback still works).")
-client = genai.Client(api_key=GEMINI_API_KEY) if GEMINI_API_KEY else None
 # Firebase config (read service account JSON from env)
 FIREBASE_ADMIN_JSON = os.getenv("FIREBASE_ADMIN_JSON", "").strip()
@@ -51,8 +58,8 @@ if FIREBASE_ADMIN_JSON and not FIREBASE_ADMIN_AVAILABLE:
 app = Flask(__name__)
 CORS(app)
-# ---------- Category mapping (must match frontend) ----------
-CATEGORIES = [
     "Heels",
     "Sneakers",
     "Loafers",
@@ -66,96 +73,8 @@ CATEGORIES = [
     "Coat",
     "Shorts",
 ]
-SYNONYMS: Dict[str, str] = {
-    "heel": "Heels", "heels": "Heels",
-    "sneaker": "Sneakers", "sneakers": "Sneakers", "trainer": "Sneakers", "trainers": "Sneakers",
-    "loafer": "Loafers", "loafers": "Loafers",
-    "boot": "Boots", "boots": "Boots",
-    "dress": "Dress", "gown": "Dress",
-    "jean": "Jeans", "jeans": "Jeans", "denim": "Jeans",
-    "skirt": "Skirt",
-    "jacket": "Jacket",
-    "coat": "Coat",
-    "blazer": "Blazer",
-    "t-shirt": "T-Shirt", "t shirt": "T-Shirt", "tee": "T-Shirt", "shirt": "T-Shirt", "top": "T-Shirt",
-    "short": "Shorts", "shorts": "Shorts",
-    "shoe": "Sneakers", "shoes": "Sneakers",
-    "sandal": "Heels", "sandals": "Heels",
-}
-def normalize_text(s: str) -> str:
-    return re.sub(r'[^a-z0-9\s\-]', ' ', s.lower()).strip()
-def choose_category_from_candidates(*candidates: Optional[str], tags: Optional[List[str]] = None) -> str:
-    if tags:
-        for t in tags:
-            if not t: continue
-            tok = normalize_text(str(t))
-            if tok in SYNONYMS:
-                return SYNONYMS[tok]
-            for key, cat in SYNONYMS.items():
-                if key in tok:
-                    return cat
-            for cat in CATEGORIES:
-                if tok == cat.lower() or cat.lower() in tok:
-                    return cat
-    for c in candidates:
-        if not c: continue
-        s = normalize_text(str(c))
-        for cat in CATEGORIES:
-            if s == cat.lower() or cat.lower() in s:
-                return cat
-        words = s.split()
-        for w in words:
-            if w in SYNONYMS:
-                return SYNONYMS[w]
-        for key, cat in SYNONYMS.items():
-            if key in s:
-                return cat
-    return "T-Shirt"
-# ---------- New: ask Gemini to pick EXACT allowed category ----------
-def pick_allowed_category(preferred_text: Optional[str], label_text: Optional[str], tags: Optional[List[str]] = None) -> str:
-    """
-    Try to get Gemini to return exactly one category string from CATEGORIES.
-    If client not available or call fails or the returned value isn't an exact match, fallback to local chooser.
-    """
-    candidate = preferred_text or label_text or ""
-    # build short instruction
-    if client:
-        try:
-            # prompt: return exactly one of the categories listed, nothing else (no punctuation)
-            prompt = (
-                "You are given a short description of a clothing item. "
-                "From the following list choose the single best category that matches the item. "
-                "Return ONLY the category name exactly as shown (case-sensitive match is not required):\n\n"
-                f"{', '.join(CATEGORIES)}\n\n"
-                f"Item description: {candidate}\n\n"
-                "Output exactly one of the category names above (no JSON, no explanation)."
-            )
-            contents = [types.Content(role="user", parts=[types.Part.from_text(text=prompt)])]
-            # prefer to ask model to respond with a single string; we won't rely on strict schema formatting,
-            # but we'll attempt to validate the returned string.
-            cfg = types.GenerateContentConfig(response_mime_type="text/plain")
-            resp = client.models.generate_content(model="gemini-2.5-flash-lite", contents=contents, config=cfg)
-            raw = (resp.text or "").strip()
-            # strip quotes if present
-            candidate_out = raw.strip().strip('"').strip("'").strip()
-            # check candidate_out against allowed categories (case-insensitive)
-            for cat in CATEGORIES:
-                if candidate_out.lower() == cat.lower():
-                    return cat
-            # sometimes model returns JSON or extra text; try to extract any allowed category substring
-            low = candidate_out.lower()
-            for cat in CATEGORIES:
-                if cat.lower() in low:
-                    return cat
-            # if not matched, fallback to local matching
-        except Exception as e:
-            log.warning("pick_allowed_category Gemini call failed: %s", e)
-    # Gemini not available or didn't return a valid match -> fallback
-    return choose_category_from_candidates(preferred_text, label_text, tags=tags)
 # ---------- Firebase init helpers ----------
 _firebase_app = None
@@ -187,20 +106,28 @@ def init_firebase_admin_if_needed():
         raise
 def upload_b64_to_firebase(base64_str: str, path: str, content_type="image/jpeg", metadata: dict = None) -> str:
     if not FIREBASE_ADMIN_JSON:
         raise RuntimeError("FIREBASE_ADMIN_JSON not set")
     init_firebase_admin_if_needed()
     if not FIREBASE_ADMIN_AVAILABLE:
         raise RuntimeError("firebase-admin not available")
     raw = base64_str
     if raw.startswith("data:"):
         raw = raw.split(",", 1)[1]
     raw = raw.replace("\n", "").replace("\r", "")
     data = base64.b64decode(raw)
     try:
         bucket = fb_storage.bucket()
         blob = bucket.blob(path)
         blob.upload_from_string(data, content_type=content_type)
         if metadata:
             try:
                 blob.metadata = {k: (json.dumps(v) if not isinstance(v, str) else v) for k, v in metadata.items()}
@@ -218,46 +145,48 @@ def upload_b64_to_firebase(base64_str: str, path: str, content_type="image/jpeg"
         raise
 # ---------- Image helpers (with EXIF transpose) ----------
-# Replace existing read_image_bytes and crop_and_b64 with this block
 def read_image_bytes(file_storage) -> Tuple[np.ndarray, int, int, bytes]:
     """
-    Read bytes, apply EXIF orientation, return BGR numpy, width, height and re-encoded JPEG bytes.
-    This ensures the bytes we pass to Gemini / upload to storage are physically upright
-    (EXIF orientation is applied and not left in metadata).
     """
     data = file_storage.read()
     try:
         img = Image.open(io.BytesIO(data))
     except Exception as e:
-        # fallback: try to decode raw bytes via OpenCV
-        try:
-            arr_np = np.frombuffer(data, np.uint8)
-            cv_img = cv2.imdecode(arr_np, cv2.IMREAD_COLOR)
-            if cv_img is None:
-                raise
-            h, w = cv_img.shape[:2]
-            # re-encode to jpeg bytes to have consistent format
-            _, jpeg = cv2.imencode(".jpg", cv_img, [int(cv2.IMWRITE_JPEG_QUALITY), 92])
-            return cv_img, w, h, jpeg.tobytes()
-        except Exception as ee:
-            raise
-    # physically apply EXIF rotation if present
     try:
-        img = ImageOps.exif_transpose(img)
     except Exception:
-        # ignore failures here; proceed with original image
-        pass
-    # ensure RGB and get size
     img = img.convert("RGB")
     w, h = img.size
-    # re-encode to JPEG bytes to strip EXIF orientation tag (important!)
-    buf = BytesIO()
-    # We intentionally omit any EXIF bytes when saving so orientation is cleared.
     img.save(buf, format="JPEG", quality=92, optimize=True)
     jpeg_bytes = buf.getvalue()
@@ -265,11 +194,7 @@ def read_image_bytes(file_storage) -> Tuple[np.ndarray, int, int, bytes]:
     arr = np.array(img)[:, :, ::-1]  # RGB -> BGR
     return arr, w, h, jpeg_bytes
 def crop_and_b64(bgr_img: np.ndarray, x: int, y: int, w: int, h: int, max_side=512) -> str:
-    """
-    Crop from BGR image (already upright), optionally resize, encode as JPEG and return base64 string.
-    """
     h_img, w_img = bgr_img.shape[:2]
     x = max(0, int(x)); y = max(0, int(y))
     x2 = min(w_img, int(x + w)); y2 = min(h_img, int(y + h))
@@ -281,7 +206,6 @@ def crop_and_b64(bgr_img: np.ndarray, x: int, y: int, w: int, h: int, max_side=5
     if max_dim > max_side:
         scale = max_side / max_dim
         crop = cv2.resize(crop, (int(crop.shape[1] * scale), int(crop.shape[0] * scale)), interpolation=cv2.INTER_AREA)
-    # encode to JPEG (this will be upright because bgr_img was exif_transposed)
     _, jpeg = cv2.imencode(".jpg", crop, [int(cv2.IMWRITE_JPEG_QUALITY), 82])
     return base64.b64encode(jpeg.tobytes()).decode("ascii")
@@ -335,23 +259,27 @@ def fallback_contour_crops(bgr_img, max_items=8) -> List[Dict[str, Any]]:
                 })
     return items
-# ---------- AI analysis helper (unchanged) ----------
 def analyze_crop_with_gemini(jpeg_b64: str) -> Dict[str, Any]:
-    if not client:
         return {"type": "unknown", "summary": "", "brand": "", "tags": []}
     try:
         prompt = (
             "You are an assistant that identifies clothing item characteristics from an image. "
             "Return only a JSON object with keys: type (single word like 'shoe','top','jacket'), "
             "summary (a single short sentence, one line), brand (brand name if visible else empty string), "
-            "tags (an array of short single-word tags describing visible attributes). "
-            "Keep values short and concise."
         )
-        contents = [
-            types.Content(role="user", parts=[types.Part.from_text(text=prompt)])
-        ]
         image_bytes = base64.b64decode(jpeg_b64)
         contents.append(types.Content(role="user", parts=[types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg")]))
         schema = {
             "type": "object",
             "properties": {
@@ -368,16 +296,17 @@ def analyze_crop_with_gemini(jpeg_b64: str) -> Dict[str, Any]:
         parsed = {}
         try:
             parsed = json.loads(text)
-            parsed["type"] = str(parsed.get("type", "")).strip()
-            parsed["summary"] = str(parsed.get("summary", "")).strip()
-            parsed["brand"] = str(parsed.get("brand", "")).strip()
-            tags = parsed.get("tags", [])
-            if not isinstance(tags, list):
-                tags = []
-            parsed["tags"] = [str(t).strip() for t in tags if str(t).strip()]
         except Exception as e:
             log.warning("Failed parsing Gemini analysis JSON: %s — raw: %s", e, (text[:300] if text else ""))
             parsed = {"type": "unknown", "summary": "", "brand": "", "tags": []}
         return {
             "type": parsed.get("type", "unknown") or "unknown",
             "summary": parsed.get("summary", "") or "",
@@ -388,31 +317,109 @@ def analyze_crop_with_gemini(jpeg_b64: str) -> Dict[str, Any]:
         log.exception("analyze_crop_with_gemini failure: %s", e)
         return {"type": "unknown", "summary": "", "brand": "", "tags": []}
 # ---------- Main / processing ----------
 @app.route("/process", methods=["POST"])
 def process_image():
     if "photo" not in request.files:
         return jsonify({"error": "missing photo"}), 400
     file = request.files["photo"]
     uid = (request.form.get("uid") or request.args.get("uid") or "anon").strip() or "anon"
     try:
-        bgr_img, img_w, img_h, raw_bytes = read_image_bytes(file)
     except Exception as e:
         log.error("invalid image: %s", e)
         return jsonify({"error": "invalid image"}), 400
     session_id = str(uuid.uuid4())
     user_prompt = (
         "You are an assistant that extracts clothing detections from a single image. "
         "Return a JSON object with a single key 'items' which is an array. Each item must have: "
         "label (string, short like 'top','skirt','sneakers'), "
         "bbox with normalized coordinates between 0 and 1: {x, y, w, h} where x,y are top-left relative to width/height, "
-        "confidence (0-1). Output ONLY valid JSON."
     )
     try:
         contents = [
-            types.Content(role="user", parts=[types.Part.from_text(text=user_prompt)])
         ]
-        contents.append(types.Content(role="user", parts=[types.Part.from_bytes(data=raw_bytes, mime_type="image/jpeg")]))
         schema = {
             "type": "object",
             "properties": {
@@ -424,7 +431,12 @@ def process_image():
                             "label": {"type": "string"},
                             "bbox": {
                                 "type": "object",
-                                "properties": {"x": {"type": "number"}, "y": {"type": "number"}, "w": {"type": "number"}, "h": {"type": "number"}},
                                 "required": ["x","y","w","h"]
                             },
                             "confidence": {"type": "number"}
@@ -435,47 +447,81 @@ def process_image():
             },
             "required": ["items"]
         }
-        cfg = types.GenerateContentConfig(response_mime_type="application/json", response_schema=schema)
-        log.info("Calling Gemini model for detection (gemini-2.5-flash-lite)...")
-        model_resp = client.models.generate_content(model="gemini-2.5-flash-lite", contents=contents, config=cfg) if client else None
-        raw_text = (model_resp.text or "") if model_resp else ""
-        log.info("Gemini raw response length: %d", len(raw_text))
         parsed = None
         try:
             parsed = json.loads(raw_text) if raw_text else None
         except Exception as e:
             log.warning("Could not parse Gemini JSON: %s", e)
             parsed = None
         items_out: List[Dict[str, Any]] = []
         if parsed and isinstance(parsed.get("items"), list) and len(parsed["items"])>0:
             for it in parsed["items"]:
                 try:
-                    label = str(it.get("label","unknown"))[:48]
                     bbox = it.get("bbox",{})
-                    nx = float(bbox.get("x",0)); ny = float(bbox.get("y",0)); nw = float(bbox.get("w",0)); nh = float(bbox.get("h",0))
                     nx = max(0.0, min(1.0, nx)); ny = max(0.0,min(1.0,ny))
                     nw = max(0.0, min(1.0, nw)); nh = max(0.0, min(1.0, nh))
                     px = int(nx * img_w); py = int(ny * img_h)
                     pw = int(nw * img_w); ph = int(nh * img_h)
                     if pw <= 8 or ph <= 8:
                         continue
-                    b64 = crop_and_b64(bgr_img, px, py, pw, ph)
-                    if not b64:
                         continue
-                    items_out.append({
-                        "id": str(uuid.uuid4()),
-                        "label": label,
                         "confidence": float(it.get("confidence", 0.5)),
                         "bbox": {"x": px, "y": py, "w": pw, "h": ph},
-                        "thumbnail_b64": b64,
                         "source": "gemini"
-                    })
                 except Exception as e:
                     log.warning("skipping item due to error: %s", e)
         else:
             log.info("Gemini returned no items or parse failed — using fallback contour crops.")
             items_out = fallback_contour_crops(bgr_img, max_items=8)
-        # AI analysis & upload
         if FIREBASE_ADMIN_JSON and FIREBASE_ADMIN_AVAILABLE:
             try:
                 init_firebase_admin_if_needed()
@@ -483,20 +529,12 @@ def process_image():
             except Exception as e:
                 log.exception("Firebase admin init for upload failed: %s", e)
                 bucket = None
             safe_uid = "".join(ch for ch in uid if ch.isalnum() or ch in ("-", "_")) or "anon"
             for itm in items_out:
                 b64 = itm.get("thumbnail_b64")
                 if not b64:
                     continue
-                try:
-                    analysis = analyze_crop_with_gemini(b64) if client else {"type":"unknown","summary":"","brand":"","tags":[]}
-                except Exception as ae:
-                    log.warning("analysis failed: %s", ae)
-                    analysis = {"type":"unknown","summary":"","brand":"","tags":[]}
-                itm["analysis"] = analysis
-                # pick allowed category (this is the important change: we ask Gemini to pick allowed category then fallback)
-                title = pick_allowed_category(analysis.get("type",""), itm.get("label",""), tags=analysis.get("tags", []))
-                itm["title"] = title
                 item_id = itm.get("id") or str(uuid.uuid4())
                 path = f"detected/{safe_uid}/{item_id}.jpg"
                 try:
@@ -505,68 +543,91 @@ def process_image():
                         "session_id": session_id,
                         "uploaded_by": safe_uid,
                         "uploaded_at": str(int(time.time())),
-                        "ai_type": analysis.get("type",""),
-                        "ai_brand": analysis.get("brand",""),
-                        "ai_summary": analysis.get("summary",""),
-                        "ai_tags": json.dumps(analysis.get("tags", [])),
-                        "title": title,
                     }
                     url = upload_b64_to_firebase(b64, path, content_type="image/jpeg", metadata=metadata)
                     itm["thumbnail_url"] = url
                     itm["thumbnail_path"] = path
                     itm.pop("thumbnail_b64", None)
                     itm["_session_id"] = session_id
-                    log.debug("Auto-uploaded thumbnail for %s -> %s (session=%s) title=%s", item_id, url, session_id, title)
                 except Exception as up_e:
                     log.warning("Auto-upload failed for %s: %s", item_id, up_e)
         else:
             if not FIREBASE_ADMIN_JSON:
                 log.info("FIREBASE_ADMIN_JSON not set; skipping server-side thumbnail upload.")
             else:
                 log.info("Firebase admin SDK not available; skipping server-side thumbnail upload.")
-            # ensure a title exists for frontend even if no upload
-            for itm in items_out:
-                if "title" not in itm:
-                    analysis = itm.get("analysis") or {"type":"unknown","tags":[]}
-                    itm["title"] = pick_allowed_category(analysis.get("type",""), itm.get("label",""), tags=analysis.get("tags", []))
         return jsonify({"ok": True, "items": items_out, "session_id": session_id, "debug": {"raw_model_text": (raw_text or "")[:1600]}}), 200
     except Exception as ex:
         log.exception("Processing error: %s", ex)
         try:
             items_out = fallback_contour_crops(bgr_img, max_items=8)
             for itm in items_out:
-                if "title" not in itm:
-                    itm["title"] = choose_category_from_candidates(itm.get("label","unknown"))
             return jsonify({"ok": True, "items": items_out, "session_id": session_id, "debug": {"error": str(ex)}}), 200
         except Exception as e2:
             log.exception("Fallback also failed: %s", e2)
             return jsonify({"error": "internal failure", "detail": str(e2)}), 500
-# ---------- Finalize endpoint ----------
 @app.route("/finalize_detections", methods=["POST"])
 def finalize_detections():
     try:
         body = request.get_json(force=True)
     except Exception:
         return jsonify({"error": "invalid json"}), 400
     uid = (body.get("uid") or request.args.get("uid") or "anon").strip() or "anon"
     keep_ids = set(body.get("keep_ids") or [])
     session_id = (body.get("session_id") or request.args.get("session_id") or "").strip()
     if not session_id:
         return jsonify({"error": "session_id required for finalize to avoid unsafe deletes"}), 400
     if not FIREBASE_ADMIN_JSON or not FIREBASE_ADMIN_AVAILABLE:
         return jsonify({"error": "firebase admin not configured"}), 500
     try:
         init_firebase_admin_if_needed()
         bucket = fb_storage.bucket()
     except Exception as e:
         log.exception("Firebase init error in finalize: %s", e)
         return jsonify({"error": "firebase admin init failed", "detail": str(e)}), 500
     safe_uid = "".join(ch for ch in uid if ch.isalnum() or ch in ("-", "_")) or "anon"
     prefix = f"detected/{safe_uid}/"
     kept = []
     deleted = []
     errors = []
     try:
         blobs = list(bucket.list_blobs(prefix=prefix))
         for blob in blobs:
@@ -576,46 +637,42 @@ def finalize_detections():
                 if "." not in fname:
                     continue
                 item_id = fname.rsplit(".", 1)[0]
                 md = blob.metadata or {}
                 if str(md.get("session_id", "")) != session_id or str(md.get("tmp", "")).lower() not in ("true", "1", "yes"):
                     continue
                 if item_id in keep_ids:
                     try:
                         blob.make_public()
                         url = blob.public_url
                     except Exception:
                         url = f"gs://{bucket.name}/{name}"
                     ai_type = md.get("ai_type") or ""
                     ai_brand = md.get("ai_brand") or ""
                     ai_summary = md.get("ai_summary") or ""
                     ai_tags_raw = md.get("ai_tags") or "[]"
-                    title_meta = md.get("title") or ""
                     try:
                         ai_tags = json.loads(ai_tags_raw) if isinstance(ai_tags_raw, str) else ai_tags_raw
                     except Exception:
                         ai_tags = []
-                    title = None
-                    if title_meta:
-                        try:
-                            title = json.loads(title_meta) if (title_meta.startswith('[') or title_meta.startswith('{')) else str(title_meta)
-                        except Exception:
-                            title = str(title_meta)
-                    # validate title: if not in allowed set, derive from AI fields
-                    valid = False
-                    if isinstance(title, str) and title.strip():
-                        for cat in CATEGORIES:
-                            if title.strip().lower() == cat.lower():
-                                title = cat
-                                valid = True
-                                break
-                    if not valid:
-                        title = choose_category_from_candidates(ai_type, ai_summary, tags=ai_tags)
                     kept.append({
                         "id": item_id,
                         "thumbnail_url": url,
                         "thumbnail_path": name,
-                        "analysis": {"type": ai_type, "brand": ai_brand, "summary": ai_summary, "tags": ai_tags},
-                        "title": title
                     })
                 else:
                     try:
@@ -630,27 +687,37 @@ def finalize_detections():
         log.exception("finalize_detections error: %s", e)
         return jsonify({"error": "internal", "detail": str(e)}), 500
-# ---------- Clear session ----------
 @app.route("/clear_session", methods=["POST"])
 def clear_session():
     try:
         body = request.get_json(force=True)
     except Exception:
         return jsonify({"error": "invalid json"}), 400
     session_id = (body.get("session_id") or request.args.get("session_id") or "").strip()
     uid = (body.get("uid") or request.args.get("uid") or "anon").strip() or "anon"
     if not session_id:
         return jsonify({"error": "session_id required"}), 400
     if not FIREBASE_ADMIN_JSON or not FIREBASE_ADMIN_AVAILABLE:
         return jsonify({"error": "firebase admin not configured"}), 500
     try:
         init_firebase_admin_if_needed()
         bucket = fb_storage.bucket()
     except Exception as e:
         log.exception("Firebase init error in clear_session: %s", e)
         return jsonify({"error": "firebase admin init failed", "detail": str(e)}), 500
     safe_uid = "".join(ch for ch in uid if ch.isalnum() or ch in ("-", "_")) or "anon"
     prefix = f"detected/{safe_uid}/"
     deleted = []
     errors = []
     try:

 import os
 import io
 import json
 import base64
 import logging
 import uuid
 import time
 from typing import List, Dict, Any, Tuple, Optional
 from flask import Flask, request, jsonify
 import cv2
 # genai client
+try:
+    from google import genai
+    from google.genai import types
+except Exception:
+    genai = None
+    types = None
 # Firebase Admin (in-memory JSON init)
 try:
 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger("wardrobe-server")
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "").strip()
+if GEMINI_API_KEY and genai:
+    try:
+        client = genai.Client(api_key=GEMINI_API_KEY)
+    except Exception as e:
+        log.exception("Failed to init genai client: %s", e)
+        client = None
+else:
+    client = None
+    if not GEMINI_API_KEY:
+        log.info("GEMINI_API_KEY not set; model calls disabled.")
 # Firebase config (read service account JSON from env)
 FIREBASE_ADMIN_JSON = os.getenv("FIREBASE_ADMIN_JSON", "").strip()
 app = Flask(__name__)
 CORS(app)
+# ---------- Category options (must match frontend) ----------
+CATEGORY_OPTIONS = [
     "Heels",
     "Sneakers",
     "Loafers",
     "Coat",
     "Shorts",
 ]
+# normalized set for quick match
+_CATEGORY_RENORM = [c.lower() for c in CATEGORY_OPTIONS]
 # ---------- Firebase init helpers ----------
 _firebase_app = None
         raise
 def upload_b64_to_firebase(base64_str: str, path: str, content_type="image/jpeg", metadata: dict = None) -> str:
+    """
+    Upload base64 string to Firebase Storage at `path`.
+    Optionally attach metadata dict (custom metadata).
+    Returns a public URL when possible, otherwise returns gs://<bucket>/<path>.
+    """
     if not FIREBASE_ADMIN_JSON:
         raise RuntimeError("FIREBASE_ADMIN_JSON not set")
     init_firebase_admin_if_needed()
     if not FIREBASE_ADMIN_AVAILABLE:
         raise RuntimeError("firebase-admin not available")
     raw = base64_str
     if raw.startswith("data:"):
         raw = raw.split(",", 1)[1]
     raw = raw.replace("\n", "").replace("\r", "")
     data = base64.b64decode(raw)
     try:
         bucket = fb_storage.bucket()
         blob = bucket.blob(path)
         blob.upload_from_string(data, content_type=content_type)
+        # attach metadata if provided (values must be strings)
         if metadata:
             try:
                 blob.metadata = {k: (json.dumps(v) if not isinstance(v, str) else v) for k, v in metadata.items()}
         raise
 # ---------- Image helpers (with EXIF transpose) ----------
 def read_image_bytes(file_storage) -> Tuple[np.ndarray, int, int, bytes]:
     """
+    Read uploaded bytes, apply EXIF orientation via PIL.ImageOps.exif_transpose,
+    re-encode to JPEG bytes (EXIF cleared), and return (bgr_numpy, width, height, jpeg_bytes).
     """
     data = file_storage.read()
+    if not data:
+        raise ValueError("No image data uploaded")
+    # Try opening with PIL to read EXIF and apply transpose
     try:
         img = Image.open(io.BytesIO(data))
     except Exception as e:
+        log.warning("PIL failed to open image; falling back to OpenCV decode: %s", e)
+        arr_np = np.frombuffer(data, np.uint8)
+        cv_img = cv2.imdecode(arr_np, cv2.IMREAD_COLOR)
+        if cv_img is None:
+            raise RuntimeError("Could not decode uploaded image")
+        h, w = cv_img.shape[:2]
+        _, jpeg = cv2.imencode(".jpg", cv_img, [int(cv2.IMWRITE_JPEG_QUALITY), 92])
+        return cv_img, w, h, jpeg.tobytes()
+    # log original EXIF orientation when present
     try:
+        exif = img._getexif() or {}
+        orientation = None
+        if isinstance(exif, dict):
+            orientation = exif.get(274)  # tag 274 orientation
+        log.debug("Original EXIF orientation: %s", orientation)
     except Exception:
+        orientation = None
+    # physically apply EXIF rotation (so image pixels are upright)
+    try:
+        img = ImageOps.exif_transpose(img)
+    except Exception as e:
+        log.warning("exif_transpose failed: %s", e)
+    # ensure RGB, then re-encode to JPEG to remove orientation tag from bytes
     img = img.convert("RGB")
     w, h = img.size
+    buf = io.BytesIO()
     img.save(buf, format="JPEG", quality=92, optimize=True)
     jpeg_bytes = buf.getvalue()
     arr = np.array(img)[:, :, ::-1]  # RGB -> BGR
     return arr, w, h, jpeg_bytes
 def crop_and_b64(bgr_img: np.ndarray, x: int, y: int, w: int, h: int, max_side=512) -> str:
     h_img, w_img = bgr_img.shape[:2]
     x = max(0, int(x)); y = max(0, int(y))
     x2 = min(w_img, int(x + w)); y2 = min(h_img, int(y + h))
     if max_dim > max_side:
         scale = max_side / max_dim
         crop = cv2.resize(crop, (int(crop.shape[1] * scale), int(crop.shape[0] * scale)), interpolation=cv2.INTER_AREA)
     _, jpeg = cv2.imencode(".jpg", crop, [int(cv2.IMWRITE_JPEG_QUALITY), 82])
     return base64.b64encode(jpeg.tobytes()).decode("ascii")
                 })
     return items
+# ---------- AI analysis helper ----------
 def analyze_crop_with_gemini(jpeg_b64: str) -> Dict[str, Any]:
+    """
+    Run Gemini on the cropped image bytes to extract:
+      type, summary, brand, tags
+    Returns dict, falls back to defaults on error.
+    """
+    if not client or not types:
         return {"type": "unknown", "summary": "", "brand": "", "tags": []}
     try:
         prompt = (
             "You are an assistant that identifies clothing item characteristics from an image. "
             "Return only a JSON object with keys: type (single word like 'shoe','top','jacket'), "
             "summary (a single short sentence, one line), brand (brand name if visible else empty string), "
+            "tags (an array of short single-word tags). Keep values short and concise."
         )
+        contents = [types.Content(role="user", parts=[types.Part.from_text(text=prompt)])]
         image_bytes = base64.b64decode(jpeg_b64)
         contents.append(types.Content(role="user", parts=[types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg")]))
         schema = {
             "type": "object",
             "properties": {
         parsed = {}
         try:
             parsed = json.loads(text)
         except Exception as e:
             log.warning("Failed parsing Gemini analysis JSON: %s — raw: %s", e, (text[:300] if text else ""))
             parsed = {"type": "unknown", "summary": "", "brand": "", "tags": []}
+        # coerce
+        parsed["type"] = str(parsed.get("type","") or "").strip()
+        parsed["summary"] = str(parsed.get("summary","") or "").strip()
+        parsed["brand"] = str(parsed.get("brand","") or "").strip()
+        tags = parsed.get("tags", [])
+        if not isinstance(tags, list):
+            tags = []
+        parsed["tags"] = [str(t).strip() for t in tags if str(t).strip()]
         return {
             "type": parsed.get("type", "unknown") or "unknown",
             "summary": parsed.get("summary", "") or "",
         log.exception("analyze_crop_with_gemini failure: %s", e)
         return {"type": "unknown", "summary": "", "brand": "", "tags": []}
+# ---------- Title mapping helper ----------
+def choose_title_from_label_and_analysis(label: str, analysis: Dict[str, Any]) -> str:
+    """
+    Return a title that is guaranteed to be one of CATEGORY_OPTIONS.
+    Heuristics:
+      - check analysis.type
+      - check analysis.tags
+      - check label text
+      - fallback to 'T-Shirt'
+    """
+    def find_match_in_text(txt: str) -> Optional[str]:
+        if not txt:
+            return None
+        s = txt.lower()
+        # quick synonyms mapping
+        synonyms = {
+            "tshirt": "T-Shirt", "t-shirt": "T-Shirt", "tee": "T-Shirt",
+            "sneaker": "Sneakers", "trainers": "Sneakers",
+            "jeans": "Jeans", "denim": "Jeans",
+            "dress": "Dress",
+            "skirt": "Skirt",
+            "jacket": "Jacket",
+            "coat": "Coat",
+            "blazer": "Blazer",
+            "boot": "Boots",
+            "heel": "Heels",
+            "loafer": "Loafers",
+            "short": "Shorts",
+            "shoe": "Sneakers",  # generic shoe -> put under Sneakers by default
+            "sneakers": "Sneakers",
+        }
+        for k, v in synonyms.items():
+            if k in s:
+                return v
+        # check direct category words
+        for idx, cat in enumerate(CATEGORY_OPTIONS):
+            if cat.lower().replace("-", "").replace(" ", "") in s.replace("-", "").replace(" ", ""):
+                return CATEGORY_OPTIONS[idx]
+        return None
+    # try analysis.type first
+    atype = (analysis.get("type") or "").strip()
+    match = find_match_in_text(atype)
+    if match:
+        return match
+    # try analysis.tags
+    tags = analysis.get("tags") or []
+    if isinstance(tags, list):
+        for t in tags:
+            m = find_match_in_text(t)
+            if m:
+                return m
+    # try label (raw detection label from detection model)
+    m = find_match_in_text(label or "")
+    if m:
+        return m
+    # try analysis.summary casual check
+    m = find_match_in_text(analysis.get("summary", "") or "")
+    if m:
+        return m
+    # fallback: prefer 'T-Shirt' as generic top fallback (guaranteed category)
+    return "T-Shirt"
 # ---------- Main / processing ----------
 @app.route("/process", methods=["POST"])
 def process_image():
     if "photo" not in request.files:
         return jsonify({"error": "missing photo"}), 400
     file = request.files["photo"]
     uid = (request.form.get("uid") or request.args.get("uid") or "anon").strip() or "anon"
     try:
+        # read and get corrected jpeg bytes (EXIF transpose applied)
+        bgr_img, img_w, img_h, corrected_jpeg_bytes = read_image_bytes(file)
     except Exception as e:
         log.error("invalid image: %s", e)
         return jsonify({"error": "invalid image"}), 400
     session_id = str(uuid.uuid4())
+    # Detection prompt (Gemini expects the corrected image bytes)
     user_prompt = (
         "You are an assistant that extracts clothing detections from a single image. "
         "Return a JSON object with a single key 'items' which is an array. Each item must have: "
         "label (string, short like 'top','skirt','sneakers'), "
         "bbox with normalized coordinates between 0 and 1: {x, y, w, h} where x,y are top-left relative to width/height, "
+        "confidence (0-1). Example output: {\"items\":[{\"label\":\"top\",\"bbox\":{\"x\":0.1,\"y\":0.2,\"w\":0.3,\"h\":0.4},\"confidence\":0.95}]} "
+        "Output ONLY valid JSON. If you cannot detect any clothing confidently, return {\"items\":[]}."
     )
     try:
         contents = [
+            types.Content(role="user", parts=[types.Part.from_text(text=user_prompt)]) if types else None
         ]
+        # attach corrected jpeg bytes
+        if types:
+            contents.append(types.Content(role="user", parts=[types.Part.from_bytes(data=corrected_jpeg_bytes, mime_type="image/jpeg")]))
         schema = {
             "type": "object",
             "properties": {
                             "label": {"type": "string"},
                             "bbox": {
                                 "type": "object",
+                                "properties": {
+                                    "x": {"type": "number"},
+                                    "y": {"type": "number"},
+                                    "w": {"type": "number"},
+                                    "h": {"type": "number"}
+                                },
                                 "required": ["x","y","w","h"]
                             },
                             "confidence": {"type": "number"}
             },
             "required": ["items"]
         }
+        cfg = types.GenerateContentConfig(response_mime_type="application/json", response_schema=schema) if types else None
+        if client and types:
+            log.info("Calling Gemini model for detection (gemini-2.5-flash-lite)...")
+            model_resp = client.models.generate_content(model="gemini-2.5-flash-lite", contents=contents, config=cfg)
+            raw_text = model_resp.text or ""
+        else:
+            log.info("Gemini client not configured, skipping model detection — using fallback.")
+            raw_text = ""
+        log.info("Gemini raw response length: %d", len(raw_text) if raw_text else 0)
         parsed = None
         try:
             parsed = json.loads(raw_text) if raw_text else None
         except Exception as e:
             log.warning("Could not parse Gemini JSON: %s", e)
             parsed = None
         items_out: List[Dict[str, Any]] = []
         if parsed and isinstance(parsed.get("items"), list) and len(parsed["items"])>0:
             for it in parsed["items"]:
                 try:
+                    raw_label = str(it.get("label","unknown"))[:64]
                     bbox = it.get("bbox",{})
+                    nx = float(bbox.get("x",0))
+                    ny = float(bbox.get("y",0))
+                    nw = float(bbox.get("w",0))
+                    nh = float(bbox.get("h",0))
                     nx = max(0.0, min(1.0, nx)); ny = max(0.0,min(1.0,ny))
                     nw = max(0.0, min(1.0, nw)); nh = max(0.0, min(1.0, nh))
                     px = int(nx * img_w); py = int(ny * img_h)
                     pw = int(nw * img_w); ph = int(nh * img_h)
                     if pw <= 8 or ph <= 8:
                         continue
+                    crop_b64 = crop_and_b64(bgr_img, px, py, pw, ph)
+                    if not crop_b64:
                         continue
+                    # analyze crop with Gemini (optional)
+                    analysis = analyze_crop_with_gemini(crop_b64) if client else {"type":"unknown","summary":"","brand":"","tags":[]}
+                    # choose title within CATEGORY_OPTIONS
+                    title = choose_title_from_label_and_analysis(raw_label, analysis)
+                    item_id = str(uuid.uuid4())
+                    itm = {
+                        "id": item_id,
+                        "label": raw_label,
+                        "title": title,
                         "confidence": float(it.get("confidence", 0.5)),
                         "bbox": {"x": px, "y": py, "w": pw, "h": ph},
+                        "thumbnail_b64": crop_b64,
+                        "analysis": analysis,
                         "source": "gemini"
+                    }
+                    items_out.append(itm)
                 except Exception as e:
                     log.warning("skipping item due to error: %s", e)
         else:
             log.info("Gemini returned no items or parse failed — using fallback contour crops.")
             items_out = fallback_contour_crops(bgr_img, max_items=8)
+            # do analysis + title mapping for fallback crops
+            for itm in items_out:
+                try:
+                    crop_b64 = itm.get("thumbnail_b64")
+                    analysis = analyze_crop_with_gemini(crop_b64) if client else {"type":"unknown","summary":"","brand":"","tags":[]}
+                    itm["analysis"] = analysis
+                    itm["title"] = choose_title_from_label_and_analysis(itm.get("label","unknown"), analysis)
+                except Exception:
+                    itm["analysis"] = {"type":"unknown","summary":"","brand":"","tags":[]}
+                    itm["title"] = choose_title_from_label_and_analysis(itm.get("label","unknown"), itm["analysis"])
+        # Auto-upload thumbnails to Firebase Storage (temporary, marked by session_id)
         if FIREBASE_ADMIN_JSON and FIREBASE_ADMIN_AVAILABLE:
             try:
                 init_firebase_admin_if_needed()
             except Exception as e:
                 log.exception("Firebase admin init for upload failed: %s", e)
                 bucket = None
             safe_uid = "".join(ch for ch in uid if ch.isalnum() or ch in ("-", "_")) or "anon"
             for itm in items_out:
                 b64 = itm.get("thumbnail_b64")
                 if not b64:
                     continue
                 item_id = itm.get("id") or str(uuid.uuid4())
                 path = f"detected/{safe_uid}/{item_id}.jpg"
                 try:
                         "session_id": session_id,
                         "uploaded_by": safe_uid,
                         "uploaded_at": str(int(time.time())),
+                        # AI fields
+                        "ai_type": itm.get("analysis", {}).get("type", ""),
+                        "ai_brand": itm.get("analysis", {}).get("brand", ""),
+                        "ai_summary": itm.get("analysis", {}).get("summary", ""),
+                        "ai_tags": json.dumps(itm.get("analysis", {}).get("tags", [])),
+                        "ai_title": itm.get("title", "")
                     }
                     url = upload_b64_to_firebase(b64, path, content_type="image/jpeg", metadata=metadata)
                     itm["thumbnail_url"] = url
                     itm["thumbnail_path"] = path
+                    # remove raw base64 to keep response small
                     itm.pop("thumbnail_b64", None)
                     itm["_session_id"] = session_id
+                    # annotate uploaded_at (unix)
+                    itm["uploaded_at"] = int(time.time())
+                    log.debug("Auto-uploaded thumbnail for %s -> %s (session=%s)", item_id, url, session_id)
                 except Exception as up_e:
                     log.warning("Auto-upload failed for %s: %s", item_id, up_e)
+                    # keep thumbnail_b64 as fallback
         else:
             if not FIREBASE_ADMIN_JSON:
                 log.info("FIREBASE_ADMIN_JSON not set; skipping server-side thumbnail upload.")
             else:
                 log.info("Firebase admin SDK not available; skipping server-side thumbnail upload.")
+        # Final response: items contain id,title,confidence,bbox,thumbnail_url or thumbnail_b64,analysis,uploaded_at if available,source, _session_id
         return jsonify({"ok": True, "items": items_out, "session_id": session_id, "debug": {"raw_model_text": (raw_text or "")[:1600]}}), 200
     except Exception as ex:
         log.exception("Processing error: %s", ex)
         try:
             items_out = fallback_contour_crops(bgr_img, max_items=8)
             for itm in items_out:
+                itm["analysis"] = analyze_crop_with_gemini(itm.get("thumbnail_b64")) if client else {"type":"unknown","summary":"","brand":"","tags":[]}
+                itm["title"] = choose_title_from_label_and_analysis(itm.get("label","unknown"), itm["analysis"])
             return jsonify({"ok": True, "items": items_out, "session_id": session_id, "debug": {"error": str(ex)}}), 200
         except Exception as e2:
             log.exception("Fallback also failed: %s", e2)
             return jsonify({"error": "internal failure", "detail": str(e2)}), 500
+# ---------- Finalize endpoint: keep selected and delete only session's temp files ----------
 @app.route("/finalize_detections", methods=["POST"])
 def finalize_detections():
+    """
+    Body JSON:
+    { "uid": "user123", "keep_ids": ["id1","id2",...], "session_id": "<session id from /process>" }
+    Server will delete only detected/<uid>/* files whose:
+      - metadata.tmp == "true"
+      - metadata.session_id == session_id
+      - item_id NOT in keep_ids
+    Returns:
+      { ok: True, kept: [...], deleted: [...], errors: [...] }
+    kept entries include id, thumbnail_url, thumbnail_path, analysis, title, uploaded_at
+    """
     try:
         body = request.get_json(force=True)
     except Exception:
         return jsonify({"error": "invalid json"}), 400
     uid = (body.get("uid") or request.args.get("uid") or "anon").strip() or "anon"
     keep_ids = set(body.get("keep_ids") or [])
     session_id = (body.get("session_id") or request.args.get("session_id") or "").strip()
     if not session_id:
         return jsonify({"error": "session_id required for finalize to avoid unsafe deletes"}), 400
     if not FIREBASE_ADMIN_JSON or not FIREBASE_ADMIN_AVAILABLE:
         return jsonify({"error": "firebase admin not configured"}), 500
     try:
         init_firebase_admin_if_needed()
         bucket = fb_storage.bucket()
     except Exception as e:
         log.exception("Firebase init error in finalize: %s", e)
         return jsonify({"error": "firebase admin init failed", "detail": str(e)}), 500
     safe_uid = "".join(ch for ch in uid if ch.isalnum() or ch in ("-", "_")) or "anon"
     prefix = f"detected/{safe_uid}/"
     kept = []
     deleted = []
     errors = []
     try:
         blobs = list(bucket.list_blobs(prefix=prefix))
         for blob in blobs:
                 if "." not in fname:
                     continue
                 item_id = fname.rsplit(".", 1)[0]
                 md = blob.metadata or {}
+                # only consider temporary files matching this session id
                 if str(md.get("session_id", "")) != session_id or str(md.get("tmp", "")).lower() not in ("true", "1", "yes"):
                     continue
                 if item_id in keep_ids:
                     try:
                         blob.make_public()
                         url = blob.public_url
                     except Exception:
                         url = f"gs://{bucket.name}/{name}"
                     ai_type = md.get("ai_type") or ""
                     ai_brand = md.get("ai_brand") or ""
                     ai_summary = md.get("ai_summary") or ""
                     ai_tags_raw = md.get("ai_tags") or "[]"
                     try:
                         ai_tags = json.loads(ai_tags_raw) if isinstance(ai_tags_raw, str) else ai_tags_raw
                     except Exception:
                         ai_tags = []
+                    ai_title = md.get("ai_title") or ""
+                    uploaded_at = md.get("uploaded_at") or None
                     kept.append({
                         "id": item_id,
                         "thumbnail_url": url,
                         "thumbnail_path": name,
+                        "analysis": {
+                            "type": ai_type,
+                            "brand": ai_brand,
+                            "summary": ai_summary,
+                            "tags": ai_tags
+                        },
+                        "title": ai_title or choose_title_from_label_and_analysis("", {"type": ai_type, "summary": ai_summary, "brand": ai_brand, "tags": ai_tags}),
+                        "uploaded_at": int(uploaded_at) if uploaded_at and str(uploaded_at).isdigit() else uploaded_at
                     })
                 else:
                     try:
         log.exception("finalize_detections error: %s", e)
         return jsonify({"error": "internal", "detail": str(e)}), 500
+# ---------- Clear session: delete all temporary files for a session ----------
 @app.route("/clear_session", methods=["POST"])
 def clear_session():
+    """
+    Body JSON: { "session_id": "<id>", "uid": "<optional uid>" }
+    Deletes all detected/<uid>/* blobs where metadata.session_id == session_id and metadata.tmp == "true".
+    """
     try:
         body = request.get_json(force=True)
     except Exception:
         return jsonify({"error": "invalid json"}), 400
     session_id = (body.get("session_id") or request.args.get("session_id") or "").strip()
     uid = (body.get("uid") or request.args.get("uid") or "anon").strip() or "anon"
     if not session_id:
         return jsonify({"error": "session_id required"}), 400
     if not FIREBASE_ADMIN_JSON or not FIREBASE_ADMIN_AVAILABLE:
         return jsonify({"error": "firebase admin not configured"}), 500
     try:
         init_firebase_admin_if_needed()
         bucket = fb_storage.bucket()
     except Exception as e:
         log.exception("Firebase init error in clear_session: %s", e)
         return jsonify({"error": "firebase admin init failed", "detail": str(e)}), 500
     safe_uid = "".join(ch for ch in uid if ch.isalnum() or ch in ("-", "_")) or "anon"
     prefix = f"detected/{safe_uid}/"
     deleted = []
     errors = []
     try: