Spaces:

Pepguy
/

ai_text_dresscode

Running

App Files Files Community

Pepguy commited on Nov 4, 2025

Commit

e364d6d

verified ·

1 Parent(s): 5282e3c

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -170

app.py CHANGED Viewed

@@ -50,7 +50,6 @@ app = Flask(__name__)
 CORS(app)
 # ---------- Category mapping (must match frontend) ----------
-# These values intentionally match the CATEGORY_OPTIONS array on the frontend.
 CATEGORIES = [
     "Heels",
     "Sneakers",
@@ -66,89 +65,96 @@ CATEGORIES = [
     "Shorts",
 ]
-# simple synonyms / keyword -> category mapping (lowercase keys)
 SYNONYMS: Dict[str, str] = {
-    "heel": "Heels",
-    "heels": "Heels",
-    "sneaker": "Sneakers",
-    "sneakers": "Sneakers",
-    "trainer": "Sneakers",
-    "trainers": "Sneakers",
-    "loafer": "Loafers",
-    "loafers": "Loafers",
-    "boot": "Boots",
-    "boots": "Boots",
-    "dress": "Dress",
-    "gown": "Dress",
-    "jean": "Jeans",
-    "jeans": "Jeans",
-    "denim": "Jeans",
     "skirt": "Skirt",
     "jacket": "Jacket",
     "coat": "Coat",
     "blazer": "Blazer",
-    "t-shirt": "T-Shirt",
-    "t shirt": "T-Shirt",
-    "tee": "T-Shirt",
-    "shirt": "T-Shirt",
-    "top": "T-Shirt",
-    "short": "Shorts",
-    "shorts": "Shorts",
-    "shoe": "Sneakers",  # generic shoe -> map to Sneakers as fallback
-    "shoes": "Sneakers",
-    "sandal": "Heels",  # if ambiguous, map sandals to Heels bucket (you can adjust)
-    "sandals": "Heels",
 }
 def normalize_text(s: str) -> str:
     return re.sub(r'[^a-z0-9\s\-]', ' ', s.lower()).strip()
 def choose_category_from_candidates(*candidates: Optional[str], tags: Optional[List[str]] = None) -> str:
-    """
-    Given a list of candidate strings (analysis.type, label, summary, etc.) and optional tags,
-    attempt to pick a category from CATEGORIES. Returns a category string guaranteed to be in CATEGORIES.
-    Falls back to "T-Shirt" if nothing matches.
-    """
-    # try tags first (explicit tag likely to indicate category)
     if tags:
         for t in tags:
-            if not t:
-                continue
             tok = normalize_text(str(t))
-            # direct synonym match
             if tok in SYNONYMS:
                 return SYNONYMS[tok]
-            # partial substring match
             for key, cat in SYNONYMS.items():
                 if key in tok:
                     return cat
-            # try direct category name match
             for cat in CATEGORIES:
                 if tok == cat.lower() or cat.lower() in tok:
                     return cat
-    # iterate through candidate strings in order provided
     for c in candidates:
-        if not c:
-            continue
         s = normalize_text(str(c))
-        # exact category match
         for cat in CATEGORIES:
             if s == cat.lower() or cat.lower() in s:
                 return cat
-        # check synonyms dictionary words
         words = s.split()
         for w in words:
             if w in SYNONYMS:
                 return SYNONYMS[w]
-        # check substrings (e.g., "sneaker" inside longer text)
         for key, cat in SYNONYMS.items():
             if key in s:
                 return cat
-    # If nothing found, return a safe default present in CATEGORIES
     return "T-Shirt"
 # ---------- Firebase init helpers ----------
 _firebase_app = None
@@ -179,28 +185,20 @@ def init_firebase_admin_if_needed():
         raise
 def upload_b64_to_firebase(base64_str: str, path: str, content_type="image/jpeg", metadata: dict = None) -> str:
-    """
-    Upload base64 string to Firebase Storage at `path`.
-    Optionally attach metadata dict (custom metadata).
-    Returns a public URL when possible, otherwise returns gs://<bucket>/<path>.
-    """
     if not FIREBASE_ADMIN_JSON:
         raise RuntimeError("FIREBASE_ADMIN_JSON not set")
     init_firebase_admin_if_needed()
     if not FIREBASE_ADMIN_AVAILABLE:
         raise RuntimeError("firebase-admin not available")
     raw = base64_str
     if raw.startswith("data:"):
         raw = raw.split(",", 1)[1]
     raw = raw.replace("\n", "").replace("\r", "")
     data = base64.b64decode(raw)
     try:
         bucket = fb_storage.bucket()
         blob = bucket.blob(path)
         blob.upload_from_string(data, content_type=content_type)
-        # attach metadata if provided (values must be strings)
         if metadata:
             try:
                 blob.metadata = {k: (json.dumps(v) if not isinstance(v, str) else v) for k, v in metadata.items()}
@@ -219,19 +217,15 @@ def upload_b64_to_firebase(base64_str: str, path: str, content_type="image/jpeg"
 # ---------- Image helpers (with EXIF transpose) ----------
 def read_image_bytes(file_storage) -> Tuple[np.ndarray, int, int, bytes]:
-    """
-    Read bytes, apply EXIF orientation, return BGR numpy, width, height and raw bytes.
-    """
     data = file_storage.read()
     img = Image.open(io.BytesIO(data))
-    # apply EXIF orientation so photos from phones are upright
     try:
         img = ImageOps.exif_transpose(img)
     except Exception:
         pass
     img = img.convert("RGB")
     w, h = img.size
-    arr = np.array(img)[:, :, ::-1]  # RGB -> BGR for OpenCV
     return arr, w, h, data
 def crop_and_b64(bgr_img: np.ndarray, x: int, y: int, w: int, h: int, max_side=512) -> str:
@@ -298,36 +292,23 @@ def fallback_contour_crops(bgr_img, max_items=8) -> List[Dict[str, Any]]:
                 })
     return items
-# ---------- AI analysis helper ----------
 def analyze_crop_with_gemini(jpeg_b64: str) -> Dict[str, Any]:
-    """
-    Run Gemini on the cropped image bytes to extract:
-      type (one-word category like 'shoe', 'jacket', 'dress'),
-      summary (single-line description),
-      brand (string or empty),
-      tags (array of short descriptors)
-    Returns dict, falls back to empty/defaults on error or missing key.
-    """
     if not client:
         return {"type": "unknown", "summary": "", "brand": "", "tags": []}
     try:
-        # prepare prompt
         prompt = (
             "You are an assistant that identifies clothing item characteristics from an image. "
             "Return only a JSON object with keys: type (single word like 'shoe','top','jacket'), "
             "summary (a single short sentence, one line), brand (brand name if visible else empty string), "
-            "tags (an array of short single-word tags describing visible attributes, e.g. ['striped','leather','white']). "
             "Keep values short and concise."
         )
         contents = [
             types.Content(role="user", parts=[types.Part.from_text(text=prompt)])
         ]
-        # attach the image bytes
         image_bytes = base64.b64decode(jpeg_b64)
         contents.append(types.Content(role="user", parts=[types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg")]))
         schema = {
             "type": "object",
             "properties": {
@@ -339,14 +320,11 @@ def analyze_crop_with_gemini(jpeg_b64: str) -> Dict[str, Any]:
             "required": ["type", "summary"]
         }
         cfg = types.GenerateContentConfig(response_mime_type="application/json", response_schema=schema)
-        # call model (use the same model family you used before)
         resp = client.models.generate_content(model="gemini-2.5-flash-lite", contents=contents, config=cfg)
         text = resp.text or ""
         parsed = {}
         try:
             parsed = json.loads(text)
-            # coerce expected shapes
             parsed["type"] = str(parsed.get("type", "")).strip()
             parsed["summary"] = str(parsed.get("summary", "")).strip()
             parsed["brand"] = str(parsed.get("brand", "")).strip()
@@ -373,33 +351,25 @@ def process_image():
     if "photo" not in request.files:
         return jsonify({"error": "missing photo"}), 400
     file = request.files["photo"]
     uid = (request.form.get("uid") or request.args.get("uid") or "anon").strip() or "anon"
     try:
         bgr_img, img_w, img_h, raw_bytes = read_image_bytes(file)
     except Exception as e:
         log.error("invalid image: %s", e)
         return jsonify({"error": "invalid image"}), 400
     session_id = str(uuid.uuid4())
-    # Detection prompt (same as before)
     user_prompt = (
         "You are an assistant that extracts clothing detections from a single image. "
         "Return a JSON object with a single key 'items' which is an array. Each item must have: "
         "label (string, short like 'top','skirt','sneakers'), "
         "bbox with normalized coordinates between 0 and 1: {x, y, w, h} where x,y are top-left relative to width/height, "
-        "confidence (0-1). Example output: {\"items\":[{\"label\":\"top\",\"bbox\":{\"x\":0.1,\"y\":0.2,\"w\":0.3,\"h\":0.4},\"confidence\":0.95}]} "
-        "Output ONLY valid JSON. If you cannot detect any clothing confidently, return {\"items\":[]}."
     )
     try:
         contents = [
             types.Content(role="user", parts=[types.Part.from_text(text=user_prompt)])
         ]
         contents.append(types.Content(role="user", parts=[types.Part.from_bytes(data=raw_bytes, mime_type="image/jpeg")]))
         schema = {
             "type": "object",
             "properties": {
@@ -411,12 +381,7 @@ def process_image():
                             "label": {"type": "string"},
                             "bbox": {
                                 "type": "object",
-                                "properties": {
-                                    "x": {"type": "number"},
-                                    "y": {"type": "number"},
-                                    "w": {"type": "number"},
-                                    "h": {"type": "number"}
-                                },
                                 "required": ["x","y","w","h"]
                             },
                             "confidence": {"type": "number"}
@@ -427,31 +392,24 @@ def process_image():
             },
             "required": ["items"]
         }
         cfg = types.GenerateContentConfig(response_mime_type="application/json", response_schema=schema)
         log.info("Calling Gemini model for detection (gemini-2.5-flash-lite)...")
         model_resp = client.models.generate_content(model="gemini-2.5-flash-lite", contents=contents, config=cfg) if client else None
         raw_text = (model_resp.text or "") if model_resp else ""
         log.info("Gemini raw response length: %d", len(raw_text))
         parsed = None
         try:
             parsed = json.loads(raw_text) if raw_text else None
         except Exception as e:
             log.warning("Could not parse Gemini JSON: %s", e)
             parsed = None
         items_out: List[Dict[str, Any]] = []
         if parsed and isinstance(parsed.get("items"), list) and len(parsed["items"])>0:
             for it in parsed["items"]:
                 try:
                     label = str(it.get("label","unknown"))[:48]
                     bbox = it.get("bbox",{})
-                    nx = float(bbox.get("x",0))
-                    ny = float(bbox.get("y",0))
-                    nw = float(bbox.get("w",0))
-                    nh = float(bbox.get("h",0))
                     nx = max(0.0, min(1.0, nx)); ny = max(0.0,min(1.0,ny))
                     nw = max(0.0, min(1.0, nw)); nh = max(0.0, min(1.0, nh))
                     px = int(nx * img_w); py = int(ny * img_h)
@@ -474,8 +432,7 @@ def process_image():
         else:
             log.info("Gemini returned no items or parse failed — using fallback contour crops.")
             items_out = fallback_contour_crops(bgr_img, max_items=8)
-        # Perform AI analysis per crop (if possible) and auto-upload to firebase with metadata (tmp + session)
         if FIREBASE_ADMIN_JSON and FIREBASE_ADMIN_AVAILABLE:
             try:
                 init_firebase_admin_if_needed()
@@ -483,31 +440,20 @@ def process_image():
             except Exception as e:
                 log.exception("Firebase admin init for upload failed: %s", e)
                 bucket = None
             safe_uid = "".join(ch for ch in uid if ch.isalnum() or ch in ("-", "_")) or "anon"
             for itm in items_out:
                 b64 = itm.get("thumbnail_b64")
                 if not b64:
                     continue
-                # analyze
                 try:
                     analysis = analyze_crop_with_gemini(b64) if client else {"type":"unknown","summary":"","brand":"","tags":[]}
                 except Exception as ae:
                     log.warning("analysis failed: %s", ae)
                     analysis = {"type":"unknown","summary":"","brand":"","tags":[]}
                 itm["analysis"] = analysis
-                # choose a frontend-category-compatible title
-                # prefer analysis.type, then label, then tags, then summary
-                title = choose_category_from_candidates(
-                    analysis.get("type", ""),
-                    itm.get("label", ""),
-                    ' '.join(analysis.get("tags", [])),
-                    tags=analysis.get("tags", [])
-                )
                 itm["title"] = title
                 item_id = itm.get("id") or str(uuid.uuid4())
                 path = f"detected/{safe_uid}/{item_id}.jpg"
                 try:
@@ -516,7 +462,6 @@ def process_image():
                         "session_id": session_id,
                         "uploaded_by": safe_uid,
                         "uploaded_at": str(int(time.time())),
-                        # store AI fields as JSON strings for later inspection
                         "ai_type": analysis.get("type",""),
                         "ai_brand": analysis.get("brand",""),
                         "ai_summary": analysis.get("summary",""),
@@ -531,26 +476,21 @@ def process_image():
                     log.debug("Auto-uploaded thumbnail for %s -> %s (session=%s) title=%s", item_id, url, session_id, title)
                 except Exception as up_e:
                     log.warning("Auto-upload failed for %s: %s", item_id, up_e)
-                    # keep thumbnail_b64 and analysis for client fallback
         else:
             if not FIREBASE_ADMIN_JSON:
                 log.info("FIREBASE_ADMIN_JSON not set; skipping server-side thumbnail upload.")
             else:
                 log.info("Firebase admin SDK not available; skipping server-side thumbnail upload.")
-            # For non-upload path, still add a title derived from label/unknown so frontend has it
             for itm in items_out:
                 if "title" not in itm:
                     analysis = itm.get("analysis") or {"type":"unknown","tags":[]}
-                    title = choose_category_from_candidates(analysis.get("type",""), itm.get("label",""), tags=analysis.get("tags", []))
-                    itm["title"] = title
         return jsonify({"ok": True, "items": items_out, "session_id": session_id, "debug": {"raw_model_text": (raw_text or "")[:1600]}}), 200
     except Exception as ex:
         log.exception("Processing error: %s", ex)
         try:
             items_out = fallback_contour_crops(bgr_img, max_items=8)
-            # give fallback items a default title so frontend can filter
             for itm in items_out:
                 if "title" not in itm:
                     itm["title"] = choose_category_from_candidates(itm.get("label","unknown"))
@@ -559,50 +499,31 @@ def process_image():
             log.exception("Fallback also failed: %s", e2)
             return jsonify({"error": "internal failure", "detail": str(e2)}), 500
-# ---------- Finalize endpoint: keep selected and delete only session's temp files ----------
 @app.route("/finalize_detections", methods=["POST"])
 def finalize_detections():
-    """
-    Body JSON:
-    { "uid": "user123", "keep_ids": ["id1","id2",...], "session_id": "<session id from /process>" }
-    Server will delete only detected/<uid>/* files whose:
-      - metadata.tmp == "true"
-      - metadata.session_id == session_id
-      - item_id NOT in keep_ids
-    Returns:
-      { ok: True, kept: [...], deleted: [...], errors: [...] }
-    """
     try:
         body = request.get_json(force=True)
     except Exception:
         return jsonify({"error": "invalid json"}), 400
     uid = (body.get("uid") or request.args.get("uid") or "anon").strip() or "anon"
     keep_ids = set(body.get("keep_ids") or [])
     session_id = (body.get("session_id") or request.args.get("session_id") or "").strip()
     if not session_id:
         return jsonify({"error": "session_id required for finalize to avoid unsafe deletes"}), 400
     if not FIREBASE_ADMIN_JSON or not FIREBASE_ADMIN_AVAILABLE:
         return jsonify({"error": "firebase admin not configured"}), 500
     try:
         init_firebase_admin_if_needed()
         bucket = fb_storage.bucket()
     except Exception as e:
         log.exception("Firebase init error in finalize: %s", e)
         return jsonify({"error": "firebase admin init failed", "detail": str(e)}), 500
     safe_uid = "".join(ch for ch in uid if ch.isalnum() or ch in ("-", "_")) or "anon"
     prefix = f"detected/{safe_uid}/"
     kept = []
     deleted = []
     errors = []
     try:
         blobs = list(bucket.list_blobs(prefix=prefix))
         for blob in blobs:
@@ -612,21 +533,15 @@ def finalize_detections():
                 if "." not in fname:
                     continue
                 item_id = fname.rsplit(".", 1)[0]
                 md = blob.metadata or {}
-                # only consider temporary files matching this session id
                 if str(md.get("session_id", "")) != session_id or str(md.get("tmp", "")).lower() not in ("true", "1", "yes"):
                     continue
                 if item_id in keep_ids:
-                    # ensure public URL available if possible
                     try:
                         blob.make_public()
                         url = blob.public_url
                     except Exception:
                         url = f"gs://{bucket.name}/{name}"
-                    # extract AI metadata (if present)
                     ai_type = md.get("ai_type") or ""
                     ai_brand = md.get("ai_brand") or ""
                     ai_summary = md.get("ai_summary") or ""
@@ -636,25 +551,27 @@ def finalize_detections():
                         ai_tags = json.loads(ai_tags_raw) if isinstance(ai_tags_raw, str) else ai_tags_raw
                     except Exception:
                         ai_tags = []
-                    # derive title: prefer stored metadata title, then ai_type/tags/summary
                     title = None
                     if title_meta:
                         try:
                             title = json.loads(title_meta) if (title_meta.startswith('[') or title_meta.startswith('{')) else str(title_meta)
                         except Exception:
                             title = str(title_meta)
-                    if not title:
                         title = choose_category_from_candidates(ai_type, ai_summary, tags=ai_tags)
                     kept.append({
                         "id": item_id,
                         "thumbnail_url": url,
                         "thumbnail_path": name,
-                        "analysis": {
-                          "type": ai_type,
-                          "brand": ai_brand,
-                          "summary": ai_summary,
-                          "tags": ai_tags
-                        },
                         "title": title
                     })
                 else:
@@ -670,37 +587,27 @@ def finalize_detections():
         log.exception("finalize_detections error: %s", e)
         return jsonify({"error": "internal", "detail": str(e)}), 500
-# ---------- Clear session: delete all temporary files for a session ----------
 @app.route("/clear_session", methods=["POST"])
 def clear_session():
-    """
-    Body JSON: { "session_id": "<id>", "uid": "<optional uid>" }
-    Deletes all detected/<uid>/* blobs where metadata.session_id == session_id and metadata.tmp == "true".
-    """
     try:
         body = request.get_json(force=True)
     except Exception:
         return jsonify({"error": "invalid json"}), 400
     session_id = (body.get("session_id") or request.args.get("session_id") or "").strip()
     uid = (body.get("uid") or request.args.get("uid") or "anon").strip() or "anon"
     if not session_id:
         return jsonify({"error": "session_id required"}), 400
     if not FIREBASE_ADMIN_JSON or not FIREBASE_ADMIN_AVAILABLE:
         return jsonify({"error": "firebase admin not configured"}), 500
     try:
         init_firebase_admin_if_needed()
         bucket = fb_storage.bucket()
     except Exception as e:
         log.exception("Firebase init error in clear_session: %s", e)
         return jsonify({"error": "firebase admin init failed", "detail": str(e)}), 500
     safe_uid = "".join(ch for ch in uid if ch.isalnum() or ch in ("-", "_")) or "anon"
     prefix = f"detected/{safe_uid}/"
     deleted = []
     errors = []
     try:

 CORS(app)
 # ---------- Category mapping (must match frontend) ----------
 CATEGORIES = [
     "Heels",
     "Sneakers",
     "Shorts",
 ]
 SYNONYMS: Dict[str, str] = {
+    "heel": "Heels", "heels": "Heels",
+    "sneaker": "Sneakers", "sneakers": "Sneakers", "trainer": "Sneakers", "trainers": "Sneakers",
+    "loafer": "Loafers", "loafers": "Loafers",
+    "boot": "Boots", "boots": "Boots",
+    "dress": "Dress", "gown": "Dress",
+    "jean": "Jeans", "jeans": "Jeans", "denim": "Jeans",
     "skirt": "Skirt",
     "jacket": "Jacket",
     "coat": "Coat",
     "blazer": "Blazer",
+    "t-shirt": "T-Shirt", "t shirt": "T-Shirt", "tee": "T-Shirt", "shirt": "T-Shirt", "top": "T-Shirt",
+    "short": "Shorts", "shorts": "Shorts",
+    "shoe": "Sneakers", "shoes": "Sneakers",
+    "sandal": "Heels", "sandals": "Heels",
 }
 def normalize_text(s: str) -> str:
     return re.sub(r'[^a-z0-9\s\-]', ' ', s.lower()).strip()
 def choose_category_from_candidates(*candidates: Optional[str], tags: Optional[List[str]] = None) -> str:
     if tags:
         for t in tags:
+            if not t: continue
             tok = normalize_text(str(t))
             if tok in SYNONYMS:
                 return SYNONYMS[tok]
             for key, cat in SYNONYMS.items():
                 if key in tok:
                     return cat
             for cat in CATEGORIES:
                 if tok == cat.lower() or cat.lower() in tok:
                     return cat
     for c in candidates:
+        if not c: continue
         s = normalize_text(str(c))
         for cat in CATEGORIES:
             if s == cat.lower() or cat.lower() in s:
                 return cat
         words = s.split()
         for w in words:
             if w in SYNONYMS:
                 return SYNONYMS[w]
         for key, cat in SYNONYMS.items():
             if key in s:
                 return cat
     return "T-Shirt"
+# ---------- New: ask Gemini to pick EXACT allowed category ----------
+def pick_allowed_category(preferred_text: Optional[str], label_text: Optional[str], tags: Optional[List[str]] = None) -> str:
+    """
+    Try to get Gemini to return exactly one category string from CATEGORIES.
+    If client not available or call fails or the returned value isn't an exact match, fallback to local chooser.
+    """
+    candidate = preferred_text or label_text or ""
+    # build short instruction
+    if client:
+        try:
+            # prompt: return exactly one of the categories listed, nothing else (no punctuation)
+            prompt = (
+                "You are given a short description of a clothing item. "
+                "From the following list choose the single best category that matches the item. "
+                "Return ONLY the category name exactly as shown (case-sensitive match is not required):\n\n"
+                f"{', '.join(CATEGORIES)}\n\n"
+                f"Item description: {candidate}\n\n"
+                "Output exactly one of the category names above (no JSON, no explanation)."
+            )
+            contents = [types.Content(role="user", parts=[types.Part.from_text(text=prompt)])]
+            # prefer to ask model to respond with a single string; we won't rely on strict schema formatting,
+            # but we'll attempt to validate the returned string.
+            cfg = types.GenerateContentConfig(response_mime_type="text/plain")
+            resp = client.models.generate_content(model="gemini-2.5-flash-lite", contents=contents, config=cfg)
+            raw = (resp.text or "").strip()
+            # strip quotes if present
+            candidate_out = raw.strip().strip('"').strip("'").strip()
+            # check candidate_out against allowed categories (case-insensitive)
+            for cat in CATEGORIES:
+                if candidate_out.lower() == cat.lower():
+                    return cat
+            # sometimes model returns JSON or extra text; try to extract any allowed category substring
+            low = candidate_out.lower()
+            for cat in CATEGORIES:
+                if cat.lower() in low:
+                    return cat
+            # if not matched, fallback to local matching
+        except Exception as e:
+            log.warning("pick_allowed_category Gemini call failed: %s", e)
+    # Gemini not available or didn't return a valid match -> fallback
+    return choose_category_from_candidates(preferred_text, label_text, tags=tags)
 # ---------- Firebase init helpers ----------
 _firebase_app = None
         raise
 def upload_b64_to_firebase(base64_str: str, path: str, content_type="image/jpeg", metadata: dict = None) -> str:
     if not FIREBASE_ADMIN_JSON:
         raise RuntimeError("FIREBASE_ADMIN_JSON not set")
     init_firebase_admin_if_needed()
     if not FIREBASE_ADMIN_AVAILABLE:
         raise RuntimeError("firebase-admin not available")
     raw = base64_str
     if raw.startswith("data:"):
         raw = raw.split(",", 1)[1]
     raw = raw.replace("\n", "").replace("\r", "")
     data = base64.b64decode(raw)
     try:
         bucket = fb_storage.bucket()
         blob = bucket.blob(path)
         blob.upload_from_string(data, content_type=content_type)
         if metadata:
             try:
                 blob.metadata = {k: (json.dumps(v) if not isinstance(v, str) else v) for k, v in metadata.items()}
 # ---------- Image helpers (with EXIF transpose) ----------
 def read_image_bytes(file_storage) -> Tuple[np.ndarray, int, int, bytes]:
     data = file_storage.read()
     img = Image.open(io.BytesIO(data))
     try:
         img = ImageOps.exif_transpose(img)
     except Exception:
         pass
     img = img.convert("RGB")
     w, h = img.size
+    arr = np.array(img)[:, :, ::-1]
     return arr, w, h, data
 def crop_and_b64(bgr_img: np.ndarray, x: int, y: int, w: int, h: int, max_side=512) -> str:
                 })
     return items
+# ---------- AI analysis helper (unchanged) ----------
 def analyze_crop_with_gemini(jpeg_b64: str) -> Dict[str, Any]:
     if not client:
         return {"type": "unknown", "summary": "", "brand": "", "tags": []}
     try:
         prompt = (
             "You are an assistant that identifies clothing item characteristics from an image. "
             "Return only a JSON object with keys: type (single word like 'shoe','top','jacket'), "
             "summary (a single short sentence, one line), brand (brand name if visible else empty string), "
+            "tags (an array of short single-word tags describing visible attributes). "
             "Keep values short and concise."
         )
         contents = [
             types.Content(role="user", parts=[types.Part.from_text(text=prompt)])
         ]
         image_bytes = base64.b64decode(jpeg_b64)
         contents.append(types.Content(role="user", parts=[types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg")]))
         schema = {
             "type": "object",
             "properties": {
             "required": ["type", "summary"]
         }
         cfg = types.GenerateContentConfig(response_mime_type="application/json", response_schema=schema)
         resp = client.models.generate_content(model="gemini-2.5-flash-lite", contents=contents, config=cfg)
         text = resp.text or ""
         parsed = {}
         try:
             parsed = json.loads(text)
             parsed["type"] = str(parsed.get("type", "")).strip()
             parsed["summary"] = str(parsed.get("summary", "")).strip()
             parsed["brand"] = str(parsed.get("brand", "")).strip()
     if "photo" not in request.files:
         return jsonify({"error": "missing photo"}), 400
     file = request.files["photo"]
     uid = (request.form.get("uid") or request.args.get("uid") or "anon").strip() or "anon"
     try:
         bgr_img, img_w, img_h, raw_bytes = read_image_bytes(file)
     except Exception as e:
         log.error("invalid image: %s", e)
         return jsonify({"error": "invalid image"}), 400
     session_id = str(uuid.uuid4())
     user_prompt = (
         "You are an assistant that extracts clothing detections from a single image. "
         "Return a JSON object with a single key 'items' which is an array. Each item must have: "
         "label (string, short like 'top','skirt','sneakers'), "
         "bbox with normalized coordinates between 0 and 1: {x, y, w, h} where x,y are top-left relative to width/height, "
+        "confidence (0-1). Output ONLY valid JSON."
     )
     try:
         contents = [
             types.Content(role="user", parts=[types.Part.from_text(text=user_prompt)])
         ]
         contents.append(types.Content(role="user", parts=[types.Part.from_bytes(data=raw_bytes, mime_type="image/jpeg")]))
         schema = {
             "type": "object",
             "properties": {
                             "label": {"type": "string"},
                             "bbox": {
                                 "type": "object",
+                                "properties": {"x": {"type": "number"}, "y": {"type": "number"}, "w": {"type": "number"}, "h": {"type": "number"}},
                                 "required": ["x","y","w","h"]
                             },
                             "confidence": {"type": "number"}
             },
             "required": ["items"]
         }
         cfg = types.GenerateContentConfig(response_mime_type="application/json", response_schema=schema)
         log.info("Calling Gemini model for detection (gemini-2.5-flash-lite)...")
         model_resp = client.models.generate_content(model="gemini-2.5-flash-lite", contents=contents, config=cfg) if client else None
         raw_text = (model_resp.text or "") if model_resp else ""
         log.info("Gemini raw response length: %d", len(raw_text))
         parsed = None
         try:
             parsed = json.loads(raw_text) if raw_text else None
         except Exception as e:
             log.warning("Could not parse Gemini JSON: %s", e)
             parsed = None
         items_out: List[Dict[str, Any]] = []
         if parsed and isinstance(parsed.get("items"), list) and len(parsed["items"])>0:
             for it in parsed["items"]:
                 try:
                     label = str(it.get("label","unknown"))[:48]
                     bbox = it.get("bbox",{})
+                    nx = float(bbox.get("x",0)); ny = float(bbox.get("y",0)); nw = float(bbox.get("w",0)); nh = float(bbox.get("h",0))
                     nx = max(0.0, min(1.0, nx)); ny = max(0.0,min(1.0,ny))
                     nw = max(0.0, min(1.0, nw)); nh = max(0.0, min(1.0, nh))
                     px = int(nx * img_w); py = int(ny * img_h)
         else:
             log.info("Gemini returned no items or parse failed — using fallback contour crops.")
             items_out = fallback_contour_crops(bgr_img, max_items=8)
+        # AI analysis & upload
         if FIREBASE_ADMIN_JSON and FIREBASE_ADMIN_AVAILABLE:
             try:
                 init_firebase_admin_if_needed()
             except Exception as e:
                 log.exception("Firebase admin init for upload failed: %s", e)
                 bucket = None
             safe_uid = "".join(ch for ch in uid if ch.isalnum() or ch in ("-", "_")) or "anon"
             for itm in items_out:
                 b64 = itm.get("thumbnail_b64")
                 if not b64:
                     continue
                 try:
                     analysis = analyze_crop_with_gemini(b64) if client else {"type":"unknown","summary":"","brand":"","tags":[]}
                 except Exception as ae:
                     log.warning("analysis failed: %s", ae)
                     analysis = {"type":"unknown","summary":"","brand":"","tags":[]}
                 itm["analysis"] = analysis
+                # pick allowed category (this is the important change: we ask Gemini to pick allowed category then fallback)
+                title = pick_allowed_category(analysis.get("type",""), itm.get("label",""), tags=analysis.get("tags", []))
                 itm["title"] = title
                 item_id = itm.get("id") or str(uuid.uuid4())
                 path = f"detected/{safe_uid}/{item_id}.jpg"
                 try:
                         "session_id": session_id,
                         "uploaded_by": safe_uid,
                         "uploaded_at": str(int(time.time())),
                         "ai_type": analysis.get("type",""),
                         "ai_brand": analysis.get("brand",""),
                         "ai_summary": analysis.get("summary",""),
                     log.debug("Auto-uploaded thumbnail for %s -> %s (session=%s) title=%s", item_id, url, session_id, title)
                 except Exception as up_e:
                     log.warning("Auto-upload failed for %s: %s", item_id, up_e)
         else:
             if not FIREBASE_ADMIN_JSON:
                 log.info("FIREBASE_ADMIN_JSON not set; skipping server-side thumbnail upload.")
             else:
                 log.info("Firebase admin SDK not available; skipping server-side thumbnail upload.")
+            # ensure a title exists for frontend even if no upload
             for itm in items_out:
                 if "title" not in itm:
                     analysis = itm.get("analysis") or {"type":"unknown","tags":[]}
+                    itm["title"] = pick_allowed_category(analysis.get("type",""), itm.get("label",""), tags=analysis.get("tags", []))
         return jsonify({"ok": True, "items": items_out, "session_id": session_id, "debug": {"raw_model_text": (raw_text or "")[:1600]}}), 200
     except Exception as ex:
         log.exception("Processing error: %s", ex)
         try:
             items_out = fallback_contour_crops(bgr_img, max_items=8)
             for itm in items_out:
                 if "title" not in itm:
                     itm["title"] = choose_category_from_candidates(itm.get("label","unknown"))
             log.exception("Fallback also failed: %s", e2)
             return jsonify({"error": "internal failure", "detail": str(e2)}), 500
+# ---------- Finalize endpoint ----------
 @app.route("/finalize_detections", methods=["POST"])
 def finalize_detections():
     try:
         body = request.get_json(force=True)
     except Exception:
         return jsonify({"error": "invalid json"}), 400
     uid = (body.get("uid") or request.args.get("uid") or "anon").strip() or "anon"
     keep_ids = set(body.get("keep_ids") or [])
     session_id = (body.get("session_id") or request.args.get("session_id") or "").strip()
     if not session_id:
         return jsonify({"error": "session_id required for finalize to avoid unsafe deletes"}), 400
     if not FIREBASE_ADMIN_JSON or not FIREBASE_ADMIN_AVAILABLE:
         return jsonify({"error": "firebase admin not configured"}), 500
     try:
         init_firebase_admin_if_needed()
         bucket = fb_storage.bucket()
     except Exception as e:
         log.exception("Firebase init error in finalize: %s", e)
         return jsonify({"error": "firebase admin init failed", "detail": str(e)}), 500
     safe_uid = "".join(ch for ch in uid if ch.isalnum() or ch in ("-", "_")) or "anon"
     prefix = f"detected/{safe_uid}/"
     kept = []
     deleted = []
     errors = []
     try:
         blobs = list(bucket.list_blobs(prefix=prefix))
         for blob in blobs:
                 if "." not in fname:
                     continue
                 item_id = fname.rsplit(".", 1)[0]
                 md = blob.metadata or {}
                 if str(md.get("session_id", "")) != session_id or str(md.get("tmp", "")).lower() not in ("true", "1", "yes"):
                     continue
                 if item_id in keep_ids:
                     try:
                         blob.make_public()
                         url = blob.public_url
                     except Exception:
                         url = f"gs://{bucket.name}/{name}"
                     ai_type = md.get("ai_type") or ""
                     ai_brand = md.get("ai_brand") or ""
                     ai_summary = md.get("ai_summary") or ""
                         ai_tags = json.loads(ai_tags_raw) if isinstance(ai_tags_raw, str) else ai_tags_raw
                     except Exception:
                         ai_tags = []
                     title = None
                     if title_meta:
                         try:
                             title = json.loads(title_meta) if (title_meta.startswith('[') or title_meta.startswith('{')) else str(title_meta)
                         except Exception:
                             title = str(title_meta)
+                    # validate title: if not in allowed set, derive from AI fields
+                    valid = False
+                    if isinstance(title, str) and title.strip():
+                        for cat in CATEGORIES:
+                            if title.strip().lower() == cat.lower():
+                                title = cat
+                                valid = True
+                                break
+                    if not valid:
                         title = choose_category_from_candidates(ai_type, ai_summary, tags=ai_tags)
                     kept.append({
                         "id": item_id,
                         "thumbnail_url": url,
                         "thumbnail_path": name,
+                        "analysis": {"type": ai_type, "brand": ai_brand, "summary": ai_summary, "tags": ai_tags},
                         "title": title
                     })
                 else:
         log.exception("finalize_detections error: %s", e)
         return jsonify({"error": "internal", "detail": str(e)}), 500
+# ---------- Clear session ----------
 @app.route("/clear_session", methods=["POST"])
 def clear_session():
     try:
         body = request.get_json(force=True)
     except Exception:
         return jsonify({"error": "invalid json"}), 400
     session_id = (body.get("session_id") or request.args.get("session_id") or "").strip()
     uid = (body.get("uid") or request.args.get("uid") or "anon").strip() or "anon"
     if not session_id:
         return jsonify({"error": "session_id required"}), 400
     if not FIREBASE_ADMIN_JSON or not FIREBASE_ADMIN_AVAILABLE:
         return jsonify({"error": "firebase admin not configured"}), 500
     try:
         init_firebase_admin_if_needed()
         bucket = fb_storage.bucket()
     except Exception as e:
         log.exception("Firebase init error in clear_session: %s", e)
         return jsonify({"error": "firebase admin init failed", "detail": str(e)}), 500
     safe_uid = "".join(ch for ch in uid if ch.isalnum() or ch in ("-", "_")) or "anon"
     prefix = f"detected/{safe_uid}/"
     deleted = []
     errors = []
     try: