Pepguy commited on
Commit
a02ad5f
·
verified ·
1 Parent(s): 0900715

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +197 -214
app.py CHANGED
@@ -6,6 +6,7 @@ import base64
6
  import logging
7
  import uuid
8
  import time
 
9
  from typing import List, Dict, Any, Tuple, Optional
10
 
11
  from flask import Flask, request, jsonify
@@ -15,12 +16,8 @@ import numpy as np
15
  import cv2
16
 
17
  # genai client
18
- try:
19
- from google import genai
20
- from google.genai import types
21
- except Exception:
22
- genai = None
23
- types = None
24
 
25
  # Firebase Admin (in-memory JSON init)
26
  try:
@@ -36,17 +33,11 @@ except Exception:
36
  logging.basicConfig(level=logging.INFO)
37
  log = logging.getLogger("wardrobe-server")
38
 
39
- GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "").strip()
40
- if GEMINI_API_KEY and genai:
41
- try:
42
- client = genai.Client(api_key=GEMINI_API_KEY)
43
- except Exception as e:
44
- log.exception("Failed to init genai client: %s", e)
45
- client = None
46
- else:
47
- client = None
48
- if not GEMINI_API_KEY:
49
- log.info("GEMINI_API_KEY not set; model calls disabled.")
50
 
51
  # Firebase config (read service account JSON from env)
52
  FIREBASE_ADMIN_JSON = os.getenv("FIREBASE_ADMIN_JSON", "").strip()
@@ -58,8 +49,9 @@ if FIREBASE_ADMIN_JSON and not FIREBASE_ADMIN_AVAILABLE:
58
  app = Flask(__name__)
59
  CORS(app)
60
 
61
- # ---------- Category options (must match frontend) ----------
62
- CATEGORY_OPTIONS = [
 
63
  "Heels",
64
  "Sneakers",
65
  "Loafers",
@@ -73,8 +65,89 @@ CATEGORY_OPTIONS = [
73
  "Coat",
74
  "Shorts",
75
  ]
76
- # normalized set for quick match
77
- _CATEGORY_RENORM = [c.lower() for c in CATEGORY_OPTIONS]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  # ---------- Firebase init helpers ----------
80
  _firebase_app = None
@@ -147,52 +220,19 @@ def upload_b64_to_firebase(base64_str: str, path: str, content_type="image/jpeg"
147
  # ---------- Image helpers (with EXIF transpose) ----------
148
  def read_image_bytes(file_storage) -> Tuple[np.ndarray, int, int, bytes]:
149
  """
150
- Read uploaded bytes, apply EXIF orientation via PIL.ImageOps.exif_transpose,
151
- re-encode to JPEG bytes (EXIF cleared), and return (bgr_numpy, width, height, jpeg_bytes).
152
  """
153
  data = file_storage.read()
154
- if not data:
155
- raise ValueError("No image data uploaded")
156
-
157
- # Try opening with PIL to read EXIF and apply transpose
158
- try:
159
- img = Image.open(io.BytesIO(data))
160
- except Exception as e:
161
- log.warning("PIL failed to open image; falling back to OpenCV decode: %s", e)
162
- arr_np = np.frombuffer(data, np.uint8)
163
- cv_img = cv2.imdecode(arr_np, cv2.IMREAD_COLOR)
164
- if cv_img is None:
165
- raise RuntimeError("Could not decode uploaded image")
166
- h, w = cv_img.shape[:2]
167
- _, jpeg = cv2.imencode(".jpg", cv_img, [int(cv2.IMWRITE_JPEG_QUALITY), 92])
168
- return cv_img, w, h, jpeg.tobytes()
169
-
170
- # log original EXIF orientation when present
171
- try:
172
- exif = img._getexif() or {}
173
- orientation = None
174
- if isinstance(exif, dict):
175
- orientation = exif.get(274) # tag 274 orientation
176
- log.debug("Original EXIF orientation: %s", orientation)
177
- except Exception:
178
- orientation = None
179
-
180
- # physically apply EXIF rotation (so image pixels are upright)
181
  try:
182
  img = ImageOps.exif_transpose(img)
183
- except Exception as e:
184
- log.warning("exif_transpose failed: %s", e)
185
-
186
- # ensure RGB, then re-encode to JPEG to remove orientation tag from bytes
187
  img = img.convert("RGB")
188
  w, h = img.size
189
- buf = io.BytesIO()
190
- img.save(buf, format="JPEG", quality=92, optimize=True)
191
- jpeg_bytes = buf.getvalue()
192
-
193
- # convert to BGR numpy for OpenCV operations
194
- arr = np.array(img)[:, :, ::-1] # RGB -> BGR
195
- return arr, w, h, jpeg_bytes
196
 
197
  def crop_and_b64(bgr_img: np.ndarray, x: int, y: int, w: int, h: int, max_side=512) -> str:
198
  h_img, w_img = bgr_img.shape[:2]
@@ -201,7 +241,6 @@ def crop_and_b64(bgr_img: np.ndarray, x: int, y: int, w: int, h: int, max_side=5
201
  crop = bgr_img[y:y2, x:x2]
202
  if crop.size == 0:
203
  return ""
204
- # resize if too large
205
  max_dim = max(crop.shape[0], crop.shape[1])
206
  if max_dim > max_side:
207
  scale = max_side / max_dim
@@ -263,20 +302,29 @@ def fallback_contour_crops(bgr_img, max_items=8) -> List[Dict[str, Any]]:
263
  def analyze_crop_with_gemini(jpeg_b64: str) -> Dict[str, Any]:
264
  """
265
  Run Gemini on the cropped image bytes to extract:
266
- type, summary, brand, tags
267
- Returns dict, falls back to defaults on error.
 
 
 
268
  """
269
- if not client or not types:
270
  return {"type": "unknown", "summary": "", "brand": "", "tags": []}
271
  try:
 
272
  prompt = (
273
  "You are an assistant that identifies clothing item characteristics from an image. "
274
  "Return only a JSON object with keys: type (single word like 'shoe','top','jacket'), "
275
  "summary (a single short sentence, one line), brand (brand name if visible else empty string), "
276
- "tags (an array of short single-word tags). Keep values short and concise."
 
277
  )
278
 
279
- contents = [types.Content(role="user", parts=[types.Part.from_text(text=prompt)])]
 
 
 
 
280
  image_bytes = base64.b64decode(jpeg_b64)
281
  contents.append(types.Content(role="user", parts=[types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg")]))
282
 
@@ -291,22 +339,24 @@ def analyze_crop_with_gemini(jpeg_b64: str) -> Dict[str, Any]:
291
  "required": ["type", "summary"]
292
  }
293
  cfg = types.GenerateContentConfig(response_mime_type="application/json", response_schema=schema)
 
 
294
  resp = client.models.generate_content(model="gemini-2.5-flash-lite", contents=contents, config=cfg)
295
  text = resp.text or ""
296
  parsed = {}
297
  try:
298
  parsed = json.loads(text)
 
 
 
 
 
 
 
 
299
  except Exception as e:
300
  log.warning("Failed parsing Gemini analysis JSON: %s — raw: %s", e, (text[:300] if text else ""))
301
  parsed = {"type": "unknown", "summary": "", "brand": "", "tags": []}
302
- # coerce
303
- parsed["type"] = str(parsed.get("type","") or "").strip()
304
- parsed["summary"] = str(parsed.get("summary","") or "").strip()
305
- parsed["brand"] = str(parsed.get("brand","") or "").strip()
306
- tags = parsed.get("tags", [])
307
- if not isinstance(tags, list):
308
- tags = []
309
- parsed["tags"] = [str(t).strip() for t in tags if str(t).strip()]
310
  return {
311
  "type": parsed.get("type", "unknown") or "unknown",
312
  "summary": parsed.get("summary", "") or "",
@@ -317,73 +367,6 @@ def analyze_crop_with_gemini(jpeg_b64: str) -> Dict[str, Any]:
317
  log.exception("analyze_crop_with_gemini failure: %s", e)
318
  return {"type": "unknown", "summary": "", "brand": "", "tags": []}
319
 
320
- # ---------- Title mapping helper ----------
321
- def choose_title_from_label_and_analysis(label: str, analysis: Dict[str, Any]) -> str:
322
- """
323
- Return a title that is guaranteed to be one of CATEGORY_OPTIONS.
324
- Heuristics:
325
- - check analysis.type
326
- - check analysis.tags
327
- - check label text
328
- - fallback to 'T-Shirt'
329
- """
330
- def find_match_in_text(txt: str) -> Optional[str]:
331
- if not txt:
332
- return None
333
- s = txt.lower()
334
- # quick synonyms mapping
335
- synonyms = {
336
- "tshirt": "T-Shirt", "t-shirt": "T-Shirt", "tee": "T-Shirt",
337
- "sneaker": "Sneakers", "trainers": "Sneakers",
338
- "jeans": "Jeans", "denim": "Jeans",
339
- "dress": "Dress",
340
- "skirt": "Skirt",
341
- "jacket": "Jacket",
342
- "coat": "Coat",
343
- "blazer": "Blazer",
344
- "boot": "Boots",
345
- "heel": "Heels",
346
- "loafer": "Loafers",
347
- "short": "Shorts",
348
- "shoe": "Sneakers", # generic shoe -> put under Sneakers by default
349
- "sneakers": "Sneakers",
350
- }
351
- for k, v in synonyms.items():
352
- if k in s:
353
- return v
354
- # check direct category words
355
- for idx, cat in enumerate(CATEGORY_OPTIONS):
356
- if cat.lower().replace("-", "").replace(" ", "") in s.replace("-", "").replace(" ", ""):
357
- return CATEGORY_OPTIONS[idx]
358
- return None
359
-
360
- # try analysis.type first
361
- atype = (analysis.get("type") or "").strip()
362
- match = find_match_in_text(atype)
363
- if match:
364
- return match
365
-
366
- # try analysis.tags
367
- tags = analysis.get("tags") or []
368
- if isinstance(tags, list):
369
- for t in tags:
370
- m = find_match_in_text(t)
371
- if m:
372
- return m
373
-
374
- # try label (raw detection label from detection model)
375
- m = find_match_in_text(label or "")
376
- if m:
377
- return m
378
-
379
- # try analysis.summary casual check
380
- m = find_match_in_text(analysis.get("summary", "") or "")
381
- if m:
382
- return m
383
-
384
- # fallback: prefer 'T-Shirt' as generic top fallback (guaranteed category)
385
- return "T-Shirt"
386
-
387
  # ---------- Main / processing ----------
388
  @app.route("/process", methods=["POST"])
389
  def process_image():
@@ -394,15 +377,14 @@ def process_image():
394
  uid = (request.form.get("uid") or request.args.get("uid") or "anon").strip() or "anon"
395
 
396
  try:
397
- # read and get corrected jpeg bytes (EXIF transpose applied)
398
- bgr_img, img_w, img_h, corrected_jpeg_bytes = read_image_bytes(file)
399
  except Exception as e:
400
  log.error("invalid image: %s", e)
401
  return jsonify({"error": "invalid image"}), 400
402
 
403
  session_id = str(uuid.uuid4())
404
 
405
- # Detection prompt (Gemini expects the corrected image bytes)
406
  user_prompt = (
407
  "You are an assistant that extracts clothing detections from a single image. "
408
  "Return a JSON object with a single key 'items' which is an array. Each item must have: "
@@ -414,11 +396,9 @@ def process_image():
414
 
415
  try:
416
  contents = [
417
- types.Content(role="user", parts=[types.Part.from_text(text=user_prompt)]) if types else None
418
  ]
419
- # attach corrected jpeg bytes
420
- if types:
421
- contents.append(types.Content(role="user", parts=[types.Part.from_bytes(data=corrected_jpeg_bytes, mime_type="image/jpeg")]))
422
 
423
  schema = {
424
  "type": "object",
@@ -448,17 +428,12 @@ def process_image():
448
  "required": ["items"]
449
  }
450
 
451
- cfg = types.GenerateContentConfig(response_mime_type="application/json", response_schema=schema) if types else None
452
-
453
- if client and types:
454
- log.info("Calling Gemini model for detection (gemini-2.5-flash-lite)...")
455
- model_resp = client.models.generate_content(model="gemini-2.5-flash-lite", contents=contents, config=cfg)
456
- raw_text = model_resp.text or ""
457
- else:
458
- log.info("Gemini client not configured, skipping model detection — using fallback.")
459
- raw_text = ""
460
 
461
- log.info("Gemini raw response length: %d", len(raw_text) if raw_text else 0)
 
 
 
462
 
463
  parsed = None
464
  try:
@@ -471,7 +446,7 @@ def process_image():
471
  if parsed and isinstance(parsed.get("items"), list) and len(parsed["items"])>0:
472
  for it in parsed["items"]:
473
  try:
474
- raw_label = str(it.get("label","unknown"))[:64]
475
  bbox = it.get("bbox",{})
476
  nx = float(bbox.get("x",0))
477
  ny = float(bbox.get("y",0))
@@ -483,45 +458,24 @@ def process_image():
483
  pw = int(nw * img_w); ph = int(nh * img_h)
484
  if pw <= 8 or ph <= 8:
485
  continue
486
- crop_b64 = crop_and_b64(bgr_img, px, py, pw, ph)
487
- if not crop_b64:
488
  continue
489
-
490
- # analyze crop with Gemini (optional)
491
- analysis = analyze_crop_with_gemini(crop_b64) if client else {"type":"unknown","summary":"","brand":"","tags":[]}
492
-
493
- # choose title within CATEGORY_OPTIONS
494
- title = choose_title_from_label_and_analysis(raw_label, analysis)
495
-
496
- item_id = str(uuid.uuid4())
497
- itm = {
498
- "id": item_id,
499
- "label": raw_label,
500
- "title": title,
501
  "confidence": float(it.get("confidence", 0.5)),
502
  "bbox": {"x": px, "y": py, "w": pw, "h": ph},
503
- "thumbnail_b64": crop_b64,
504
- "analysis": analysis,
505
  "source": "gemini"
506
- }
507
- items_out.append(itm)
508
  except Exception as e:
509
  log.warning("skipping item due to error: %s", e)
510
  else:
511
  log.info("Gemini returned no items or parse failed — using fallback contour crops.")
512
  items_out = fallback_contour_crops(bgr_img, max_items=8)
513
- # do analysis + title mapping for fallback crops
514
- for itm in items_out:
515
- try:
516
- crop_b64 = itm.get("thumbnail_b64")
517
- analysis = analyze_crop_with_gemini(crop_b64) if client else {"type":"unknown","summary":"","brand":"","tags":[]}
518
- itm["analysis"] = analysis
519
- itm["title"] = choose_title_from_label_and_analysis(itm.get("label","unknown"), analysis)
520
- except Exception:
521
- itm["analysis"] = {"type":"unknown","summary":"","brand":"","tags":[]}
522
- itm["title"] = choose_title_from_label_and_analysis(itm.get("label","unknown"), itm["analysis"])
523
-
524
- # Auto-upload thumbnails to Firebase Storage (temporary, marked by session_id)
525
  if FIREBASE_ADMIN_JSON and FIREBASE_ADMIN_AVAILABLE:
526
  try:
527
  init_firebase_admin_if_needed()
@@ -535,6 +489,25 @@ def process_image():
535
  b64 = itm.get("thumbnail_b64")
536
  if not b64:
537
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
538
  item_id = itm.get("id") or str(uuid.uuid4())
539
  path = f"detected/{safe_uid}/{item_id}.jpg"
540
  try:
@@ -543,41 +516,44 @@ def process_image():
543
  "session_id": session_id,
544
  "uploaded_by": safe_uid,
545
  "uploaded_at": str(int(time.time())),
546
- # AI fields
547
- "ai_type": itm.get("analysis", {}).get("type", ""),
548
- "ai_brand": itm.get("analysis", {}).get("brand", ""),
549
- "ai_summary": itm.get("analysis", {}).get("summary", ""),
550
- "ai_tags": json.dumps(itm.get("analysis", {}).get("tags", [])),
551
- "ai_title": itm.get("title", "")
552
  }
553
  url = upload_b64_to_firebase(b64, path, content_type="image/jpeg", metadata=metadata)
554
  itm["thumbnail_url"] = url
555
  itm["thumbnail_path"] = path
556
- # remove raw base64 to keep response small
557
  itm.pop("thumbnail_b64", None)
558
  itm["_session_id"] = session_id
559
- # annotate uploaded_at (unix)
560
- itm["uploaded_at"] = int(time.time())
561
- log.debug("Auto-uploaded thumbnail for %s -> %s (session=%s)", item_id, url, session_id)
562
  except Exception as up_e:
563
  log.warning("Auto-upload failed for %s: %s", item_id, up_e)
564
- # keep thumbnail_b64 as fallback
565
  else:
566
  if not FIREBASE_ADMIN_JSON:
567
  log.info("FIREBASE_ADMIN_JSON not set; skipping server-side thumbnail upload.")
568
  else:
569
  log.info("Firebase admin SDK not available; skipping server-side thumbnail upload.")
 
 
 
 
 
 
570
 
571
- # Final response: items contain id,title,confidence,bbox,thumbnail_url or thumbnail_b64,analysis,uploaded_at if available,source, _session_id
572
  return jsonify({"ok": True, "items": items_out, "session_id": session_id, "debug": {"raw_model_text": (raw_text or "")[:1600]}}), 200
573
 
574
  except Exception as ex:
575
  log.exception("Processing error: %s", ex)
576
  try:
577
  items_out = fallback_contour_crops(bgr_img, max_items=8)
 
578
  for itm in items_out:
579
- itm["analysis"] = analyze_crop_with_gemini(itm.get("thumbnail_b64")) if client else {"type":"unknown","summary":"","brand":"","tags":[]}
580
- itm["title"] = choose_title_from_label_and_analysis(itm.get("label","unknown"), itm["analysis"])
581
  return jsonify({"ok": True, "items": items_out, "session_id": session_id, "debug": {"error": str(ex)}}), 200
582
  except Exception as e2:
583
  log.exception("Fallback also failed: %s", e2)
@@ -597,7 +573,6 @@ def finalize_detections():
597
 
598
  Returns:
599
  { ok: True, kept: [...], deleted: [...], errors: [...] }
600
- kept entries include id, thumbnail_url, thumbnail_path, analysis, title, uploaded_at
601
  """
602
  try:
603
  body = request.get_json(force=True)
@@ -644,35 +619,43 @@ def finalize_detections():
644
  continue
645
 
646
  if item_id in keep_ids:
 
647
  try:
648
  blob.make_public()
649
  url = blob.public_url
650
  except Exception:
651
  url = f"gs://{bucket.name}/{name}"
652
 
 
653
  ai_type = md.get("ai_type") or ""
654
  ai_brand = md.get("ai_brand") or ""
655
  ai_summary = md.get("ai_summary") or ""
656
  ai_tags_raw = md.get("ai_tags") or "[]"
 
657
  try:
658
  ai_tags = json.loads(ai_tags_raw) if isinstance(ai_tags_raw, str) else ai_tags_raw
659
  except Exception:
660
  ai_tags = []
661
- ai_title = md.get("ai_title") or ""
662
- uploaded_at = md.get("uploaded_at") or None
663
-
 
 
 
 
 
 
664
  kept.append({
665
  "id": item_id,
666
  "thumbnail_url": url,
667
  "thumbnail_path": name,
668
  "analysis": {
669
- "type": ai_type,
670
- "brand": ai_brand,
671
- "summary": ai_summary,
672
- "tags": ai_tags
673
  },
674
- "title": ai_title or choose_title_from_label_and_analysis("", {"type": ai_type, "summary": ai_summary, "brand": ai_brand, "tags": ai_tags}),
675
- "uploaded_at": int(uploaded_at) if uploaded_at and str(uploaded_at).isdigit() else uploaded_at
676
  })
677
  else:
678
  try:
 
6
  import logging
7
  import uuid
8
  import time
9
+ import re
10
  from typing import List, Dict, Any, Tuple, Optional
11
 
12
  from flask import Flask, request, jsonify
 
16
  import cv2
17
 
18
  # genai client
19
+ from google import genai
20
+ from google.genai import types
 
 
 
 
21
 
22
  # Firebase Admin (in-memory JSON init)
23
  try:
 
33
  logging.basicConfig(level=logging.INFO)
34
  log = logging.getLogger("wardrobe-server")
35
 
36
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
37
+ if not GEMINI_API_KEY:
38
+ log.warning("GEMINI_API_KEY not set — gemini calls will fail (but fallback still works).")
39
+
40
+ client = genai.Client(api_key=GEMINI_API_KEY) if GEMINI_API_KEY else None
 
 
 
 
 
 
41
 
42
  # Firebase config (read service account JSON from env)
43
  FIREBASE_ADMIN_JSON = os.getenv("FIREBASE_ADMIN_JSON", "").strip()
 
49
  app = Flask(__name__)
50
  CORS(app)
51
 
52
+ # ---------- Category mapping (must match frontend) ----------
53
+ # These values intentionally match the CATEGORY_OPTIONS array on the frontend.
54
+ CATEGORIES = [
55
  "Heels",
56
  "Sneakers",
57
  "Loafers",
 
65
  "Coat",
66
  "Shorts",
67
  ]
68
+
69
+ # simple synonyms / keyword -> category mapping (lowercase keys)
70
+ SYNONYMS: Dict[str, str] = {
71
+ "heel": "Heels",
72
+ "heels": "Heels",
73
+ "sneaker": "Sneakers",
74
+ "sneakers": "Sneakers",
75
+ "trainer": "Sneakers",
76
+ "trainers": "Sneakers",
77
+ "loafer": "Loafers",
78
+ "loafers": "Loafers",
79
+ "boot": "Boots",
80
+ "boots": "Boots",
81
+ "dress": "Dress",
82
+ "gown": "Dress",
83
+ "jean": "Jeans",
84
+ "jeans": "Jeans",
85
+ "denim": "Jeans",
86
+ "skirt": "Skirt",
87
+ "jacket": "Jacket",
88
+ "coat": "Coat",
89
+ "blazer": "Blazer",
90
+ "t-shirt": "T-Shirt",
91
+ "t shirt": "T-Shirt",
92
+ "tee": "T-Shirt",
93
+ "shirt": "T-Shirt",
94
+ "top": "T-Shirt",
95
+ "short": "Shorts",
96
+ "shorts": "Shorts",
97
+ "shoe": "Sneakers", # generic shoe -> map to Sneakers as fallback
98
+ "shoes": "Sneakers",
99
+ "sandal": "Heels", # if ambiguous, map sandals to Heels bucket (you can adjust)
100
+ "sandals": "Heels",
101
+ }
102
+
103
+ def normalize_text(s: str) -> str:
104
+ return re.sub(r'[^a-z0-9\s\-]', ' ', s.lower()).strip()
105
+
106
+ def choose_category_from_candidates(*candidates: Optional[str], tags: Optional[List[str]] = None) -> str:
107
+ """
108
+ Given a list of candidate strings (analysis.type, label, summary, etc.) and optional tags,
109
+ attempt to pick a category from CATEGORIES. Returns a category string guaranteed to be in CATEGORIES.
110
+ Falls back to "T-Shirt" if nothing matches.
111
+ """
112
+ # try tags first (explicit tag likely to indicate category)
113
+ if tags:
114
+ for t in tags:
115
+ if not t:
116
+ continue
117
+ tok = normalize_text(str(t))
118
+ # direct synonym match
119
+ if tok in SYNONYMS:
120
+ return SYNONYMS[tok]
121
+ # partial substring match
122
+ for key, cat in SYNONYMS.items():
123
+ if key in tok:
124
+ return cat
125
+ # try direct category name match
126
+ for cat in CATEGORIES:
127
+ if tok == cat.lower() or cat.lower() in tok:
128
+ return cat
129
+
130
+ # iterate through candidate strings in order provided
131
+ for c in candidates:
132
+ if not c:
133
+ continue
134
+ s = normalize_text(str(c))
135
+ # exact category match
136
+ for cat in CATEGORIES:
137
+ if s == cat.lower() or cat.lower() in s:
138
+ return cat
139
+ # check synonyms dictionary words
140
+ words = s.split()
141
+ for w in words:
142
+ if w in SYNONYMS:
143
+ return SYNONYMS[w]
144
+ # check substrings (e.g., "sneaker" inside longer text)
145
+ for key, cat in SYNONYMS.items():
146
+ if key in s:
147
+ return cat
148
+
149
+ # If nothing found, return a safe default present in CATEGORIES
150
+ return "T-Shirt"
151
 
152
  # ---------- Firebase init helpers ----------
153
  _firebase_app = None
 
220
  # ---------- Image helpers (with EXIF transpose) ----------
221
  def read_image_bytes(file_storage) -> Tuple[np.ndarray, int, int, bytes]:
222
  """
223
+ Read bytes, apply EXIF orientation, return BGR numpy, width, height and raw bytes.
 
224
  """
225
  data = file_storage.read()
226
+ img = Image.open(io.BytesIO(data))
227
+ # apply EXIF orientation so photos from phones are upright
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  try:
229
  img = ImageOps.exif_transpose(img)
230
+ except Exception:
231
+ pass
 
 
232
  img = img.convert("RGB")
233
  w, h = img.size
234
+ arr = np.array(img)[:, :, ::-1] # RGB -> BGR for OpenCV
235
+ return arr, w, h, data
 
 
 
 
 
236
 
237
  def crop_and_b64(bgr_img: np.ndarray, x: int, y: int, w: int, h: int, max_side=512) -> str:
238
  h_img, w_img = bgr_img.shape[:2]
 
241
  crop = bgr_img[y:y2, x:x2]
242
  if crop.size == 0:
243
  return ""
 
244
  max_dim = max(crop.shape[0], crop.shape[1])
245
  if max_dim > max_side:
246
  scale = max_side / max_dim
 
302
  def analyze_crop_with_gemini(jpeg_b64: str) -> Dict[str, Any]:
303
  """
304
  Run Gemini on the cropped image bytes to extract:
305
+ type (one-word category like 'shoe', 'jacket', 'dress'),
306
+ summary (single-line description),
307
+ brand (string or empty),
308
+ tags (array of short descriptors)
309
+ Returns dict, falls back to empty/defaults on error or missing key.
310
  """
311
+ if not client:
312
  return {"type": "unknown", "summary": "", "brand": "", "tags": []}
313
  try:
314
+ # prepare prompt
315
  prompt = (
316
  "You are an assistant that identifies clothing item characteristics from an image. "
317
  "Return only a JSON object with keys: type (single word like 'shoe','top','jacket'), "
318
  "summary (a single short sentence, one line), brand (brand name if visible else empty string), "
319
+ "tags (an array of short single-word tags describing visible attributes, e.g. ['striped','leather','white']). "
320
+ "Keep values short and concise."
321
  )
322
 
323
+ contents = [
324
+ types.Content(role="user", parts=[types.Part.from_text(text=prompt)])
325
+ ]
326
+
327
+ # attach the image bytes
328
  image_bytes = base64.b64decode(jpeg_b64)
329
  contents.append(types.Content(role="user", parts=[types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg")]))
330
 
 
339
  "required": ["type", "summary"]
340
  }
341
  cfg = types.GenerateContentConfig(response_mime_type="application/json", response_schema=schema)
342
+
343
+ # call model (use the same model family you used before)
344
  resp = client.models.generate_content(model="gemini-2.5-flash-lite", contents=contents, config=cfg)
345
  text = resp.text or ""
346
  parsed = {}
347
  try:
348
  parsed = json.loads(text)
349
+ # coerce expected shapes
350
+ parsed["type"] = str(parsed.get("type", "")).strip()
351
+ parsed["summary"] = str(parsed.get("summary", "")).strip()
352
+ parsed["brand"] = str(parsed.get("brand", "")).strip()
353
+ tags = parsed.get("tags", [])
354
+ if not isinstance(tags, list):
355
+ tags = []
356
+ parsed["tags"] = [str(t).strip() for t in tags if str(t).strip()]
357
  except Exception as e:
358
  log.warning("Failed parsing Gemini analysis JSON: %s — raw: %s", e, (text[:300] if text else ""))
359
  parsed = {"type": "unknown", "summary": "", "brand": "", "tags": []}
 
 
 
 
 
 
 
 
360
  return {
361
  "type": parsed.get("type", "unknown") or "unknown",
362
  "summary": parsed.get("summary", "") or "",
 
367
  log.exception("analyze_crop_with_gemini failure: %s", e)
368
  return {"type": "unknown", "summary": "", "brand": "", "tags": []}
369
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  # ---------- Main / processing ----------
371
  @app.route("/process", methods=["POST"])
372
  def process_image():
 
377
  uid = (request.form.get("uid") or request.args.get("uid") or "anon").strip() or "anon"
378
 
379
  try:
380
+ bgr_img, img_w, img_h, raw_bytes = read_image_bytes(file)
 
381
  except Exception as e:
382
  log.error("invalid image: %s", e)
383
  return jsonify({"error": "invalid image"}), 400
384
 
385
  session_id = str(uuid.uuid4())
386
 
387
+ # Detection prompt (same as before)
388
  user_prompt = (
389
  "You are an assistant that extracts clothing detections from a single image. "
390
  "Return a JSON object with a single key 'items' which is an array. Each item must have: "
 
396
 
397
  try:
398
  contents = [
399
+ types.Content(role="user", parts=[types.Part.from_text(text=user_prompt)])
400
  ]
401
+ contents.append(types.Content(role="user", parts=[types.Part.from_bytes(data=raw_bytes, mime_type="image/jpeg")]))
 
 
402
 
403
  schema = {
404
  "type": "object",
 
428
  "required": ["items"]
429
  }
430
 
431
+ cfg = types.GenerateContentConfig(response_mime_type="application/json", response_schema=schema)
 
 
 
 
 
 
 
 
432
 
433
+ log.info("Calling Gemini model for detection (gemini-2.5-flash-lite)...")
434
+ model_resp = client.models.generate_content(model="gemini-2.5-flash-lite", contents=contents, config=cfg) if client else None
435
+ raw_text = (model_resp.text or "") if model_resp else ""
436
+ log.info("Gemini raw response length: %d", len(raw_text))
437
 
438
  parsed = None
439
  try:
 
446
  if parsed and isinstance(parsed.get("items"), list) and len(parsed["items"])>0:
447
  for it in parsed["items"]:
448
  try:
449
+ label = str(it.get("label","unknown"))[:48]
450
  bbox = it.get("bbox",{})
451
  nx = float(bbox.get("x",0))
452
  ny = float(bbox.get("y",0))
 
458
  pw = int(nw * img_w); ph = int(nh * img_h)
459
  if pw <= 8 or ph <= 8:
460
  continue
461
+ b64 = crop_and_b64(bgr_img, px, py, pw, ph)
462
+ if not b64:
463
  continue
464
+ items_out.append({
465
+ "id": str(uuid.uuid4()),
466
+ "label": label,
 
 
 
 
 
 
 
 
 
467
  "confidence": float(it.get("confidence", 0.5)),
468
  "bbox": {"x": px, "y": py, "w": pw, "h": ph},
469
+ "thumbnail_b64": b64,
 
470
  "source": "gemini"
471
+ })
 
472
  except Exception as e:
473
  log.warning("skipping item due to error: %s", e)
474
  else:
475
  log.info("Gemini returned no items or parse failed — using fallback contour crops.")
476
  items_out = fallback_contour_crops(bgr_img, max_items=8)
477
+
478
+ # Perform AI analysis per crop (if possible) and auto-upload to firebase with metadata (tmp + session)
 
 
 
 
 
 
 
 
 
 
479
  if FIREBASE_ADMIN_JSON and FIREBASE_ADMIN_AVAILABLE:
480
  try:
481
  init_firebase_admin_if_needed()
 
489
  b64 = itm.get("thumbnail_b64")
490
  if not b64:
491
  continue
492
+ # analyze
493
+ try:
494
+ analysis = analyze_crop_with_gemini(b64) if client else {"type":"unknown","summary":"","brand":"","tags":[]}
495
+ except Exception as ae:
496
+ log.warning("analysis failed: %s", ae)
497
+ analysis = {"type":"unknown","summary":"","brand":"","tags":[]}
498
+
499
+ itm["analysis"] = analysis
500
+
501
+ # choose a frontend-category-compatible title
502
+ # prefer analysis.type, then label, then tags, then summary
503
+ title = choose_category_from_candidates(
504
+ analysis.get("type", ""),
505
+ itm.get("label", ""),
506
+ ' '.join(analysis.get("tags", [])),
507
+ tags=analysis.get("tags", [])
508
+ )
509
+ itm["title"] = title
510
+
511
  item_id = itm.get("id") or str(uuid.uuid4())
512
  path = f"detected/{safe_uid}/{item_id}.jpg"
513
  try:
 
516
  "session_id": session_id,
517
  "uploaded_by": safe_uid,
518
  "uploaded_at": str(int(time.time())),
519
+ # store AI fields as JSON strings for later inspection
520
+ "ai_type": analysis.get("type",""),
521
+ "ai_brand": analysis.get("brand",""),
522
+ "ai_summary": analysis.get("summary",""),
523
+ "ai_tags": json.dumps(analysis.get("tags", [])),
524
+ "title": title,
525
  }
526
  url = upload_b64_to_firebase(b64, path, content_type="image/jpeg", metadata=metadata)
527
  itm["thumbnail_url"] = url
528
  itm["thumbnail_path"] = path
 
529
  itm.pop("thumbnail_b64", None)
530
  itm["_session_id"] = session_id
531
+ log.debug("Auto-uploaded thumbnail for %s -> %s (session=%s) title=%s", item_id, url, session_id, title)
 
 
532
  except Exception as up_e:
533
  log.warning("Auto-upload failed for %s: %s", item_id, up_e)
534
+ # keep thumbnail_b64 and analysis for client fallback
535
  else:
536
  if not FIREBASE_ADMIN_JSON:
537
  log.info("FIREBASE_ADMIN_JSON not set; skipping server-side thumbnail upload.")
538
  else:
539
  log.info("Firebase admin SDK not available; skipping server-side thumbnail upload.")
540
+ # For non-upload path, still add a title derived from label/unknown so frontend has it
541
+ for itm in items_out:
542
+ if "title" not in itm:
543
+ analysis = itm.get("analysis") or {"type":"unknown","tags":[]}
544
+ title = choose_category_from_candidates(analysis.get("type",""), itm.get("label",""), tags=analysis.get("tags", []))
545
+ itm["title"] = title
546
 
 
547
  return jsonify({"ok": True, "items": items_out, "session_id": session_id, "debug": {"raw_model_text": (raw_text or "")[:1600]}}), 200
548
 
549
  except Exception as ex:
550
  log.exception("Processing error: %s", ex)
551
  try:
552
  items_out = fallback_contour_crops(bgr_img, max_items=8)
553
+ # give fallback items a default title so frontend can filter
554
  for itm in items_out:
555
+ if "title" not in itm:
556
+ itm["title"] = choose_category_from_candidates(itm.get("label","unknown"))
557
  return jsonify({"ok": True, "items": items_out, "session_id": session_id, "debug": {"error": str(ex)}}), 200
558
  except Exception as e2:
559
  log.exception("Fallback also failed: %s", e2)
 
573
 
574
  Returns:
575
  { ok: True, kept: [...], deleted: [...], errors: [...] }
 
576
  """
577
  try:
578
  body = request.get_json(force=True)
 
619
  continue
620
 
621
  if item_id in keep_ids:
622
+ # ensure public URL available if possible
623
  try:
624
  blob.make_public()
625
  url = blob.public_url
626
  except Exception:
627
  url = f"gs://{bucket.name}/{name}"
628
 
629
+ # extract AI metadata (if present)
630
  ai_type = md.get("ai_type") or ""
631
  ai_brand = md.get("ai_brand") or ""
632
  ai_summary = md.get("ai_summary") or ""
633
  ai_tags_raw = md.get("ai_tags") or "[]"
634
+ title_meta = md.get("title") or ""
635
  try:
636
  ai_tags = json.loads(ai_tags_raw) if isinstance(ai_tags_raw, str) else ai_tags_raw
637
  except Exception:
638
  ai_tags = []
639
+ # derive title: prefer stored metadata title, then ai_type/tags/summary
640
+ title = None
641
+ if title_meta:
642
+ try:
643
+ title = json.loads(title_meta) if (title_meta.startswith('[') or title_meta.startswith('{')) else str(title_meta)
644
+ except Exception:
645
+ title = str(title_meta)
646
+ if not title:
647
+ title = choose_category_from_candidates(ai_type, ai_summary, tags=ai_tags)
648
  kept.append({
649
  "id": item_id,
650
  "thumbnail_url": url,
651
  "thumbnail_path": name,
652
  "analysis": {
653
+ "type": ai_type,
654
+ "brand": ai_brand,
655
+ "summary": ai_summary,
656
+ "tags": ai_tags
657
  },
658
+ "title": title
 
659
  })
660
  else:
661
  try: