Pepguy commited on
Commit
b42ec7f
·
verified ·
1 Parent(s): a02ad5f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +551 -673
app.py CHANGED
@@ -1,4 +1,5 @@
1
- # server_gemini_seg.py
 
2
  import os
3
  import io
4
  import json
@@ -6,7 +7,6 @@ import base64
6
  import logging
7
  import uuid
8
  import time
9
- import re
10
  from typing import List, Dict, Any, Tuple, Optional
11
 
12
  from flask import Flask, request, jsonify
@@ -15,713 +15,591 @@ from PIL import Image, ImageOps
15
  import numpy as np
16
  import cv2
17
 
18
- # genai client
 
19
  from google import genai
20
  from google.genai import types
21
 
22
- # Firebase Admin (in-memory JSON init)
 
23
  try:
24
- import firebase_admin
25
- from firebase_admin import credentials as fb_credentials, storage as fb_storage
26
- FIREBASE_ADMIN_AVAILABLE = True
27
  except Exception:
28
- firebase_admin = None
29
- fb_credentials = None
30
- fb_storage = None
31
- FIREBASE_ADMIN_AVAILABLE = False
32
 
33
  logging.basicConfig(level=logging.INFO)
34
  log = logging.getLogger("wardrobe-server")
35
 
36
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
37
  if not GEMINI_API_KEY:
38
- log.warning("GEMINI_API_KEY not set — gemini calls will fail (but fallback still works).")
39
 
40
  client = genai.Client(api_key=GEMINI_API_KEY) if GEMINI_API_KEY else None
41
 
42
- # Firebase config (read service account JSON from env)
 
43
  FIREBASE_ADMIN_JSON = os.getenv("FIREBASE_ADMIN_JSON", "").strip()
44
  FIREBASE_STORAGE_BUCKET = os.getenv("FIREBASE_STORAGE_BUCKET", "").strip() # optional override
45
 
46
  if FIREBASE_ADMIN_JSON and not FIREBASE_ADMIN_AVAILABLE:
47
- log.warning("FIREBASE_ADMIN_JSON provided but firebase-admin SDK is not installed. Install firebase-admin.")
48
 
49
- app = Flask(__name__)
50
  CORS(app)
51
 
52
- # ---------- Category mapping (must match frontend) ----------
53
- # These values intentionally match the CATEGORY_OPTIONS array on the frontend.
54
- CATEGORIES = [
55
- "Heels",
56
- "Sneakers",
57
- "Loafers",
58
- "Boots",
59
- "Dress",
60
- "Jeans",
61
- "Skirt",
62
- "Jacket",
63
- "Blazer",
64
- "T-Shirt",
65
- "Coat",
66
- "Shorts",
67
- ]
68
 
69
- # simple synonyms / keyword -> category mapping (lowercase keys)
70
- SYNONYMS: Dict[str, str] = {
71
- "heel": "Heels",
72
- "heels": "Heels",
73
- "sneaker": "Sneakers",
74
- "sneakers": "Sneakers",
75
- "trainer": "Sneakers",
76
- "trainers": "Sneakers",
77
- "loafer": "Loafers",
78
- "loafers": "Loafers",
79
- "boot": "Boots",
80
- "boots": "Boots",
81
- "dress": "Dress",
82
- "gown": "Dress",
83
- "jean": "Jeans",
84
- "jeans": "Jeans",
85
- "denim": "Jeans",
86
- "skirt": "Skirt",
87
- "jacket": "Jacket",
88
- "coat": "Coat",
89
- "blazer": "Blazer",
90
- "t-shirt": "T-Shirt",
91
- "t shirt": "T-Shirt",
92
- "tee": "T-Shirt",
93
- "shirt": "T-Shirt",
94
- "top": "T-Shirt",
95
- "short": "Shorts",
96
- "shorts": "Shorts",
97
- "shoe": "Sneakers", # generic shoe -> map to Sneakers as fallback
98
- "shoes": "Sneakers",
99
- "sandal": "Heels", # if ambiguous, map sandals to Heels bucket (you can adjust)
100
- "sandals": "Heels",
101
- }
102
-
103
- def normalize_text(s: str) -> str:
104
- return re.sub(r'[^a-z0-9\s\-]', ' ', s.lower()).strip()
105
-
106
- def choose_category_from_candidates(*candidates: Optional[str], tags: Optional[List[str]] = None) -> str:
107
- """
108
- Given a list of candidate strings (analysis.type, label, summary, etc.) and optional tags,
109
- attempt to pick a category from CATEGORIES. Returns a category string guaranteed to be in CATEGORIES.
110
- Falls back to "T-Shirt" if nothing matches.
111
- """
112
- # try tags first (explicit tag likely to indicate category)
113
- if tags:
114
- for t in tags:
115
- if not t:
116
- continue
117
- tok = normalize_text(str(t))
118
- # direct synonym match
119
- if tok in SYNONYMS:
120
- return SYNONYMS[tok]
121
- # partial substring match
122
- for key, cat in SYNONYMS.items():
123
- if key in tok:
124
- return cat
125
- # try direct category name match
126
- for cat in CATEGORIES:
127
- if tok == cat.lower() or cat.lower() in tok:
128
- return cat
129
-
130
- # iterate through candidate strings in order provided
131
- for c in candidates:
132
- if not c:
133
- continue
134
- s = normalize_text(str(c))
135
- # exact category match
136
- for cat in CATEGORIES:
137
- if s == cat.lower() or cat.lower() in s:
138
- return cat
139
- # check synonyms dictionary words
140
- words = s.split()
141
- for w in words:
142
- if w in SYNONYMS:
143
- return SYNONYMS[w]
144
- # check substrings (e.g., "sneaker" inside longer text)
145
- for key, cat in SYNONYMS.items():
146
- if key in s:
147
- return cat
148
-
149
- # If nothing found, return a safe default present in CATEGORIES
150
- return "T-Shirt"
151
-
152
- # ---------- Firebase init helpers ----------
153
  _firebase_app = None
154
 
155
  def init_firebase_admin_if_needed():
156
- global _firebase_app
157
- if _firebase_app is not None:
158
- return _firebase_app
159
- if not FIREBASE_ADMIN_JSON:
160
- log.info("No FIREBASE_ADMIN_JSON env var set; skipping Firebase admin init.")
161
- return None
162
- if not FIREBASE_ADMIN_AVAILABLE:
163
- raise RuntimeError("firebase-admin not installed (pip install firebase-admin)")
164
- try:
165
- sa_obj = json.loads(FIREBASE_ADMIN_JSON)
166
- except Exception as e:
167
- log.exception("Failed parsing FIREBASE_ADMIN_JSON: %s", e)
168
- raise
169
- bucket_name = FIREBASE_STORAGE_BUCKET or (sa_obj.get("project_id") and f"{sa_obj.get('project_id')}.appspot.com")
170
- if not bucket_name:
171
- raise RuntimeError("Could not determine storage bucket. Set FIREBASE_STORAGE_BUCKET or include project_id in service account JSON.")
172
- try:
173
- cred = fb_credentials.Certificate(sa_obj)
174
- _firebase_app = firebase_admin.initialize_app(cred, {"storageBucket": bucket_name})
175
- log.info("Initialized firebase admin with bucket: %s", bucket_name)
176
- return _firebase_app
177
- except Exception as e:
178
- log.exception("Failed to initialize firebase admin: %s", e)
179
- raise
180
 
181
  def upload_b64_to_firebase(base64_str: str, path: str, content_type="image/jpeg", metadata: dict = None) -> str:
182
- """
183
- Upload base64 string to Firebase Storage at `path`.
184
- Optionally attach metadata dict (custom metadata).
185
- Returns a public URL when possible, otherwise returns gs://<bucket>/<path>.
186
- """
187
- if not FIREBASE_ADMIN_JSON:
188
- raise RuntimeError("FIREBASE_ADMIN_JSON not set")
189
- init_firebase_admin_if_needed()
190
- if not FIREBASE_ADMIN_AVAILABLE:
191
- raise RuntimeError("firebase-admin not available")
192
-
193
- raw = base64_str
194
- if raw.startswith("data:"):
195
- raw = raw.split(",", 1)[1]
196
- raw = raw.replace("\n", "").replace("\r", "")
197
- data = base64.b64decode(raw)
198
-
199
- try:
200
- bucket = fb_storage.bucket()
201
- blob = bucket.blob(path)
202
- blob.upload_from_string(data, content_type=content_type)
203
- # attach metadata if provided (values must be strings)
204
- if metadata:
205
- try:
206
- blob.metadata = {k: (json.dumps(v) if not isinstance(v, str) else v) for k, v in metadata.items()}
207
- blob.patch()
208
- except Exception as me:
209
- log.warning("Failed to patch metadata for %s: %s", path, me)
210
- try:
211
- blob.make_public()
212
- return blob.public_url
213
- except Exception as e:
214
- log.warning("Could not make blob public: %s", e)
215
- return f"gs://{bucket.name}/{path}"
216
- except Exception as e:
217
- log.exception("Firebase upload error for path %s: %s", path, e)
218
- raise
219
-
220
- # ---------- Image helpers (with EXIF transpose) ----------
 
221
  def read_image_bytes(file_storage) -> Tuple[np.ndarray, int, int, bytes]:
222
- """
223
- Read bytes, apply EXIF orientation, return BGR numpy, width, height and raw bytes.
224
- """
225
- data = file_storage.read()
226
- img = Image.open(io.BytesIO(data))
227
- # apply EXIF orientation so photos from phones are upright
228
- try:
229
- img = ImageOps.exif_transpose(img)
230
- except Exception:
231
- pass
232
- img = img.convert("RGB")
233
- w, h = img.size
234
- arr = np.array(img)[:, :, ::-1] # RGB -> BGR for OpenCV
235
- return arr, w, h, data
236
 
237
  def crop_and_b64(bgr_img: np.ndarray, x: int, y: int, w: int, h: int, max_side=512) -> str:
238
- h_img, w_img = bgr_img.shape[:2]
239
- x = max(0, int(x)); y = max(0, int(y))
240
- x2 = min(w_img, int(x + w)); y2 = min(h_img, int(y + h))
241
- crop = bgr_img[y:y2, x:x2]
242
- if crop.size == 0:
243
- return ""
244
- max_dim = max(crop.shape[0], crop.shape[1])
245
- if max_dim > max_side:
246
- scale = max_side / max_dim
247
- crop = cv2.resize(crop, (int(crop.shape[1] * scale), int(crop.shape[0] * scale)), interpolation=cv2.INTER_AREA)
248
- _, jpeg = cv2.imencode(".jpg", crop, [int(cv2.IMWRITE_JPEG_QUALITY), 82])
249
- return base64.b64encode(jpeg.tobytes()).decode("ascii")
250
 
251
  def fallback_contour_crops(bgr_img, max_items=8) -> List[Dict[str, Any]]:
252
- gray = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2GRAY)
253
- blur = cv2.GaussianBlur(gray, (7,7), 0)
254
- thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY_INV,15,6)
255
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9))
256
- closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
257
- contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
258
- h_img, w_img = bgr_img.shape[:2]
259
- min_area = (w_img*h_img) * 0.005
260
- items = []
261
- for cnt in sorted(contours, key=cv2.contourArea, reverse=True):
262
- if len(items) >= max_items:
263
- break
264
- area = cv2.contourArea(cnt)
265
- if area < min_area:
266
- continue
267
- x,y,w,h = cv2.boundingRect(cnt)
268
- pad_x, pad_y = int(w*0.07), int(h*0.07)
269
- x = max(0, x - pad_x); y = max(0, y - pad_y)
270
- w = min(w_img - x, w + pad_x*2); h = min(h_img - y, h + pad_y*2)
271
- b64 = crop_and_b64(bgr_img, x, y, w, h)
272
- if not b64:
273
- continue
274
- items.append({
275
- "id": str(uuid.uuid4()),
276
- "label": "unknown",
277
- "confidence": min(0.95, max(0.25, area/(w_img*h_img))),
278
- "bbox": {"x": x, "y": y, "w": w, "h": h},
279
- "thumbnail_b64": b64,
280
- "source": "fallback"
281
- })
282
- if not items:
283
- h_half, w_half = h_img//2, w_img//2
284
- rects = [
285
- (0,0,w_half,h_half), (w_half,0,w_half,h_half),
286
- (0,h_half,w_half,h_half), (w_half,h_half,w_half,h_half)
287
- ]
288
- for r in rects:
289
- b64 = crop_and_b64(bgr_img, r[0], r[1], r[2], r[3])
290
- if b64:
291
- items.append({
292
- "id": str(uuid.uuid4()),
293
- "label": "unknown",
294
- "confidence": 0.3,
295
- "bbox": {"x": r[0], "y": r[1], "w": r[2], "h": r[3]},
296
- "thumbnail_b64": b64,
297
- "source": "fallback-grid"
298
- })
299
- return items
300
-
301
- # ---------- AI analysis helper ----------
 
302
  def analyze_crop_with_gemini(jpeg_b64: str) -> Dict[str, Any]:
303
- """
304
- Run Gemini on the cropped image bytes to extract:
305
- type (one-word category like 'shoe', 'jacket', 'dress'),
306
- summary (single-line description),
307
- brand (string or empty),
308
- tags (array of short descriptors)
309
- Returns dict, falls back to empty/defaults on error or missing key.
310
- """
311
- if not client:
312
- return {"type": "unknown", "summary": "", "brand": "", "tags": []}
313
- try:
314
- # prepare prompt
315
- prompt = (
316
- "You are an assistant that identifies clothing item characteristics from an image. "
317
- "Return only a JSON object with keys: type (single word like 'shoe','top','jacket'), "
318
- "summary (a single short sentence, one line), brand (brand name if visible else empty string), "
319
- "tags (an array of short single-word tags describing visible attributes, e.g. ['striped','leather','white']). "
320
- "Keep values short and concise."
321
- )
322
-
323
- contents = [
324
- types.Content(role="user", parts=[types.Part.from_text(text=prompt)])
325
- ]
326
-
327
- # attach the image bytes
328
- image_bytes = base64.b64decode(jpeg_b64)
329
- contents.append(types.Content(role="user", parts=[types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg")]))
330
-
331
- schema = {
332
- "type": "object",
333
- "properties": {
334
- "type": {"type": "string"},
335
- "summary": {"type": "string"},
336
- "brand": {"type": "string"},
337
- "tags": {"type": "array", "items": {"type": "string"}}
338
- },
339
- "required": ["type", "summary"]
340
- }
341
- cfg = types.GenerateContentConfig(response_mime_type="application/json", response_schema=schema)
342
-
343
- # call model (use the same model family you used before)
344
- resp = client.models.generate_content(model="gemini-2.5-flash-lite", contents=contents, config=cfg)
345
- text = resp.text or ""
346
- parsed = {}
347
- try:
348
- parsed = json.loads(text)
349
- # coerce expected shapes
350
- parsed["type"] = str(parsed.get("type", "")).strip()
351
- parsed["summary"] = str(parsed.get("summary", "")).strip()
352
- parsed["brand"] = str(parsed.get("brand", "")).strip()
353
- tags = parsed.get("tags", [])
354
- if not isinstance(tags, list):
355
- tags = []
356
- parsed["tags"] = [str(t).strip() for t in tags if str(t).strip()]
357
- except Exception as e:
358
- log.warning("Failed parsing Gemini analysis JSON: %s — raw: %s", e, (text[:300] if text else ""))
359
- parsed = {"type": "unknown", "summary": "", "brand": "", "tags": []}
360
- return {
361
- "type": parsed.get("type", "unknown") or "unknown",
362
- "summary": parsed.get("summary", "") or "",
363
- "brand": parsed.get("brand", "") or "",
364
- "tags": parsed.get("tags", []) or []
365
- }
366
- except Exception as e:
367
- log.exception("analyze_crop_with_gemini failure: %s", e)
368
- return {"type": "unknown", "summary": "", "brand": "", "tags": []}
369
-
370
- # ---------- Main / processing ----------
 
371
  @app.route("/process", methods=["POST"])
372
  def process_image():
373
- if "photo" not in request.files:
374
- return jsonify({"error": "missing photo"}), 400
375
- file = request.files["photo"]
376
-
377
- uid = (request.form.get("uid") or request.args.get("uid") or "anon").strip() or "anon"
378
-
379
- try:
380
- bgr_img, img_w, img_h, raw_bytes = read_image_bytes(file)
381
- except Exception as e:
382
- log.error("invalid image: %s", e)
383
- return jsonify({"error": "invalid image"}), 400
384
-
385
- session_id = str(uuid.uuid4())
386
-
387
- # Detection prompt (same as before)
388
- user_prompt = (
389
- "You are an assistant that extracts clothing detections from a single image. "
390
- "Return a JSON object with a single key 'items' which is an array. Each item must have: "
391
- "label (string, short like 'top','skirt','sneakers'), "
392
- "bbox with normalized coordinates between 0 and 1: {x, y, w, h} where x,y are top-left relative to width/height, "
393
- "confidence (0-1). Example output: {\"items\":[{\"label\":\"top\",\"bbox\":{\"x\":0.1,\"y\":0.2,\"w\":0.3,\"h\":0.4},\"confidence\":0.95}]} "
394
- "Output ONLY valid JSON. If you cannot detect any clothing confidently, return {\"items\":[]}."
395
- )
396
-
397
- try:
398
- contents = [
399
- types.Content(role="user", parts=[types.Part.from_text(text=user_prompt)])
400
- ]
401
- contents.append(types.Content(role="user", parts=[types.Part.from_bytes(data=raw_bytes, mime_type="image/jpeg")]))
402
-
403
- schema = {
404
- "type": "object",
405
- "properties": {
406
- "items": {
407
- "type": "array",
408
- "items": {
409
- "type": "object",
410
- "properties": {
411
- "label": {"type": "string"},
412
- "bbox": {
413
- "type": "object",
414
- "properties": {
415
- "x": {"type": "number"},
416
- "y": {"type": "number"},
417
- "w": {"type": "number"},
418
- "h": {"type": "number"}
419
- },
420
- "required": ["x","y","w","h"]
421
- },
422
- "confidence": {"type": "number"}
423
- },
424
- "required": ["label","bbox","confidence"]
425
- }
426
- }
427
- },
428
- "required": ["items"]
429
- }
430
-
431
- cfg = types.GenerateContentConfig(response_mime_type="application/json", response_schema=schema)
432
-
433
- log.info("Calling Gemini model for detection (gemini-2.5-flash-lite)...")
434
- model_resp = client.models.generate_content(model="gemini-2.5-flash-lite", contents=contents, config=cfg) if client else None
435
- raw_text = (model_resp.text or "") if model_resp else ""
436
- log.info("Gemini raw response length: %d", len(raw_text))
437
-
438
- parsed = None
439
- try:
440
- parsed = json.loads(raw_text) if raw_text else None
441
- except Exception as e:
442
- log.warning("Could not parse Gemini JSON: %s", e)
443
- parsed = None
444
-
445
- items_out: List[Dict[str, Any]] = []
446
- if parsed and isinstance(parsed.get("items"), list) and len(parsed["items"])>0:
447
- for it in parsed["items"]:
448
- try:
449
- label = str(it.get("label","unknown"))[:48]
450
- bbox = it.get("bbox",{})
451
- nx = float(bbox.get("x",0))
452
- ny = float(bbox.get("y",0))
453
- nw = float(bbox.get("w",0))
454
- nh = float(bbox.get("h",0))
455
- nx = max(0.0, min(1.0, nx)); ny = max(0.0,min(1.0,ny))
456
- nw = max(0.0, min(1.0, nw)); nh = max(0.0, min(1.0, nh))
457
- px = int(nx * img_w); py = int(ny * img_h)
458
- pw = int(nw * img_w); ph = int(nh * img_h)
459
- if pw <= 8 or ph <= 8:
460
- continue
461
- b64 = crop_and_b64(bgr_img, px, py, pw, ph)
462
- if not b64:
463
- continue
464
- items_out.append({
465
- "id": str(uuid.uuid4()),
466
- "label": label,
467
- "confidence": float(it.get("confidence", 0.5)),
468
- "bbox": {"x": px, "y": py, "w": pw, "h": ph},
469
- "thumbnail_b64": b64,
470
- "source": "gemini"
471
- })
472
- except Exception as e:
473
- log.warning("skipping item due to error: %s", e)
474
- else:
475
- log.info("Gemini returned no items or parse failed — using fallback contour crops.")
476
- items_out = fallback_contour_crops(bgr_img, max_items=8)
477
-
478
- # Perform AI analysis per crop (if possible) and auto-upload to firebase with metadata (tmp + session)
479
- if FIREBASE_ADMIN_JSON and FIREBASE_ADMIN_AVAILABLE:
480
- try:
481
- init_firebase_admin_if_needed()
482
- bucket = fb_storage.bucket()
483
- except Exception as e:
484
- log.exception("Firebase admin init for upload failed: %s", e)
485
- bucket = None
486
-
487
- safe_uid = "".join(ch for ch in uid if ch.isalnum() or ch in ("-", "_")) or "anon"
488
- for itm in items_out:
489
- b64 = itm.get("thumbnail_b64")
490
- if not b64:
491
- continue
492
- # analyze
493
- try:
494
- analysis = analyze_crop_with_gemini(b64) if client else {"type":"unknown","summary":"","brand":"","tags":[]}
495
- except Exception as ae:
496
- log.warning("analysis failed: %s", ae)
497
- analysis = {"type":"unknown","summary":"","brand":"","tags":[]}
498
-
499
- itm["analysis"] = analysis
500
-
501
- # choose a frontend-category-compatible title
502
- # prefer analysis.type, then label, then tags, then summary
503
- title = choose_category_from_candidates(
504
- analysis.get("type", ""),
505
- itm.get("label", ""),
506
- ' '.join(analysis.get("tags", [])),
507
- tags=analysis.get("tags", [])
508
- )
509
- itm["title"] = title
510
-
511
- item_id = itm.get("id") or str(uuid.uuid4())
512
- path = f"detected/{safe_uid}/{item_id}.jpg"
513
- try:
514
- metadata = {
515
- "tmp": "true",
516
- "session_id": session_id,
517
- "uploaded_by": safe_uid,
518
- "uploaded_at": str(int(time.time())),
519
- # store AI fields as JSON strings for later inspection
520
- "ai_type": analysis.get("type",""),
521
- "ai_brand": analysis.get("brand",""),
522
- "ai_summary": analysis.get("summary",""),
523
- "ai_tags": json.dumps(analysis.get("tags", [])),
524
- "title": title,
525
- }
526
- url = upload_b64_to_firebase(b64, path, content_type="image/jpeg", metadata=metadata)
527
- itm["thumbnail_url"] = url
528
- itm["thumbnail_path"] = path
529
- itm.pop("thumbnail_b64", None)
530
- itm["_session_id"] = session_id
531
- log.debug("Auto-uploaded thumbnail for %s -> %s (session=%s) title=%s", item_id, url, session_id, title)
532
- except Exception as up_e:
533
- log.warning("Auto-upload failed for %s: %s", item_id, up_e)
534
- # keep thumbnail_b64 and analysis for client fallback
535
- else:
536
- if not FIREBASE_ADMIN_JSON:
537
- log.info("FIREBASE_ADMIN_JSON not set; skipping server-side thumbnail upload.")
538
- else:
539
- log.info("Firebase admin SDK not available; skipping server-side thumbnail upload.")
540
- # For non-upload path, still add a title derived from label/unknown so frontend has it
541
- for itm in items_out:
542
- if "title" not in itm:
543
- analysis = itm.get("analysis") or {"type":"unknown","tags":[]}
544
- title = choose_category_from_candidates(analysis.get("type",""), itm.get("label",""), tags=analysis.get("tags", []))
545
- itm["title"] = title
546
-
547
- return jsonify({"ok": True, "items": items_out, "session_id": session_id, "debug": {"raw_model_text": (raw_text or "")[:1600]}}), 200
548
-
549
- except Exception as ex:
550
- log.exception("Processing error: %s", ex)
551
- try:
552
- items_out = fallback_contour_crops(bgr_img, max_items=8)
553
- # give fallback items a default title so frontend can filter
554
- for itm in items_out:
555
- if "title" not in itm:
556
- itm["title"] = choose_category_from_candidates(itm.get("label","unknown"))
557
- return jsonify({"ok": True, "items": items_out, "session_id": session_id, "debug": {"error": str(ex)}}), 200
558
- except Exception as e2:
559
- log.exception("Fallback also failed: %s", e2)
560
- return jsonify({"error": "internal failure", "detail": str(e2)}), 500
561
-
562
- # ---------- Finalize endpoint: keep selected and delete only session's temp files ----------
563
  @app.route("/finalize_detections", methods=["POST"])
564
  def finalize_detections():
565
- """
566
- Body JSON:
567
- { "uid": "user123", "keep_ids": ["id1","id2",...], "session_id": "<session id from /process>" }
568
-
569
- Server will delete only detected/<uid>/* files whose:
570
- - metadata.tmp == "true"
571
- - metadata.session_id == session_id
572
- - item_id NOT in keep_ids
573
-
574
- Returns:
575
- { ok: True, kept: [...], deleted: [...], errors: [...] }
576
- """
577
- try:
578
- body = request.get_json(force=True)
579
- except Exception:
580
- return jsonify({"error": "invalid json"}), 400
581
-
582
- uid = (body.get("uid") or request.args.get("uid") or "anon").strip() or "anon"
583
- keep_ids = set(body.get("keep_ids") or [])
584
- session_id = (body.get("session_id") or request.args.get("session_id") or "").strip()
585
-
586
- if not session_id:
587
- return jsonify({"error": "session_id required for finalize to avoid unsafe deletes"}), 400
588
-
589
- if not FIREBASE_ADMIN_JSON or not FIREBASE_ADMIN_AVAILABLE:
590
- return jsonify({"error": "firebase admin not configured"}), 500
591
-
592
- try:
593
- init_firebase_admin_if_needed()
594
- bucket = fb_storage.bucket()
595
- except Exception as e:
596
- log.exception("Firebase init error in finalize: %s", e)
597
- return jsonify({"error": "firebase admin init failed", "detail": str(e)}), 500
598
-
599
- safe_uid = "".join(ch for ch in uid if ch.isalnum() or ch in ("-", "_")) or "anon"
600
- prefix = f"detected/{safe_uid}/"
601
-
602
- kept = []
603
- deleted = []
604
- errors = []
605
-
606
- try:
607
- blobs = list(bucket.list_blobs(prefix=prefix))
608
- for blob in blobs:
609
- try:
610
- name = blob.name
611
- fname = name.split("/")[-1]
612
- if "." not in fname:
613
- continue
614
- item_id = fname.rsplit(".", 1)[0]
615
-
616
- md = blob.metadata or {}
617
- # only consider temporary files matching this session id
618
- if str(md.get("session_id", "")) != session_id or str(md.get("tmp", "")).lower() not in ("true", "1", "yes"):
619
- continue
620
-
621
- if item_id in keep_ids:
622
- # ensure public URL available if possible
623
- try:
624
- blob.make_public()
625
- url = blob.public_url
626
- except Exception:
627
- url = f"gs://{bucket.name}/{name}"
628
-
629
- # extract AI metadata (if present)
630
- ai_type = md.get("ai_type") or ""
631
- ai_brand = md.get("ai_brand") or ""
632
- ai_summary = md.get("ai_summary") or ""
633
- ai_tags_raw = md.get("ai_tags") or "[]"
634
- title_meta = md.get("title") or ""
635
- try:
636
- ai_tags = json.loads(ai_tags_raw) if isinstance(ai_tags_raw, str) else ai_tags_raw
637
- except Exception:
638
- ai_tags = []
639
- # derive title: prefer stored metadata title, then ai_type/tags/summary
640
- title = None
641
- if title_meta:
642
- try:
643
- title = json.loads(title_meta) if (title_meta.startswith('[') or title_meta.startswith('{')) else str(title_meta)
644
- except Exception:
645
- title = str(title_meta)
646
- if not title:
647
- title = choose_category_from_candidates(ai_type, ai_summary, tags=ai_tags)
648
- kept.append({
649
- "id": item_id,
650
- "thumbnail_url": url,
651
- "thumbnail_path": name,
652
- "analysis": {
653
- "type": ai_type,
654
- "brand": ai_brand,
655
- "summary": ai_summary,
656
- "tags": ai_tags
657
- },
658
- "title": title
659
- })
660
- else:
661
- try:
662
- blob.delete()
663
- deleted.append(item_id)
664
- except Exception as de:
665
- errors.append({"id": item_id, "error": str(de)})
666
- except Exception as e:
667
- errors.append({"blob": getattr(blob, "name", None), "error": str(e)})
668
- return jsonify({"ok": True, "kept": kept, "deleted": deleted, "errors": errors}), 200
669
- except Exception as e:
670
- log.exception("finalize_detections error: %s", e)
671
- return jsonify({"error": "internal", "detail": str(e)}), 500
672
-
673
- # ---------- Clear session: delete all temporary files for a session ----------
674
  @app.route("/clear_session", methods=["POST"])
675
  def clear_session():
676
- """
677
- Body JSON: { "session_id": "<id>", "uid": "<optional uid>" }
678
- Deletes all detected/<uid>/* blobs where metadata.session_id == session_id and metadata.tmp == "true".
679
- """
680
- try:
681
- body = request.get_json(force=True)
682
- except Exception:
683
- return jsonify({"error": "invalid json"}), 400
684
-
685
- session_id = (body.get("session_id") or request.args.get("session_id") or "").strip()
686
- uid = (body.get("uid") or request.args.get("uid") or "anon").strip() or "anon"
687
-
688
- if not session_id:
689
- return jsonify({"error": "session_id required"}), 400
690
-
691
- if not FIREBASE_ADMIN_JSON or not FIREBASE_ADMIN_AVAILABLE:
692
- return jsonify({"error": "firebase admin not configured"}), 500
693
-
694
- try:
695
- init_firebase_admin_if_needed()
696
- bucket = fb_storage.bucket()
697
- except Exception as e:
698
- log.exception("Firebase init error in clear_session: %s", e)
699
- return jsonify({"error": "firebase admin init failed", "detail": str(e)}), 500
700
-
701
- safe_uid = "".join(ch for ch in uid if ch.isalnum() or ch in ("-", "_")) or "anon"
702
- prefix = f"detected/{safe_uid}/"
703
-
704
- deleted = []
705
- errors = []
706
- try:
707
- blobs = list(bucket.list_blobs(prefix=prefix))
708
- for blob in blobs:
709
- try:
710
- md = blob.metadata or {}
711
- if str(md.get("session_id", "")) == session_id and str(md.get("tmp", "")).lower() in ("true", "1", "yes"):
712
- try:
713
- blob.delete()
714
- deleted.append(blob.name.split("/")[-1].rsplit(".", 1)[0])
715
- except Exception as de:
716
- errors.append({"blob": blob.name, "error": str(de)})
717
- except Exception as e:
718
- errors.append({"blob": getattr(blob, "name", None), "error": str(e)})
719
- return jsonify({"ok": True, "deleted": deleted, "errors": errors}), 200
720
- except Exception as e:
721
- log.exception("clear_session error: %s", e)
722
- return jsonify({"error": "internal", "detail": str(e)}), 500
723
-
724
- if __name__ == "__main__":
725
- port = int(os.getenv("PORT", 7860))
726
- log.info("Starting server on 0.0.0.0:%d", port)
727
- app.run(host="0.0.0.0", port=port, debug=True)
 
 
1
+ # Update the backend to send a title that's within our array of categories for good filtration on frontend and nice classification instead of the generic top or unknown it gives most times, you can see the categories array in the frontend code, now update this backend: # server_gemini_seg.py
2
+
3
  import os
4
  import io
5
  import json
 
7
  import logging
8
  import uuid
9
  import time
 
10
  from typing import List, Dict, Any, Tuple, Optional
11
 
12
  from flask import Flask, request, jsonify
 
15
  import numpy as np
16
  import cv2
17
 
18
+ genai client
19
+
20
  from google import genai
21
  from google.genai import types
22
 
23
+ Firebase Admin (in-memory JSON init)
24
+
25
  try:
26
+ import firebase_admin
27
+ from firebase_admin import credentials as fb_credentials, storage as fb_storage
28
+ FIREBASE_ADMIN_AVAILABLE = True
29
  except Exception:
30
+ firebase_admin = None
31
+ fb_credentials = None
32
+ fb_storage = None
33
+ FIREBASE_ADMIN_AVAILABLE = False
34
 
35
  logging.basicConfig(level=logging.INFO)
36
  log = logging.getLogger("wardrobe-server")
37
 
38
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
39
  if not GEMINI_API_KEY:
40
+ log.warning("GEMINI_API_KEY not set — gemini calls will fail (but fallback still works).")
41
 
42
  client = genai.Client(api_key=GEMINI_API_KEY) if GEMINI_API_KEY else None
43
 
44
+ Firebase config (read service account JSON from env)
45
+
46
  FIREBASE_ADMIN_JSON = os.getenv("FIREBASE_ADMIN_JSON", "").strip()
47
  FIREBASE_STORAGE_BUCKET = os.getenv("FIREBASE_STORAGE_BUCKET", "").strip() # optional override
48
 
49
  if FIREBASE_ADMIN_JSON and not FIREBASE_ADMIN_AVAILABLE:
50
+ log.warning("FIREBASE_ADMIN_JSON provided but firebase-admin SDK is not installed. Install firebase-admin.")
51
 
52
+ app = Flask(name)
53
  CORS(app)
54
 
55
+ ---------- Firebase init helpers ----------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  _firebase_app = None
58
 
59
  def init_firebase_admin_if_needed():
60
+ global _firebase_app
61
+ if _firebase_app is not None:
62
+ return _firebase_app
63
+ if not FIREBASE_ADMIN_JSON:
64
+ log.info("No FIREBASE_ADMIN_JSON env var set; skipping Firebase admin init.")
65
+ return None
66
+ if not FIREBASE_ADMIN_AVAILABLE:
67
+ raise RuntimeError("firebase-admin not installed (pip install firebase-admin)")
68
+ try:
69
+ sa_obj = json.loads(FIREBASE_ADMIN_JSON)
70
+ except Exception as e:
71
+ log.exception("Failed parsing FIREBASE_ADMIN_JSON: %s", e)
72
+ raise
73
+ bucket_name = FIREBASE_STORAGE_BUCKET or (sa_obj.get("project_id") and f"{sa_obj.get('project_id')}.appspot.com")
74
+ if not bucket_name:
75
+ raise RuntimeError("Could not determine storage bucket. Set FIREBASE_STORAGE_BUCKET or include project_id in service account JSON.")
76
+ try:
77
+ cred = fb_credentials.Certificate(sa_obj)
78
+ _firebase_app = firebase_admin.initialize_app(cred, {"storageBucket": bucket_name})
79
+ log.info("Initialized firebase admin with bucket: %s", bucket_name)
80
+ return _firebase_app
81
+ except Exception as e:
82
+ log.exception("Failed to initialize firebase admin: %s", e)
83
+ raise
84
 
85
  def upload_b64_to_firebase(base64_str: str, path: str, content_type="image/jpeg", metadata: dict = None) -> str:
86
+ """
87
+ Upload base64 string to Firebase Storage at path.
88
+ Optionally attach metadata dict (custom metadata).
89
+ Returns a public URL when possible, otherwise returns gs://<bucket>/<path>.
90
+ """
91
+ if not FIREBASE_ADMIN_JSON:
92
+ raise RuntimeError("FIREBASE_ADMIN_JSON not set")
93
+ init_firebase_admin_if_needed()
94
+ if not FIREBASE_ADMIN_AVAILABLE:
95
+ raise RuntimeError("firebase-admin not available")
96
+
97
+ raw = base64_str
98
+ if raw.startswith("data:"):
99
+ raw = raw.split(",", 1)[1]
100
+ raw = raw.replace("\n", "").replace("\r", "")
101
+ data = base64.b64decode(raw)
102
+
103
+ try:
104
+ bucket = fb_storage.bucket()
105
+ blob = bucket.blob(path)
106
+ blob.upload_from_string(data, content_type=content_type)
107
+ # attach metadata if provided (values must be strings)
108
+ if metadata:
109
+ try:
110
+ blob.metadata = {k: (json.dumps(v) if not isinstance(v, str) else v) for k, v in metadata.items()}
111
+ blob.patch()
112
+ except Exception as me:
113
+ log.warning("Failed to patch metadata for %s: %s", path, me)
114
+ try:
115
+ blob.make_public()
116
+ return blob.public_url
117
+ except Exception as e:
118
+ log.warning("Could not make blob public: %s", e)
119
+ return f"gs://{bucket.name}/{path}"
120
+ except Exception as e:
121
+ log.exception("Firebase upload error for path %s: %s", path, e)
122
+ raise
123
+
124
+ ---------- Image helpers (with EXIF transpose) ----------
125
+
126
  def read_image_bytes(file_storage) -> Tuple[np.ndarray, int, int, bytes]:
127
+ """
128
+ Read bytes, apply EXIF orientation, return BGR numpy, width, height and raw bytes.
129
+ """
130
+ data = file_storage.read()
131
+ img = Image.open(io.BytesIO(data))
132
+ # apply EXIF orientation so photos from phones are upright
133
+ try:
134
+ img = ImageOps.exif_transpose(img)
135
+ except Exception:
136
+ pass
137
+ img = img.convert("RGB")
138
+ w, h = img.size
139
+ arr = np.array(img)[:, :, ::-1] # RGB -> BGR for OpenCV
140
+ return arr, w, h, data
141
 
142
  def crop_and_b64(bgr_img: np.ndarray, x: int, y: int, w: int, h: int, max_side=512) -> str:
143
+ h_img, w_img = bgr_img.shape[:2]
144
+ x = max(0, int(x)); y = max(0, int(y))
145
+ x2 = min(w_img, int(x + w)); y2 = min(h_img, int(y + h))
146
+ crop = bgr_img[y:y2, x:x2]
147
+ if crop.size == 0:
148
+ return ""
149
+ max_dim = max(crop.shape[0], crop.shape[1])
150
+ if max_dim > max_side:
151
+ scale = max_side / max_dim
152
+ crop = cv2.resize(crop, (int(crop.shape[1] * scale), int(crop.shape[0] * scale)), interpolation=cv2.INTER_AREA)
153
+ _, jpeg = cv2.imencode(".jpg", crop, [int(cv2.IMWRITE_JPEG_QUALITY), 82])
154
+ return base64.b64encode(jpeg.tobytes()).decode("ascii")
155
 
156
  def fallback_contour_crops(bgr_img, max_items=8) -> List[Dict[str, Any]]:
157
+ gray = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2GRAY)
158
+ blur = cv2.GaussianBlur(gray, (7,7), 0)
159
+ thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY_INV,15,6)
160
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9))
161
+ closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
162
+ contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
163
+ h_img, w_img = bgr_img.shape[:2]
164
+ min_area = (w_imgh_img) * 0.005
165
+ items = []
166
+ for cnt in sorted(contours, key=cv2.contourArea, reverse=True):
167
+ if len(items) >= max_items:
168
+ break
169
+ area = cv2.contourArea(cnt)
170
+ if area < min_area:
171
+ continue
172
+ x,y,w,h = cv2.boundingRect(cnt)
173
+ pad_x, pad_y = int(w0.07), int(h0.07)
174
+ x = max(0, x - pad_x); y = max(0, y - pad_y)
175
+ w = min(w_img - x, w + pad_x2); h = min(h_img - y, h + pad_y2)
176
+ b64 = crop_and_b64(bgr_img, x, y, w, h)
177
+ if not b64:
178
+ continue
179
+ items.append({
180
+ "id": str(uuid.uuid4()),
181
+ "label": "unknown",
182
+ "confidence": min(0.95, max(0.25, area/(w_imgh_img))),
183
+ "bbox": {"x": x, "y": y, "w": w, "h": h},
184
+ "thumbnail_b64": b64,
185
+ "source": "fallback"
186
+ })
187
+ if not items:
188
+ h_half, w_half = h_img//2, w_img//2
189
+ rects = [
190
+ (0,0,w_half,h_half), (w_half,0,w_half,h_half),
191
+ (0,h_half,w_half,h_half), (w_half,h_half,w_half,h_half)
192
+ ]
193
+ for r in rects:
194
+ b64 = crop_and_b64(bgr_img, r[0], r[1], r[2], r[3])
195
+ if b64:
196
+ items.append({
197
+ "id": str(uuid.uuid4()),
198
+ "label": "unknown",
199
+ "confidence": 0.3,
200
+ "bbox": {"x": r[0], "y": r[1], "w": r[2], "h": r[3]},
201
+ "thumbnail_b64": b64,
202
+ "source": "fallback-grid"
203
+ })
204
+ return items
205
+
206
+ ---------- AI analysis helper ----------
207
+
208
  def analyze_crop_with_gemini(jpeg_b64: str) -> Dict[str, Any]:
209
+ """
210
+ Run Gemini on the cropped image bytes to extract:
211
+ type (one-word category like 'shoe', 'jacket', 'dress'),
212
+ summary (single-line description),
213
+ brand (string or empty),
214
+ tags (array of short descriptors)
215
+ Returns dict, falls back to empty/defaults on error or missing key.
216
+ """
217
+ if not client:
218
+ return {"type": "unknown", "summary": "", "brand": "", "tags": []}
219
+ try:
220
+ # prepare prompt
221
+ prompt = (
222
+ "You are an assistant that identifies clothing item characteristics from an image. "
223
+ "Return only a JSON object with keys: type (single word like 'shoe','top','jacket'), "
224
+ "summary (a single short sentence, one line), brand (brand name if visible else empty string), "
225
+ "tags (an array of short single-word tags describing visible attributes, e.g. ['striped','leather','white']). "
226
+ "Keep values short and concise."
227
+ )
228
+
229
+ contents = [
230
+ types.Content(role="user", parts=[types.Part.from_text(text=prompt)])
231
+ ]
232
+
233
+ # attach the image bytes
234
+ image_bytes = base64.b64decode(jpeg_b64)
235
+ contents.append(types.Content(role="user", parts=[types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg")]))
236
+
237
+ schema = {
238
+ "type": "object",
239
+ "properties": {
240
+ "type": {"type": "string"},
241
+ "summary": {"type": "string"},
242
+ "brand": {"type": "string"},
243
+ "tags": {"type": "array", "items": {"type": "string"}}
244
+ },
245
+ "required": ["type", "summary"]
246
+ }
247
+ cfg = types.GenerateContentConfig(response_mime_type="application/json", response_schema=schema)
248
+
249
+ # call model (use the same model family you used before)
250
+ resp = client.models.generate_content(model="gemini-2.5-flash-lite", contents=contents, config=cfg)
251
+ text = resp.text or ""
252
+ parsed = {}
253
+ try:
254
+ parsed = json.loads(text)
255
+ # coerce expected shapes
256
+ parsed["type"] = str(parsed.get("type", "")).strip()
257
+ parsed["summary"] = str(parsed.get("summary", "")).strip()
258
+ parsed["brand"] = str(parsed.get("brand", "")).strip()
259
+ tags = parsed.get("tags", [])
260
+ if not isinstance(tags, list):
261
+ tags = []
262
+ parsed["tags"] = [str(t).strip() for t in tags if str(t).strip()]
263
+ except Exception as e:
264
+ log.warning("Failed parsing Gemini analysis JSON: %s — raw: %s", e, (text[:300] if text else ""))
265
+ parsed = {"type": "unknown", "summary": "", "brand": "", "tags": []}
266
+ return {
267
+ "type": parsed.get("type", "unknown") or "unknown",
268
+ "summary": parsed.get("summary", "") or "",
269
+ "brand": parsed.get("brand", "") or "",
270
+ "tags": parsed.get("tags", []) or []
271
+ }
272
+ except Exception as e:
273
+ log.exception("analyze_crop_with_gemini failure: %s", e)
274
+ return {"type": "unknown", "summary": "", "brand": "", "tags": []}
275
+
276
+ ---------- Main / processing ----------
277
+
278
  @app.route("/process", methods=["POST"])
279
  def process_image():
280
+ if "photo" not in request.files:
281
+ return jsonify({"error": "missing photo"}), 400
282
+ file = request.files["photo"]
283
+
284
+ uid = (request.form.get("uid") or request.args.get("uid") or "anon").strip() or "anon"
285
+
286
+ try:
287
+ bgr_img, img_w, img_h, raw_bytes = read_image_bytes(file)
288
+ except Exception as e:
289
+ log.error("invalid image: %s", e)
290
+ return jsonify({"error": "invalid image"}), 400
291
+
292
+ session_id = str(uuid.uuid4())
293
+
294
+ # Detection prompt (same as before)
295
+ user_prompt = (
296
+ "You are an assistant that extracts clothing detections from a single image. "
297
+ "Return a JSON object with a single key 'items' which is an array. Each item must have: "
298
+ "label (string, short like 'top','skirt','sneakers'), "
299
+ "bbox with normalized coordinates between 0 and 1: {x, y, w, h} where x,y are top-left relative to width/height, "
300
+ "confidence (0-1). Example output: {\"items\":[{\"label\":\"top\",\"bbox\":{\"x\":0.1,\"y\":0.2,\"w\":0.3,\"h\":0.4},\"confidence\":0.95}]} "
301
+ "Output ONLY valid JSON. If you cannot detect any clothing confidently, return {\"items\":[]}."
302
+ )
303
+
304
+ try:
305
+ contents = [
306
+ types.Content(role="user", parts=[types.Part.from_text(text=user_prompt)])
307
+ ]
308
+ contents.append(types.Content(role="user", parts=[types.Part.from_bytes(data=raw_bytes, mime_type="image/jpeg")]))
309
+
310
+ schema = {
311
+ "type": "object",
312
+ "properties": {
313
+ "items": {
314
+ "type": "array",
315
+ "items": {
316
+ "type": "object",
317
+ "properties": {
318
+ "label": {"type": "string"},
319
+ "bbox": {
320
+ "type": "object",
321
+ "properties": {
322
+ "x": {"type": "number"},
323
+ "y": {"type": "number"},
324
+ "w": {"type": "number"},
325
+ "h": {"type": "number"}
326
+ },
327
+ "required": ["x","y","w","h"]
328
+ },
329
+ "confidence": {"type": "number"}
330
+ },
331
+ "required": ["label","bbox","confidence"]
332
+ }
333
+ }
334
+ },
335
+ "required": ["items"]
336
+ }
337
+
338
+ cfg = types.GenerateContentConfig(response_mime_type="application/json", response_schema=schema)
339
+
340
+ log.info("Calling Gemini model for detection (gemini-2.5-flash-lite)...")
341
+ model_resp = client.models.generate_content(model="gemini-2.5-flash-lite", contents=contents, config=cfg) if client else None
342
+ raw_text = (model_resp.text or "") if model_resp else ""
343
+ log.info("Gemini raw response length: %d", len(raw_text))
344
+
345
+ parsed = None
346
+ try:
347
+ parsed = json.loads(raw_text) if raw_text else None
348
+ except Exception as e:
349
+ log.warning("Could not parse Gemini JSON: %s", e)
350
+ parsed = None
351
+
352
+ items_out: List[Dict[str, Any]] = []
353
+ if parsed and isinstance(parsed.get("items"), list) and len(parsed["items"])>0:
354
+ for it in parsed["items"]:
355
+ try:
356
+ label = str(it.get("label","unknown"))[:48]
357
+ bbox = it.get("bbox",{})
358
+ nx = float(bbox.get("x",0))
359
+ ny = float(bbox.get("y",0))
360
+ nw = float(bbox.get("w",0))
361
+ nh = float(bbox.get("h",0))
362
+ nx = max(0.0, min(1.0, nx)); ny = max(0.0,min(1.0,ny))
363
+ nw = max(0.0, min(1.0, nw)); nh = max(0.0, min(1.0, nh))
364
+ px = int(nx * img_w); py = int(ny * img_h)
365
+ pw = int(nw * img_w); ph = int(nh * img_h)
366
+ if pw <= 8 or ph <= 8:
367
+ continue
368
+ b64 = crop_and_b64(bgr_img, px, py, pw, ph)
369
+ if not b64:
370
+ continue
371
+ items_out.append({
372
+ "id": str(uuid.uuid4()),
373
+ "label": label,
374
+ "confidence": float(it.get("confidence", 0.5)),
375
+ "bbox": {"x": px, "y": py, "w": pw, "h": ph},
376
+ "thumbnail_b64": b64,
377
+ "source": "gemini"
378
+ })
379
+ except Exception as e:
380
+ log.warning("skipping item due to error: %s", e)
381
+ else:
382
+ log.info("Gemini returned no items or parse failed — using fallback contour crops.")
383
+ items_out = fallback_contour_crops(bgr_img, max_items=8)
384
+
385
+ # Perform AI analysis per crop (if possible) and auto-upload to firebase with metadata (tmp + session)
386
+ if FIREBASE_ADMIN_JSON and FIREBASE_ADMIN_AVAILABLE:
387
+ try:
388
+ init_firebase_admin_if_needed()
389
+ bucket = fb_storage.bucket()
390
+ except Exception as e:
391
+ log.exception("Firebase admin init for upload failed: %s", e)
392
+ bucket = None
393
+
394
+ safe_uid = "".join(ch for ch in uid if ch.isalnum() or ch in ("-", "_")) or "anon"
395
+ for itm in items_out:
396
+ b64 = itm.get("thumbnail_b64")
397
+ if not b64:
398
+ continue
399
+ # analyze
400
+ try:
401
+ analysis = analyze_crop_with_gemini(b64) if client else {"type":"unknown","summary":"","brand":"","tags":[]}
402
+ except Exception as ae:
403
+ log.warning("analysis failed: %s", ae)
404
+ analysis = {"type":"unknown","summary":"","brand":"","tags":[]}
405
+
406
+ itm["analysis"] = analysis
407
+
408
+ item_id = itm.get("id") or str(uuid.uuid4())
409
+ path = f"detected/{safe_uid}/{item_id}.jpg"
410
+ try:
411
+ metadata = {
412
+ "tmp": "true",
413
+ "session_id": session_id,
414
+ "uploaded_by": safe_uid,
415
+ "uploaded_at": str(int(time.time())),
416
+ # store AI fields as JSON strings for later inspection
417
+ "ai_type": analysis.get("type",""),
418
+ "ai_brand": analysis.get("brand",""),
419
+ "ai_summary": analysis.get("summary",""),
420
+ "ai_tags": json.dumps(analysis.get("tags", [])),
421
+ }
422
+ url = upload_b64_to_firebase(b64, path, content_type="image/jpeg", metadata=metadata)
423
+ itm["thumbnail_url"] = url
424
+ itm["thumbnail_path"] = path
425
+ itm.pop("thumbnail_b64", None)
426
+ itm["_session_id"] = session_id
427
+ log.debug("Auto-uploaded thumbnail for %s -> %s (session=%s)", item_id, url, session_id)
428
+ except Exception as up_e:
429
+ log.warning("Auto-upload failed for %s: %s", item_id, up_e)
430
+ # keep thumbnail_b64 and analysis for client fallback
431
+ else:
432
+ if not FIREBASE_ADMIN_JSON:
433
+ log.info("FIREBASE_ADMIN_JSON not set; skipping server-side thumbnail upload.")
434
+ else:
435
+ log.info("Firebase admin SDK not available; skipping server-side thumbnail upload.")
436
+
437
+ return jsonify({"ok": True, "items": items_out, "session_id": session_id, "debug": {"raw_model_text": (raw_text or "")[:1600]}}), 200
438
+
439
+ except Exception as ex:
440
+ log.exception("Processing error: %s", ex)
441
+ try:
442
+ items_out = fallback_contour_crops(bgr_img, max_items=8)
443
+ return jsonify({"ok": True, "items": items_out, "session_id": session_id, "debug": {"error": str(ex)}}), 200
444
+ except Exception as e2:
445
+ log.exception("Fallback also failed: %s", e2)
446
+ return jsonify({"error": "internal failure", "detail": str(e2)}), 500
447
+
448
+ ---------- Finalize endpoint: keep selected and delete only session's temp files ----------
449
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
  @app.route("/finalize_detections", methods=["POST"])
451
  def finalize_detections():
452
+ """
453
+ Body JSON:
454
+ { "uid": "user123", "keep_ids": ["id1","id2",...], "session_id": "<session id from /process>" }
455
+
456
+ Server will delete only detected/<uid>/* files whose:
457
+ - metadata.tmp == "true"
458
+ - metadata.session_id == session_id
459
+ - item_id NOT in keep_ids
460
+
461
+ Returns:
462
+ { ok: True, kept: [...], deleted: [...], errors: [...] }
463
+ """
464
+ try:
465
+ body = request.get_json(force=True)
466
+ except Exception:
467
+ return jsonify({"error": "invalid json"}), 400
468
+
469
+ uid = (body.get("uid") or request.args.get("uid") or "anon").strip() or "anon"
470
+ keep_ids = set(body.get("keep_ids") or [])
471
+ session_id = (body.get("session_id") or request.args.get("session_id") or "").strip()
472
+
473
+ if not session_id:
474
+ return jsonify({"error": "session_id required for finalize to avoid unsafe deletes"}), 400
475
+
476
+ if not FIREBASE_ADMIN_JSON or not FIREBASE_ADMIN_AVAILABLE:
477
+ return jsonify({"error": "firebase admin not configured"}), 500
478
+
479
+ try:
480
+ init_firebase_admin_if_needed()
481
+ bucket = fb_storage.bucket()
482
+ except Exception as e:
483
+ log.exception("Firebase init error in finalize: %s", e)
484
+ return jsonify({"error": "firebase admin init failed", "detail": str(e)}), 500
485
+
486
+ safe_uid = "".join(ch for ch in uid if ch.isalnum() or ch in ("-", "_")) or "anon"
487
+ prefix = f"detected/{safe_uid}/"
488
+
489
+ kept = []
490
+ deleted = []
491
+ errors = []
492
+
493
+ try:
494
+ blobs = list(bucket.list_blobs(prefix=prefix))
495
+ for blob in blobs:
496
+ try:
497
+ name = blob.name
498
+ fname = name.split("/")[-1]
499
+ if "." not in fname:
500
+ continue
501
+ item_id = fname.rsplit(".", 1)[0]
502
+
503
+ md = blob.metadata or {}
504
+ # only consider temporary files matching this session id
505
+ if str(md.get("session_id", "")) != session_id or str(md.get("tmp", "")).lower() not in ("true", "1", "yes"):
506
+ continue
507
+
508
+ if item_id in keep_ids:
509
+ # ensure public URL available if possible
510
+ try:
511
+ blob.make_public()
512
+ url = blob.public_url
513
+ except Exception:
514
+ url = f"gs://{bucket.name}/{name}"
515
+
516
+ # extract AI metadata (if present)
517
+ ai_type = md.get("ai_type") or ""
518
+ ai_brand = md.get("ai_brand") or ""
519
+ ai_summary = md.get("ai_summary") or ""
520
+ ai_tags_raw = md.get("ai_tags") or "[]"
521
+ try:
522
+ ai_tags = json.loads(ai_tags_raw) if isinstance(ai_tags_raw, str) else ai_tags_raw
523
+ except Exception:
524
+ ai_tags = []
525
+ kept.append({
526
+ "id": item_id,
527
+ "thumbnail_url": url,
528
+ "thumbnail_path": name,
529
+ "analysis": {
530
+ "type": ai_type,
531
+ "brand": ai_brand,
532
+ "summary": ai_summary,
533
+ "tags": ai_tags
534
+ }
535
+ })
536
+ else:
537
+ try:
538
+ blob.delete()
539
+ deleted.append(item_id)
540
+ except Exception as de:
541
+ errors.append({"id": item_id, "error": str(de)})
542
+ except Exception as e:
543
+ errors.append({"blob": getattr(blob, "name", None), "error": str(e)})
544
+ return jsonify({"ok": True, "kept": kept, "deleted": deleted, "errors": errors}), 200
545
+ except Exception as e:
546
+ log.exception("finalize_detections error: %s", e)
547
+ return jsonify({"error": "internal", "detail": str(e)}), 500
548
+
549
+ ---------- Clear session: delete all temporary files for a session ----------
550
+
 
 
 
 
 
 
 
 
 
 
551
  @app.route("/clear_session", methods=["POST"])
552
  def clear_session():
553
+ """
554
+ Body JSON: { "session_id": "<id>", "uid": "<optional uid>" }
555
+ Deletes all detected/<uid>/* blobs where metadata.session_id == session_id and metadata.tmp == "true".
556
+ """
557
+ try:
558
+ body = request.get_json(force=True)
559
+ except Exception:
560
+ return jsonify({"error": "invalid json"}), 400
561
+
562
+ session_id = (body.get("session_id") or request.args.get("session_id") or "").strip()
563
+ uid = (body.get("uid") or request.args.get("uid") or "anon").strip() or "anon"
564
+
565
+ if not session_id:
566
+ return jsonify({"error": "session_id required"}), 400
567
+
568
+ if not FIREBASE_ADMIN_JSON or not FIREBASE_ADMIN_AVAILABLE:
569
+ return jsonify({"error": "firebase admin not configured"}), 500
570
+
571
+ try:
572
+ init_firebase_admin_if_needed()
573
+ bucket = fb_storage.bucket()
574
+ except Exception as e:
575
+ log.exception("Firebase init error in clear_session: %s", e)
576
+ return jsonify({"error": "firebase admin init failed", "detail": str(e)}), 500
577
+
578
+ safe_uid = "".join(ch for ch in uid if ch.isalnum() or ch in ("-", "_")) or "anon"
579
+ prefix = f"detected/{safe_uid}/"
580
+
581
+ deleted = []
582
+ errors = []
583
+ try:
584
+ blobs = list(bucket.list_blobs(prefix=prefix))
585
+ for blob in blobs:
586
+ try:
587
+ md = blob.metadata or {}
588
+ if str(md.get("session_id", "")) == session_id and str(md.get("tmp", "")).lower() in ("true", "1", "yes"):
589
+ try:
590
+ blob.delete()
591
+ deleted.append(blob.name.split("/")[-1].rsplit(".", 1)[0])
592
+ except Exception as de:
593
+ errors.append({"blob": blob.name, "error": str(de)})
594
+ except Exception as e:
595
+ errors.append({"blob": getattr(blob, "name", None), "error": str(e)})
596
+ return jsonify({"ok": True, "deleted": deleted, "errors": errors}), 200
597
+ except Exception as e:
598
+ log.exception("clear_session error: %s", e)
599
+ return jsonify({"error": "internal", "detail": str(e)}), 500
600
+
601
+ if name == "main":
602
+ port = int(os.getenv("PORT", 7860))
603
+ log.info("Starting server on 0.0.0.0:%d", port)
604
+ app.run(host="0.0.0.0", port=port, debug=True)
605
+