mlbench123 commited on
Commit
4017b6f
Β·
verified Β·
1 Parent(s): 415eb49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +336 -200
app.py CHANGED
@@ -1,202 +1,317 @@
1
  """
2
- Amazon Trailer Inspector
3
- HuggingFace Spaces + Gradio 5.x pipeline
4
- Gemma-3 (primary) β†’ Llama-3.2-Vision β†’ Qwen2.5-VL (fallbacks)
 
 
 
 
 
 
 
5
  """
6
 
7
  import gradio as gr
8
  import base64
9
  import concurrent.futures
10
  import json
11
- import re
12
  import os
13
- from PIL import Image
14
  import io
15
- from huggingface_hub import InferenceClient, HfApi
16
 
17
- # ──────────────────────────────────────────────────────────────
18
- # Model chain (tried in order, first success wins)
19
- # ──────────────────────────────────────────────────────────────
 
 
 
20
  MODELS = [
21
- "meta-llama/Llama-3.2-11B-Vision-Instruct", # Most reliable free vision model
22
- "Qwen/Qwen2.5-VL-7B-Instruct", # Fallback 1
23
- "google/gemma-3-4b-it", # Fallback 2
24
  ]
25
 
26
- DETECTION_PROMPT = """You are a precise visual inspector for Amazon trailer fleets.
27
- Carefully examine the trailer image and locate these 4 components:
28
 
29
- 1. SENSORS β€” Two silver/beige DIAMOND (rhombus) shaped metal plates near the rear bottom of the trailer back doors.
30
- 2. GPS_DEVICE β€” A small white or light-gray rectangular electronic box at the upper corner of the trailer rear face.
31
- 3. PRIME_LOGO β€” The Amazon Prime logo: blue swooping arrow/checkmark. Can be full or partial, on rear or side.
32
- 4. TRAILER_ID β€” A vertical fluorescent green or yellow-green ID label strip on the corner post (shows a number like SV2602705).
33
-
34
- Reply ONLY with valid JSON β€” no extra text, no markdown fences:
 
 
 
 
 
 
 
 
 
 
35
  {
36
  "sensors": {"found": true, "confidence": "high", "notes": "two diamond plates visible lower-left"},
37
- "gps_device": {"found": false, "confidence": "medium", "notes": "top corner obscured"},
38
- "prime_logo": {"found": true, "confidence": "high", "notes": "partial prime swoosh on rear panel"},
39
- "trailer_id": {"found": true, "confidence": "high", "notes": "SV2602705 label on right corner post"}
40
  }"""
41
 
42
  KEYS = ["sensors", "gps_device", "prime_logo", "trailer_id"]
43
 
 
 
 
44
 
45
- # ──────────────────────────────────────────────────────────────
46
- # Token validation (runs once at startup)
47
- # ──────────────────────────────────────────────────────────────
 
 
 
 
 
48
 
49
- def check_token() -> tuple[bool, str]:
50
- token = os.environ.get("HF_TOKEN", "").strip()
51
- if not token:
52
- return False, "HF_TOKEN secret is not set. Go to Space Settings β†’ Repository Secrets β†’ add HF_TOKEN."
53
- try:
54
- api = HfApi(token=token)
55
- api.whoami()
56
- return True, "Token OK"
57
- except Exception as e:
58
- return False, f"HF_TOKEN is invalid or expired: {e}"
59
 
60
- TOKEN_OK, TOKEN_MSG = check_token()
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
 
63
- # ──────────────────────────────────────────────────────────────
64
- # Image helpers
65
- # ──────────────────────────────────────────────────────────────
66
 
67
- def pil_to_b64(img: Image.Image, max_side: int = 1024) -> str:
68
- """Resize and encode to base64 JPEG."""
69
- if max(img.size) > max_side:
70
- img = img.copy()
71
- img.thumbnail((max_side, max_side), Image.LANCZOS)
72
- buf = io.BytesIO()
73
- img.save(buf, format="JPEG", quality=85)
74
- return base64.b64encode(buf.getvalue()).decode()
75
 
 
 
 
 
 
76
 
77
- # ──────────────────────────────────────────────────────────────
78
- # LLM call β€” with detailed error capture
79
- # ──────────────────────────────────────────────────────────────
 
 
80
 
81
- def call_model(img: Image.Image, model: str) -> dict:
82
- """Call one vision LLM. Raises ValueError with a descriptive message on failure."""
83
- token = os.environ.get("HF_TOKEN", "").strip() or None
84
- client = InferenceClient(token=token)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  b64 = pil_to_b64(img)
86
 
87
- messages = [{
88
- "role": "user",
89
- "content": [
90
- {
91
- "type": "image_url",
92
- "image_url": {"url": f"data:image/jpeg;base64,{b64}"},
93
- },
 
94
  {
95
- "type": "text",
96
- "text": DETECTION_PROMPT,
97
- },
 
 
 
 
 
 
 
 
 
98
  ],
99
- }]
 
 
 
100
 
101
- resp = client.chat_completion(
102
- model=model,
103
- messages=messages,
104
- max_tokens=512,
105
- temperature=0.05,
106
- )
107
- raw = resp.choices[0].message.content.strip()
108
 
109
- # Strip accidental markdown fences
110
- raw = re.sub(r"^```(?:json)?", "", raw).strip()
111
- raw = re.sub(r"```$", "", raw).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
- m = re.search(r"\{[\s\S]*\}", raw)
114
- if not m:
115
- raise ValueError(f"Model returned no JSON.\nRaw output: {raw[:300]}")
116
 
117
- try:
118
- return json.loads(m.group())
119
- except json.JSONDecodeError as e:
120
- raise ValueError(f"JSON parse error: {e}\nRaw: {m.group()[:300]}")
121
 
 
 
 
122
 
123
- def analyze_one(img: Image.Image) -> tuple:
124
  """
125
- Try each model in MODELS order.
126
  Returns (result_dict, model_short_name) on success,
127
- (None, error_summary_string) on total failure.
128
  """
129
- attempt_log = []
130
  for model in MODELS:
131
  short = model.split("/")[-1]
132
  try:
133
- result = call_model(img, model)
 
134
  return result, short
135
- except Exception as e:
136
  msg = str(e)
137
- # Shorten common HTTP error noise
138
- if "429" in msg:
139
- msg = "rate-limited (429)"
140
- elif "401" in msg or "403" in msg:
141
- msg = "auth error β€” check HF_TOKEN"
142
- elif "404" in msg:
143
- msg = "model not found (404)"
144
- elif "503" in msg or "502" in msg:
145
- msg = "model loading / unavailable"
146
- attempt_log.append(f"{short}: {msg}")
147
-
148
- return None, " | ".join(attempt_log)
149
-
150
-
151
- # ──────────────────────────────────────────────────────────────
152
- # Result merging
153
- # ──────────────────────────────────────────────────────────────
154
-
155
- def merge(results: list) -> dict:
156
- RANK = {"high": 3, "medium": 2, "low": 1, "": 0}
157
  merged = {k: {"found": False, "confidence": "low", "notes": ""} for k in KEYS}
158
  for res in results:
159
  if not res:
160
  continue
161
  for k in KEYS:
162
- item = res.get(k, {})
163
- if item.get("found"):
164
  merged[k]["found"] = True
165
- if RANK.get(item.get("confidence", ""), 0) > RANK.get(merged[k]["confidence"], 0):
166
- merged[k]["confidence"] = item.get("confidence", "low")
167
- if item.get("found") and item.get("notes") and not merged[k]["notes"]:
168
- merged[k]["notes"] = item["notes"]
169
  return merged
170
 
171
 
172
- # ──────────────────────────────────────────────────────────────
173
- # Load images from Gradio 5.x file paths
174
- # ──────────────────────────────────────────────────────────────
175
-
176
- def load_images(file_paths) -> list:
177
- imgs = []
178
- if not file_paths:
179
- return imgs
180
- if isinstance(file_paths, str):
181
- file_paths = [file_paths]
182
- for p in file_paths:
183
- try:
184
- path = p if isinstance(p, str) else getattr(p, "name", str(p))
185
- imgs.append(Image.open(path).convert("RGB"))
186
- except Exception as e:
187
- print(f"[load] skipped {p}: {e}")
188
- return imgs
189
-
190
-
191
- # ──────────────────────────────────────────────────────────────
192
- # Main Gradio callback
193
- # ──────────────────────────────────────────────────────────────
194
 
195
  def analyze(file_paths):
196
- # ── Token guard ──
197
- if not TOKEN_OK:
 
 
198
  return (
199
- _error(f"<b>Setup required:</b> {TOKEN_MSG}"),
 
 
 
 
 
 
 
 
200
  _status("error"),
201
  )
202
 
@@ -205,10 +320,13 @@ def analyze(file_paths):
205
  return _placeholder(), _status("idle")
206
 
207
  n = len(images)
 
 
208
  all_results, all_errors, models_used = [], [], set()
209
 
 
210
  with concurrent.futures.ThreadPoolExecutor(max_workers=min(n, 4)) as pool:
211
- futs = [pool.submit(analyze_one, img) for img in images]
212
  for fut in concurrent.futures.as_completed(futs):
213
  res, meta = fut.result()
214
  if res is not None:
@@ -218,50 +336,64 @@ def analyze(file_paths):
218
  all_errors.append(meta)
219
 
220
  if not all_results:
221
- # Show the REAL error from each model attempt
222
- err_detail = "<br>".join(all_errors) if all_errors else "Unknown error"
 
 
223
  return (
224
  _error(
225
- f"<b>All models failed.</b><br><br>"
226
- f"<code style='font-size:12px;line-height:1.8;'>{err_detail}</code><br><br>"
227
- f"Common causes:<br>"
228
- f"β€’ HF_TOKEN missing/expired β†’ Space Settings β†’ Secrets<br>"
229
- f"β€’ Models overloaded (rate limit 429) β†’ retry in a minute<br>"
230
- f"β€’ Image too large β†’ try a smaller/compressed photo"
 
 
231
  ),
232
  _status("error"),
233
  )
234
 
235
  merged = merge(all_results)
236
  model_str = " Β· ".join(sorted(models_used)) or "AI"
237
- warn = (f"<br><small style='color:#d97706;'>⚠️ {len(all_errors)} image(s) failed: "
238
- f"{all_errors[0][:80]}</small>" if all_errors else "")
 
 
 
 
239
 
240
  return build_cards(merged, n, model_str, warn), _status("done", n, len(all_results))
241
 
242
 
243
- # ──────────────────────────────────────────────────────────────
244
- # HTML builders
245
- # ──────────────────────────────────────────────────────────────
246
 
247
  COMP_META = [
248
  ("sensors", "πŸ”·", "Sensors", "Two diamond-shaped sensor plates", "#f59e0b", "#fef3c7"),
249
  ("gps_device", "πŸ“‘", "GPS Device", "White electronic box β€” upper corner", "#3b82f6", "#dbeafe"),
250
- ("prime_logo", "πŸ”΅", "Prime Logo", "Amazon Prime arrow/swoosh mark", "#8b5cf6", "#ede9fe"),
251
  ("trailer_id", "🏷️", "Trailer ID Label", "Vertical strip on the corner post", "#10b981", "#d1fae5"),
252
  ]
253
 
 
 
254
 
255
  def build_cards(merged: dict, img_n: int, model_str: str, warn: str) -> str:
256
  found_n = sum(1 for k, *_ in COMP_META if merged.get(k, {}).get("found"))
257
  total = len(COMP_META)
258
  all_ok = found_n == total
259
 
260
- sc = "#16a34a" if all_ok else ("#d97706" if found_n >= 2 else "#dc2626")
261
- sb = "#f0fdf4" if all_ok else ("#fffbeb" if found_n >= 2 else "#fef2f2")
262
- se = "#86efac" if all_ok else ("#fde68a" if found_n >= 2 else "#fca5a5")
263
- si = "βœ…" if all_ok else ("⚠️" if found_n >= 2 else "❌")
264
- sl = "All Clear" if all_ok else ("Partial" if found_n >= 2 else "Issues Found")
 
 
 
 
265
 
266
  rows = ""
267
  for key, icon, name, desc, accent, pill in COMP_META:
@@ -270,23 +402,22 @@ def build_cards(merged: dict, img_n: int, model_str: str, warn: str) -> str:
270
  conf = d.get("confidence", "low")
271
  notes = d.get("notes", "")
272
 
273
- rbg = "#f0fdf4" if found else "#fef2f2"
274
- rbd = "#bbf7d0" if found else "#fecaca"
275
- stc = "#15803d" if found else "#b91c1c"
276
- stx = "βœ… Found" if found else "❌ Missing"
277
- cdc = {"high": "#16a34a", "medium": "#d97706", "low": "#dc2626"}.get(conf, "#9ca3af")
278
  note_html = (
279
  f'<div style="margin-top:8px;padding-top:8px;border-top:1px solid {rbd};'
280
  f'font-size:12px;color:#4b5563;font-style:italic;line-height:1.5;">"{notes}"</div>'
281
- if notes else ""
282
- )
283
 
284
  rows += f"""
285
  <div style="background:{rbg};border:1.5px solid {rbd};border-radius:12px;
286
  padding:14px 16px;margin-bottom:10px;">
287
  <div style="display:flex;align-items:flex-start;gap:12px;">
288
  <div style="background:{pill};border-radius:10px;padding:9px 11px;
289
- font-size:20px;line-height:1;flex-shrink:0;">{icon}</div>
290
  <div style="flex:1;min-width:0;">
291
  <div style="font-weight:700;font-size:14px;color:#111827;">{name}</div>
292
  <div style="font-size:11px;color:#9ca3af;margin-top:1px;">{desc}</div>
@@ -300,14 +431,12 @@ def build_cards(merged: dict, img_n: int, model_str: str, warn: str) -> str:
300
  </div>"""
301
 
302
  return f"""
303
- <div style="font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;">
304
  <div style="background:{sb};border:2px solid {se};border-radius:14px;
305
  padding:16px 20px;margin-bottom:18px;
306
  display:flex;align-items:center;justify-content:space-between;gap:12px;">
307
  <div>
308
- <div style="font-size:18px;font-weight:800;color:{sc};">
309
- {si} {found_n}/{total} β€” {sl}
310
- </div>
311
  <div style="font-size:12px;color:#6b7280;margin-top:3px;">
312
  {img_n} image{'s' if img_n > 1 else ''} Β· {model_str}{warn}
313
  </div>
@@ -320,7 +449,8 @@ def build_cards(merged: dict, img_n: int, model_str: str, warn: str) -> str:
320
 
321
  def _placeholder() -> str:
322
  return """
323
- <div style="text-align:center;padding:60px 20px;color:#94a3b8;font-family:sans-serif;">
 
324
  <div style="font-size:48px;margin-bottom:14px;">πŸ“·</div>
325
  <div style="font-size:15px;font-weight:600;color:#64748b;">Upload trailer images to begin</div>
326
  <div style="font-size:13px;margin-top:6px;">Front view, rear view, or both β€” all work</div>
@@ -343,39 +473,40 @@ def _status(state: str, total: int = 0, ok: int = 0) -> str:
343
  def _error(msg: str) -> str:
344
  return (
345
  f'<div style="background:#fef2f2;border:1.5px solid #fca5a5;border-radius:12px;'
346
- f'padding:18px 20px;color:#b91c1c;font-family:sans-serif;font-size:13px;line-height:1.7;">'
347
- f'{msg}</div>'
348
  )
349
 
350
 
351
- # ──────────────────────────────────────────────────────────────
352
- # Startup banner (shown in Space logs)
353
- # ──────────────────────────────────────────────────────────────
354
-
355
- print("=" * 55)
356
- print(" Amazon Trailer Inspector β€” starting up")
357
- print(f" Token status : {TOKEN_MSG}")
358
- print(f" Models : {[m.split('/')[-1] for m in MODELS]}")
359
- print("=" * 55)
360
 
 
 
 
 
 
 
361
 
362
- # ──────────────────────────────────────────────────────────────
363
- # Gradio UI
364
- # ──────────────────────────────────────────────────────────────
365
 
366
- TOKEN_BANNER = "" if TOKEN_OK else (
367
  '<div style="background:#fef3c7;border:1.5px solid #fde68a;border-radius:10px;'
368
- 'padding:12px 16px;margin-bottom:14px;font-size:13px;color:#92400e;font-family:sans-serif;">'
369
- '⚠️ <b>HF_TOKEN not set.</b> Go to Space <b>Settings β†’ Repository Secrets</b> '
370
- 'and add <code>HF_TOKEN</code> with your HuggingFace Read token. '
371
- 'Get one free at <a href="https://huggingface.co/settings/tokens" target="_blank">'
372
- 'huggingface.co/settings/tokens</a></div>'
 
373
  )
374
 
375
  CSS = """
376
  .gradio-container { max-width: 980px !important; margin: auto !important; }
377
  #analyze-btn { font-size: 15px !important; font-weight: 700 !important;
378
- letter-spacing: .02em; border-radius: 10px !important; }
379
  footer { display: none !important; }
380
  """
381
 
@@ -388,7 +519,8 @@ THEME = gr.themes.Soft(
388
  with gr.Blocks(title="πŸš› Amazon Trailer Inspector", theme=THEME, css=CSS) as demo:
389
 
390
  gr.HTML(f"""
391
- <div style="text-align:center;padding:30px 0 18px;font-family:sans-serif;">
 
392
  <div style="font-size:46px;margin-bottom:10px;">πŸš›</div>
393
  <h1 style="font-size:26px;font-weight:800;color:#0f172a;margin:0 0 6px;">
394
  Amazon Trailer Inspector
@@ -401,10 +533,12 @@ with gr.Blocks(title="πŸš› Amazon Trailer Inspector", theme=THEME, css=CSS) as d
401
 
402
  with gr.Row(equal_height=False):
403
 
 
404
  with gr.Column(scale=1, min_width=280):
405
  gr.HTML("""
406
  <div style="background:#f8fafc;border:1px solid #e2e8f0;border-radius:14px;
407
- padding:16px 18px;margin-bottom:14px;">
 
408
  <div style="font-weight:700;font-size:12px;color:#475569;
409
  letter-spacing:.06em;text-transform:uppercase;margin-bottom:12px;">
410
  What we check
@@ -419,7 +553,7 @@ with gr.Blocks(title="πŸš› Amazon Trailer Inspector", theme=THEME, css=CSS) as d
419
  <span><b>GPS Device</b> β€” white box, top corner</span>
420
  </div>
421
  <div style="display:flex;align-items:center;gap:10px;">
422
- <span style="background:#ede9fe;border-radius:7px;padding:4px 9px;">πŸ”΅</span>
423
  <span><b>Prime Logo</b> β€” Amazon Prime mark</span>
424
  </div>
425
  <div style="display:flex;align-items:center;gap:10px;">
@@ -437,8 +571,9 @@ with gr.Blocks(title="πŸš› Amazon Trailer Inspector", theme=THEME, css=CSS) as d
437
  )
438
 
439
  gr.HTML("""
440
- <p style="font-size:12px;color:#94a3b8;text-align:center;margin:8px 0 14px;">
441
- πŸ’‘ Upload both rear &amp; side views for best results
 
442
  </p>""")
443
 
444
  analyze_btn = gr.Button(
@@ -450,12 +585,13 @@ with gr.Blocks(title="πŸš› Amazon Trailer Inspector", theme=THEME, css=CSS) as d
450
 
451
  status_html = gr.HTML(_status("idle"))
452
 
 
453
  with gr.Column(scale=1, min_width=320):
454
  result_html = gr.HTML(_placeholder())
455
 
456
  gr.HTML("""
457
  <div style="text-align:center;padding:20px 0 10px;color:#94a3b8;
458
- font-size:12px;font-family:sans-serif;">
459
  Llama 3.2 Vision Β· Qwen2.5-VL Β· Gemma 3 &nbsp;|&nbsp;
460
  Images processed in parallel &nbsp;|&nbsp; No data stored
461
  </div>""")
 
1
  """
2
+ Amazon Trailer Inspector β€” app.py
3
+ HuggingFace Spaces Β· Gradio 5.x Β· Free vision LLMs
4
+
5
+ FIXES over previous version:
6
+ - Uses requests directly (avoids huggingface_hub API version breakage)
7
+ - Correct chat-completions endpoint format for HF Serverless Inference
8
+ - Updated model list to currently-working free vision models
9
+ - Removed blocking whoami() startup check
10
+ - Robust JSON extraction with multi-pass recovery
11
+ - Detailed per-model error logging to Space logs
12
  """
13
 
14
  import gradio as gr
15
  import base64
16
  import concurrent.futures
17
  import json
 
18
  import os
19
+ import re
20
  import io
 
21
 
22
+ import requests
23
+ from PIL import Image
24
+
25
+ # ──────────────────────────────────────────────────────────────────────────────
26
+ # MODELS β€” ordered by reliability on HF free tier (most reliable first)
27
+ # ──────────────────────────────────────────────────────────────────────────────
28
  MODELS = [
29
+ "meta-llama/Llama-3.2-11B-Vision-Instruct", # Best free vision model on HF
30
+ "Qwen/Qwen2.5-VL-7B-Instruct", # Good fallback
31
+ "google/gemma-3-4b-it", # Smaller, faster fallback
32
  ]
33
 
34
+ # HF Serverless Inference β€” chat completions endpoint
35
+ HF_CHAT_URL = "https://api-inference.huggingface.co/models/{model}/v1/chat/completions"
36
 
37
+ # ──────────────────────────────────────────────────────────────────────────────
38
+ # DETECTION PROMPT
39
+ # ──────────────────────────────────────────────────────────────────────────────
40
+ DETECTION_PROMPT = """You are a precise visual inspector for Amazon trailer fleets.
41
+ Carefully examine the full trailer image and locate these 4 components:
42
+
43
+ 1. SENSORS β€” Exactly TWO silver/beige DIAMOND (rhombus/rotated-square) shaped metal plates.
44
+ They are mounted near the lower-rear area on the back doors of the trailer.
45
+ 2. GPS_DEVICE β€” A small white or light-gray rectangular electronic box mounted at the upper
46
+ corner of the trailer rear face. About the size of a paperback book.
47
+ 3. PRIME_LOGO β€” The Amazon Prime branding: the word "prime" OR the Amazon arrow/smile logo
48
+ OR both. Can be full or partially visible, on rear or side of trailer.
49
+ 4. TRAILER_ID β€” A vertical fluorescent-green or yellow-green label strip on the corner post/pillar,
50
+ showing an alphanumeric code like "SV2602705".
51
+
52
+ IMPORTANT: Reply ONLY with valid JSON β€” absolutely no extra text before or after, no markdown fences:
53
  {
54
  "sensors": {"found": true, "confidence": "high", "notes": "two diamond plates visible lower-left"},
55
+ "gps_device": {"found": false, "confidence": "medium", "notes": "top corner not visible in this angle"},
56
+ "prime_logo": {"found": true, "confidence": "high", "notes": "prime word visible on rear panel"},
57
+ "trailer_id": {"found": true, "confidence": "high", "notes": "SV2602705 on right corner post"}
58
  }"""
59
 
60
  KEYS = ["sensors", "gps_device", "prime_logo", "trailer_id"]
61
 
62
+ # ──────────────────────────────────────────────────────────────────────────────
63
+ # IMAGE HELPERS
64
+ # ──────────────────────────────────────────────────────────────────────────────
65
 
66
+ def pil_to_b64(img: Image.Image, max_side: int = 1024) -> str:
67
+ """Resize large images and encode as base64 JPEG."""
68
+ img = img.copy().convert("RGB")
69
+ if max(img.size) > max_side:
70
+ img.thumbnail((max_side, max_side), Image.LANCZOS)
71
+ buf = io.BytesIO()
72
+ img.save(buf, format="JPEG", quality=82)
73
+ return base64.b64encode(buf.getvalue()).decode("utf-8")
74
 
 
 
 
 
 
 
 
 
 
 
75
 
76
+ def load_images(file_paths) -> list[Image.Image]:
77
+ """Load PIL images from Gradio 5.x file paths (str or filepath objects)."""
78
+ imgs = []
79
+ if not file_paths:
80
+ return imgs
81
+ if isinstance(file_paths, str):
82
+ file_paths = [file_paths]
83
+ for p in file_paths:
84
+ try:
85
+ path = p if isinstance(p, str) else getattr(p, "name", str(p))
86
+ imgs.append(Image.open(path).convert("RGB"))
87
+ except Exception as e:
88
+ print(f"[load_images] skipped {p}: {e}")
89
+ return imgs
90
 
91
 
92
+ # ──────────────────────────────────────────────────────────────────────────────
93
+ # JSON EXTRACTION β€” multi-pass recovery
94
+ # ──────────────────────────────────────────────────────────────────────────────
95
 
96
+ def extract_json(text: str) -> dict | None:
97
+ """Try multiple strategies to pull valid JSON from LLM output."""
98
+ if not text:
99
+ return None
100
+
101
+ # Strip markdown code fences
102
+ text = re.sub(r"```(?:json)?", "", text, flags=re.IGNORECASE).replace("```", "").strip()
 
103
 
104
+ # Find outermost { ... } block
105
+ m = re.search(r"\{[\s\S]*\}", text)
106
+ if not m:
107
+ return None
108
+ raw = m.group()
109
 
110
+ # Pass 1: direct parse
111
+ try:
112
+ return json.loads(raw)
113
+ except json.JSONDecodeError:
114
+ pass
115
 
116
+ # Pass 2: fix trailing commas
117
+ fixed = re.sub(r",\s*([}\]])", r"\1", raw)
118
+ try:
119
+ return json.loads(fixed)
120
+ except json.JSONDecodeError:
121
+ pass
122
+
123
+ # Pass 3: extract only the lines containing our keys
124
+ try:
125
+ rebuilt = {
126
+ key: json.loads(
127
+ re.search(
128
+ rf'"{key}"\s*:\s*(\{{[^}}]+\}})', raw, re.DOTALL
129
+ ).group(1)
130
+ )
131
+ for key in KEYS
132
+ if re.search(rf'"{key}"\s*:\s*\{{', raw)
133
+ }
134
+ if rebuilt:
135
+ return rebuilt
136
+ except Exception:
137
+ pass
138
+
139
+ return None
140
+
141
+
142
+ def validate_result(data: dict) -> dict | None:
143
+ """Ensure result has all keys and correct types; coerce where possible."""
144
+ if not data:
145
+ return None
146
+ out = {}
147
+ for key in KEYS:
148
+ item = data.get(key)
149
+ if not isinstance(item, dict):
150
+ return None # hard fail β€” missing a required key
151
+ found = item.get("found", False)
152
+ if isinstance(found, str):
153
+ found = found.lower() in ("true", "yes", "1")
154
+ out[key] = {
155
+ "found": bool(found),
156
+ "confidence": item.get("confidence", "low") or "low",
157
+ "notes": (item.get("notes") or "").strip(),
158
+ }
159
+ return out
160
+
161
+
162
+ # ──────────────────────────────────────────────────────────────────────────────
163
+ # LLM CALL β€” direct requests, no huggingface_hub dependency for inference
164
+ # ──────────────────────────────────────────────────────────────────────────────
165
+
166
+ def call_model(img: Image.Image, model: str, token: str) -> dict:
167
+ """
168
+ Call one HF vision model via the chat-completions endpoint.
169
+ Returns validated result dict on success.
170
+ Raises RuntimeError with a clear message on failure.
171
+ """
172
  b64 = pil_to_b64(img)
173
 
174
+ headers = {
175
+ "Content-Type": "application/json",
176
+ "Authorization": f"Bearer {token}",
177
+ }
178
+
179
+ payload = {
180
+ "model": model,
181
+ "messages": [
182
  {
183
+ "role": "user",
184
+ "content": [
185
+ {
186
+ "type": "image_url",
187
+ "image_url": {"url": f"data:image/jpeg;base64,{b64}"},
188
+ },
189
+ {
190
+ "type": "text",
191
+ "text": DETECTION_PROMPT,
192
+ },
193
+ ],
194
+ }
195
  ],
196
+ "max_tokens": 512,
197
+ "temperature": 0.05,
198
+ "stream": False,
199
+ }
200
 
201
+ url = HF_CHAT_URL.format(model=model)
202
+ short = model.split("/")[-1]
 
 
 
 
 
203
 
204
+ try:
205
+ resp = requests.post(url, headers=headers, json=payload, timeout=90)
206
+ except requests.exceptions.Timeout:
207
+ raise RuntimeError(f"{short}: request timed out (90s)")
208
+ except requests.exceptions.ConnectionError as e:
209
+ raise RuntimeError(f"{short}: connection error β€” {e}")
210
+
211
+ # ── HTTP-level error handling ────────────────────────────────────────────
212
+ if resp.status_code == 401:
213
+ raise RuntimeError(f"{short}: 401 Unauthorized β€” HF_TOKEN is missing or invalid")
214
+ if resp.status_code == 403:
215
+ raise RuntimeError(f"{short}: 403 Forbidden β€” token may not have access to this model")
216
+ if resp.status_code == 404:
217
+ raise RuntimeError(f"{short}: 404 Not Found β€” model not available on serverless endpoint")
218
+ if resp.status_code == 422:
219
+ raise RuntimeError(f"{short}: 422 Unprocessable β€” model may not support vision input")
220
+ if resp.status_code == 429:
221
+ raise RuntimeError(f"{short}: 429 Rate Limited β€” try again in ~60 seconds")
222
+ if resp.status_code in (502, 503):
223
+ raise RuntimeError(f"{short}: {resp.status_code} Service Unavailable β€” model is loading")
224
+ if resp.status_code != 200:
225
+ body_preview = resp.text[:200].replace("\n", " ")
226
+ raise RuntimeError(f"{short}: HTTP {resp.status_code} β€” {body_preview}")
227
+
228
+ # ── Parse response ──���────────────────────────────────────────────────────
229
+ try:
230
+ body = resp.json()
231
+ content = body["choices"][0]["message"]["content"]
232
+ except (KeyError, IndexError, json.JSONDecodeError) as e:
233
+ raise RuntimeError(f"{short}: unexpected response shape β€” {e} | body: {resp.text[:200]}")
234
+
235
+ print(f"[{short}] raw LLM output: {content[:300]}") # visible in Space logs
236
+
237
+ data = extract_json(content)
238
+ result = validate_result(data)
239
+ if result is None:
240
+ raise RuntimeError(
241
+ f"{short}: could not extract valid JSON.\n"
242
+ f"Raw output (first 300 chars): {content[:300]}"
243
+ )
244
 
245
+ return result
 
 
246
 
 
 
 
 
247
 
248
+ # ──────────────────────────────────────────────────────────────────────────────
249
+ # PER-IMAGE ANALYSIS β€” try each model in order
250
+ # ──────────────────────────────────────────────────────────────────────────────
251
 
252
+ def analyze_one(img: Image.Image, token: str) -> tuple[dict | None, str]:
253
  """
254
+ Try MODELS in order for a single image.
255
  Returns (result_dict, model_short_name) on success,
256
+ (None, joined_error_string) on total failure.
257
  """
258
+ errors = []
259
  for model in MODELS:
260
  short = model.split("/")[-1]
261
  try:
262
+ result = call_model(img, model, token)
263
+ print(f"[analyze_one] SUCCESS with {short}")
264
  return result, short
265
+ except RuntimeError as e:
266
  msg = str(e)
267
+ print(f"[analyze_one] FAIL {msg}")
268
+ errors.append(msg)
269
+
270
+ return None, " | ".join(errors)
271
+
272
+
273
+ # ──────────────────────────────────────────────────────────────────────────────
274
+ # RESULT MERGING
275
+ # ──────────────────────────────────────────────────────────────────────────────
276
+
277
+ CONF_RANK = {"high": 3, "medium": 2, "low": 1, "": 0}
278
+
279
+ def merge(results: list[dict]) -> dict:
280
+ """found=True wins across images; highest confidence wins."""
 
 
 
 
 
 
281
  merged = {k: {"found": False, "confidence": "low", "notes": ""} for k in KEYS}
282
  for res in results:
283
  if not res:
284
  continue
285
  for k in KEYS:
286
+ src = res.get(k, {})
287
+ if src.get("found"):
288
  merged[k]["found"] = True
289
+ if CONF_RANK.get(src.get("confidence", ""), 0) > CONF_RANK.get(merged[k]["confidence"], 0):
290
+ merged[k]["confidence"] = src["confidence"]
291
+ if src.get("notes") and not merged[k]["notes"]:
292
+ merged[k]["notes"] = src["notes"]
293
  return merged
294
 
295
 
296
+ # ──────────────────────────────────────────────────────────────────────────────
297
+ # MAIN GRADIO CALLBACK
298
+ # ──────────────────────────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
 
300
  def analyze(file_paths):
301
+ token = os.environ.get("HF_TOKEN", "").strip()
302
+
303
+ # ── Token guard β€” show actionable message ───────────────────────────────
304
+ if not token:
305
  return (
306
+ _error(
307
+ "<b>Setup required: HF_TOKEN not set.</b><br><br>"
308
+ "Go to your Space β†’ <b>Settings β†’ Repository Secrets</b> "
309
+ "β†’ add a secret named <code>HF_TOKEN</code> with your "
310
+ "HuggingFace Read token.<br>"
311
+ "Get a free token at "
312
+ "<a href='https://huggingface.co/settings/tokens' target='_blank'>"
313
+ "huggingface.co/settings/tokens</a>"
314
+ ),
315
  _status("error"),
316
  )
317
 
 
320
  return _placeholder(), _status("idle")
321
 
322
  n = len(images)
323
+ print(f"[analyze] processing {n} image(s)")
324
+
325
  all_results, all_errors, models_used = [], [], set()
326
 
327
+ # Parallel: one thread per image (up to 4)
328
  with concurrent.futures.ThreadPoolExecutor(max_workers=min(n, 4)) as pool:
329
+ futs = {pool.submit(analyze_one, img, token): i for i, img in enumerate(images)}
330
  for fut in concurrent.futures.as_completed(futs):
331
  res, meta = fut.result()
332
  if res is not None:
 
336
  all_errors.append(meta)
337
 
338
  if not all_results:
339
+ err_lines = "<br>".join(
340
+ f"<code style='font-size:11px;'>{e}</code>" for e in all_errors
341
+ ) or "<code>Unknown error</code>"
342
+
343
  return (
344
  _error(
345
+ f"<b>All models failed for all images.</b><br><br>"
346
+ f"<b>Exact errors:</b><br>{err_lines}<br><br>"
347
+ f"<b>Most likely fixes:</b><br>"
348
+ f"β€’ <b>401/403</b> β†’ HF_TOKEN is wrong or expired β€” regenerate at "
349
+ f"<a href='https://huggingface.co/settings/tokens' target='_blank'>hf.co/settings/tokens</a><br>"
350
+ f"β€’ <b>429</b> β†’ Rate limited β€” wait 60 seconds and retry<br>"
351
+ f"β€’ <b>404</b> β†’ Model temporarily unavailable β€” retry or report as issue<br>"
352
+ f"β€’ <b>503</b> β†’ Model is loading (cold start) β€” wait 30s and retry"
353
  ),
354
  _status("error"),
355
  )
356
 
357
  merged = merge(all_results)
358
  model_str = " Β· ".join(sorted(models_used)) or "AI"
359
+ warn = ""
360
+ if all_errors:
361
+ warn = (
362
+ f"<br><small style='color:#d97706;'>⚠️ {len(all_errors)} image(s) failed β€” "
363
+ f"{all_errors[0][:100]}</small>"
364
+ )
365
 
366
  return build_cards(merged, n, model_str, warn), _status("done", n, len(all_results))
367
 
368
 
369
+ # ──────────────────────────────────────────────────────────────────────────────
370
+ # HTML BUILDERS
371
+ # ──────────────────────────────────────────────────────────────────────────────
372
 
373
  COMP_META = [
374
  ("sensors", "πŸ”·", "Sensors", "Two diamond-shaped sensor plates", "#f59e0b", "#fef3c7"),
375
  ("gps_device", "πŸ“‘", "GPS Device", "White electronic box β€” upper corner", "#3b82f6", "#dbeafe"),
376
+ ("prime_logo", "πŸ”Ά", "Prime Logo", "Amazon Prime logo (full or partial)", "#f97316", "#fff7ed"),
377
  ("trailer_id", "🏷️", "Trailer ID Label", "Vertical strip on the corner post", "#10b981", "#d1fae5"),
378
  ]
379
 
380
+ CONF_COLOR = {"high": "#15803d", "medium": "#b45309", "low": "#b91c1c"}
381
+
382
 
383
  def build_cards(merged: dict, img_n: int, model_str: str, warn: str) -> str:
384
  found_n = sum(1 for k, *_ in COMP_META if merged.get(k, {}).get("found"))
385
  total = len(COMP_META)
386
  all_ok = found_n == total
387
 
388
+ # Banner colours
389
+ if all_ok:
390
+ sc, sb, se, si, sl = "#16a34a", "#f0fdf4", "#86efac", "βœ…", "All Clear β€” All Components Found"
391
+ elif found_n >= 3:
392
+ sc, sb, se, si, sl = "#d97706", "#fffbeb", "#fde68a", "⚠️", "Mostly Complete"
393
+ elif found_n >= 2:
394
+ sc, sb, se, si, sl = "#ea580c", "#fff7ed", "#fed7aa", "⚠️", "Partially Complete"
395
+ else:
396
+ sc, sb, se, si, sl = "#dc2626", "#fef2f2", "#fca5a5", "❌", "Missing Components"
397
 
398
  rows = ""
399
  for key, icon, name, desc, accent, pill in COMP_META:
 
402
  conf = d.get("confidence", "low")
403
  notes = d.get("notes", "")
404
 
405
+ rbg = "#f0fdf4" if found else "#fef2f2"
406
+ rbd = "#bbf7d0" if found else "#fecaca"
407
+ stc = "#15803d" if found else "#b91c1c"
408
+ stx = "βœ… Found" if found else "❌ Missing"
409
+ cdc = CONF_COLOR.get(conf, "#9ca3af")
410
  note_html = (
411
  f'<div style="margin-top:8px;padding-top:8px;border-top:1px solid {rbd};'
412
  f'font-size:12px;color:#4b5563;font-style:italic;line-height:1.5;">"{notes}"</div>'
413
+ ) if notes else ""
 
414
 
415
  rows += f"""
416
  <div style="background:{rbg};border:1.5px solid {rbd};border-radius:12px;
417
  padding:14px 16px;margin-bottom:10px;">
418
  <div style="display:flex;align-items:flex-start;gap:12px;">
419
  <div style="background:{pill};border-radius:10px;padding:9px 11px;
420
+ font-size:22px;line-height:1;flex-shrink:0;">{icon}</div>
421
  <div style="flex:1;min-width:0;">
422
  <div style="font-weight:700;font-size:14px;color:#111827;">{name}</div>
423
  <div style="font-size:11px;color:#9ca3af;margin-top:1px;">{desc}</div>
 
431
  </div>"""
432
 
433
  return f"""
434
+ <div style="font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;max-width:600px;">
435
  <div style="background:{sb};border:2px solid {se};border-radius:14px;
436
  padding:16px 20px;margin-bottom:18px;
437
  display:flex;align-items:center;justify-content:space-between;gap:12px;">
438
  <div>
439
+ <div style="font-size:18px;font-weight:800;color:{sc};">{si} {found_n}/{total} β€” {sl}</div>
 
 
440
  <div style="font-size:12px;color:#6b7280;margin-top:3px;">
441
  {img_n} image{'s' if img_n > 1 else ''} Β· {model_str}{warn}
442
  </div>
 
449
 
450
  def _placeholder() -> str:
451
  return """
452
+ <div style="text-align:center;padding:60px 20px;color:#94a3b8;
453
+ font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;">
454
  <div style="font-size:48px;margin-bottom:14px;">πŸ“·</div>
455
  <div style="font-size:15px;font-weight:600;color:#64748b;">Upload trailer images to begin</div>
456
  <div style="font-size:13px;margin-top:6px;">Front view, rear view, or both β€” all work</div>
 
473
  def _error(msg: str) -> str:
474
  return (
475
  f'<div style="background:#fef2f2;border:1.5px solid #fca5a5;border-radius:12px;'
476
+ f'padding:18px 20px;color:#b91c1c;font-family:-apple-system,sans-serif;'
477
+ f'font-size:13px;line-height:1.8;">{msg}</div>'
478
  )
479
 
480
 
481
+ # ──────────────────────────────────────────────────────────────────────────────
482
+ # STARTUP LOG
483
+ # ──────────────────────────────────────────────────────────────────────────────
 
 
 
 
 
 
484
 
485
+ _tok = os.environ.get("HF_TOKEN", "")
486
+ print("=" * 60)
487
+ print(" Amazon Trailer Inspector β€” startup")
488
+ print(f" HF_TOKEN : {'SET (' + str(len(_tok)) + ' chars)' if _tok else 'NOT SET ← add to Space Secrets!'}")
489
+ print(f" Models : {[m.split('/')[-1] for m in MODELS]}")
490
+ print("=" * 60)
491
 
492
+ # ──────────────────────────────────────────────────────────────────────────────
493
+ # GRADIO UI
494
+ # ──────────────────────────────────────────────────────────────────────────────
495
 
496
+ TOKEN_BANNER = "" if _tok else (
497
  '<div style="background:#fef3c7;border:1.5px solid #fde68a;border-radius:10px;'
498
+ 'padding:12px 16px;margin-bottom:14px;font-size:13px;color:#92400e;'
499
+ 'font-family:-apple-system,sans-serif;">'
500
+ '⚠️ <b>HF_TOKEN not set.</b> Space Settings β†’ Repository Secrets β†’ add '
501
+ '<code>HF_TOKEN</code> = your Read token from '
502
+ '<a href="https://huggingface.co/settings/tokens" target="_blank">huggingface.co/settings/tokens</a>'
503
+ '</div>'
504
  )
505
 
506
  CSS = """
507
  .gradio-container { max-width: 980px !important; margin: auto !important; }
508
  #analyze-btn { font-size: 15px !important; font-weight: 700 !important;
509
+ letter-spacing: .02em !important; border-radius: 10px !important; }
510
  footer { display: none !important; }
511
  """
512
 
 
519
  with gr.Blocks(title="πŸš› Amazon Trailer Inspector", theme=THEME, css=CSS) as demo:
520
 
521
  gr.HTML(f"""
522
+ <div style="text-align:center;padding:30px 0 18px;
523
+ font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;">
524
  <div style="font-size:46px;margin-bottom:10px;">πŸš›</div>
525
  <h1 style="font-size:26px;font-weight:800;color:#0f172a;margin:0 0 6px;">
526
  Amazon Trailer Inspector
 
533
 
534
  with gr.Row(equal_height=False):
535
 
536
+ # LEFT COLUMN β€” upload + checklist
537
  with gr.Column(scale=1, min_width=280):
538
  gr.HTML("""
539
  <div style="background:#f8fafc;border:1px solid #e2e8f0;border-radius:14px;
540
+ padding:16px 18px;margin-bottom:14px;
541
+ font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;">
542
  <div style="font-weight:700;font-size:12px;color:#475569;
543
  letter-spacing:.06em;text-transform:uppercase;margin-bottom:12px;">
544
  What we check
 
553
  <span><b>GPS Device</b> β€” white box, top corner</span>
554
  </div>
555
  <div style="display:flex;align-items:center;gap:10px;">
556
+ <span style="background:#fff7ed;border-radius:7px;padding:4px 9px;">πŸ”Ά</span>
557
  <span><b>Prime Logo</b> β€” Amazon Prime mark</span>
558
  </div>
559
  <div style="display:flex;align-items:center;gap:10px;">
 
571
  )
572
 
573
  gr.HTML("""
574
+ <p style="font-size:12px;color:#94a3b8;text-align:center;margin:8px 0 14px;
575
+ font-family:-apple-system,sans-serif;">
576
+ πŸ’‘ Upload front, rear, or side views β€” more angles = better accuracy
577
  </p>""")
578
 
579
  analyze_btn = gr.Button(
 
585
 
586
  status_html = gr.HTML(_status("idle"))
587
 
588
+ # RIGHT COLUMN β€” results
589
  with gr.Column(scale=1, min_width=320):
590
  result_html = gr.HTML(_placeholder())
591
 
592
  gr.HTML("""
593
  <div style="text-align:center;padding:20px 0 10px;color:#94a3b8;
594
+ font-size:12px;font-family:-apple-system,sans-serif;">
595
  Llama 3.2 Vision Β· Qwen2.5-VL Β· Gemma 3 &nbsp;|&nbsp;
596
  Images processed in parallel &nbsp;|&nbsp; No data stored
597
  </div>""")