mlbench123 commited on
Commit
46d5b05
Β·
verified Β·
1 Parent(s): 42b37db

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +404 -0
app.py ADDED
@@ -0,0 +1,404 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Amazon Trailer Inspector
3
+ HuggingFace Spaces + Gradio pipeline
4
+ Gemma-3 (primary) β†’ Llama-3.2-Vision β†’ Qwen2.5-VL (fallbacks)
5
+ Parallel multi-image inference, clean results UI.
6
+ """
7
+
8
+ import gradio as gr
9
+ import base64
10
+ import concurrent.futures
11
+ import json
12
+ import re
13
+ import os
14
+ from PIL import Image
15
+ import io
16
+ from huggingface_hub import InferenceClient
17
+
18
+ # ──────────────────────────────────────────────────────────────
19
+ # Model chain (Gemma first, automatic fallback)
20
+ # ──────────────────────────────────────────────────────────────
21
+ MODELS = [
22
+ "google/gemma-3-4b-it", # Primary – Gemma 3 multimodal (free)
23
+ "meta-llama/Llama-3.2-11B-Vision-Instruct", # Fallback 1
24
+ "Qwen/Qwen2.5-VL-7B-Instruct", # Fallback 2
25
+ ]
26
+
27
+ DETECTION_PROMPT = """You are a precise visual inspector for Amazon trailer fleets.
28
+ Carefully examine the trailer image and locate these 4 components:
29
+
30
+ 1. SENSORS β€” Two silver/beige DIAMOND (rhombus) shaped metal plates near the rear bottom of the trailer back doors.
31
+ 2. GPS_DEVICE β€” A small white or light-gray rectangular electronic box at the upper corner of the trailer rear face.
32
+ 3. PRIME_LOGO β€” The Amazon Prime logo: blue swooping arrow/checkmark. Can be full or partial, on rear or side.
33
+ 4. TRAILER_ID β€” A vertical fluorescent green or yellow-green ID label strip on the corner post (shows a number like SV2602705).
34
+
35
+ Reply ONLY with valid JSON β€” absolutely no extra text, no markdown code fences:
36
+ {
37
+ "sensors": {"found": true, "confidence": "high", "notes": "two diamond plates visible lower-left"},
38
+ "gps_device": {"found": false, "confidence": "medium", "notes": "top corner obscured"},
39
+ "prime_logo": {"found": true, "confidence": "high", "notes": "partial prime swoosh on rear panel"},
40
+ "trailer_id": {"found": true, "confidence": "high", "notes": "SV2602705 label on right corner post"}
41
+ }"""
42
+
43
+ KEYS = ["sensors", "gps_device", "prime_logo", "trailer_id"]
44
+
45
+ # ──────────────────────────────────────────────────────────────
46
+ # Vision helpers
47
+ # ──────────────────────────────────────────────────────────────
48
+
49
+ def pil_to_b64(img: Image.Image, max_side: int = 1120) -> str:
50
+ if max(img.size) > max_side:
51
+ img = img.copy()
52
+ img.thumbnail((max_side, max_side), Image.LANCZOS)
53
+ buf = io.BytesIO()
54
+ img.save(buf, format="JPEG", quality=88)
55
+ return base64.b64encode(buf.getvalue()).decode()
56
+
57
+
58
+ def call_model(img: Image.Image, model: str) -> dict:
59
+ """One LLM call β€” raises on failure."""
60
+ token = os.environ.get("HF_TOKEN")
61
+ client = InferenceClient(model=model, token=token)
62
+ b64 = pil_to_b64(img)
63
+
64
+ messages = [{
65
+ "role": "user",
66
+ "content": [
67
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"}},
68
+ {"type": "text", "text": DETECTION_PROMPT},
69
+ ],
70
+ }]
71
+
72
+ resp = client.chat_completion(messages=messages, max_tokens=512, temperature=0.05)
73
+ raw = resp.choices[0].message.content.strip()
74
+
75
+ m = re.search(r'\{[\s\S]*\}', raw)
76
+ if not m:
77
+ raise ValueError(f"No JSON in response: {raw[:200]}")
78
+ return json.loads(m.group())
79
+
80
+
81
+ def analyze_one(img: Image.Image) -> tuple:
82
+ """Try models in order. Returns (result_dict_or_None, model_name_or_error)."""
83
+ last_err = "no models tried"
84
+ for model in MODELS:
85
+ try:
86
+ result = call_model(img, model)
87
+ return result, model.split("/")[-1]
88
+ except Exception as e:
89
+ last_err = f"{model.split('/')[-1]}: {e}"
90
+ return None, last_err
91
+
92
+
93
+ # ──────────────────────────────────────────────────────────────
94
+ # Result merging
95
+ # ──────────────────────────────────────────────────────────────
96
+
97
+ def merge(results: list) -> dict:
98
+ """
99
+ Union across all images:
100
+ - component is FOUND if any image found it
101
+ - highest confidence wins
102
+ - first non-empty notes kept
103
+ """
104
+ RANK = {"high": 3, "medium": 2, "low": 1, "": 0}
105
+ merged = {k: {"found": False, "confidence": "low", "notes": ""} for k in KEYS}
106
+
107
+ for res in results:
108
+ if not res:
109
+ continue
110
+ for k in KEYS:
111
+ item = res.get(k, {})
112
+ if item.get("found"):
113
+ merged[k]["found"] = True
114
+ if RANK.get(item.get("confidence", ""), 0) > RANK.get(merged[k]["confidence"], 0):
115
+ merged[k]["confidence"] = item.get("confidence", "low")
116
+ if item.get("found") and item.get("notes") and not merged[k]["notes"]:
117
+ merged[k]["notes"] = item["notes"]
118
+ return merged
119
+
120
+
121
+ # ──────────────────────────────────────────────────────────────
122
+ # Main pipeline function (called by Gradio)
123
+ # ──────────────────────────────────────────────────────────────
124
+
125
+ def load_images(file_paths):
126
+ """
127
+ HF Spaces Gradio 5.x: gr.File(type='filepath') returns list[str].
128
+ Handles string paths and legacy file-object fallback.
129
+ """
130
+ imgs = []
131
+ if not file_paths:
132
+ return imgs
133
+ if isinstance(file_paths, str):
134
+ file_paths = [file_paths]
135
+ for p in file_paths:
136
+ try:
137
+ path = p if isinstance(p, str) else (getattr(p, "name", None) or str(p))
138
+ imgs.append(Image.open(path).convert("RGB"))
139
+ except Exception as e:
140
+ print(f"[load] skipped {p}: {e}")
141
+ return imgs
142
+
143
+
144
+ def analyze(file_paths):
145
+ """
146
+ Main Gradio callback.
147
+ Returns: (result_html: str, status_html: str)
148
+ """
149
+ images = load_images(file_paths)
150
+
151
+ if not images:
152
+ return (
153
+ _placeholder(),
154
+ _status("idle"),
155
+ )
156
+
157
+ n = len(images)
158
+ all_results, errors, models_used = [], [], set()
159
+
160
+ with concurrent.futures.ThreadPoolExecutor(max_workers=min(n, 4)) as pool:
161
+ futs = [pool.submit(analyze_one, img) for img in images]
162
+ for fut in concurrent.futures.as_completed(futs):
163
+ res, meta = fut.result()
164
+ if res is not None:
165
+ all_results.append(res)
166
+ models_used.add(meta)
167
+ else:
168
+ errors.append(meta)
169
+
170
+ if not all_results:
171
+ return (
172
+ _error("Analysis failed β€” all models returned errors.<br>"
173
+ "Make sure <b>HF_TOKEN</b> is set in Space Secrets."),
174
+ _status("error"),
175
+ )
176
+
177
+ merged = merge(all_results)
178
+ model_str = " Β· ".join(sorted(models_used)) or "AI"
179
+ warn = (f"<br><small style='color:#d97706;'>⚠️ {len(errors)} image(s) failed</small>"
180
+ if errors else "")
181
+ result_h = build_cards(merged, n, model_str, warn)
182
+ status_h = _status("done", n, len(all_results))
183
+
184
+ return result_h, status_h
185
+
186
+
187
+ # ──────────────────────────────────────────────────────────────
188
+ # HTML builders
189
+ # ──────────────────────────────────────────────────────────────
190
+
191
+ COMP_META = [
192
+ ("sensors", "πŸ”·", "Sensors", "Two diamond-shaped sensor plates", "#f59e0b", "#fef3c7"),
193
+ ("gps_device", "πŸ“‘", "GPS Device", "White electronic box β€” upper corner", "#3b82f6", "#dbeafe"),
194
+ ("prime_logo", "πŸ”΅", "Prime Logo", "Amazon Prime arrow/swoosh mark", "#8b5cf6", "#ede9fe"),
195
+ ("trailer_id", "🏷️", "Trailer ID Label", "Vertical strip on the corner post", "#10b981", "#d1fae5"),
196
+ ]
197
+
198
+
199
+ def build_cards(merged: dict, img_n: int, model_str: str, warn: str) -> str:
200
+ found_n = sum(1 for k, *_ in COMP_META if merged.get(k, {}).get("found"))
201
+ total = len(COMP_META)
202
+ all_ok = found_n == total
203
+
204
+ sc = "#16a34a" if all_ok else ("#d97706" if found_n >= 2 else "#dc2626")
205
+ sb = "#f0fdf4" if all_ok else ("#fffbeb" if found_n >= 2 else "#fef2f2")
206
+ se = "#86efac" if all_ok else ("#fde68a" if found_n >= 2 else "#fca5a5")
207
+ si = "βœ…" if all_ok else ("⚠️" if found_n >= 2 else "❌")
208
+ sl = "All Clear" if all_ok else ("Partial" if found_n >= 2 else "Issues Found")
209
+
210
+ rows = ""
211
+ for key, icon, name, desc, accent, pill in COMP_META:
212
+ d = merged.get(key, {})
213
+ found = d.get("found", False)
214
+ conf = d.get("confidence", "low")
215
+ notes = d.get("notes", "")
216
+
217
+ rbg = "#f0fdf4" if found else "#fef2f2"
218
+ rbd = "#bbf7d0" if found else "#fecaca"
219
+ stc = "#15803d" if found else "#b91c1c"
220
+ stx = "βœ… Found" if found else "❌ Missing"
221
+ cdc = {"high":"#16a34a","medium":"#d97706","low":"#dc2626"}.get(conf,"#9ca3af")
222
+ note = (
223
+ f'<div style="margin-top:8px;padding-top:8px;border-top:1px solid {rbd};'
224
+ f'font-size:12px;color:#4b5563;font-style:italic;line-height:1.5;">"{notes}"</div>'
225
+ if notes else ""
226
+ )
227
+
228
+ rows += f"""
229
+ <div style="background:{rbg};border:1.5px solid {rbd};border-radius:12px;
230
+ padding:14px 16px;margin-bottom:10px;">
231
+ <div style="display:flex;align-items:flex-start;gap:12px;">
232
+ <div style="background:{pill};border-radius:10px;padding:9px 11px;
233
+ font-size:20px;line-height:1;flex-shrink:0;">{icon}</div>
234
+ <div style="flex:1;min-width:0;">
235
+ <div style="font-weight:700;font-size:14px;color:#111827;">{name}</div>
236
+ <div style="font-size:11px;color:#9ca3af;margin-top:1px;">{desc}</div>
237
+ {note}
238
+ </div>
239
+ <div style="text-align:right;flex-shrink:0;padding-left:8px;">
240
+ <div style="font-weight:700;color:{stc};font-size:13px;white-space:nowrap;">{stx}</div>
241
+ <div style="font-size:11px;color:{cdc};margin-top:3px;">● {conf.capitalize()}</div>
242
+ </div>
243
+ </div>
244
+ </div>"""
245
+
246
+ return f"""
247
+ <div style="font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;">
248
+ <div style="background:{sb};border:2px solid {se};border-radius:14px;
249
+ padding:16px 20px;margin-bottom:18px;
250
+ display:flex;align-items:center;justify-content:space-between;gap:12px;">
251
+ <div>
252
+ <div style="font-size:18px;font-weight:800;color:{sc};">
253
+ {si} {found_n}/{total} β€” {sl}
254
+ </div>
255
+ <div style="font-size:12px;color:#6b7280;margin-top:3px;">
256
+ {img_n} image{'s' if img_n>1 else ''} Β· {model_str}{warn}
257
+ </div>
258
+ </div>
259
+ <div style="font-size:36px;">πŸš›</div>
260
+ </div>
261
+ {rows}
262
+ </div>"""
263
+
264
+
265
+ def _placeholder() -> str:
266
+ return """
267
+ <div style="text-align:center;padding:60px 20px;color:#94a3b8;
268
+ font-family:-apple-system,sans-serif;">
269
+ <div style="font-size:48px;margin-bottom:14px;">πŸ“·</div>
270
+ <div style="font-size:15px;font-weight:600;color:#64748b;">Upload trailer images to begin</div>
271
+ <div style="font-size:13px;margin-top:6px;">Front view, rear view, or both β€” all work</div>
272
+ </div>"""
273
+
274
+
275
+ def _status(state: str, total: int = 0, ok: int = 0) -> str:
276
+ msgs = {
277
+ "idle": ("🟑", "#d97706", "Waiting for images"),
278
+ "done": ("🟒", "#16a34a", f"{ok}/{total} image{'s' if total>1 else ''} processed"),
279
+ "error": ("πŸ”΄", "#dc2626", "Analysis failed β€” check HF_TOKEN secret"),
280
+ }
281
+ icon, color, text = msgs.get(state, msgs["idle"])
282
+ return (
283
+ f'<div style="font-size:12px;color:{color};text-align:center;'
284
+ f'padding:6px 0 2px;">{icon} {text}</div>'
285
+ )
286
+
287
+
288
+ def _error(msg: str) -> str:
289
+ return (
290
+ f'<div style="background:#fef2f2;border:1.5px solid #fca5a5;border-radius:12px;'
291
+ f'padding:20px;color:#b91c1c;font-family:sans-serif;font-size:14px;">'
292
+ f'⚠️ {msg}</div>'
293
+ )
294
+
295
+
296
+ # ──────────────────────────────────────────────────────────────
297
+ # Gradio UI
298
+ # ──────────────────────────────────────────────────────────────
299
+
300
+ CSS = """
301
+ .gradio-container { max-width: 980px !important; margin: auto !important; }
302
+ #upload-box .wrap { border-radius: 12px !important; min-height: 120px; }
303
+ #analyze-btn { font-size: 15px !important; font-weight: 700 !important;
304
+ letter-spacing: .02em; border-radius: 10px !important; }
305
+ footer { display: none !important; }
306
+ """
307
+
308
+ THEME = gr.themes.Soft(
309
+ primary_hue=gr.themes.colors.blue,
310
+ neutral_hue=gr.themes.colors.slate,
311
+ font=[gr.themes.GoogleFont("DM Sans"), "sans-serif"],
312
+ )
313
+
314
+ with gr.Blocks(title="πŸš› Amazon Trailer Inspector", theme=THEME, css=CSS) as demo:
315
+
316
+ # ── Header ────────────────────────────────────────────────
317
+ gr.HTML("""
318
+ <div style="text-align:center;padding:30px 0 18px;font-family:sans-serif;">
319
+ <div style="font-size:46px;margin-bottom:10px;">πŸš›</div>
320
+ <h1 style="font-size:26px;font-weight:800;color:#0f172a;margin:0 0 6px;">
321
+ Amazon Trailer Inspector
322
+ </h1>
323
+ <p style="color:#64748b;font-size:14px;margin:0;">
324
+ AI-powered verification of required trailer components from photos
325
+ </p>
326
+ </div>""")
327
+
328
+ # ── Two-column layout ─────────────────────────────────────
329
+ with gr.Row(equal_height=False):
330
+
331
+ # Left – upload + checklist
332
+ with gr.Column(scale=1, min_width=280):
333
+
334
+ gr.HTML("""
335
+ <div style="background:#f8fafc;border:1px solid #e2e8f0;border-radius:14px;
336
+ padding:16px 18px;margin-bottom:14px;">
337
+ <div style="font-weight:700;font-size:12px;color:#475569;
338
+ letter-spacing:.06em;text-transform:uppercase;margin-bottom:12px;">
339
+ What we check
340
+ </div>
341
+ <div style="display:grid;gap:9px;font-size:13px;color:#334155;">
342
+ <div style="display:flex;align-items:center;gap:10px;">
343
+ <span style="background:#fef3c7;border-radius:7px;padding:4px 9px;font-size:15px;">πŸ”·</span>
344
+ <span><b>Sensors</b> β€” two diamond-shaped plates</span>
345
+ </div>
346
+ <div style="display:flex;align-items:center;gap:10px;">
347
+ <span style="background:#dbeafe;border-radius:7px;padding:4px 9px;font-size:15px;">πŸ“‘</span>
348
+ <span><b>GPS Device</b> β€” white box, top corner</span>
349
+ </div>
350
+ <div style="display:flex;align-items:center;gap:10px;">
351
+ <span style="background:#ede9fe;border-radius:7px;padding:4px 9px;font-size:15px;">πŸ”΅</span>
352
+ <span><b>Prime Logo</b> β€” Amazon Prime mark</span>
353
+ </div>
354
+ <div style="display:flex;align-items:center;gap:10px;">
355
+ <span style="background:#d1fae5;border-radius:7px;padding:4px 9px;font-size:15px;">🏷️</span>
356
+ <span><b>Trailer ID</b> β€” corner post label strip</span>
357
+ </div>
358
+ </div>
359
+ </div>""")
360
+
361
+ file_input = gr.File(
362
+ label="Upload Trailer Image(s)",
363
+ file_count="multiple",
364
+ file_types=["image"],
365
+ type="filepath", # HF Spaces: returns plain string paths
366
+ elem_id="upload-box",
367
+ )
368
+
369
+ gr.HTML("""
370
+ <p style="font-size:12px;color:#94a3b8;text-align:center;margin:8px 0 14px;">
371
+ πŸ’‘ Upload both rear &amp; side views for best results
372
+ </p>""")
373
+
374
+ analyze_btn = gr.Button(
375
+ "πŸ” Analyze Trailer",
376
+ variant="primary",
377
+ size="lg",
378
+ elem_id="analyze-btn",
379
+ )
380
+
381
+ status_html = gr.HTML(_status("idle"))
382
+
383
+ # Right – results panel
384
+ with gr.Column(scale=1, min_width=320):
385
+ result_html = gr.HTML(_placeholder())
386
+
387
+ # ── Footer ────────────────────────────────────────────────
388
+ gr.HTML("""
389
+ <div style="text-align:center;padding:20px 0 10px;color:#94a3b8;
390
+ font-size:12px;font-family:sans-serif;">
391
+ Gemma 3 Β· Llama 3.2 Vision Β· Qwen2.5-VL &nbsp;|&nbsp;
392
+ Images processed in parallel &nbsp;|&nbsp; No data is stored
393
+ </div>""")
394
+
395
+ # ── Wiring ────────────────────────────────────────────────
396
+ analyze_btn.click(
397
+ fn=analyze,
398
+ inputs=[file_input],
399
+ outputs=[result_html, status_html],
400
+ )
401
+
402
+
403
+ # HF Spaces handles host/port β€” no arguments needed
404
+ demo.launch()