""" Amazon Trailer Inspector — app.py HuggingFace Spaces · Gradio 5.x · Free vision LLMs FIXES over previous version: - Uses requests directly (avoids huggingface_hub API version breakage) - Correct chat-completions endpoint format for HF Serverless Inference - Updated model list to currently-working free vision models - Removed blocking whoami() startup check - Robust JSON extraction with multi-pass recovery - Detailed per-model error logging to Space logs """ import gradio as gr import base64 import concurrent.futures import json import os import re import io from PIL import Image from huggingface_hub import InferenceClient # ────────────────────────────────────────────────────────────────────────────── # MODELS — ordered by reliability on HF free tier (most reliable first) # ────────────────────────────────────────────────────────────────────────────── # Confirmed DEPLOYED vision models — verified from HF inference/models table April 9 2026 # google/gemma-4-31B-it → novita (cheapest) + together (fastest) ✅ VISION # google/gemma-4-26B-A4B-it → novita ✅ VISION (MoE: faster/cheaper) # Qwen/Qwen3-VL-8B-Instruct → novita + together ✅ VISION (VL = Vision-Language) MODELS = [ "google/gemma-4-31B-it", # Primary — best quality, novita + together "google/gemma-4-26B-A4B-it", # Fallback 1 — MoE variant, faster (4B active params) "Qwen/Qwen3-VL-8B-Instruct", # Fallback 2 — dedicated VL model, novita + together ] # HF Serverless Inference — new router endpoint (api-inference.huggingface.co is deprecated as of 2026) # ────────────────────────────────────────────────────────────────────────────── # DETECTION PROMPT # ────────────────────────────────────────────────────────────────────────────── DETECTION_PROMPT = """You are a precise visual inspector for Amazon trailer fleets. Carefully examine the full trailer image and locate these 4 components: 1. SENSORS — Exactly TWO silver/beige DIAMOND (rhombus/rotated-square) shaped metal plates. They are mounted near the lower-rear area on the back doors of the trailer. 2. GPS_DEVICE — A small white or light-gray rectangular electronic box mounted at the upper corner of the trailer rear face. About the size of a paperback book. - GPS_DEVICE — A small white or light-gray rectangular electronic box mounted at the upper corner A tracking device mounted on the upper rear area of the trailer. + It may: + - be white, gray, or black + - include cables, mounts, or connectors + - appear inside a recessed panel or metal frame + - not be a perfect rectangle 3. PRIME_LOGO — The Amazon Prime branding logo: the word "prime" OR "amazon" OR the Amazon arrow/smile logo OR both. Can be full or partially visible, on rear or side of trailer. Find it carefully. It can be partial, small/tiny, large etc. 4. TRAILER_ID — A vertical fluorescent-green or yellow-green label strip on the corner post/pillar, showing an alphanumeric code like "SV2602705". IMPORTANT: Reply ONLY with valid JSON — absolutely no extra text before or after, no markdown fences: { "sensors": {"found": true, "confidence": "high", "notes": "two diamond plates visible lower-left"}, "gps_device": {"found": false, "confidence": "medium", "notes": "top corner not visible in this angle"}, "prime_logo": {"found": true, "confidence": "high", "notes": "prime word visible on rear panel"}, "trailer_id": {"found": true, "confidence": "high", "notes": "SV2602705 on right corner post"} }""" KEYS = ["sensors", "gps_device", "prime_logo", "trailer_id"] # ────────────────────────────────────────────────────────────────────────────── # IMAGE HELPERS # ────────────────────────────────────────────────────────────────────────────── def pil_to_b64(img: Image.Image, max_side: int = 1024) -> str: """Resize large images and encode as base64 JPEG.""" img = img.copy().convert("RGB") if max(img.size) > max_side: img.thumbnail((max_side, max_side), Image.LANCZOS) buf = io.BytesIO() img.save(buf, format="JPEG", quality=82) return base64.b64encode(buf.getvalue()).decode("utf-8") def load_images(file_paths) -> list[Image.Image]: """Load PIL images from Gradio 5.x file paths (str or filepath objects).""" imgs = [] if not file_paths: return imgs if isinstance(file_paths, str): file_paths = [file_paths] for p in file_paths: try: path = p if isinstance(p, str) else getattr(p, "name", str(p)) imgs.append(Image.open(path).convert("RGB")) except Exception as e: print(f"[load_images] skipped {p}: {e}") return imgs # ────────────────────────────────────────────────────────────────────────────── # JSON EXTRACTION — multi-pass recovery # ────────────────────────────────────────────────────────────────────────────── def extract_json(text: str) -> dict | None: """Try multiple strategies to pull valid JSON from LLM output.""" if not text: return None # Strip markdown code fences text = re.sub(r"```(?:json)?", "", text, flags=re.IGNORECASE).replace("```", "").strip() # Find outermost { ... } block m = re.search(r"\{[\s\S]*\}", text) if not m: return None raw = m.group() # Pass 1: direct parse try: return json.loads(raw) except json.JSONDecodeError: pass # Pass 2: fix trailing commas fixed = re.sub(r",\s*([}\]])", r"\1", raw) try: return json.loads(fixed) except json.JSONDecodeError: pass # Pass 3: extract only the lines containing our keys try: rebuilt = { key: json.loads( re.search( rf'"{key}"\s*:\s*(\{{[^}}]+\}})', raw, re.DOTALL ).group(1) ) for key in KEYS if re.search(rf'"{key}"\s*:\s*\{{', raw) } if rebuilt: return rebuilt except Exception: pass return None def validate_result(data: dict) -> dict | None: """Ensure result has all keys and correct types; coerce where possible.""" if not data: return None out = {} for key in KEYS: item = data.get(key) if not isinstance(item, dict): return None # hard fail — missing a required key found = item.get("found", False) if isinstance(found, str): found = found.lower() in ("true", "yes", "1") out[key] = { "found": bool(found), "confidence": item.get("confidence", "low") or "low", "notes": (item.get("notes") or "").strip(), } return out # ────────────────────────────────────────────────────────────────────────────── # LLM CALL — direct requests, no huggingface_hub dependency for inference # ────────────────────────────────────────────────────────────────────────────── def call_model(img: Image.Image, model: str, token: str) -> dict: """ Call one HF vision model via InferenceClient with provider='auto'. This is the official HF-recommended approach after api-inference deprecation. Returns validated result dict on success. Raises RuntimeError with a clear message on failure. """ b64 = pil_to_b64(img) short = model.split("/")[-1] try: # provider="auto" = HF router picks best available provider for this model # This works for vision LLMs unlike hf-inference which is CPU-only client = InferenceClient(provider="auto", api_key=token) resp = client.chat_completion( model=model, messages=[{ "role": "user", "content": [ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"}}, {"type": "text", "text": DETECTION_PROMPT}, ], }], max_tokens=512, temperature=0.05, ) raw_content = resp.choices[0].message.content except Exception as e: err = str(e) if "401" in err or "403" in err: raise RuntimeError(f"{short}: auth error — check HF_TOKEN ({err[:120]})") elif "404" in err: raise RuntimeError(f"{short}: 404 — model not on free serverless tier ({err[:120]})") elif "429" in err: raise RuntimeError(f"{short}: rate limited — retry in ~60s") elif "503" in err or "502" in err: raise RuntimeError(f"{short}: model loading/unavailable — retry shortly") else: raise RuntimeError(f"{short}: {err[:200]}") print(f"[{short}] raw LLM output: {raw_content[:300]}") # visible in Space logs data = extract_json(raw_content) result = validate_result(data) if result is None: raise RuntimeError( f"{short}: could not extract valid JSON.\n" f"Raw output (first 300 chars): {raw_content[:300]}" ) return result # ────────────────────────────────────────────────────────────────────────────── # PER-IMAGE ANALYSIS — try each model in order # ────────────────────────────────────────────────────────────────────────────── def analyze_one(img: Image.Image, token: str) -> tuple[dict | None, str]: """ Try MODELS in order for a single image. Returns (result_dict, model_short_name) on success, (None, joined_error_string) on total failure. """ errors = [] for model in MODELS: short = model.split("/")[-1] try: result = call_model(img, model, token) print(f"[analyze_one] SUCCESS with {short}") return result, short except RuntimeError as e: msg = str(e) print(f"[analyze_one] FAIL {msg}") errors.append(msg) return None, " | ".join(errors) # ────────────────────────────────────────────────────────────────────────────── # RESULT MERGING # ────────────────────────────────────────────────────────────────────────────── CONF_RANK = {"high": 3, "medium": 2, "low": 1, "": 0} def merge(results: list[dict]) -> dict: """found=True wins across images; highest confidence wins.""" merged = {k: {"found": False, "confidence": "low", "notes": ""} for k in KEYS} for res in results: if not res: continue for k in KEYS: src = res.get(k, {}) if src.get("found"): merged[k]["found"] = True if CONF_RANK.get(src.get("confidence", ""), 0) > CONF_RANK.get(merged[k]["confidence"], 0): merged[k]["confidence"] = src["confidence"] if src.get("notes") and not merged[k]["notes"]: merged[k]["notes"] = src["notes"] return merged # ────────────────────────────────────────────────────────────────────────────── # MAIN GRADIO CALLBACK # ────────────────────────────────────────────────────────────────────────────── def analyze(file_paths): token = os.environ.get("HF_TOKEN", "").strip() # ── Token guard — show actionable message ─────────────────────────────── if not token: return ( _error( "Setup required: HF_TOKEN not set.

" "Go to your Space → Settings → Repository Secrets " "→ add a secret named HF_TOKEN with your " "HuggingFace Read token.
" "Get a free token at " "" "huggingface.co/settings/tokens" ), _status("error"), ) images = load_images(file_paths) if not images: return _placeholder(), _status("idle") n = len(images) print(f"[analyze] processing {n} image(s)") all_results, all_errors, models_used = [], [], set() # Parallel: one thread per image (up to 4) with concurrent.futures.ThreadPoolExecutor(max_workers=min(n, 4)) as pool: futs = {pool.submit(analyze_one, img, token): i for i, img in enumerate(images)} for fut in concurrent.futures.as_completed(futs): res, meta = fut.result() if res is not None: all_results.append(res) models_used.add(meta) else: all_errors.append(meta) if not all_results: err_lines = "
".join( f"{e}" for e in all_errors ) or "Unknown error" return ( _error( f"All models failed for all images.

" f"Exact errors:
{err_lines}

" f"Most likely fixes:
" f"• 401/403 → HF_TOKEN is wrong or expired — regenerate at " f"hf.co/settings/tokens
" f"• 429 → Rate limited — wait 60 seconds and retry
" f"• 404 → Model temporarily unavailable — retry or report as issue
" f"• 503 → Model is loading (cold start) — wait 30s and retry" ), _status("error"), ) merged = merge(all_results) model_str = " · ".join(sorted(models_used)) or "AI" warn = "" if all_errors: warn = ( f"
⚠️ {len(all_errors)} image(s) failed — " f"{all_errors[0][:100]}" ) return build_cards(merged, n, model_str, warn), _status("done", n, len(all_results)) # ────────────────────────────────────────────────────────────────────────────── # HTML BUILDERS # ────────────────────────────────────────────────────────────────────────────── COMP_META = [ ("sensors", "🔷", "Sensors", "Two diamond-shaped sensor plates", "#f59e0b", "#fef3c7"), ("gps_device", "📡", "GPS Device", "White electronic box — upper corner", "#3b82f6", "#dbeafe"), ("prime_logo", "🔶", "Prime Logo", "Amazon Prime logo (full or partial)", "#f97316", "#fff7ed"), ("trailer_id", "🏷️", "Trailer ID Label", "Vertical strip on the corner post", "#10b981", "#d1fae5"), ] CONF_COLOR = {"high": "#15803d", "medium": "#b45309", "low": "#b91c1c"} def build_cards(merged: dict, img_n: int, model_str: str, warn: str) -> str: found_n = sum(1 for k, *_ in COMP_META if merged.get(k, {}).get("found")) total = len(COMP_META) all_ok = found_n == total # Banner colours if all_ok: sc, sb, se, si, sl = "#16a34a", "#f0fdf4", "#86efac", "✅", "All Clear — All Components Found" elif found_n >= 3: sc, sb, se, si, sl = "#d97706", "#fffbeb", "#fde68a", "⚠️", "Mostly Complete" elif found_n >= 2: sc, sb, se, si, sl = "#ea580c", "#fff7ed", "#fed7aa", "⚠️", "Partially Complete" else: sc, sb, se, si, sl = "#dc2626", "#fef2f2", "#fca5a5", "❌", "Missing Components" rows = "" for key, icon, name, desc, accent, pill in COMP_META: d = merged.get(key, {}) found = d.get("found", False) conf = d.get("confidence", "low") notes = d.get("notes", "") rbg = "#f0fdf4" if found else "#fef2f2" rbd = "#bbf7d0" if found else "#fecaca" stc = "#15803d" if found else "#b91c1c" stx = "✅ Found" if found else "❌ Missing" cdc = CONF_COLOR.get(conf, "#9ca3af") note_html = ( f'
"{notes}"
' ) if notes else "" rows += f"""
{icon}
{name}
{desc}
{note_html}
{stx}
● {conf.capitalize()}
""" return f"""
{si} {found_n}/{total} — {sl}
{img_n} image{'s' if img_n > 1 else ''} · {model_str}{warn}
🚛
{rows}
""" def _placeholder() -> str: return """
📷
Upload trailer images to begin
Front view, rear view, or both — all work
""" def _status(state: str, total: int = 0, ok: int = 0) -> str: msgs = { "idle": ("🟡", "#d97706", "Waiting for images"), "done": ("🟢", "#16a34a", f"{ok}/{total} image{'s' if total > 1 else ''} processed"), "error": ("🔴", "#dc2626", "See error details →"), } icon, color, text = msgs.get(state, msgs["idle"]) return ( f'
' f'{icon} {text}
' ) def _error(msg: str) -> str: return ( f'
{msg}
' ) # ────────────────────────────────────────────────────────────────────────────── # STARTUP LOG # ────────────────────────────────────────────────────────────────────────────── _tok = os.environ.get("HF_TOKEN", "") print("=" * 60) print(" Amazon Trailer Inspector — startup") print(f" HF_TOKEN : {'SET (' + str(len(_tok)) + ' chars)' if _tok else 'NOT SET ← add to Space Secrets!'}") print(f" Models : {[m.split('/')[-1] for m in MODELS]}") print(f" Method : InferenceClient(provider='auto') — router selects best provider") print("=" * 60) # ────────────────────────────────────────────────────────────────────────────── # GRADIO UI # ────────────────────────────────────────────────────────────────────────────── TOKEN_BANNER = "" if _tok else ( '
' '⚠️ HF_TOKEN not set. Space Settings → Repository Secrets → add ' 'HF_TOKEN = your Read token from ' 'huggingface.co/settings/tokens' '
' ) CSS = """ .gradio-container { max-width: 980px !important; margin: auto !important; } #analyze-btn { font-size: 15px !important; font-weight: 700 !important; letter-spacing: .02em !important; border-radius: 10px !important; } footer { display: none !important; } """ THEME = gr.themes.Soft( primary_hue=gr.themes.colors.blue, neutral_hue=gr.themes.colors.slate, font=[gr.themes.GoogleFont("DM Sans"), "sans-serif"], ) with gr.Blocks(title="🚛 Amazon Trailer Inspector", theme=THEME, css=CSS) as demo: gr.HTML(f"""
🚛

Amazon Trailer Inspector

AI-powered verification of required trailer components from photos

{TOKEN_BANNER}""") with gr.Row(equal_height=False): # LEFT COLUMN — upload + checklist with gr.Column(scale=1, min_width=280): gr.HTML("""
What we check
🔷 Sensors — two diamond-shaped plates
📡 GPS Device — white box, top corner
🔶 Prime Logo — Amazon Prime mark
🏷️ Trailer ID — corner post label strip
""") file_input = gr.File( label="Upload Trailer Image(s)", file_count="multiple", file_types=["image"], type="filepath", ) gr.HTML("""

💡 Upload front, rear, or side views — more angles = better accuracy

""") analyze_btn = gr.Button( "🔍 Analyze Trailer", variant="primary", size="lg", elem_id="analyze-btn", ) status_html = gr.HTML(_status("idle")) # RIGHT COLUMN — results with gr.Column(scale=1, min_width=320): result_html = gr.HTML(_placeholder()) gr.HTML("""
Llama 3.2 Vision · Qwen2.5-VL · Gemma 3  |  Images processed in parallel  |  No data stored
""") analyze_btn.click( fn=analyze, inputs=[file_input], outputs=[result_html, status_html], ) demo.launch()