mudflap_LLM / app.py
mlbench123's picture
Update app.py
14785c0 verified
"""
Amazon Trailer Inspector β€” app.py
HuggingFace Spaces Β· Gradio 5.x Β· Free vision LLMs
FIXES over previous version:
- Uses requests directly (avoids huggingface_hub API version breakage)
- Correct chat-completions endpoint format for HF Serverless Inference
- Updated model list to currently-working free vision models
- Removed blocking whoami() startup check
- Robust JSON extraction with multi-pass recovery
- Detailed per-model error logging to Space logs
"""
import gradio as gr
import base64
import concurrent.futures
import json
import os
import re
import io
from PIL import Image
from huggingface_hub import InferenceClient
# ──────────────────────────────────────────────────────────────────────────────
# MODELS β€” ordered by reliability on HF free tier (most reliable first)
# ──────────────────────────────────────────────────────────────────────────────
# Confirmed DEPLOYED vision models β€” verified from HF inference/models table April 9 2026
# google/gemma-4-31B-it β†’ novita (cheapest) + together (fastest) βœ… VISION
# google/gemma-4-26B-A4B-it β†’ novita βœ… VISION (MoE: faster/cheaper)
# Qwen/Qwen3-VL-8B-Instruct β†’ novita + together βœ… VISION (VL = Vision-Language)
MODELS = [
"google/gemma-4-31B-it", # Primary β€” best quality, novita + together
"google/gemma-4-26B-A4B-it", # Fallback 1 β€” MoE variant, faster (4B active params)
"Qwen/Qwen3-VL-8B-Instruct", # Fallback 2 β€” dedicated VL model, novita + together
]
# HF Serverless Inference β€” new router endpoint (api-inference.huggingface.co is deprecated as of 2026)
# ──────────────────────────────────────────────────────────────────────────────
# DETECTION PROMPT
# ──────────────────────────────────────────────────────────────────────────────
DETECTION_PROMPT = """You are a precise visual inspector for Amazon trailer fleets.
Carefully examine the full trailer image and locate these 4 components:
1. SENSORS β€” Exactly TWO silver/beige DIAMOND (rhombus/rotated-square) shaped metal plates.
They are mounted near the lower-rear area on the back doors of the trailer.
2. GPS_DEVICE β€” A small white or light-gray rectangular electronic box mounted at the upper
corner of the trailer rear face. About the size of a paperback book.
- GPS_DEVICE β€” A small white or light-gray rectangular electronic box mounted at the upper corner
A tracking device mounted on the upper rear area of the trailer.
+ It may:
+ - be white, gray, or black
+ - include cables, mounts, or connectors
+ - appear inside a recessed panel or metal frame
+ - not be a perfect rectangle
3. PRIME_LOGO β€” The Amazon Prime branding logo: the word "prime" OR "amazon" OR the Amazon arrow/smile logo
OR both. Can be full or partially visible, on rear or side of trailer. Find it carefully. It can be partial, small/tiny, large etc.
4. TRAILER_ID β€” A vertical fluorescent-green or yellow-green label strip on the corner post/pillar,
showing an alphanumeric code like "SV2602705".
IMPORTANT: Reply ONLY with valid JSON β€” absolutely no extra text before or after, no markdown fences:
{
"sensors": {"found": true, "confidence": "high", "notes": "two diamond plates visible lower-left"},
"gps_device": {"found": false, "confidence": "medium", "notes": "top corner not visible in this angle"},
"prime_logo": {"found": true, "confidence": "high", "notes": "prime word visible on rear panel"},
"trailer_id": {"found": true, "confidence": "high", "notes": "SV2602705 on right corner post"}
}"""
KEYS = ["sensors", "gps_device", "prime_logo", "trailer_id"]
# ──────────────────────────────────────────────────────────────────────────────
# IMAGE HELPERS
# ──────────────────────────────────────────────────────────────────────────────
def pil_to_b64(img: Image.Image, max_side: int = 1024) -> str:
"""Resize large images and encode as base64 JPEG."""
img = img.copy().convert("RGB")
if max(img.size) > max_side:
img.thumbnail((max_side, max_side), Image.LANCZOS)
buf = io.BytesIO()
img.save(buf, format="JPEG", quality=82)
return base64.b64encode(buf.getvalue()).decode("utf-8")
def load_images(file_paths) -> list[Image.Image]:
"""Load PIL images from Gradio 5.x file paths (str or filepath objects)."""
imgs = []
if not file_paths:
return imgs
if isinstance(file_paths, str):
file_paths = [file_paths]
for p in file_paths:
try:
path = p if isinstance(p, str) else getattr(p, "name", str(p))
imgs.append(Image.open(path).convert("RGB"))
except Exception as e:
print(f"[load_images] skipped {p}: {e}")
return imgs
# ──────────────────────────────────────────────────────────────────────────────
# JSON EXTRACTION β€” multi-pass recovery
# ──────────────────────────────────────────────────────────────────────────────
def extract_json(text: str) -> dict | None:
"""Try multiple strategies to pull valid JSON from LLM output."""
if not text:
return None
# Strip markdown code fences
text = re.sub(r"```(?:json)?", "", text, flags=re.IGNORECASE).replace("```", "").strip()
# Find outermost { ... } block
m = re.search(r"\{[\s\S]*\}", text)
if not m:
return None
raw = m.group()
# Pass 1: direct parse
try:
return json.loads(raw)
except json.JSONDecodeError:
pass
# Pass 2: fix trailing commas
fixed = re.sub(r",\s*([}\]])", r"\1", raw)
try:
return json.loads(fixed)
except json.JSONDecodeError:
pass
# Pass 3: extract only the lines containing our keys
try:
rebuilt = {
key: json.loads(
re.search(
rf'"{key}"\s*:\s*(\{{[^}}]+\}})', raw, re.DOTALL
).group(1)
)
for key in KEYS
if re.search(rf'"{key}"\s*:\s*\{{', raw)
}
if rebuilt:
return rebuilt
except Exception:
pass
return None
def validate_result(data: dict) -> dict | None:
"""Ensure result has all keys and correct types; coerce where possible."""
if not data:
return None
out = {}
for key in KEYS:
item = data.get(key)
if not isinstance(item, dict):
return None # hard fail β€” missing a required key
found = item.get("found", False)
if isinstance(found, str):
found = found.lower() in ("true", "yes", "1")
out[key] = {
"found": bool(found),
"confidence": item.get("confidence", "low") or "low",
"notes": (item.get("notes") or "").strip(),
}
return out
# ──────────────────────────────────────────────────────────────────────────────
# LLM CALL β€” direct requests, no huggingface_hub dependency for inference
# ──────────────────────────────────────────────────────────────────────────────
def call_model(img: Image.Image, model: str, token: str) -> dict:
"""
Call one HF vision model via InferenceClient with provider='auto'.
This is the official HF-recommended approach after api-inference deprecation.
Returns validated result dict on success.
Raises RuntimeError with a clear message on failure.
"""
b64 = pil_to_b64(img)
short = model.split("/")[-1]
try:
# provider="auto" = HF router picks best available provider for this model
# This works for vision LLMs unlike hf-inference which is CPU-only
client = InferenceClient(provider="auto", api_key=token)
resp = client.chat_completion(
model=model,
messages=[{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"}},
{"type": "text", "text": DETECTION_PROMPT},
],
}],
max_tokens=512,
temperature=0.05,
)
raw_content = resp.choices[0].message.content
except Exception as e:
err = str(e)
if "401" in err or "403" in err:
raise RuntimeError(f"{short}: auth error β€” check HF_TOKEN ({err[:120]})")
elif "404" in err:
raise RuntimeError(f"{short}: 404 β€” model not on free serverless tier ({err[:120]})")
elif "429" in err:
raise RuntimeError(f"{short}: rate limited β€” retry in ~60s")
elif "503" in err or "502" in err:
raise RuntimeError(f"{short}: model loading/unavailable β€” retry shortly")
else:
raise RuntimeError(f"{short}: {err[:200]}")
print(f"[{short}] raw LLM output: {raw_content[:300]}") # visible in Space logs
data = extract_json(raw_content)
result = validate_result(data)
if result is None:
raise RuntimeError(
f"{short}: could not extract valid JSON.\n"
f"Raw output (first 300 chars): {raw_content[:300]}"
)
return result
# ──────────────────────────────────────────────────────────────────────────────
# PER-IMAGE ANALYSIS β€” try each model in order
# ──────────────────────────────────────────────────────────────────────────────
def analyze_one(img: Image.Image, token: str) -> tuple[dict | None, str]:
"""
Try MODELS in order for a single image.
Returns (result_dict, model_short_name) on success,
(None, joined_error_string) on total failure.
"""
errors = []
for model in MODELS:
short = model.split("/")[-1]
try:
result = call_model(img, model, token)
print(f"[analyze_one] SUCCESS with {short}")
return result, short
except RuntimeError as e:
msg = str(e)
print(f"[analyze_one] FAIL {msg}")
errors.append(msg)
return None, " | ".join(errors)
# ──────────────────────────────────────────────────────────────────────────────
# RESULT MERGING
# ──────────────────────────────────────────────────────────────────────────────
CONF_RANK = {"high": 3, "medium": 2, "low": 1, "": 0}
def merge(results: list[dict]) -> dict:
"""found=True wins across images; highest confidence wins."""
merged = {k: {"found": False, "confidence": "low", "notes": ""} for k in KEYS}
for res in results:
if not res:
continue
for k in KEYS:
src = res.get(k, {})
if src.get("found"):
merged[k]["found"] = True
if CONF_RANK.get(src.get("confidence", ""), 0) > CONF_RANK.get(merged[k]["confidence"], 0):
merged[k]["confidence"] = src["confidence"]
if src.get("notes") and not merged[k]["notes"]:
merged[k]["notes"] = src["notes"]
return merged
# ──────────────────────────────────────────────────────────────────────────────
# MAIN GRADIO CALLBACK
# ──────────────────────────────────────────────────────────────────────────────
def analyze(file_paths):
token = os.environ.get("HF_TOKEN", "").strip()
# ── Token guard β€” show actionable message ───────────────────────────────
if not token:
return (
_error(
"<b>Setup required: HF_TOKEN not set.</b><br><br>"
"Go to your Space β†’ <b>Settings β†’ Repository Secrets</b> "
"β†’ add a secret named <code>HF_TOKEN</code> with your "
"HuggingFace Read token.<br>"
"Get a free token at "
"<a href='https://huggingface.co/settings/tokens' target='_blank'>"
"huggingface.co/settings/tokens</a>"
),
_status("error"),
)
images = load_images(file_paths)
if not images:
return _placeholder(), _status("idle")
n = len(images)
print(f"[analyze] processing {n} image(s)")
all_results, all_errors, models_used = [], [], set()
# Parallel: one thread per image (up to 4)
with concurrent.futures.ThreadPoolExecutor(max_workers=min(n, 4)) as pool:
futs = {pool.submit(analyze_one, img, token): i for i, img in enumerate(images)}
for fut in concurrent.futures.as_completed(futs):
res, meta = fut.result()
if res is not None:
all_results.append(res)
models_used.add(meta)
else:
all_errors.append(meta)
if not all_results:
err_lines = "<br>".join(
f"<code style='font-size:11px;'>{e}</code>" for e in all_errors
) or "<code>Unknown error</code>"
return (
_error(
f"<b>All models failed for all images.</b><br><br>"
f"<b>Exact errors:</b><br>{err_lines}<br><br>"
f"<b>Most likely fixes:</b><br>"
f"β€’ <b>401/403</b> β†’ HF_TOKEN is wrong or expired β€” regenerate at "
f"<a href='https://huggingface.co/settings/tokens' target='_blank'>hf.co/settings/tokens</a><br>"
f"β€’ <b>429</b> β†’ Rate limited β€” wait 60 seconds and retry<br>"
f"β€’ <b>404</b> β†’ Model temporarily unavailable β€” retry or report as issue<br>"
f"β€’ <b>503</b> β†’ Model is loading (cold start) β€” wait 30s and retry"
),
_status("error"),
)
merged = merge(all_results)
model_str = " Β· ".join(sorted(models_used)) or "AI"
warn = ""
if all_errors:
warn = (
f"<br><small style='color:#d97706;'>⚠️ {len(all_errors)} image(s) failed β€” "
f"{all_errors[0][:100]}</small>"
)
return build_cards(merged, n, model_str, warn), _status("done", n, len(all_results))
# ──────────────────────────────────────────────────────────────────────────────
# HTML BUILDERS
# ──────────────────────────────────────────────────────────────────────────────
COMP_META = [
("sensors", "πŸ”·", "Sensors", "Two diamond-shaped sensor plates", "#f59e0b", "#fef3c7"),
("gps_device", "πŸ“‘", "GPS Device", "White electronic box β€” upper corner", "#3b82f6", "#dbeafe"),
("prime_logo", "πŸ”Ά", "Prime Logo", "Amazon Prime logo (full or partial)", "#f97316", "#fff7ed"),
("trailer_id", "🏷️", "Trailer ID Label", "Vertical strip on the corner post", "#10b981", "#d1fae5"),
]
CONF_COLOR = {"high": "#15803d", "medium": "#b45309", "low": "#b91c1c"}
def build_cards(merged: dict, img_n: int, model_str: str, warn: str) -> str:
found_n = sum(1 for k, *_ in COMP_META if merged.get(k, {}).get("found"))
total = len(COMP_META)
all_ok = found_n == total
# Banner colours
if all_ok:
sc, sb, se, si, sl = "#16a34a", "#f0fdf4", "#86efac", "βœ…", "All Clear β€” All Components Found"
elif found_n >= 3:
sc, sb, se, si, sl = "#d97706", "#fffbeb", "#fde68a", "⚠️", "Mostly Complete"
elif found_n >= 2:
sc, sb, se, si, sl = "#ea580c", "#fff7ed", "#fed7aa", "⚠️", "Partially Complete"
else:
sc, sb, se, si, sl = "#dc2626", "#fef2f2", "#fca5a5", "❌", "Missing Components"
rows = ""
for key, icon, name, desc, accent, pill in COMP_META:
d = merged.get(key, {})
found = d.get("found", False)
conf = d.get("confidence", "low")
notes = d.get("notes", "")
rbg = "#f0fdf4" if found else "#fef2f2"
rbd = "#bbf7d0" if found else "#fecaca"
stc = "#15803d" if found else "#b91c1c"
stx = "βœ… Found" if found else "❌ Missing"
cdc = CONF_COLOR.get(conf, "#9ca3af")
note_html = (
f'<div style="margin-top:8px;padding-top:8px;border-top:1px solid {rbd};'
f'font-size:12px;color:#4b5563;font-style:italic;line-height:1.5;">"{notes}"</div>'
) if notes else ""
rows += f"""
<div style="background:{rbg};border:1.5px solid {rbd};border-radius:12px;
padding:14px 16px;margin-bottom:10px;">
<div style="display:flex;align-items:flex-start;gap:12px;">
<div style="background:{pill};border-radius:10px;padding:9px 11px;
font-size:22px;line-height:1;flex-shrink:0;">{icon}</div>
<div style="flex:1;min-width:0;">
<div style="font-weight:700;font-size:14px;color:#111827;">{name}</div>
<div style="font-size:11px;color:#9ca3af;margin-top:1px;">{desc}</div>
{note_html}
</div>
<div style="text-align:right;flex-shrink:0;padding-left:8px;">
<div style="font-weight:700;color:{stc};font-size:13px;white-space:nowrap;">{stx}</div>
<div style="font-size:11px;color:{cdc};margin-top:3px;">● {conf.capitalize()}</div>
</div>
</div>
</div>"""
return f"""
<div style="font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;max-width:600px;">
<div style="background:{sb};border:2px solid {se};border-radius:14px;
padding:16px 20px;margin-bottom:18px;
display:flex;align-items:center;justify-content:space-between;gap:12px;">
<div>
<div style="font-size:18px;font-weight:800;color:{sc};">{si} {found_n}/{total} β€” {sl}</div>
<div style="font-size:12px;color:#6b7280;margin-top:3px;">
{img_n} image{'s' if img_n > 1 else ''} Β· {model_str}{warn}
</div>
</div>
<div style="font-size:36px;">πŸš›</div>
</div>
{rows}
</div>"""
def _placeholder() -> str:
return """
<div style="text-align:center;padding:60px 20px;color:#94a3b8;
font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;">
<div style="font-size:48px;margin-bottom:14px;">πŸ“·</div>
<div style="font-size:15px;font-weight:600;color:#64748b;">Upload trailer images to begin</div>
<div style="font-size:13px;margin-top:6px;">Front view, rear view, or both β€” all work</div>
</div>"""
def _status(state: str, total: int = 0, ok: int = 0) -> str:
msgs = {
"idle": ("🟑", "#d97706", "Waiting for images"),
"done": ("🟒", "#16a34a", f"{ok}/{total} image{'s' if total > 1 else ''} processed"),
"error": ("πŸ”΄", "#dc2626", "See error details β†’"),
}
icon, color, text = msgs.get(state, msgs["idle"])
return (
f'<div style="font-size:12px;color:{color};text-align:center;padding:6px 0 2px;">'
f'{icon} {text}</div>'
)
def _error(msg: str) -> str:
return (
f'<div style="background:#fef2f2;border:1.5px solid #fca5a5;border-radius:12px;'
f'padding:18px 20px;color:#b91c1c;font-family:-apple-system,sans-serif;'
f'font-size:13px;line-height:1.8;">{msg}</div>'
)
# ──────────────────────────────────────────────────────────────────────────────
# STARTUP LOG
# ──────────────────────────────────────────────────────────────────────────────
_tok = os.environ.get("HF_TOKEN", "")
print("=" * 60)
print(" Amazon Trailer Inspector β€” startup")
print(f" HF_TOKEN : {'SET (' + str(len(_tok)) + ' chars)' if _tok else 'NOT SET ← add to Space Secrets!'}")
print(f" Models : {[m.split('/')[-1] for m in MODELS]}")
print(f" Method : InferenceClient(provider='auto') β€” router selects best provider")
print("=" * 60)
# ──────────────────────────────────────────────────────────────────────────────
# GRADIO UI
# ──────────────────────────────────────────────────────────────────────────────
TOKEN_BANNER = "" if _tok else (
'<div style="background:#fef3c7;border:1.5px solid #fde68a;border-radius:10px;'
'padding:12px 16px;margin-bottom:14px;font-size:13px;color:#92400e;'
'font-family:-apple-system,sans-serif;">'
'⚠️ <b>HF_TOKEN not set.</b> Space Settings β†’ Repository Secrets β†’ add '
'<code>HF_TOKEN</code> = your Read token from '
'<a href="https://huggingface.co/settings/tokens" target="_blank">huggingface.co/settings/tokens</a>'
'</div>'
)
CSS = """
.gradio-container { max-width: 980px !important; margin: auto !important; }
#analyze-btn { font-size: 15px !important; font-weight: 700 !important;
letter-spacing: .02em !important; border-radius: 10px !important; }
footer { display: none !important; }
"""
THEME = gr.themes.Soft(
primary_hue=gr.themes.colors.blue,
neutral_hue=gr.themes.colors.slate,
font=[gr.themes.GoogleFont("DM Sans"), "sans-serif"],
)
with gr.Blocks(title="πŸš› Amazon Trailer Inspector", theme=THEME, css=CSS) as demo:
gr.HTML(f"""
<div style="text-align:center;padding:30px 0 18px;
font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;">
<div style="font-size:46px;margin-bottom:10px;">πŸš›</div>
<h1 style="font-size:26px;font-weight:800;color:#0f172a;margin:0 0 6px;">
Amazon Trailer Inspector
</h1>
<p style="color:#64748b;font-size:14px;margin:0;">
AI-powered verification of required trailer components from photos
</p>
</div>
{TOKEN_BANNER}""")
with gr.Row(equal_height=False):
# LEFT COLUMN β€” upload + checklist
with gr.Column(scale=1, min_width=280):
gr.HTML("""
<div style="background:#f8fafc;border:1px solid #e2e8f0;border-radius:14px;
padding:16px 18px;margin-bottom:14px;
font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;">
<div style="font-weight:700;font-size:12px;color:#475569;
letter-spacing:.06em;text-transform:uppercase;margin-bottom:12px;">
What we check
</div>
<div style="display:grid;gap:9px;font-size:13px;color:#000000;">
<div style="display:flex;align-items:center;gap:10px;">
<span style="background:#fef3c7;border-radius:7px;padding:4px 9px;">πŸ”·</span>
<span style="color:#000000 !important;">Sensors β€” two diamond-shaped plates</span>
</div>
<div style="display:flex;align-items:center;gap:10px;">
<span style="background:#dbeafe;border-radius:7px;padding:4px 9px;">πŸ“‘</span>
<span style="color:#000000 !important;">GPS Device β€” white box, top corner</span>
</div>
<div style="display:flex;align-items:center;gap:10px;">
<span style="background:#fff7ed;border-radius:7px;padding:4px 9px;">πŸ”Ά</span>
<span style="color:#000000 !important;">Prime Logo β€” Amazon Prime mark</span>
</div>
<div style="display:flex;align-items:center;gap:10px;">
<span style="background:#d1fae5;border-radius:7px;padding:4px 9px;">🏷️</span>
<span style="color:#000000 !important;">Trailer ID β€” corner post label strip</span>
</div>
</div>
</div>""")
file_input = gr.File(
label="Upload Trailer Image(s)",
file_count="multiple",
file_types=["image"],
type="filepath",
)
gr.HTML("""
<p style="font-size:12px;color:#94a3b8;text-align:center;margin:8px 0 14px;
font-family:-apple-system,sans-serif;">
πŸ’‘ Upload front, rear, or side views β€” more angles = better accuracy
</p>""")
analyze_btn = gr.Button(
"πŸ” Analyze Trailer",
variant="primary",
size="lg",
elem_id="analyze-btn",
)
status_html = gr.HTML(_status("idle"))
# RIGHT COLUMN β€” results
with gr.Column(scale=1, min_width=320):
result_html = gr.HTML(_placeholder())
gr.HTML("""
<div style="text-align:center;padding:20px 0 10px;color:#94a3b8;
font-size:12px;font-family:-apple-system,sans-serif;">
Llama 3.2 Vision Β· Qwen2.5-VL Β· Gemma 3 &nbsp;|&nbsp;
Images processed in parallel &nbsp;|&nbsp; No data stored
</div>""")
analyze_btn.click(
fn=analyze,
inputs=[file_input],
outputs=[result_html, status_html],
)
demo.launch()