"""
Amazon Trailer Inspector — app.py
HuggingFace Spaces · Gradio 5.x · Free vision LLMs

FIXES over previous version:
  - Uses requests directly (avoids huggingface_hub API version breakage)
  - Correct chat-completions endpoint format for HF Serverless Inference
  - Updated model list to currently-working free vision models
  - Removed blocking whoami() startup check
  - Robust JSON extraction with multi-pass recovery
  - Detailed per-model error logging to Space logs
"""

import gradio as gr
import base64
import concurrent.futures
import json
import os
import re
import io

from PIL import Image
from huggingface_hub import InferenceClient

# ──────────────────────────────────────────────────────────────────────────────
# MODELS  — ordered by reliability on HF free tier (most reliable first)
# ──────────────────────────────────────────────────────────────────────────────
# Confirmed DEPLOYED vision models — verified from HF inference/models table April 9 2026
#   google/gemma-4-31B-it   → novita (cheapest) + together (fastest)  ✅ VISION
#   google/gemma-4-26B-A4B-it → novita  ✅ VISION (MoE: faster/cheaper)
#   Qwen/Qwen3-VL-8B-Instruct → novita + together  ✅ VISION (VL = Vision-Language)
MODELS = [
    "google/gemma-4-31B-it",          # Primary   — best quality, novita + together
    "google/gemma-4-26B-A4B-it",      # Fallback 1 — MoE variant, faster (4B active params)
    "Qwen/Qwen3-VL-8B-Instruct",      # Fallback 2 — dedicated VL model, novita + together
]

# HF Serverless Inference — new router endpoint (api-inference.huggingface.co is deprecated as of 2026)

# ──────────────────────────────────────────────────────────────────────────────
# DETECTION PROMPT
# ──────────────────────────────────────────────────────────────────────────────
DETECTION_PROMPT = """You are a precise visual inspector for Amazon trailer fleets.
Carefully examine the full trailer image and locate these 4 components:

1. SENSORS    — Exactly TWO silver/beige DIAMOND (rhombus/rotated-square) shaped metal plates.
                They are mounted near the lower-rear area on the back doors of the trailer.
2. GPS_DEVICE — A small white or light-gray rectangular electronic box mounted at the upper
                corner of the trailer rear face. About the size of a paperback book. 
                - GPS_DEVICE — A small white or light-gray rectangular electronic box mounted at the upper corner
                A tracking device mounted on the upper rear area of the trailer.
                + It may:
                +   - be white, gray, or black
                +   - include cables, mounts, or connectors
                +   - appear inside a recessed panel or metal frame
                +   - not be a perfect rectangle
3. PRIME_LOGO — The Amazon Prime branding logo: the word "prime" OR "amazon" OR the Amazon arrow/smile logo
                OR both. Can be full or partially visible, on rear or side of trailer. Find it carefully. It can be partial, small/tiny, large etc.
4. TRAILER_ID — A vertical fluorescent-green or yellow-green label strip on the corner post/pillar,
                showing an alphanumeric code like "SV2602705".

IMPORTANT: Reply ONLY with valid JSON — absolutely no extra text before or after, no markdown fences:
{
  "sensors":    {"found": true,  "confidence": "high",   "notes": "two diamond plates visible lower-left"},
  "gps_device": {"found": false, "confidence": "medium", "notes": "top corner not visible in this angle"},
  "prime_logo": {"found": true,  "confidence": "high",   "notes": "prime word visible on rear panel"},
  "trailer_id": {"found": true,  "confidence": "high",   "notes": "SV2602705 on right corner post"}
}"""

KEYS = ["sensors", "gps_device", "prime_logo", "trailer_id"]

# ──────────────────────────────────────────────────────────────────────────────
# IMAGE HELPERS
# ──────────────────────────────────────────────────────────────────────────────

def pil_to_b64(img: Image.Image, max_side: int = 1024) -> str:
    """Resize large images and encode as base64 JPEG."""
    img = img.copy().convert("RGB")
    if max(img.size) > max_side:
        img.thumbnail((max_side, max_side), Image.LANCZOS)
    buf = io.BytesIO()
    img.save(buf, format="JPEG", quality=82)
    return base64.b64encode(buf.getvalue()).decode("utf-8")


def load_images(file_paths) -> list[Image.Image]:
    """Load PIL images from Gradio 5.x file paths (str or filepath objects)."""
    imgs = []
    if not file_paths:
        return imgs
    if isinstance(file_paths, str):
        file_paths = [file_paths]
    for p in file_paths:
        try:
            path = p if isinstance(p, str) else getattr(p, "name", str(p))
            imgs.append(Image.open(path).convert("RGB"))
        except Exception as e:
            print(f"[load_images] skipped {p}: {e}")
    return imgs


# ──────────────────────────────────────────────────────────────────────────────
# JSON EXTRACTION  — multi-pass recovery
# ──────────────────────────────────────────────────────────────────────────────

def extract_json(text: str) -> dict | None:
    """Try multiple strategies to pull valid JSON from LLM output."""
    if not text:
        return None

    # Strip markdown code fences
    text = re.sub(r"```(?:json)?", "", text, flags=re.IGNORECASE).replace("```", "").strip()

    # Find outermost { ... } block
    m = re.search(r"\{[\s\S]*\}", text)
    if not m:
        return None
    raw = m.group()

    # Pass 1: direct parse
    try:
        return json.loads(raw)
    except json.JSONDecodeError:
        pass

    # Pass 2: fix trailing commas
    fixed = re.sub(r",\s*([}\]])", r"\1", raw)
    try:
        return json.loads(fixed)
    except json.JSONDecodeError:
        pass

    # Pass 3: extract only the lines containing our keys
    try:
        rebuilt = {
            key: json.loads(
                re.search(
                    rf'"{key}"\s*:\s*(\{{[^}}]+\}})', raw, re.DOTALL
                ).group(1)
            )
            for key in KEYS
            if re.search(rf'"{key}"\s*:\s*\{{', raw)
        }
        if rebuilt:
            return rebuilt
    except Exception:
        pass

    return None


def validate_result(data: dict) -> dict | None:
    """Ensure result has all keys and correct types; coerce where possible."""
    if not data:
        return None
    out = {}
    for key in KEYS:
        item = data.get(key)
        if not isinstance(item, dict):
            return None   # hard fail — missing a required key
        found = item.get("found", False)
        if isinstance(found, str):
            found = found.lower() in ("true", "yes", "1")
        out[key] = {
            "found":      bool(found),
            "confidence": item.get("confidence", "low") or "low",
            "notes":      (item.get("notes") or "").strip(),
        }
    return out


# ──────────────────────────────────────────────────────────────────────────────
# LLM CALL  — direct requests, no huggingface_hub dependency for inference
# ──────────────────────────────────────────────────────────────────────────────

def call_model(img: Image.Image, model: str, token: str) -> dict:
    """
    Call one HF vision model via InferenceClient with provider='auto'.
    This is the official HF-recommended approach after api-inference deprecation.
    Returns validated result dict on success.
    Raises RuntimeError with a clear message on failure.
    """
    b64 = pil_to_b64(img)
    short = model.split("/")[-1]

    try:
        # provider="auto" = HF router picks best available provider for this model
        # This works for vision LLMs unlike hf-inference which is CPU-only
        client = InferenceClient(provider="auto", api_key=token)
        resp = client.chat_completion(
            model=model,
            messages=[{
                "role": "user",
                "content": [
                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"}},
                    {"type": "text", "text": DETECTION_PROMPT},
                ],
            }],
            max_tokens=512,
            temperature=0.05,
        )
        raw_content = resp.choices[0].message.content
    except Exception as e:
        err = str(e)
        if "401" in err or "403" in err:
            raise RuntimeError(f"{short}: auth error — check HF_TOKEN ({err[:120]})")
        elif "404" in err:
            raise RuntimeError(f"{short}: 404 — model not on free serverless tier ({err[:120]})")
        elif "429" in err:
            raise RuntimeError(f"{short}: rate limited — retry in ~60s")
        elif "503" in err or "502" in err:
            raise RuntimeError(f"{short}: model loading/unavailable — retry shortly")
        else:
            raise RuntimeError(f"{short}: {err[:200]}")

    print(f"[{short}] raw LLM output: {raw_content[:300]}")  # visible in Space logs

    data = extract_json(raw_content)
    result = validate_result(data)
    if result is None:
        raise RuntimeError(
            f"{short}: could not extract valid JSON.\n"
            f"Raw output (first 300 chars): {raw_content[:300]}"
        )

    return result


# ──────────────────────────────────────────────────────────────────────────────
# PER-IMAGE ANALYSIS  — try each model in order
# ──────────────────────────────────────────────────────────────────────────────

def analyze_one(img: Image.Image, token: str) -> tuple[dict | None, str]:
    """
    Try MODELS in order for a single image.
    Returns (result_dict, model_short_name) on success,
            (None, joined_error_string)      on total failure.
    """
    errors = []
    for model in MODELS:
        short = model.split("/")[-1]
        try:
            result = call_model(img, model, token)
            print(f"[analyze_one] SUCCESS with {short}")
            return result, short
        except RuntimeError as e:
            msg = str(e)
            print(f"[analyze_one] FAIL {msg}")
            errors.append(msg)

    return None, " | ".join(errors)


# ──────────────────────────────────────────────────────────────────────────────
# RESULT MERGING
# ──────────────────────────────────────────────────────────────────────────────

CONF_RANK = {"high": 3, "medium": 2, "low": 1, "": 0}

def merge(results: list[dict]) -> dict:
    """found=True wins across images; highest confidence wins."""
    merged = {k: {"found": False, "confidence": "low", "notes": ""} for k in KEYS}
    for res in results:
        if not res:
            continue
        for k in KEYS:
            src = res.get(k, {})
            if src.get("found"):
                merged[k]["found"] = True
                if CONF_RANK.get(src.get("confidence", ""), 0) > CONF_RANK.get(merged[k]["confidence"], 0):
                    merged[k]["confidence"] = src["confidence"]
                if src.get("notes") and not merged[k]["notes"]:
                    merged[k]["notes"] = src["notes"]
    return merged


# ──────────────────────────────────────────────────────────────────────────────
# MAIN GRADIO CALLBACK
# ──────────────────────────────────────────────────────────────────────────────

def analyze(file_paths):
    token = os.environ.get("HF_TOKEN", "").strip()

    # ── Token guard — show actionable message ───────────────────────────────
    if not token:
        return (
            _error(
                "<b>Setup required: HF_TOKEN not set.</b><br><br>"
                "Go to your Space → <b>Settings → Repository Secrets</b> "
                "→ add a secret named <code>HF_TOKEN</code> with your "
                "HuggingFace Read token.<br>"
                "Get a free token at "
                "<a href='https://huggingface.co/settings/tokens' target='_blank'>"
                "huggingface.co/settings/tokens</a>"
            ),
            _status("error"),
        )

    images = load_images(file_paths)
    if not images:
        return _placeholder(), _status("idle")

    n = len(images)
    print(f"[analyze] processing {n} image(s)")

    all_results, all_errors, models_used = [], [], set()

    # Parallel: one thread per image (up to 4)
    with concurrent.futures.ThreadPoolExecutor(max_workers=min(n, 4)) as pool:
        futs = {pool.submit(analyze_one, img, token): i for i, img in enumerate(images)}
        for fut in concurrent.futures.as_completed(futs):
            res, meta = fut.result()
            if res is not None:
                all_results.append(res)
                models_used.add(meta)
            else:
                all_errors.append(meta)

    if not all_results:
        err_lines = "<br>".join(
            f"<code style='font-size:11px;'>{e}</code>" for e in all_errors
        ) or "<code>Unknown error</code>"

        return (
            _error(
                f"<b>All models failed for all images.</b><br><br>"
                f"<b>Exact errors:</b><br>{err_lines}<br><br>"
                f"<b>Most likely fixes:</b><br>"
                f"• <b>401/403</b> → HF_TOKEN is wrong or expired — regenerate at "
                f"<a href='https://huggingface.co/settings/tokens' target='_blank'>hf.co/settings/tokens</a><br>"
                f"• <b>429</b> → Rate limited — wait 60 seconds and retry<br>"
                f"• <b>404</b> → Model temporarily unavailable — retry or report as issue<br>"
                f"• <b>503</b> → Model is loading (cold start) — wait 30s and retry"
            ),
            _status("error"),
        )

    merged    = merge(all_results)
    model_str = " · ".join(sorted(models_used)) or "AI"
    warn      = ""
    if all_errors:
        warn = (
            f"<br><small style='color:#d97706;'>⚠️ {len(all_errors)} image(s) failed — "
            f"{all_errors[0][:100]}</small>"
        )

    return build_cards(merged, n, model_str, warn), _status("done", n, len(all_results))


# ──────────────────────────────────────────────────────────────────────────────
# HTML BUILDERS
# ──────────────────────────────────────────────────────────────────────────────

COMP_META = [
    ("sensors",    "🔷", "Sensors",          "Two diamond-shaped sensor plates",    "#f59e0b", "#fef3c7"),
    ("gps_device", "📡", "GPS Device",       "White electronic box — upper corner", "#3b82f6", "#dbeafe"),
    ("prime_logo", "🔶", "Prime Logo",       "Amazon Prime logo (full or partial)", "#f97316", "#fff7ed"),
    ("trailer_id", "🏷️", "Trailer ID Label", "Vertical strip on the corner post",   "#10b981", "#d1fae5"),
]

CONF_COLOR = {"high": "#15803d", "medium": "#b45309", "low": "#b91c1c"}


def build_cards(merged: dict, img_n: int, model_str: str, warn: str) -> str:
    found_n = sum(1 for k, *_ in COMP_META if merged.get(k, {}).get("found"))
    total   = len(COMP_META)
    all_ok  = found_n == total

    # Banner colours
    if all_ok:
        sc, sb, se, si, sl = "#16a34a", "#f0fdf4", "#86efac", "✅", "All Clear — All Components Found"
    elif found_n >= 3:
        sc, sb, se, si, sl = "#d97706", "#fffbeb", "#fde68a", "⚠️", "Mostly Complete"
    elif found_n >= 2:
        sc, sb, se, si, sl = "#ea580c", "#fff7ed", "#fed7aa", "⚠️", "Partially Complete"
    else:
        sc, sb, se, si, sl = "#dc2626", "#fef2f2", "#fca5a5", "❌", "Missing Components"

    rows = ""
    for key, icon, name, desc, accent, pill in COMP_META:
        d     = merged.get(key, {})
        found = d.get("found", False)
        conf  = d.get("confidence", "low")
        notes = d.get("notes", "")

        rbg  = "#f0fdf4" if found else "#fef2f2"
        rbd  = "#bbf7d0" if found else "#fecaca"
        stc  = "#15803d" if found else "#b91c1c"
        stx  = "✅ Found" if found else "❌ Missing"
        cdc  = CONF_COLOR.get(conf, "#9ca3af")
        note_html = (
            f'<div style="margin-top:8px;padding-top:8px;border-top:1px solid {rbd};'
            f'font-size:12px;color:#4b5563;font-style:italic;line-height:1.5;">"{notes}"</div>'
        ) if notes else ""

        rows += f"""
<div style="background:{rbg};border:1.5px solid {rbd};border-radius:12px;
            padding:14px 16px;margin-bottom:10px;">
  <div style="display:flex;align-items:flex-start;gap:12px;">
    <div style="background:{pill};border-radius:10px;padding:9px 11px;
                font-size:22px;line-height:1;flex-shrink:0;">{icon}</div>
    <div style="flex:1;min-width:0;">
      <div style="font-weight:700;font-size:14px;color:#111827;">{name}</div>
      <div style="font-size:11px;color:#9ca3af;margin-top:1px;">{desc}</div>
      {note_html}
    </div>
    <div style="text-align:right;flex-shrink:0;padding-left:8px;">
      <div style="font-weight:700;color:{stc};font-size:13px;white-space:nowrap;">{stx}</div>
      <div style="font-size:11px;color:{cdc};margin-top:3px;">● {conf.capitalize()}</div>
    </div>
  </div>
</div>"""

    return f"""
<div style="font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;max-width:600px;">
  <div style="background:{sb};border:2px solid {se};border-radius:14px;
              padding:16px 20px;margin-bottom:18px;
              display:flex;align-items:center;justify-content:space-between;gap:12px;">
    <div>
      <div style="font-size:18px;font-weight:800;color:{sc};">{si} {found_n}/{total} — {sl}</div>
      <div style="font-size:12px;color:#6b7280;margin-top:3px;">
        {img_n} image{'s' if img_n > 1 else ''} · {model_str}{warn}
      </div>
    </div>
    <div style="font-size:36px;">🚛</div>
  </div>
  {rows}
</div>"""


def _placeholder() -> str:
    return """
<div style="text-align:center;padding:60px 20px;color:#94a3b8;
            font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;">
  <div style="font-size:48px;margin-bottom:14px;">📷</div>
  <div style="font-size:15px;font-weight:600;color:#64748b;">Upload trailer images to begin</div>
  <div style="font-size:13px;margin-top:6px;">Front view, rear view, or both — all work</div>
</div>"""


def _status(state: str, total: int = 0, ok: int = 0) -> str:
    msgs = {
        "idle":  ("🟡", "#d97706", "Waiting for images"),
        "done":  ("🟢", "#16a34a", f"{ok}/{total} image{'s' if total > 1 else ''} processed"),
        "error": ("🔴", "#dc2626", "See error details →"),
    }
    icon, color, text = msgs.get(state, msgs["idle"])
    return (
        f'<div style="font-size:12px;color:{color};text-align:center;padding:6px 0 2px;">'
        f'{icon} {text}</div>'
    )


def _error(msg: str) -> str:
    return (
        f'<div style="background:#fef2f2;border:1.5px solid #fca5a5;border-radius:12px;'
        f'padding:18px 20px;color:#b91c1c;font-family:-apple-system,sans-serif;'
        f'font-size:13px;line-height:1.8;">{msg}</div>'
    )


# ──────────────────────────────────────────────────────────────────────────────
# STARTUP LOG
# ──────────────────────────────────────────────────────────────────────────────

_tok = os.environ.get("HF_TOKEN", "")
print("=" * 60)
print("  Amazon Trailer Inspector — startup")
print(f"  HF_TOKEN : {'SET (' + str(len(_tok)) + ' chars)' if _tok else 'NOT SET ← add to Space Secrets!'}")
print(f"  Models   : {[m.split('/')[-1] for m in MODELS]}")
print(f"  Method   : InferenceClient(provider='auto') — router selects best provider")
print("=" * 60)

# ──────────────────────────────────────────────────────────────────────────────
# GRADIO UI
# ──────────────────────────────────────────────────────────────────────────────

TOKEN_BANNER = "" if _tok else (
    '<div style="background:#fef3c7;border:1.5px solid #fde68a;border-radius:10px;'
    'padding:12px 16px;margin-bottom:14px;font-size:13px;color:#92400e;'
    'font-family:-apple-system,sans-serif;">'
    '⚠️ <b>HF_TOKEN not set.</b> Space Settings → Repository Secrets → add '
    '<code>HF_TOKEN</code> = your Read token from '
    '<a href="https://huggingface.co/settings/tokens" target="_blank">huggingface.co/settings/tokens</a>'
    '</div>'
)

CSS = """
.gradio-container { max-width: 980px !important; margin: auto !important; }
#analyze-btn { font-size: 15px !important; font-weight: 700 !important;
               letter-spacing: .02em !important; border-radius: 10px !important; }
footer { display: none !important; }
"""

THEME = gr.themes.Soft(
    primary_hue=gr.themes.colors.blue,
    neutral_hue=gr.themes.colors.slate,
    font=[gr.themes.GoogleFont("DM Sans"), "sans-serif"],
)

with gr.Blocks(title="🚛 Amazon Trailer Inspector", theme=THEME, css=CSS) as demo:

    gr.HTML(f"""
<div style="text-align:center;padding:30px 0 18px;
            font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;">
  <div style="font-size:46px;margin-bottom:10px;">🚛</div>
  <h1 style="font-size:26px;font-weight:800;color:#0f172a;margin:0 0 6px;">
    Amazon Trailer Inspector
  </h1>
  <p style="color:#64748b;font-size:14px;margin:0;">
    AI-powered verification of required trailer components from photos
  </p>
</div>
{TOKEN_BANNER}""")

    with gr.Row(equal_height=False):

        # LEFT COLUMN — upload + checklist
        with gr.Column(scale=1, min_width=280):
            gr.HTML("""
<div style="background:#f8fafc;border:1px solid #e2e8f0;border-radius:14px;
            padding:16px 18px;margin-bottom:14px;
            font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;">
  <div style="font-weight:700;font-size:12px;color:#475569;
              letter-spacing:.06em;text-transform:uppercase;margin-bottom:12px;">
    What we check
  </div>
  <div style="display:grid;gap:9px;font-size:13px;color:#000000;">
    <div style="display:flex;align-items:center;gap:10px;">
      <span style="background:#fef3c7;border-radius:7px;padding:4px 9px;">🔷</span>
      <span style="color:#000000 !important;">Sensors — two diamond-shaped plates</span>
    </div>
    <div style="display:flex;align-items:center;gap:10px;">
      <span style="background:#dbeafe;border-radius:7px;padding:4px 9px;">📡</span>
      <span style="color:#000000 !important;">GPS Device — white box, top corner</span>
    </div>
    <div style="display:flex;align-items:center;gap:10px;">
      <span style="background:#fff7ed;border-radius:7px;padding:4px 9px;">🔶</span>
      <span style="color:#000000 !important;">Prime Logo — Amazon Prime mark</span>
    </div>
    <div style="display:flex;align-items:center;gap:10px;">
      <span style="background:#d1fae5;border-radius:7px;padding:4px 9px;">🏷️</span>
      <span style="color:#000000 !important;">Trailer ID — corner post label strip</span>
    </div>
  </div>
</div>""")

            file_input = gr.File(
                label="Upload Trailer Image(s)",
                file_count="multiple",
                file_types=["image"],
                type="filepath",
            )

            gr.HTML("""
<p style="font-size:12px;color:#94a3b8;text-align:center;margin:8px 0 14px;
          font-family:-apple-system,sans-serif;">
  💡 Upload front, rear, or side views — more angles = better accuracy
</p>""")

            analyze_btn = gr.Button(
                "🔍  Analyze Trailer",
                variant="primary",
                size="lg",
                elem_id="analyze-btn",
            )

            status_html = gr.HTML(_status("idle"))

        # RIGHT COLUMN — results
        with gr.Column(scale=1, min_width=320):
            result_html = gr.HTML(_placeholder())

    gr.HTML("""
<div style="text-align:center;padding:20px 0 10px;color:#94a3b8;
            font-size:12px;font-family:-apple-system,sans-serif;">
  Llama 3.2 Vision · Qwen2.5-VL · Gemma 3 &nbsp;|&nbsp;
  Images processed in parallel &nbsp;|&nbsp; No data stored
</div>""")

    analyze_btn.click(
        fn=analyze,
        inputs=[file_input],
        outputs=[result_html, status_html],
    )

demo.launch()