Spaces:

NeilDriscoll
/

InstaAutoApp_TeamDataMavericks

Sleeping

File size: 29,512 Bytes

4466abe

"""
Insta-AutoApp v3 — 6-Agent Pipeline with InferenceClient LLM
AI-powered symptom triage for 2023 Ford Bronco owners.

Agents:
  1. IntakeAgent         — validates & normalizes user symptom input
  2. ProfileAgent        — injects vehicle profile context
  3. ClarificationAgent  — generates Bronco-specific follow-up questions
  4. RetrievalAgent      — RAG retrieval from OEM manual (FAISS + keyword fallback)
  5. DiagnosticAgent     — LLM-powered triage producing structured 4-field output
  6. PresentationAgent   — formats branded Triage Card with safety disclaimer

Team Data Mavericks · Nasser Chaudhry · Miriam Camacho · Neil Driscoll
ANLY 601 · Mays Business School · Texas A&M University
"""

import html
import logging
import os
import re
import time
from dataclasses import dataclass, field
from typing import Optional

import gradio as gr
from huggingface_hub import InferenceClient

from config import (
    APP_TITLE, APP_DESCRIPTION, DISCLAIMER_BANNER, DISCLAIMER_RESPONSE,
    ERROR_API_UNAVAILABLE, ERROR_NOT_IN_MANUAL,
    TRIM_OPTIONS, ENGINE_OPTIONS, PACKAGE_OPTIONS, TOP_TYPE_OPTIONS,
    MILEAGE_MIN, MILEAGE_MAX, MILEAGE_DEFAULT,
    FALLBACK_FOLLOWUP_QUESTIONS, SAFETY_CRITICAL_KEYWORDS, HF_API_TOKEN,
    MAX_RETRIES, RETRY_DELAY,
)
from prompts import (
    FOLLOWUP_SYSTEM_PROMPT, TRIAGE_SYSTEM_PROMPT,
    format_vehicle_profile, format_followup_context, format_retrieved_context,
)
from rag_pipeline import get_rag_pipeline, initialize_rag

logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)

# ──────────────────────────────────────────────────────────────────────
# LLM Client — tries a chain of models until one answers
# ──────────────────────────────────────────────────────────────────────
# HF Inference Providers sometimes route to backends that 404 (e.g. Novita for
# Mistral). A fallback chain makes this robust: first model that answers wins,
# and we cache it for subsequent calls.
MODEL_CHAIN = [
    os.getenv("HF_MODEL_ID", ""),
    "deepseek-ai/DeepSeek-V3-0324",
    "meta-llama/Llama-3.3-70B-Instruct",
    "Qwen/Qwen2.5-7B-Instruct",
    "HuggingFaceH4/zephyr-7b-beta",
]
MODEL_CHAIN = [m for m in MODEL_CHAIN if m]


class LLMClient:
    """InferenceClient wrapper that tries a chain of models until one answers."""

    def __init__(self):
        self.token = HF_API_TOKEN
        self.models = MODEL_CHAIN
        self._client = None
        self._working_model = None
        if self.token:
            try:
                self._client = InferenceClient(token=self.token, timeout=30)
                logger.info(f"LLMClient initialized. Chain: {self.models}")
            except Exception as e:
                logger.error(f"Failed to initialize InferenceClient: {e}")
        else:
            logger.warning("HF_API_TOKEN not set. LLM calls will fail.")

    def is_configured(self) -> bool:
        return self._client is not None

    def _try_model(self, model, prompt, max_new_tokens):
        try:
            response = self._client.chat_completion(
                model=model,
                messages=[{"role": "user", "content": prompt}],
                max_tokens=max_new_tokens,
                temperature=0.7,
                top_p=0.9,
            )
            if hasattr(response, "choices") and response.choices:
                content = response.choices[0].message.content
                if content:
                    return content.strip()
        except Exception as e:
            logger.warning(f"Model {model} failed: {type(e).__name__}: {str(e)[:200]}")
        return None

    def generate(self, prompt: str, max_new_tokens: int = 1024) -> Optional[str]:
        if not self._client:
            return None
        if self._working_model:
            result = self._try_model(self._working_model, prompt, max_new_tokens)
            if result:
                return result
            logger.warning(f"Cached model {self._working_model} failed, re-trying chain")
            self._working_model = None
        for model in self.models:
            logger.info(f"Trying model: {model}")
            result = self._try_model(model, prompt, max_new_tokens)
            if result:
                self._working_model = model
                logger.info(f"✓ {model} succeeded, caching")
                return result
        logger.error(f"All {len(self.models)} models in chain failed")
        return None


_llm_client: Optional[LLMClient] = None

def get_llm_client() -> LLMClient:
    global _llm_client
    if _llm_client is None:
        _llm_client = LLMClient()
    return _llm_client


# ──────────────────────────────────────────────────────────────────────
# Pipeline Context
# ──────────────────────────────────────────────────────────────────────

@dataclass
class PipelineContext:
    raw_symptom: str = ""
    normalized_symptom: str = ""
    vehicle_profile: str = ""
    is_valid: bool = False
    validation_error: str = ""
    followup_questions: list = field(default_factory=list)
    followup_answers: list = field(default_factory=list)
    using_fallback_questions: bool = False
    retrieved_chunks: list = field(default_factory=list)
    triage_fields: dict = field(default_factory=dict)
    html_output: str = ""
    pipeline_trace: list = field(default_factory=list)
    safety_flagged: bool = False

    def trace(self, agent, status, msg):
        icon = {"ok": "✓", "warn": "⚠", "skip": "⊘", "fail": "✗"}.get(status, "·")
        self.pipeline_trace.append({"agent": agent, "status": status, "icon": icon, "msg": msg})
        logger.info(f"[{agent}] {status.upper()}: {msg}")


# ──────────────────────────────────────────────────────────────────────
# Agents
# ──────────────────────────────────────────────────────────────────────

class IntakeAgent:
    MIN_LEN = 8
    def process(self, ctx):
        raw = (ctx.raw_symptom or "").strip()
        if not raw:
            ctx.validation_error = "Please describe your symptom before submitting."
            ctx.is_valid = False
            ctx.trace("IntakeAgent", "fail", "Empty input rejected")
            return ctx
        if len(raw) < self.MIN_LEN:
            ctx.validation_error = "Please provide a bit more detail about what's happening."
            ctx.is_valid = False
            ctx.trace("IntakeAgent", "fail", f"Input too short ({len(raw)} chars)")
            return ctx
        ctx.normalized_symptom = re.sub(r"\s+", " ", raw)
        ctx.is_valid = True
        ctx.trace("IntakeAgent", "ok", f"Normalized {len(raw)} chars of input")
        if any(kw in ctx.normalized_symptom.lower() for kw in SAFETY_CRITICAL_KEYWORDS):
            ctx.safety_flagged = True
            ctx.trace("IntakeAgent", "warn", "Safety-critical keywords detected → conservative bias engaged")
        return ctx


class ProfileAgent:
    def process(self, ctx, trim, engine, package, top_type, mileage):
        ctx.vehicle_profile = format_vehicle_profile(trim, engine, package, top_type, mileage)
        try:
            mi = int(mileage) if mileage else 0
        except (TypeError, ValueError):
            mi = 0
        ctx.trace("ProfileAgent", "ok", f"{trim} · {engine} · {package} · {mi:,} mi")
        return ctx


class ClarificationAgent:
    MULTI_SYMPTOM_TRIGGERS = [
        ("check engine", "4x4"), ("check engine", "transmission"),
        ("smell", "light"), ("noise", "light"), ("brake", "steering"),
        ("burning", "light"), ("4x4", "hesitat"),
    ]

    def _needs_clarification(self, symptom):
        s = symptom.lower()
        for a, b in self.MULTI_SYMPTOM_TRIGGERS:
            if a in s and b in s:
                return True
        if len(symptom.split()) < 12:
            return True
        if any(p in s for p in ["something", "weird", "strange", "acting up", "off"]):
            return True
        return False

    def process(self, ctx):
        if not self._needs_clarification(ctx.normalized_symptom):
            ctx.trace("ClarificationAgent", "skip", "Input specific enough — follow-ups skipped")
            return ctx

        llm = get_llm_client()
        if not llm.is_configured():
            ctx.followup_questions = FALLBACK_FOLLOWUP_QUESTIONS.copy()
            ctx.using_fallback_questions = True
            ctx.trace("ClarificationAgent", "warn", "LLM not configured — using standard follow-ups")
            return ctx

        prompt = FOLLOWUP_SYSTEM_PROMPT.format(
            vehicle_profile=ctx.vehicle_profile, symptom=ctx.normalized_symptom,
        )
        response = llm.generate(prompt, max_new_tokens=256)
        if response is None:
            ctx.followup_questions = FALLBACK_FOLLOWUP_QUESTIONS.copy()
            ctx.using_fallback_questions = True
            ctx.trace("ClarificationAgent", "warn", "LLM call failed — using standard follow-ups")
            return ctx

        questions = []
        for line in response.strip().split("\n"):
            line = line.strip().lstrip("0123456789.)-• ").strip()
            if line and len(line) > 10 and "?" in line:
                questions.append(line)
        questions = questions[:2]

        if not questions:
            ctx.followup_questions = FALLBACK_FOLLOWUP_QUESTIONS.copy()
            ctx.using_fallback_questions = True
            ctx.trace("ClarificationAgent", "warn", "No valid questions parsed — using fallback")
        else:
            ctx.followup_questions = questions
            ctx.trace("ClarificationAgent", "ok", f"Generated {len(questions)} Bronco-specific follow-up(s)")
        return ctx


class RetrievalAgent:
    def process(self, ctx):
        parts = [ctx.normalized_symptom]
        for q, a in zip(ctx.followup_questions[:len(ctx.followup_answers)], ctx.followup_answers):
            parts.append(f"{q} {a}")
        query = " ".join(parts)
        rag = get_rag_pipeline()
        if not rag.is_loaded():
            ctx.retrieved_chunks = []
            ctx.trace("RetrievalAgent", "fail", "RAG pipeline not loaded")
            return ctx
        chunks = rag.retrieve(query)
        ctx.retrieved_chunks = chunks
        mode = "FAISS semantic" if getattr(rag, "_use_faiss", False) else "keyword fallback"
        ctx.trace("RetrievalAgent", "ok", f"Retrieved {len(chunks)} OEM manual chunks ({mode})")
        return ctx


class DiagnosticAgent:
    def process(self, ctx):
        llm = get_llm_client()
        if not llm.is_configured():
            ctx.trace("DiagnosticAgent", "fail", "LLM not configured")
            return ctx
        followup_ctx = format_followup_context(
            ctx.followup_questions[:len(ctx.followup_answers)], ctx.followup_answers,
        )
        prompt = TRIAGE_SYSTEM_PROMPT.format(
            vehicle_profile=ctx.vehicle_profile, symptom=ctx.normalized_symptom,
            followup_context=followup_ctx,
            retrieved_context=format_retrieved_context(ctx.retrieved_chunks),
        )
        response = llm.generate(prompt, max_new_tokens=1024)
        if response is None:
            ctx.trace("DiagnosticAgent", "fail", "LLM generation failed after retries")
            return ctx
        fields = self._parse(response)
        if ctx.safety_flagged and fields.get("urgency", "").lower() in ("safe", "monitor"):
            fields["urgency"] = "Urgent"
            ctx.trace("DiagnosticAgent", "warn", "Urgency escalated to Urgent (safety-critical keywords)")
        ctx.triage_fields = fields
        ctx.trace("DiagnosticAgent", "ok", f"Triage generated — Urgency: {fields.get('urgency', '?')}")
        return ctx

    @staticmethod
    def _parse(text):
        fields = {"urgency": "", "meaning": "", "next_step": "", "citation": ""}
        patterns = {
            "urgency": r"(?:urgency(?:\s+level)?|\*\*urgency[^*]*\*\*)\s*[:\-]?\s*(.+?)(?=\n|$)",
            "meaning": r"(?:likely\s+meaning|meaning|cause)\s*[:\-]?\s*(.+?)(?=\n(?:recommended|next|oem|citation|\*\*)|\Z)",
            "next_step": r"(?:recommended\s+next\s+step|next\s+step|action)\s*[:\-]?\s*(.+?)(?=\n(?:oem|citation|\*\*)|\Z)",
            "citation": r"(?:oem\s+citation|citation|source|reference)\s*[:\-]?\s*(.+?)(?=\n\n|\Z)",
        }
        for key, pat in patterns.items():
            m = re.search(pat, text, re.IGNORECASE | re.DOTALL)
            if m:
                val = m.group(1).strip().strip("*").strip()
                val = re.sub(r"\*\*", "", val)
                fields[key] = val[:800]
        if not any(fields.values()):
            fields["meaning"] = text.strip()[:500]
            fields["urgency"] = "Monitor"
            fields["next_step"] = "Consult a Ford-certified technician for inspection."
            fields["citation"] = "See 2023 Ford Bronco Owner's Manual."
        return fields


class PresentationAgent:
    URGENCY_STYLES = {
        "safe":         ("#1F7A3A", "#E8F5EB", "SAFE"),
        "monitor":      ("#B68B00", "#FFF7D6", "MONITOR"),
        "urgent":       ("#C84A1A", "#FFEDE0", "URGENT"),
        "do not drive": ("#A01818", "#FDE6E6", "DO NOT DRIVE"),
    }

    def process(self, ctx):
        if not ctx.triage_fields:
            ctx.html_output = self._error_card(ERROR_API_UNAVAILABLE)
            ctx.trace("PresentationAgent", "fail", "No triage fields to render")
            return ctx
        if not ctx.retrieved_chunks:
            ctx.html_output = self._error_card(ERROR_NOT_IN_MANUAL)
            ctx.trace("PresentationAgent", "warn", "No retrieved chunks — showing not-in-manual notice")
            return ctx
        ctx.html_output = self._triage_card(ctx.triage_fields)
        ctx.trace("PresentationAgent", "ok", "Triage card rendered")
        return ctx

    def _triage_card(self, f):
        urg_key = f.get("urgency", "monitor").lower().strip()
        matched = "monitor"
        for k in self.URGENCY_STYLES:
            if k in urg_key:
                matched = k
                break
        fg, bg, label = self.URGENCY_STYLES[matched]
        esc = lambda s: html.escape(s or "—")
        return f"""
<div style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; max-width: 780px;">
  <div style="background: #FFFFFF; border: 1px solid #E8D9C0; border-radius: 8px; overflow: hidden; box-shadow: 0 2px 8px rgba(26,42,68,0.08);">
    <div style="background: #1A2A44; color: #F5EFE6 !important; padding: 14px 20px; display: flex; justify-content: space-between; align-items: center;">
      <div style="font-size: 11px; font-weight: 700; letter-spacing: 2px; color: #E88A5C !important;">TRIAGE RESULT</div>
      <div style="font-size: 11px; color: #E8D9C0 !important;">OEM-GROUNDED · 2023 FORD BRONCO</div>
    </div>
    <div style="background: {bg}; padding: 18px 20px; border-bottom: 3px solid {fg};">
      <div style="font-size: 10px; letter-spacing: 2px; color: {fg}; font-weight: 700; margin-bottom: 4px;">URGENCY LEVEL</div>
      <div style="font-size: 28px; font-weight: 800; color: {fg}; font-family: Georgia, serif;">{label}</div>
    </div>
    <div style="padding: 20px; background: #FFFFFF;">
      <div style="margin-bottom: 18px;">
        <div style="font-size: 10px; letter-spacing: 2px; color: #B04A2C; font-weight: 700; margin-bottom: 6px;">LIKELY MEANING</div>
        <div style="font-size: 15px; color: #1A2A44; line-height: 1.5;">{esc(f.get('meaning'))}</div>
      </div>
      <div style="margin-bottom: 18px;">
        <div style="font-size: 10px; letter-spacing: 2px; color: #B04A2C; font-weight: 700; margin-bottom: 6px;">RECOMMENDED NEXT STEP</div>
        <div style="font-size: 15px; color: #1A2A44; line-height: 1.5; font-weight: 500;">{esc(f.get('next_step'))}</div>
      </div>
      <div style="background: #F5EFE6; padding: 12px 14px; border-left: 3px solid #1A2A44; border-radius: 3px;">
        <div style="font-size: 10px; letter-spacing: 2px; color: #8B7355; font-weight: 700; margin-bottom: 4px;">OEM CITATION</div>
        <div style="font-size: 13px; color: #1A2A44; font-style: italic;">{esc(f.get('citation'))}</div>
      </div>
    </div>
    <div style="background: #FFF4E5; border-top: 1px solid #E8D9C0; padding: 10px 20px; font-size: 11px; color: #8B5A00; line-height: 1.4;">
      ⚠ <strong>{esc(DISCLAIMER_RESPONSE)}</strong>
    </div>
  </div>
</div>
"""

    def _error_card(self, msg):
        return f"""
<div style="font-family: -apple-system, sans-serif; max-width: 780px; background: #FDE6E6; border: 1px solid #A01818; border-radius: 8px; padding: 20px;">
  <div style="font-size: 11px; letter-spacing: 2px; color: #A01818; font-weight: 700; margin-bottom: 8px;">NOTICE</div>
  <div style="font-size: 15px; color: #1A2A44;">{html.escape(msg)}</div>
</div>
"""


# ──────────────────────────────────────────────────────────────────────
# Pipeline
# ──────────────────────────────────────────────────────────────────────

class TriagePipeline:
    def __init__(self):
        self.a1 = IntakeAgent(); self.a2 = ProfileAgent(); self.a3 = ClarificationAgent()
        self.a4 = RetrievalAgent(); self.a5 = DiagnosticAgent(); self.a6 = PresentationAgent()

    def stage1(self, symptom, trim, engine, package, top_type, mileage):
        ctx = PipelineContext(raw_symptom=symptom)
        ctx = self.a1.process(ctx)
        if not ctx.is_valid:
            return ctx
        ctx = self.a2.process(ctx, trim, engine, package, top_type, mileage)
        ctx = self.a3.process(ctx)
        return ctx

    def stage2(self, ctx):
        ctx = self.a4.process(ctx); ctx = self.a5.process(ctx); ctx = self.a6.process(ctx)
        return ctx


PIPELINE = TriagePipeline()


def render_trace(trace):
    if not trace:
        return "<div style='color:#8B7355; font-style:italic; padding:12px;'>Pipeline has not run yet.</div>"
    colors = {"ok": "#1F7A3A", "warn": "#B68B00", "skip": "#8B7355", "fail": "#A01818"}
    rows = []
    for e in trace:
        c = colors.get(e["status"], "#1A2A44")
        rows.append(f"""<div style="display:flex; gap:10px; padding:8px 12px; border-left:3px solid {c}; background:#FFFFFF; margin-bottom:4px; font-family:-apple-system,sans-serif; font-size:12px; border-radius:0 3px 3px 0;">
  <span style="color:{c}; font-weight:700; min-width:18px;">{e['icon']}</span>
  <span style="color:#B04A2C; font-weight:700; min-width:160px;">{html.escape(e['agent'])}</span>
  <span style="color:#1A2A44;">{html.escape(e['msg'])}</span>
</div>""")
    return f"<div style='background:#F5EFE6; padding:10px; border-radius:6px;'>{''.join(rows)}</div>"


# ──────────────────────────────────────────────────────────────────────
# Gradio Handlers
# ──────────────────────────────────────────────────────────────────────

def on_submit_symptom(symptom, trim, engine, package, top_type, mileage):
    ctx = PIPELINE.stage1(symptom, trim, engine, package, top_type, mileage)
    trace_html = render_trace(ctx.pipeline_trace)
    if not ctx.is_valid:
        return (
            gr.update(value=f"<div style='color:#A01818; padding:10px;'>⚠ {html.escape(ctx.validation_error)}</div>", visible=True),
            gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
            "", "", trace_html, ctx,
        )
    if not ctx.followup_questions:
        ctx = PIPELINE.stage2(ctx)
        return (
            gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
            gr.update(value=ctx.html_output, visible=True),
            "", "", render_trace(ctx.pipeline_trace), ctx,
        )
    q_md = "**Quick clarifying questions:**\n\n"
    if ctx.using_fallback_questions:
        q_md += "_(using standard Bronco follow-ups)_\n\n"
    for i, q in enumerate(ctx.followup_questions, 1):
        q_md += f"{i}. {q}\n\n"
    return (
        gr.update(visible=False),
        gr.update(value=q_md, visible=True),
        gr.update(visible=True),
        gr.update(visible=False),
        "", "", trace_html, ctx,
    )


def on_submit_followup(answer1, answer2, ctx):
    if ctx is None:
        return gr.update(visible=False), gr.update(visible=False), "", ctx
    answers = []
    if len(ctx.followup_questions) >= 1 and answer1.strip():
        answers.append(answer1.strip())
    if len(ctx.followup_questions) >= 2 and answer2.strip():
        answers.append(answer2.strip())
    ctx.followup_answers = answers
    ctx = PIPELINE.stage2(ctx)
    return (
        gr.update(visible=False),
        gr.update(value=ctx.html_output, visible=True),
        render_trace(ctx.pipeline_trace),
        ctx,
    )


def on_new_query():
    return (
        "", "", "", 0,
        gr.update(value="", visible=False),
        gr.update(visible=False),
        gr.update(visible=False),
        gr.update(visible=False),
        "<div style='color:#8B7355; font-style:italic; padding:12px;'>Pipeline has not run yet.</div>",
        None,
    )


# ──────────────────────────────────────────────────────────────────────
# UI
# ──────────────────────────────────────────────────────────────────────

CUSTOM_CSS = """
.gradio-container { background: #F5EFE6 !important; }
.gr-button-primary { background: #B04A2C !important; border: none !important; color: #F5EFE6 !important; font-weight: 700 !important; }
.gr-button-primary:hover { background: #8F3A20 !important; }
"""


def create_app():
    initialize_rag()
    with gr.Blocks(title=APP_TITLE, theme=gr.themes.Soft(primary_hue="orange", neutral_hue="stone"), css=CUSTOM_CSS) as app:
        state = gr.State(None)

        gr.HTML("""
<div style="background: linear-gradient(135deg, #1A2A44 0%, #0F1A2E 100%) !important; padding: 24px 28px; border-radius: 10px; margin-bottom: 12px;">
  <div style="font-size: 11px; letter-spacing: 3px; color: #E88A5C !important; font-weight: 700; margin-bottom: 8px;">AIAAS · AUTOMOTIVE · 6-AGENT PIPELINE</div>
  <div style="font-family: Georgia, 'Times New Roman', serif; font-size: 38px; font-weight: 800; line-height: 1.1; color: #FFFFFF !important; margin-bottom: 6px;">Insta-AutoApp</div>
  <div style="font-size: 14px; color: #F5EFE6 !important; font-style: italic;">OEM-grounded symptom triage for 2023 Ford Bronco owners.</div>
</div>
""")
        gr.HTML(f"""<div style="background: #FFF4E5; border-left: 4px solid #B04A2C; padding: 12px 16px; font-size: 13px; color: #8B5A00; border-radius: 0 4px 4px 0; margin-bottom: 16px;">⚠ {html.escape(DISCLAIMER_BANNER)}</div>""")

        with gr.Row():
            with gr.Column(scale=3):
                with gr.Accordion("🚙 Vehicle Profile", open=True):
                    with gr.Row():
                        trim = gr.Dropdown(choices=TRIM_OPTIONS, value=TRIM_OPTIONS[0], label="Trim Level")
                        engine = gr.Dropdown(choices=ENGINE_OPTIONS, value=ENGINE_OPTIONS[0], label="Engine")
                    with gr.Row():
                        package = gr.Dropdown(choices=PACKAGE_OPTIONS, value=PACKAGE_OPTIONS[0], label="Package")
                        top_type = gr.Dropdown(choices=TOP_TYPE_OPTIONS, value=TOP_TYPE_OPTIONS[0], label="Top Type")
                    mileage = gr.Number(value=MILEAGE_DEFAULT, minimum=MILEAGE_MIN, maximum=MILEAGE_MAX, label="Mileage", precision=0)

                gr.Markdown("### 🔍 Describe Your Symptom")
                symptom_input = gr.Textbox(
                    placeholder="Example: My check engine light came on and the truck feels sluggish in 4H.",
                    label="What's happening with your vehicle?", lines=4,
                )
                submit_btn = gr.Button("🚀 Run 6-Agent Triage Pipeline", variant="primary", size="lg")
                status_output = gr.HTML(visible=False)

                followup_display = gr.Markdown(visible=False)
                with gr.Group(visible=False) as followup_group:
                    answer1 = gr.Textbox(label="Answer 1", lines=2)
                    answer2 = gr.Textbox(label="Answer 2 (if shown)", lines=2)
                    followup_submit_btn = gr.Button("📋 Submit Answers & Continue Pipeline", variant="primary")

                triage_output = gr.HTML(visible=False)
                new_query_btn = gr.Button("🔄 New Query", variant="secondary")

            with gr.Column(scale=2):
                gr.HTML("""<div style='font-size: 10px; letter-spacing: 2px; color: #B04A2C; font-weight: 700; margin-bottom: 4px;'>LIVE PIPELINE TRACE</div>
<div style='font-size: 13px; color: #1A2A44; margin-bottom: 8px;'>Six agents. Every step visible.</div>""")
                trace_display = gr.HTML(value="<div style='color:#8B7355; font-style:italic; padding:12px;'>Pipeline has not run yet.</div>")

                gr.HTML("""
<div style='margin-top: 20px; padding: 16px; background: #1A2A44 !important; border-radius: 6px; font-size: 12px;'>
  <div style='letter-spacing: 2px; color: #E88A5C !important; font-weight: 700; margin-bottom: 10px; font-size: 11px;'>6-AGENT ARCHITECTURE</div>
  <div style='line-height: 1.9;'>
    <div style='color: #F5EFE6 !important;'><span style='color: #E88A5C !important; font-weight: 700;'>1.</span> <strong style='color: #FFFFFF !important;'>IntakeAgent</strong> <span style='color: #D4C5A9 !important;'>— validate &amp; normalize</span></div>
    <div style='color: #F5EFE6 !important;'><span style='color: #E88A5C !important; font-weight: 700;'>2.</span> <strong style='color: #FFFFFF !important;'>ProfileAgent</strong> <span style='color: #D4C5A9 !important;'>— vehicle context</span></div>
    <div style='color: #F5EFE6 !important;'><span style='color: #E88A5C !important; font-weight: 700;'>3.</span> <strong style='color: #FFFFFF !important;'>ClarificationAgent</strong> <span style='color: #D4C5A9 !important;'>— Bronco follow-ups</span></div>
    <div style='color: #F5EFE6 !important;'><span style='color: #E88A5C !important; font-weight: 700;'>4.</span> <strong style='color: #FFFFFF !important;'>RetrievalAgent</strong> <span style='color: #D4C5A9 !important;'>— FAISS + keyword fallback</span></div>
    <div style='color: #F5EFE6 !important;'><span style='color: #E88A5C !important; font-weight: 700;'>5.</span> <strong style='color: #FFFFFF !important;'>DiagnosticAgent</strong> <span style='color: #D4C5A9 !important;'>— LLM triage + safety bias</span></div>
    <div style='color: #F5EFE6 !important;'><span style='color: #E88A5C !important; font-weight: 700;'>6.</span> <strong style='color: #FFFFFF !important;'>PresentationAgent</strong> <span style='color: #D4C5A9 !important;'>— branded Triage Card</span></div>
  </div>
</div>
""")

        gr.HTML("""
<div style='margin-top: 24px; padding: 14px; font-size: 11px; color: #8B7355; text-align: center; border-top: 1px solid #E8D9C0;'>
  <strong>Team Data Mavericks</strong> · Nasser Chaudhry · Miriam Camacho · Neil Driscoll · ANLY 601 · Mays Business School, Texas A&amp;M
</div>
""")

        submit_btn.click(
            fn=on_submit_symptom,
            inputs=[symptom_input, trim, engine, package, top_type, mileage],
            outputs=[status_output, followup_display, followup_group, triage_output, answer1, answer2, trace_display, state],
        )
        followup_submit_btn.click(
            fn=on_submit_followup,
            inputs=[answer1, answer2, state],
            outputs=[followup_group, triage_output, trace_display, state],
        )
        new_query_btn.click(
            fn=on_new_query, inputs=[],
            outputs=[symptom_input, answer1, answer2, mileage, status_output, followup_display, followup_group, triage_output, trace_display, state],
        )

    return app


if __name__ == "__main__":
    logger.info(f"Starting Insta-AutoApp v3 (6-Agent Pipeline) — model chain: {MODEL_CHAIN}")
    app = create_app()
    app.launch(server_name="0.0.0.0", server_port=7860, share=False)