"""Streamlit demo for Hugging Face Spaces: paste a clause, see prediction.

Run: streamlit run app.py
"""
from __future__ import annotations

import json
import sys
from pathlib import Path

import joblib
import numpy as np
import pandas as pd
import streamlit as st

ROOT = Path(__file__).resolve().parent
sys.path.insert(0, str(ROOT))

from src.config import (  # noqa: E402
    DEFAULT_ABSTAIN_THRESHOLD,
    MODELS_DIR,
    OUTPUTS_DIR,
    SEVERITY_TIERS,
    label_to_severity,
)
from src.epcc import load_excerpts  # noqa: E402


@st.cache_resource(show_spinner="Loading classifier...")
def load_model():
    from src.features import EmbeddingEncoder

    bundle = joblib.load(MODELS_DIR / "embed_lr.joblib")
    encoder = EmbeddingEncoder(model_name=bundle["encoder_name"])
    return bundle, encoder


@st.cache_data
def load_predictions_cache():
    path = OUTPUTS_DIR / "epcc_predictions.json"
    if path.exists():
        return json.loads(path.read_text())
    return None


def predict_one(text: str, bundle, encoder, threshold: float) -> dict:
    clf = bundle["model"]
    classes = np.asarray(bundle["classes_"])
    X = encoder.encode([text], batch_size=1, show_progress=False)
    probs = clf.predict_proba(X)[0]
    order = np.argsort(probs)[::-1]
    pred = classes[order[0]]
    conf = float(probs[order[0]])
    sev_map = label_to_severity()
    return {
        "predicted_label": pred,
        "confidence": conf,
        "severity_from_label": sev_map.get(pred, "unmapped"),
        "abstain": conf < threshold,
        "top3": [(classes[i], float(probs[i])) for i in order[:5]],
    }


def severity_badge(sev: str) -> str:
    colors = {"critical": "#d62728", "moderate": "#ff7f0e", "low": "#2ca02c", "unmapped": "#888"}
    return (
        f"<span style='background:{colors.get(sev, '#888')};color:white;"
        f"padding:3px 10px;border-radius:6px;font-size:0.85rem;'>"
        f"{sev.upper()}</span>"
    )


def main() -> None:
    st.set_page_config(
        page_title="EPCC Contract Intelligence — MVP demo",
        layout="wide",
        page_icon=":scroll:",
    )
    st.title("EPCC Contract Intelligence — clause triage demo")
    st.caption(
        "Local CUAD-trained clause classifier with EPCC playbook routing. "
        "Assistive only; legal & commercial teams remain final decision-makers."
    )

    tab1, tab2, tab3 = st.tabs(
        ["Classify a clause", "Synthetic EPCC packet (C1-C15)", "About / metrics"]
    )

    with tab1:
        bundle, encoder = load_model()
        excerpts, _ = load_excerpts()
        sample_options = {f"[{e['id']}] {e['document']}": e["text"] for e in excerpts}
        sample_options["(custom)"] = ""

        choice = st.selectbox("Pick a sample clause or write your own", list(sample_options.keys()))
        default_text = sample_options[choice]
        text = st.text_area("Clause text", value=default_text, height=160)
        threshold = st.slider(
            "Abstain threshold",
            0.0,
            1.0,
            DEFAULT_ABSTAIN_THRESHOLD,
            0.05,
            help="Below this confidence, the system abstains and recommends human review.",
        )
        if st.button("Classify", type="primary") and text.strip():
            with st.spinner("Embedding + classifying..."):
                res = predict_one(text, bundle, encoder, threshold)
            col1, col2, col3 = st.columns(3)
            with col1:
                st.metric("Predicted CUAD label", res["predicted_label"])
            with col2:
                st.metric("Confidence", f"{res['confidence']:.3f}")
            with col3:
                st.markdown("**Severity (label)**", unsafe_allow_html=True)
                st.markdown(severity_badge(res["severity_from_label"]), unsafe_allow_html=True)

            if res["abstain"]:
                st.warning(
                    f"Confidence below threshold ({threshold:.2f}) — "
                    "route to human reviewer."
                )
            elif res["severity_from_label"] == "critical":
                st.info("Critical-tier label — mandatory human review.")
            else:
                st.success("Standard review.")

            st.markdown("**Top predictions**")
            top_df = pd.DataFrame(res["top3"], columns=["label", "probability"])
            st.bar_chart(top_df.set_index("label"))

    with tab2:
        st.markdown(
            "Pre-computed predictions on the 15 synthetic EPCC excerpts using the "
            "default abstain threshold. This is what the product would produce as a "
            "first-pass risk register for a bid-stage triage."
        )
        records = load_predictions_cache()
        if records is None:
            st.warning(
                "No cached predictions found. Run `python -m src.epcc` first."
            )
        else:
            df = pd.DataFrame(records)
            display = df[
                [
                    "excerpt_id",
                    "document",
                    "epcc_risk_area",
                    "predicted_label",
                    "confidence",
                    "severity_from_risk_area",
                    "severity_from_label",
                    "recommended_owner",
                    "escalation",
                ]
            ].rename(
                columns={
                    "excerpt_id": "ID",
                    "document": "Document",
                    "epcc_risk_area": "EPCC risk area",
                    "predicted_label": "Predicted label",
                    "confidence": "Conf.",
                    "severity_from_risk_area": "Sev (risk area)",
                    "severity_from_label": "Sev (label)",
                    "recommended_owner": "Owner",
                    "escalation": "Escalation",
                }
            )
            st.dataframe(display, use_container_width=True, hide_index=True)
            with st.expander("Show clause text for each row"):
                for r in records:
                    st.markdown(f"**{r['excerpt_id']} — {r['document']}**")
                    st.write(r["text"])

    with tab3:
        st.markdown("### Model")
        metrics_path = OUTPUTS_DIR / "metrics.json"
        if metrics_path.exists():
            metrics = json.loads(metrics_path.read_text())
            st.json(metrics)
        else:
            st.info("Run `python -m src.evaluate` to populate metrics.")

        st.markdown("### Severity tiers (CUAD labels viewed from an EPC contractor)")
        for tier, labels in SEVERITY_TIERS.items():
            st.markdown(f"**{tier.title()}** ({len(labels)} labels)")
            st.write(", ".join(labels))

        st.markdown(
            "### Limitations\n"
            "- Trained on CUAD (commercial / M&A contracts), not EPCC contracts. "
            "Demo shows transferable methodology, not production readiness.\n"
            "- Single-label per clause; real EPCC review is multi-label.\n"
            "- The system is assistive. Every prediction is subject to human review."
        )


if __name__ == "__main__":
    main()