"""Streamlit demo for Hugging Face Spaces: paste a clause, see prediction. Run: streamlit run app.py """ from __future__ import annotations import json import sys from pathlib import Path import joblib import numpy as np import pandas as pd import streamlit as st ROOT = Path(__file__).resolve().parent sys.path.insert(0, str(ROOT)) from src.config import ( # noqa: E402 DEFAULT_ABSTAIN_THRESHOLD, MODELS_DIR, OUTPUTS_DIR, SEVERITY_TIERS, label_to_severity, ) from src.epcc import load_excerpts # noqa: E402 @st.cache_resource(show_spinner="Loading classifier...") def load_model(): from src.features import EmbeddingEncoder bundle = joblib.load(MODELS_DIR / "embed_lr.joblib") encoder = EmbeddingEncoder(model_name=bundle["encoder_name"]) return bundle, encoder @st.cache_data def load_predictions_cache(): path = OUTPUTS_DIR / "epcc_predictions.json" if path.exists(): return json.loads(path.read_text()) return None def predict_one(text: str, bundle, encoder, threshold: float) -> dict: clf = bundle["model"] classes = np.asarray(bundle["classes_"]) X = encoder.encode([text], batch_size=1, show_progress=False) probs = clf.predict_proba(X)[0] order = np.argsort(probs)[::-1] pred = classes[order[0]] conf = float(probs[order[0]]) sev_map = label_to_severity() return { "predicted_label": pred, "confidence": conf, "severity_from_label": sev_map.get(pred, "unmapped"), "abstain": conf < threshold, "top3": [(classes[i], float(probs[i])) for i in order[:5]], } def severity_badge(sev: str) -> str: colors = {"critical": "#d62728", "moderate": "#ff7f0e", "low": "#2ca02c", "unmapped": "#888"} return ( f"" f"{sev.upper()}" ) def main() -> None: st.set_page_config( page_title="EPCC Contract Intelligence — MVP demo", layout="wide", page_icon=":scroll:", ) st.title("EPCC Contract Intelligence — clause triage demo") st.caption( "Local CUAD-trained clause classifier with EPCC playbook routing. " "Assistive only; legal & commercial teams remain final decision-makers." ) tab1, tab2, tab3 = st.tabs( ["Classify a clause", "Synthetic EPCC packet (C1-C15)", "About / metrics"] ) with tab1: bundle, encoder = load_model() excerpts, _ = load_excerpts() sample_options = {f"[{e['id']}] {e['document']}": e["text"] for e in excerpts} sample_options["(custom)"] = "" choice = st.selectbox("Pick a sample clause or write your own", list(sample_options.keys())) default_text = sample_options[choice] text = st.text_area("Clause text", value=default_text, height=160) threshold = st.slider( "Abstain threshold", 0.0, 1.0, DEFAULT_ABSTAIN_THRESHOLD, 0.05, help="Below this confidence, the system abstains and recommends human review.", ) if st.button("Classify", type="primary") and text.strip(): with st.spinner("Embedding + classifying..."): res = predict_one(text, bundle, encoder, threshold) col1, col2, col3 = st.columns(3) with col1: st.metric("Predicted CUAD label", res["predicted_label"]) with col2: st.metric("Confidence", f"{res['confidence']:.3f}") with col3: st.markdown("**Severity (label)**", unsafe_allow_html=True) st.markdown(severity_badge(res["severity_from_label"]), unsafe_allow_html=True) if res["abstain"]: st.warning( f"Confidence below threshold ({threshold:.2f}) — " "route to human reviewer." ) elif res["severity_from_label"] == "critical": st.info("Critical-tier label — mandatory human review.") else: st.success("Standard review.") st.markdown("**Top predictions**") top_df = pd.DataFrame(res["top3"], columns=["label", "probability"]) st.bar_chart(top_df.set_index("label")) with tab2: st.markdown( "Pre-computed predictions on the 15 synthetic EPCC excerpts using the " "default abstain threshold. This is what the product would produce as a " "first-pass risk register for a bid-stage triage." ) records = load_predictions_cache() if records is None: st.warning( "No cached predictions found. Run `python -m src.epcc` first." ) else: df = pd.DataFrame(records) display = df[ [ "excerpt_id", "document", "epcc_risk_area", "predicted_label", "confidence", "severity_from_risk_area", "severity_from_label", "recommended_owner", "escalation", ] ].rename( columns={ "excerpt_id": "ID", "document": "Document", "epcc_risk_area": "EPCC risk area", "predicted_label": "Predicted label", "confidence": "Conf.", "severity_from_risk_area": "Sev (risk area)", "severity_from_label": "Sev (label)", "recommended_owner": "Owner", "escalation": "Escalation", } ) st.dataframe(display, use_container_width=True, hide_index=True) with st.expander("Show clause text for each row"): for r in records: st.markdown(f"**{r['excerpt_id']} — {r['document']}**") st.write(r["text"]) with tab3: st.markdown("### Model") metrics_path = OUTPUTS_DIR / "metrics.json" if metrics_path.exists(): metrics = json.loads(metrics_path.read_text()) st.json(metrics) else: st.info("Run `python -m src.evaluate` to populate metrics.") st.markdown("### Severity tiers (CUAD labels viewed from an EPC contractor)") for tier, labels in SEVERITY_TIERS.items(): st.markdown(f"**{tier.title()}** ({len(labels)} labels)") st.write(", ".join(labels)) st.markdown( "### Limitations\n" "- Trained on CUAD (commercial / M&A contracts), not EPCC contracts. " "Demo shows transferable methodology, not production readiness.\n" "- Single-label per clause; real EPCC review is multi-label.\n" "- The system is assistive. Every prediction is subject to human review." ) if __name__ == "__main__": main()