# app.py # ClauseXplain v5.3 — hardening pass # Changes vs v5.2: # • ICA_007 (uncapped indemnity) now requires has_uncapped_signal — no more # auto-firing on every "indemnify" mention # • analyze_document uses level_from_score() from inference.py (single source # of truth for the new 0.50 / 0.80 risk-level cutoffs) from __future__ import annotations import os import re import gc import tempfile import threading import numpy as np import torch import torch.nn as nn import gradio as gr from pathlib import Path from transformers import LongformerTokenizer, LongformerModel from sklearn.preprocessing import MultiLabelBinarizer from huggingface_hub import hf_hub_download from feature_extractor import ClauseFeatureExtractor from explanation import generate_explanation from inference import level_from_score # v5.3: single source of truth from utils import highlight_keywords # ── Optional / fail-soft integrations ───────────────────────────────────────── try: from nl_summary import NLSummarizer nl_summarizer = NLSummarizer() except Exception as _e: print(f"[WARN] NLSummarizer disabled: {_e}") nl_summarizer = None try: from local_interpreters import LocalExplainer, build_predict_fn_for_manager local_explainer = LocalExplainer(num_samples=25, timeout_seconds=25.0) except Exception as _e: print(f"[WARN] LocalExplainer disabled: {_e}") local_explainer = None build_predict_fn_for_manager = None try: from attention_visualization import attention_heatmap_html, lime_html except Exception as _e: print(f"[WARN] attention_visualization disabled: {_e}") def attention_heatmap_html(*_a, **_k): return "" def lime_html(*_a, **_k): return "" try: from report import generate_report except Exception as _e: print(f"[WARN] report disabled: {_e}") generate_report = None DEVICE = torch.device("cpu") CLAUSE_CLASSES = [ "Cap On Liability", "Change Of Control", "Covenant Not To Sue", "Exclusivity", "Governing Law", "IP Ownership Assignment", "Irrevocable Or Perpetual License", "Joint IP Ownership", "License Grant", "Liquidated Damages", "Minimum Commitment", "Most Favored Nation", "No-Solicit Of Customers", "No-Solicit Of Employees", "Non-Compete", "Notice Period To Terminate Renewal", "Post-Termination Services", "Price Restrictions", "Revenue/Profit Sharing", "Renewal Term", "Source Code Escrow", "Uncapped Liability", "Unlimited/All-You-Can-Eat-License", "Volume Restriction", "Warranty Duration", "Anti-Assignment", "Audit Rights", "Competitive Restriction Exception", "Expiration Date", "Insurance", "Ip Indemnification", "Limitation Of Liability", "Non-Disparagement", "Parties", "Permitted Development", "Rofr/Rofo/Rofn", "Third Party Beneficiary", "Termination For Convenience", "Affiliate License-Licensor", "Affiliate License-Licensee", "Agreement Date", ] RISK_CLASSES = ["ambiguity", "enforceability", "financial", "ip", "structural"] # ───────────────────────────────────────────────────────────────────────────── # Symbolic rules — v5.3 tightened # ───────────────────────────────────────────────────────────────────────────── SYMBOLIC_RULES = [ {"rule_id": "ICA_001", "name": "Unconscionable Liability Cap", "reference": "Indian Contract Act 1872, S.23", "penalty": 0.45, "category": "financial", "condition": lambda f: f.get("has_liability_cap") and f.get("excludes_gross_negligence")}, {"rule_id": "ICA_002", "name": "Unilateral Termination Without Notice", "reference": "Indian Contract Act 1872, S.39", "penalty": 0.35, "category": "enforceability", "condition": lambda f: f.get("unilateral_termination") and not f.get("notice_period_defined")}, {"rule_id": "ICA_003", "name": "Non-Compete Exceeding 2 Years", "reference": "Indian Contract Act 1872, S.27", "penalty": 0.55, "category": "enforceability", "condition": lambda f: f.get("non_compete_years", 0) > 2}, {"rule_id": "ICA_004", "name": "Penalty Clause Exceeds Actual Damage", "reference": "Indian Contract Act 1872, S.74", "penalty": 0.40, "category": "financial", "condition": lambda f: f.get("has_liquidated_damages") and f.get("damages_exceed_loss")}, # ICA_005: only fires on explicit gambling vocab — no more "contingent on closing" {"rule_id": "ICA_005", "name": "Wagering / Gambling Agreement", "reference": "Indian Contract Act 1872, S.30", "penalty": 0.70, "category": "enforceability", "condition": lambda f: f.get("is_wagering_clause")}, {"rule_id": "ICA_006", "name": "Restraint of Legal Proceedings", "reference": "Indian Contract Act 1872, S.28", "penalty": 0.60, "category": "enforceability", "condition": lambda f: f.get("restrains_legal_proceedings")}, # ICA_007 TIGHTENED: indemnity + explicit uncapped signal + no cap {"rule_id": "ICA_007", "name": "Uncapped Indemnity Obligation", "reference": "Indian Contract Act 1872, S.124", "penalty": 0.50, "category": "financial", "condition": lambda f: ( f.get("has_indemnity_clause") and f.get("has_uncapped_signal") and not f.get("indemnity_capped") )}, {"rule_id": "ICA_008", "name": "Auto-Renewal Without Opt-Out Window", "reference": "Indian Contract Act 1872 + CPA 2019", "penalty": 0.35, "category": "enforceability", "condition": lambda f: f.get("has_auto_renewal") and not f.get("has_opt_out_window")}, {"rule_id": "ICA_009", "name": "Arbitration in Distant Venue", "reference": "Arbitration and Conciliation Act 1996, S.20", "penalty": 0.40, "category": "enforceability", "condition": lambda f: f.get("has_arbitration") and f.get("arbitration_distant_venue")}, # ICA_010 narrowed via tightened has_exclusivity patterns in feature_extractor {"rule_id": "ICA_010", "name": "Indefinite Exclusivity", "reference": "Indian Contract Act 1872, S.27", "penalty": 0.50, "category": "enforceability", "condition": lambda f: f.get("has_exclusivity") and not f.get("exclusivity_term_defined")}, {"rule_id": "ICA_011", "name": "Unilateral Price Modification", "reference": "Indian Contract Act 1872, S.62 + CPA 2019", "penalty": 0.45, "category": "financial", "condition": lambda f: f.get("unilateral_price_change")}, {"rule_id": "DPDPA_001", "name": "Missing Data Retention Clause", "reference": "DPDPA 2023, S.8(7)", "penalty": 0.50, "category": "compliance", "condition": lambda f: f.get("processes_personal_data") and not f.get("has_data_retention_clause")}, {"rule_id": "DPDPA_002", "name": "Broad Pre-existing IP Assignment", "reference": "ICA 1872, S.27 + DPDPA 2023", "penalty": 0.40, "category": "ip", "condition": lambda f: f.get("assigns_all_ip") and f.get("includes_pre_existing_ip")}, {"rule_id": "DPDPA_003", "name": "No Data Principal Consent Mechanism", "reference": "DPDPA 2023, S.6", "penalty": 0.55, "category": "compliance", "condition": lambda f: f.get("processes_sensitive_data") and not f.get("has_consent_clause")}, {"rule_id": "DPDPA_004", "name": "No Data Breach Notification Clause", "reference": "DPDPA 2023, S.8(6)", "penalty": 0.45, "category": "compliance", "condition": lambda f: f.get("processes_personal_data") and not f.get("has_breach_notification")}, {"rule_id": "ITA_001", "name": "No Cybersecurity Obligation", "reference": "IT Act 2000, S.43A", "penalty": 0.35, "category": "compliance", "condition": lambda f: f.get("handles_digital_data") and not f.get("has_security_clause")}, {"rule_id": "CPA_001", "name": "Unfair Contract Term (Consumer)", "reference": "Consumer Protection Act 2019, S.2(46)", "penalty": 0.50, "category": "enforceability", "condition": lambda f: f.get("is_consumer_contract") and f.get("has_one_sided_clause")}, ] # ───────────────────────────────────────────────────────────────────────────── # Model (unchanged) # ───────────────────────────────────────────────────────────────────────────── class ClauseXplainV5(nn.Module): def __init__(self, num_clause_labels: int, num_risk_labels: int): super().__init__() self.encoder = LongformerModel.from_pretrained("allenai/longformer-base-4096") hidden = self.encoder.config.hidden_size self.dropout = nn.Dropout(0.1) self.clause_head = nn.Linear(hidden, num_clause_labels) self.risk_head = nn.Linear(hidden, num_risk_labels) self.risk_fusion = nn.Sequential( nn.Linear(hidden + num_risk_labels + num_clause_labels, 256), nn.ReLU(), nn.Dropout(0.1), nn.Linear(256, 1), ) self.risk_level_classifier = nn.Linear(1, 3) def forward(self, input_ids, attention_mask): out = self.encoder(input_ids=input_ids, attention_mask=attention_mask) pooled = self.dropout(out.last_hidden_state[:, 0]) clause_logits = self.clause_head(pooled) risk_logits = self.risk_head(pooled) clause_probs = torch.sigmoid(clause_logits) risk_probs = torch.sigmoid(risk_logits) fusion_input = torch.cat([pooled, risk_probs, clause_probs], dim=1) risk_score = torch.sigmoid(self.risk_fusion(fusion_input)) risk_level_logits = self.risk_level_classifier(risk_score) return clause_logits, risk_logits, risk_score, risk_level_logits, pooled # ───────────────────────────────────────────────────────────────────────────── # ModelManager # ───────────────────────────────────────────────────────────────────────────── class ModelManager: def __init__(self): self.model: ClauseXplainV5 | None = None self.tokenizer: LongformerTokenizer | None = None self.clause_mlb: MultiLabelBinarizer | None = None self.risk_mlb: MultiLabelBinarizer | None = None self.feature_extractor = ClauseFeatureExtractor() self.is_ready: bool = False self.load_error: str = "" self._lock = threading.Lock() self._load_attempted: bool = False def _build_mlbs(self): clause_mlb = MultiLabelBinarizer(classes=CLAUSE_CLASSES) clause_mlb.fit([[c] for c in CLAUSE_CLASSES]) risk_mlb = MultiLabelBinarizer(classes=RISK_CLASSES) risk_mlb.fit([[r] for r in RISK_CLASSES]) return clause_mlb, risk_mlb def ensure_loaded(self): if self._load_attempted: return with self._lock: if self._load_attempted: return self._load_attempted = True self._do_load() def _do_load(self): try: print("[INFO] Loading tokenizer…") self.tokenizer = LongformerTokenizer.from_pretrained( "allenai/longformer-base-4096" ) self.clause_mlb, self.risk_mlb = self._build_mlbs() print("[INFO] Building model architecture…") self.model = ClauseXplainV5( num_clause_labels=len(self.clause_mlb.classes_), num_risk_labels=len(self.risk_mlb.classes_), ) print("[INFO] Downloading checkpoint from HuggingFace Hub…") ckpt_path = hf_hub_download( repo_id="riyasuryawanshi746/clauseXplain", filename="clausexplain_v5_best.pt", ) checkpoint = torch.load( ckpt_path, map_location=torch.device("cpu"), weights_only=False, mmap=True, ) if isinstance(checkpoint, dict) and "model_state" in checkpoint: state_dict = checkpoint["model_state"] elif isinstance(checkpoint, dict) and "state_dict" in checkpoint: state_dict = checkpoint["state_dict"] else: state_dict = checkpoint cleaned = {k.replace("module.", "", 1): v for k, v in state_dict.items()} self.model.load_state_dict(cleaned, strict=False) self.model.eval() del checkpoint, cleaned gc.collect() self.is_ready = True print("[INFO] ✓ Model loaded and ready (CPU mode)") except Exception as e: self.load_error = str(e) self.is_ready = False print(f"[ERROR] Model load failed: {e}") def analyze_clause(self, text: str) -> dict: from inference import ( _symbolic_rule_score, _neuro_symbolic_fusion, _compute_confidence, IP_CLAUSE_TYPES, ) self.ensure_loaded() features, evidence = self.feature_extractor.extract(text) sym_result = _symbolic_rule_score(features, SYMBOLIC_RULES) # v5.4: Sanity-check the clause text before inference. # After pdf_utils filtering, this should always be real legal prose. word_count = len(text.split()) print(f"[DEBUG] analyze_clause: {word_count} words | " f"preview: {text[:80].replace(chr(10),' ')!r}") if self.is_ready and self.model is not None: try: enc = self.tokenizer( text, padding="max_length", truncation=True, max_length=256, return_tensors="pt", ) with torch.no_grad(): clause_logits, risk_logits, risk_score_tensor, _, _ = self.model( enc["input_ids"], enc["attention_mask"], ) clause_probs = torch.sigmoid(clause_logits).numpy()[0] top3_idx = clause_probs.argsort()[::-1][:3] top_clauses = [ (self.clause_mlb.classes_[i], round(float(clause_probs[i]), 3)) for i in top3_idx if clause_probs[i] > 0.05 ] risk_probs = torch.sigmoid(risk_logits).numpy()[0] top2_idx = risk_probs.argsort()[::-1][:2] top_risks = [ (self.risk_mlb.classes_[i], round(float(risk_probs[i]), 3)) for i in top2_idx if risk_probs[i] > 0.05 ] neural_score = round(float(risk_score_tensor.item()), 3) top_clause_name = top_clauses[0][0] if top_clauses else "" is_ip = top_clause_name in IP_CLAUSE_TYPES neural_loaded = True except Exception as e: print(f"[WARN] Neural inference failed: {e}") neural_score, top_clauses, top_risks, is_ip = 0.0, [], [], False neural_loaded = False else: neural_score, top_clauses, top_risks, is_ip = 0.0, [], [], False neural_loaded = False fusion = _neuro_symbolic_fusion(neural_score, sym_result["symbolic_score"], is_ip) confidence = _compute_confidence( neural=neural_score, symbolic=sym_result["symbolic_score"], fused=fusion["score"], num_triggered=len(sym_result["triggered_rules"]), neural_loaded=neural_loaded, ) triggered_clean = [ {"rule_id": r["rule_id"], "name": r["name"], "reference": r["reference"], "penalty": r["penalty"], "category": r["category"]} for r in sym_result["triggered_rules"] ] return { "risk_score": fusion["score"], "neural_score": neural_score, "symbolic_score": sym_result["symbolic_score"], "risk_level": f"{fusion['emoji']} {fusion['level']}", "risk_level_raw": fusion["level"], "top_clauses": top_clauses, "top_risk_cats": top_risks, "triggered_rules": triggered_clean, "features": {k: v for k, v in features.items() if v}, "evidence": evidence, "score_breakdown": fusion["breakdown"], "confidence": confidence, } def analyze_document(self, text: str, max_clauses: int = 50) -> dict: from pdf_utils import split_into_clauses_with_metadata clauses_meta = split_into_clauses_with_metadata(text)[:max_clauses] if not clauses_meta: clauses_meta = [{"text": text[:2000], "number": None, "kind": "paragraph"}] results = [] for idx, meta in enumerate(clauses_meta): clause_text = meta["text"] try: r = self.analyze_clause(clause_text) except Exception as e: print(f"[WARN] Clause {idx+1} failed: {e}") r = { "risk_score": 0.0, "neural_score": 0.0, "symbolic_score": 0.0, "risk_level": "🟢 Low", "risk_level_raw": "Low", "top_clauses": [], "top_risk_cats": [], "triggered_rules": [], "features": {}, "evidence": {}, "score_breakdown": None, "confidence": None, } r["clause_index"] = idx + 1 r["clause_text"] = clause_text r["clause_number"] = meta.get("number") r["clause_kind"] = meta.get("kind") results.append(r) scores = [r["risk_score"] for r in results] overall = round(0.70 * max(scores) + 0.30 * (sum(scores) / len(scores)), 3) # v5.3: single source of truth for thresholds level, _ = level_from_score(overall) return { "overall_risk": overall, "overall_level": level, "num_clauses": len(results), "top_risks": sorted(results, key=lambda x: x["risk_score"], reverse=True)[:3], "clauses": results, } manager = ModelManager() # ═══════════════════════════════════════════════════════════════════════════════ # UI helpers (unchanged from v5.2) # ═══════════════════════════════════════════════════════════════════════════════ LEVEL_COLOR = {"Low": "🟢", "Medium": "🟡", "High": "🔴"} LEVEL_HEX = {"Low": "#10b981", "Medium": "#f59e0b", "High": "#ef4444"} CONF_HEX = {"Low": "#f87171", "Medium": "#fbbf24", "High": "#34d399"} CAT_ICON = {"financial": "💰", "enforceability": "⚖️", "compliance": "🛡️", "ip": "🧠", "structural": "🏗️", "ambiguity": "❓"} def _risk_gauge_html(pct: int, level: str) -> str: color = LEVEL_HEX.get(level, "#6b7280") dash = round(169.6 * pct / 100, 1) return f"""
{pct}% {level} RISK
""" def _mini_bar(pct: int, level: str) -> str: color = LEVEL_HEX.get(level, "#6b7280") return (f'
' f'
') def _confidence_badge(confidence) -> str: if not confidence: return "" lvl = confidence.get("level", "Medium") pct = int(confidence.get("score", 0) * 100) col = CONF_HEX.get(lvl, "#94a3b8") return (f'' f'CONF · {lvl.upper()} {pct}%') def _evidence_pills(evidence_dict: dict) -> str: if not evidence_dict: return "" seen, pills = set(), [] for feat, hits in evidence_dict.items(): for h in hits: phrase = h.get("phrase", "").strip() key = phrase.lower() if phrase and key not in seen: seen.add(key) pills.append(f'“{phrase}”') if len(pills) >= 6: break if len(pills) >= 6: break return "".join(pills) def _score_breakdown_html(breakdown) -> str: if not breakdown: return "" w = breakdown["weights"] return f"""
Neural {breakdown['neural_score']:.3f} × {w['neural']:.2f}
Symbolic {breakdown['symbolic_score']:.3f} × {w['symbolic']:.2f}

{breakdown['formula']}
Final {breakdown['final']:.3f}
""" # ═══════════════════════════════════════════════════════════════════════════════ # Analysis flow (unchanged structurally) # ═══════════════════════════════════════════════════════════════════════════════ def _run_analysis(text: str): if not text or len(text.strip()) < 30: return None, "⚠️ Input too short — please paste at least one full clause." try: return manager.analyze_document(text), "" except Exception as e: return None, f"❌ Analysis error: {e}" def analyze_pdf(pdf_file): if pdf_file is None: return _empty_outputs("No file uploaded.") from pdf_utils import extract_text_from_pdf try: text = extract_text_from_pdf(pdf_file) except Exception as e: return _empty_outputs(f"❌ PDF read error: {e}") return _build_outputs(text) def analyze_text(raw_text: str): return _build_outputs(raw_text) def _empty_outputs(msg: str): html = f'
{msg}
' return html, html, "", gr.update(choices=[], value=None), None, gr.update(visible=False, value=None) def _build_outputs(text: str): doc, err = _run_analysis(text) if doc is None: return _empty_outputs(err) overall_level = doc["overall_level"] overall_score = doc["overall_risk"] num_clauses = doc["num_clauses"] pct = int(overall_score * 100) high_n = sum(1 for r in doc["clauses"] if r["risk_level_raw"] == "High") med_n = sum(1 for r in doc["clauses"] if r["risk_level_raw"] == "Medium") low_n = sum(1 for r in doc["clauses"] if r["risk_level_raw"] == "Low") model_note = "" if not manager.is_ready and manager._load_attempted: model_note = (f'
⚠️ Neural model unavailable — ' f'{manager.load_error[:100]}. Symbolic only.
') if nl_summarizer is None or not nl_summarizer.enabled: gem_status = nl_summarizer.last_error if nl_summarizer else "module missing" model_note += (f'
ℹ️ Gemini summaries disabled ' f'({gem_status}). Template summaries will be used.
') gauge = _risk_gauge_html(pct, overall_level) summary_html = f"""
{gauge}
Clauses
{num_clauses}
🔴 High
{high_n}
🟡 Medium
{med_n}
🟢 Low
{low_n}
{model_note}""" top_parts = ['
🔥 Top Risk Clauses
', '
'] for r in doc["top_risks"]: lvl = r["risk_level_raw"] color = LEVEL_HEX.get(lvl, "#6b7280") cpct = int(r["risk_score"] * 100) bar = _mini_bar(cpct, lvl) preview = highlight_keywords(r["clause_text"][:220].replace("\n", " ")) conf_html = _confidence_badge(r.get("confidence")) clause_no = r.get("clause_number") no_str = f' · {clause_no}' if clause_no else '' pills = "".join( f'' f'{CAT_ICON.get(rule["category"],"⚠️")} {rule["rule_id"]}' for rule in r["triggered_rules"][:3] ) or 'No violations' top_parts.append(f"""
#{r['clause_index']}{no_str} {lvl} {conf_html} {cpct}%
{bar}
{preview}{'…' if len(r['clause_text']) > 220 else ''}
{pills}
""") top_parts.append("
") top_html = "\n".join(top_parts) rows = [ "## 📄 All Clauses\n", "| # | Marker | Level | Score | Confidence | Symbolic | Preview |", "|---|--------|-------|-------|------------|----------|---------|", ] for r in doc["clauses"]: preview = r["clause_text"][:55].replace("\n", " ").replace("|", "|") conf = r.get("confidence") or {} clvl = conf.get("level", "—") cscore = int(conf.get("score", 0) * 100) if conf else 0 marker = r.get("clause_number") or "—" rows.append( f"| {r['clause_index']} | {marker} | " f"{LEVEL_COLOR.get(r['risk_level_raw'],'⚪')} {r['risk_level_raw']} | " f"`{int(r['risk_score']*100)}%` | `{clvl} {cscore}%` | " f"`{r['symbolic_score']}` | {preview}… |" ) breakdown_md = "\n".join(rows) clause_choices = [ f"#{r['clause_index']}" f"{(' ' + r['clause_number']) if r.get('clause_number') else ''}" f" | {LEVEL_COLOR.get(r['risk_level_raw'],'⚪')} " f"{r['risk_level_raw']} {int(r['risk_score']*100)}% | " f"{r['clause_text'][:55].replace(chr(10), ' ')}…" for r in doc["clauses"] ] pdf_update = gr.update(visible=True, value=None) return summary_html, top_html, breakdown_md, gr.update(choices=clause_choices, value=None), doc, pdf_update def show_clause_explanation(choice: str, doc_state: dict): if not choice or not doc_state: return '
← Select a clause above to see its full legal analysis.
' try: idx = int(choice.split("|")[0].split()[0].strip().lstrip("#")) - 1 r = doc_state["clauses"][idx] except (ValueError, IndexError): return '
Could not load clause.
' explanation = generate_explanation(r["clause_text"], r) if nl_summarizer is not None: nl_text = nl_summarizer.generate_summary(explanation, r["clause_text"]) explanation["natural_language_summary"] = nl_text r["nl_summary"] = nl_text else: explanation["natural_language_summary"] = "" lime_words = [] if local_explainer is not None and build_predict_fn_for_manager is not None: try: manager.ensure_loaded() predict_fn = build_predict_fn_for_manager(manager) lime_words = local_explainer.explain_with_lime(r["clause_text"], predict_fn) except Exception as e: print(f"[WARN] LIME path failed: {e}") attn_tokens = [] if local_explainer is not None and manager.is_ready: try: attn_tokens = local_explainer.get_attention_map( r["clause_text"], manager.model, manager.tokenizer, ) except Exception as e: print(f"[WARN] Attention path failed: {e}") lvl = r["risk_level_raw"] color = LEVEL_HEX.get(lvl, "#6b7280") cpct = int(r["risk_score"] * 100) bar = _mini_bar(cpct, lvl) highlighted = highlight_keywords(r["clause_text"]) sym_note = ('
ℹ️ Neural model not loaded — symbolic score only.
' if not manager.is_ready else "") breakdown_html = _score_breakdown_html(r.get("score_breakdown")) conf_badge = _confidence_badge(r.get("confidence")) evidence_pills = _evidence_pills(r.get("evidence", {})) evidence_block = ( f'
🔍 Evidence Detected
' f'
{evidence_pills}
' ) if evidence_pills else "" nl_block = "" nl_text = explanation.get("natural_language_summary", "").strip() if nl_text: gem_tag = ("🤖 Gemini" if (nl_summarizer and nl_summarizer.enabled) else "📝 Template") nl_block = ( f'
{gem_tag} AI Summary
' f'
{nl_text}
' ) lime_block = "" if lime_words: lime_block = (f'
🧪 LIME — Key Legal Terms Driving Risk
' f'{lime_html(lime_words)}') attn_block = "" if attn_tokens: attn_block = (f'
👁️ Attention Heatmap
' f'{attention_heatmap_html(attn_tokens)}') bd_text_block = "" if explanation.get("score_breakdown_text"): bd_text_block = f'
{explanation["score_breakdown_text"]}
' rules_html = "" for rule_data in explanation.get("rules") or []: rid = rule_data["rule_id"] icon = CAT_ICON.get(rule_data.get("category", ""), "⚠️") ev_html = "" if rule_data.get("evidence"): ev_html = '
Matched' + \ "".join(f'“{e["phrase"]}”' for e in rule_data["evidence"]) + '
' rules_html += f"""
{icon} [{rid}] {rule_data['name']} {rule_data['reference']}
Why flagged {rule_data.get('why','—')}
What it means {rule_data.get('meaning','—')}
{ev_html}
💡 Fix {rule_data.get('suggestion','—')}
""" if not rules_html: rules_html = '
No specific rule violations detected.
' overview = explanation.get("overview", "") general_tip = explanation.get("general_tip", "") tip_block = (f'
💡 General Guidance
' f'
{general_tip}
') if general_tip else "" clause_no = r.get("clause_number") title_note = f" · {clause_no}" if clause_no else "" return f"""
Clause #{r['clause_index']}{title_note}
{lvl} RISK {conf_badge} Fused {cpct}% Neural {r['neural_score']} Symbolic {r['symbolic_score']}
{bar}
{sym_note} {nl_block}
📋 Overview
{overview}
🧮 Score Breakdown
{breakdown_html} {bd_text_block} {evidence_block} {lime_block} {attn_block}
🔦 Clause Text
{highlighted}
⚖️ Rule Analysis
{rules_html} {tip_block}
""" def build_pdf_report(doc_state: dict): if not doc_state: return gr.update(visible=False, value=None) if generate_report is None: return gr.update(visible=True, value=None) try: tmp = tempfile.NamedTemporaryFile(prefix="clausexplain_", suffix=".pdf", delete=False) tmp.close() out = generate_report(doc_state, tmp.name) return gr.update(visible=True, value=out) except Exception as e: print(f"[ERROR] PDF report generation failed: {e}") return gr.update(visible=True, value=None) EXAMPLES = [ ("⚡ High Risk", """1. Liability Cap The total liability of either party shall not exceed Rs. 50,000 under any circumstances, including gross negligence or wilful misconduct of either party. 2. Non-Compete Employee shall not compete with the company in any capacity for 3 years following termination of this agreement, within the territory of India. 3. Indemnity The Service Provider shall indemnify and hold harmless the Client against any and all claims, damages, losses, and expenses arising out of or related to this agreement."""), ("🟡 Medium Risk", """1. Auto-Renewal This agreement shall automatically renew for successive one-year terms. 2. Arbitration Any dispute arising out of this agreement shall be referred to arbitration with the seat of arbitration in Singapore. 3. Pricing The Company may modify the prices and fees charged under this agreement at its sole discretion to modify the terms upon written notice."""), ("🟢 Low Risk", """1. Renewal This agreement renews automatically every year unless either party provides 30 days written notice before the renewal date. 2. Governing Law This agreement is governed by the laws of India."""), ("🧪 Benign (M&A-style)", """Compensation paid hereunder shall be exclusive of the Company's contributions to statutory benefits. Payment of the closing bonus is contingent on the occurrence of the closing of the merger transaction and continued employment through such date."""), ] CUSTOM_CSS = """ @import url('https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,400;0,9..40,500;0,9..40,600;0,9..40,700;1,9..40,400&family=DM+Mono:wght@400;500&display=swap'); * { box-sizing: border-box; } body, .gradio-container { background:#080d1a !important; font-family:'DM Sans',sans-serif !important; color:#e2e8f0 !important; } footer { display:none !important; } .gradio-container { max-width:1080px !important; margin:0 auto !important; } .cx-hero { text-align:center; padding:52px 24px 36px; background:linear-gradient(135deg,#0f172a 0%,#1a1040 60%,#0f172a 100%); border-radius:16px; margin-bottom:8px; position:relative; overflow:hidden; } .cx-hero::before { content:''; position:absolute; inset:0; background:radial-gradient(ellipse 70% 60% at 50% -10%,#6366f135 0%,transparent 70%); pointer-events:none; } .cx-hero-icon { font-size:44px; margin-bottom:14px; } .cx-hero-title { font-size:38px; font-weight:700; letter-spacing:-.025em; background:linear-gradient(135deg,#f1f5f9 20%,#a5b4fc 80%); -webkit-background-clip:text; -webkit-text-fill-color:transparent; margin:0 0 10px; line-height:1.1; } .cx-hero-sub { font-size:15px; color:#94a3b8; margin:0 0 22px; font-weight:400; } .cx-badges { display:flex; gap:8px; flex-wrap:wrap; justify-content:center; } .cx-badge-hero { font-size:11px; font-weight:600; letter-spacing:.07em; text-transform:uppercase; padding:5px 12px; border-radius:20px; border:1px solid #2d3a55; background:#131c30; color:#8b9fc7; } .cx-model-notice { background:#111827; border:1px solid #1e293b; border-radius:10px; padding:11px 16px; font-size:13px; color:#94a3b8; display:flex; align-items:center; gap:10px; margin-bottom:4px; } .cx-model-notice strong { color:#a5b4fc; } .cx-card { background:#111827; border:1px solid #1e293b; border-radius:12px; padding:16px 20px; } .cx-summary-grid { display:grid; grid-template-columns:180px 1fr 1fr 1fr 1fr; gap:12px; align-items:stretch; margin:4px 0 8px; } @media(max-width:720px){ .cx-summary-grid { grid-template-columns:1fr 1fr; } } .cx-gauge-card { display:flex; align-items:center; justify-content:center; padding:20px; } .cx-stat-card { display:flex; flex-direction:column; justify-content:center; gap:6px; } .cx-stat-label { font-size:11px; font-weight:600; letter-spacing:.06em; text-transform:uppercase; color:#4b5563; } .cx-stat-val { font-size:30px; font-weight:700; font-family:'DM Mono',monospace; line-height:1; } .cx-note { background:#1e293b; border-radius:8px; padding:10px 14px; font-size:13px; color:#94a3b8; margin:6px 0; } .cx-warn { border-left:3px solid #f59e0b; color:#fcd34d !important; } .cx-section-title, .cx-section-label { font-size:11px; font-weight:700; letter-spacing:.08em; text-transform:uppercase; color:#4b5563; margin:20px 0 10px; } .cx-top-grid { display:flex; flex-direction:column; gap:12px; } .cx-clause-card { background:#111827; border:1px solid #1e293b; border-radius:12px; padding:16px 18px; transition:border-color .15s; } .cx-clause-card:hover { border-color:#2d3748; } .cx-clause-header { display:flex; align-items:center; gap:8px; margin-bottom:10px; flex-wrap:wrap; } .cx-clause-num { font-family:'DM Mono',monospace; font-size:12px; color:#4b5563; min-width:28px; } .cx-badge { font-size:10px; font-weight:700; letter-spacing:.07em; text-transform:uppercase; padding:3px 9px; border-radius:20px; } .cx-badge-sm { background:#1e293b !important; color:#64748b !important; } .cx-score-label { font-family:'DM Mono',monospace; font-size:14px; font-weight:600; color:#e2e8f0; margin-left:auto; } .cx-bar-wrap { margin-bottom:12px; } .cx-clause-preview { font-size:13px; color:#94a3b8; line-height:1.65; margin-bottom:12px; } .cx-clause-preview strong { color:#fca5a5; background:#7f1d1d28; border-radius:3px; padding:0 2px; } .cx-pills { display:flex; flex-wrap:wrap; gap:6px; } .cx-pill { font-size:11px; font-weight:500; padding:3px 8px; border-radius:6px; border:1px solid; letter-spacing:.02em; } .cx-pill-none { font-size:12px; color:#374151; } .cx-ev-pill { font-size:11px; padding:3px 8px; border-radius:6px; background:#1e293b; color:#a5b4fc; border:1px solid #312e81; font-family:'DM Mono',monospace; } .cx-ev-pill-sm { font-size:11px; padding:2px 7px; border-radius:5px; background:#1e293b; color:#a5b4fc; border:1px solid #312e81; font-family:'DM Mono',monospace; margin-right:4px; display:inline-block; } .cx-breakdown { background:#0c1525; border:1px solid #1e293b; border-radius:10px; padding:14px 16px; font-family:'DM Mono',monospace; font-size:13px; color:#cbd5e1; } .cx-bd-row { display:grid; grid-template-columns:80px 80px 1fr; align-items:center; padding:4px 0; } .cx-bd-k { color:#6b7280; font-size:11px; text-transform:uppercase; letter-spacing:.06em; } .cx-bd-v { color:#e2e8f0; font-weight:600; } .cx-bd-w { color:#94a3b8; } .cx-bd-sep { border:none; border-top:1px solid #1e293b; margin:8px 0; } .cx-bd-formula { color:#a5b4fc; font-size:13px; padding:4px 0 8px; } .cx-bd-final { display:grid; grid-template-columns:80px 1fr; padding-top:4px; } .cx-bd-final-v { color:#34d399; font-weight:700; font-size:16px; } .cx-bd-text { font-family:'DM Mono',monospace; font-size:12px; color:#94a3b8; padding:6px 14px; } .cx-divider { border:none; border-top:1px solid #1a2332; margin:24px 0; } .cx-empty { color:#374151; font-size:14px; padding:28px 0; text-align:center; } .cx-exp-wrap { display:flex; flex-direction:column; gap:14px; } .cx-exp-header { background:#111827; border-radius:12px; padding:16px 20px; display:flex; align-items:flex-start; justify-content:space-between; gap:16px; } .cx-exp-title { font-size:17px; font-weight:700; margin-bottom:8px; } .cx-badges-row { display:flex; gap:6px; flex-wrap:wrap; } .cx-overview, .cx-nl { background:#111827; border-radius:10px; padding:14px 16px; font-size:14px; color:#cbd5e1; line-height:1.75; } .cx-nl { border-left:3px solid #8b5cf6; } .cx-clause-text { background:#0c1525; border:1px solid #1e293b; border-radius:10px; padding:16px; font-size:13px; line-height:1.9; color:#94a3b8; font-family:'DM Mono',monospace; white-space:pre-wrap; } .cx-clause-text strong { color:#fca5a5; background:#7f1d1d2a; border-radius:3px; padding:1px 3px; } .cx-rule-card { background:#111827; border-radius:10px; padding:14px 16px; display:flex; flex-direction:column; gap:8px; margin-bottom:8px; } .cx-rule-header { display:flex; align-items:flex-start; justify-content:space-between; gap:12px; font-size:14px; font-weight:600; } .cx-ref { font-size:11px; color:#6366f1; font-family:'DM Mono',monospace; white-space:nowrap; padding-top:2px; flex-shrink:0; } .cx-rule-row { display:grid; grid-template-columns:100px 1fr; gap:8px; font-size:13px; color:#94a3b8; align-items:baseline; } .cx-rule-k { font-size:10px; font-weight:700; letter-spacing:.05em; text-transform:uppercase; color:#374151; } .cx-suggestion span:last-child { color:#6ee7b7; } .cx-tip { background:#0a1f16; border:1px solid #064e3b40; border-radius:10px; padding:14px 16px; font-size:13px; color:#6ee7b7; line-height:1.75; } .cx-attn-wrap, .cx-lime-wrap { background:#0c1525; border:1px solid #1e293b; border-radius:10px; padding:14px 16px; } .cx-attn-title, .cx-lime-title { font-size:11px; font-weight:700; letter-spacing:.07em; text-transform:uppercase; color:#6b7280; margin-bottom:10px; } .cx-attn-grid { display:flex; flex-wrap:wrap; gap:4px; } .cx-attn-chip { display:inline-flex; align-items:center; gap:4px; padding:3px 7px; border-radius:5px; font-family:'DM Mono',monospace; font-size:12px; color:#e2e8f0; } .cx-attn-w { font-size:9px; opacity:.6; } .cx-attn-legend, .cx-lime-legend { font-size:11px; color:#4b5563; margin-top:10px; } .cx-lime-list { display:flex; flex-direction:column; gap:6px; } .cx-lime-row { display:grid; grid-template-columns:120px 1fr 80px; align-items:center; gap:10px; } .cx-lime-word { font-family:'DM Mono',monospace; font-size:12px; color:#cbd5e1; overflow:hidden; text-overflow:ellipsis; white-space:nowrap; } .cx-lime-bar-wrap { background:#1e293b; border-radius:4px; height:6px; } .cx-lime-bar { height:6px; border-radius:4px; } .cx-lime-w { font-family:'DM Mono',monospace; font-size:11px; text-align:right; } .gr-button, button { border-radius:8px !important; font-weight:600 !important; } button.primary { background:linear-gradient(135deg,#6366f1,#8b5cf6) !important; border:none !important; color:#fff !important; letter-spacing:.02em !important; transition:opacity .15s !important; } button.secondary { background:#1e293b !important; border:1px solid #334155 !important; color:#e2e8f0 !important; } button:hover { opacity:.88 !important; } .gr-box, .gr-form { background:#111827 !important; border-color:#1e293b !important; border-radius:12px !important; } textarea, input[type=text] { background:#0c1525 !important; border:1px solid #1e293b !important; color:#e2e8f0 !important; border-radius:8px !important; font-family:'DM Sans',sans-serif !important; } label > span { color:#64748b !important; font-size:13px !important; } .gr-file { background:#0c1525 !important; border:1px dashed #2d3748 !important; border-radius:10px !important; } select, .gr-dropdown { background:#0c1525 !important; border-color:#1e293b !important; color:#e2e8f0 !important; border-radius:8px !important; } .gr-accordion > .label-wrap { background:#111827 !important; border-color:#1e293b !important; border-radius:10px !important; color:#94a3b8 !important; } """ def build_ui(): with gr.Blocks( title="ClauseXplain — AI Legal Risk Dashboard", theme=gr.themes.Base( primary_hue=gr.themes.colors.indigo, neutral_hue=gr.themes.colors.slate, font=[gr.themes.GoogleFont("DM Sans"), "sans-serif"], ), css=CUSTOM_CSS, ) as demo: doc_state = gr.State(value=None) gr.HTML("""
⚖️

ClauseXplain

International contract neural backbone, localised via Indian neuro-symbolic legal reasoning

ICA 1872 DPDPA 2023 IT Act 2000 CPA 2019 Arbitration Act 1996 Gemini · LIME · Attention
""") gr.HTML("""
⏳  The neural model (~2 GB) loads on your first analysis request — expect 60–90 s. Per-clause LIME + attention run lazily when you inspect a clause (~15–25 s).
""") with gr.Row(equal_height=True): with gr.Column(): gr.HTML('
📂 Upload PDF
') pdf_input = gr.File(label="Contract PDF", file_types=[".pdf"], type="filepath") pdf_btn = gr.Button("Analyse PDF →", variant="primary") with gr.Column(): gr.HTML('
✏️ Paste Text
') text_input = gr.Textbox(label="", placeholder="Paste one or more contract clauses here…", lines=6) text_btn = gr.Button("Analyse Text →", variant="secondary") gr.HTML('
Try an example
') with gr.Row(): for label, content in EXAMPLES: gr.Button(label, size="sm").click(fn=lambda c=content: c, outputs=text_input) gr.HTML('
') summary_out = gr.HTML('
Upload a PDF or paste contract text to begin analysis.
') gr.HTML('
') top_risks_out = gr.HTML("") with gr.Accordion("📄 Full Clause Breakdown", open=False): breakdown_out = gr.Markdown("") with gr.Row(): pdf_dl_btn = gr.Button("📥 Download PDF Report", variant="primary") pdf_file_out = gr.File(label="Compliance Report", visible=False, interactive=False) gr.HTML('
') gr.HTML('
🔎 Clause Explorer
') clause_selector = gr.Dropdown(label="Select a clause to inspect", choices=[], interactive=True) explanation_out = gr.HTML('
← Select a clause above to see its full legal analysis.
') gr.HTML("""
Built for Indian Contract Intelligence  ·  Neuro-Symbolic AI  ·  For informational purposes only — not legal advice. Consult a qualified lawyer.
""") shared = [summary_out, top_risks_out, breakdown_out, clause_selector, doc_state, pdf_file_out] pdf_btn.click(fn=analyze_pdf, inputs=[pdf_input], outputs=shared) text_btn.click(fn=analyze_text, inputs=[text_input], outputs=shared) clause_selector.change(fn=show_clause_explanation, inputs=[clause_selector, doc_state], outputs=[explanation_out]) pdf_dl_btn.click(fn=build_pdf_report, inputs=[doc_state], outputs=[pdf_file_out]) return demo if __name__ == "__main__": demo = build_ui() demo.launch(server_name="0.0.0.0", server_port=7860)