Spaces:
Sleeping
Sleeping
| # app.py | |
| # ClauseXplain v5.3 — hardening pass | |
| # Changes vs v5.2: | |
| # • ICA_007 (uncapped indemnity) now requires has_uncapped_signal — no more | |
| # auto-firing on every "indemnify" mention | |
| # • analyze_document uses level_from_score() from inference.py (single source | |
| # of truth for the new 0.50 / 0.80 risk-level cutoffs) | |
| from __future__ import annotations | |
| import os | |
| import re | |
| import gc | |
| import tempfile | |
| import threading | |
| import numpy as np | |
| import torch | |
| import torch.nn as nn | |
| import gradio as gr | |
| from pathlib import Path | |
| from transformers import LongformerTokenizer, LongformerModel | |
| from sklearn.preprocessing import MultiLabelBinarizer | |
| from huggingface_hub import hf_hub_download | |
| from feature_extractor import ClauseFeatureExtractor | |
| from explanation import generate_explanation | |
| from inference import level_from_score # v5.3: single source of truth | |
| from utils import highlight_keywords | |
| # ── Optional / fail-soft integrations ───────────────────────────────────────── | |
| try: | |
| from nl_summary import NLSummarizer | |
| nl_summarizer = NLSummarizer() | |
| except Exception as _e: | |
| print(f"[WARN] NLSummarizer disabled: {_e}") | |
| nl_summarizer = None | |
| try: | |
| from local_interpreters import LocalExplainer, build_predict_fn_for_manager | |
| local_explainer = LocalExplainer(num_samples=25, timeout_seconds=25.0) | |
| except Exception as _e: | |
| print(f"[WARN] LocalExplainer disabled: {_e}") | |
| local_explainer = None | |
| build_predict_fn_for_manager = None | |
| try: | |
| from attention_visualization import attention_heatmap_html, lime_html | |
| except Exception as _e: | |
| print(f"[WARN] attention_visualization disabled: {_e}") | |
| def attention_heatmap_html(*_a, **_k): return "" | |
| def lime_html(*_a, **_k): return "" | |
| try: | |
| from report import generate_report | |
| except Exception as _e: | |
| print(f"[WARN] report disabled: {_e}") | |
| generate_report = None | |
| DEVICE = torch.device("cpu") | |
| CLAUSE_CLASSES = [ | |
| "Cap On Liability", "Change Of Control", "Covenant Not To Sue", | |
| "Exclusivity", "Governing Law", "IP Ownership Assignment", | |
| "Irrevocable Or Perpetual License", "Joint IP Ownership", | |
| "License Grant", "Liquidated Damages", "Minimum Commitment", | |
| "Most Favored Nation", "No-Solicit Of Customers", "No-Solicit Of Employees", | |
| "Non-Compete", "Notice Period To Terminate Renewal", "Post-Termination Services", | |
| "Price Restrictions", "Revenue/Profit Sharing", "Renewal Term", | |
| "Source Code Escrow", "Uncapped Liability", "Unlimited/All-You-Can-Eat-License", | |
| "Volume Restriction", "Warranty Duration", | |
| "Anti-Assignment", "Audit Rights", "Competitive Restriction Exception", | |
| "Expiration Date", "Insurance", "Ip Indemnification", | |
| "Limitation Of Liability", "Non-Disparagement", "Parties", | |
| "Permitted Development", "Rofr/Rofo/Rofn", "Third Party Beneficiary", | |
| "Termination For Convenience", "Affiliate License-Licensor", | |
| "Affiliate License-Licensee", "Agreement Date", | |
| ] | |
| RISK_CLASSES = ["ambiguity", "enforceability", "financial", "ip", "structural"] | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| # Symbolic rules — v5.3 tightened | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| SYMBOLIC_RULES = [ | |
| {"rule_id": "ICA_001", "name": "Unconscionable Liability Cap", | |
| "reference": "Indian Contract Act 1872, S.23", "penalty": 0.45, "category": "financial", | |
| "condition": lambda f: f.get("has_liability_cap") and f.get("excludes_gross_negligence")}, | |
| {"rule_id": "ICA_002", "name": "Unilateral Termination Without Notice", | |
| "reference": "Indian Contract Act 1872, S.39", "penalty": 0.35, "category": "enforceability", | |
| "condition": lambda f: f.get("unilateral_termination") and not f.get("notice_period_defined")}, | |
| {"rule_id": "ICA_003", "name": "Non-Compete Exceeding 2 Years", | |
| "reference": "Indian Contract Act 1872, S.27", "penalty": 0.55, "category": "enforceability", | |
| "condition": lambda f: f.get("non_compete_years", 0) > 2}, | |
| {"rule_id": "ICA_004", "name": "Penalty Clause Exceeds Actual Damage", | |
| "reference": "Indian Contract Act 1872, S.74", "penalty": 0.40, "category": "financial", | |
| "condition": lambda f: f.get("has_liquidated_damages") and f.get("damages_exceed_loss")}, | |
| # ICA_005: only fires on explicit gambling vocab — no more "contingent on closing" | |
| {"rule_id": "ICA_005", "name": "Wagering / Gambling Agreement", | |
| "reference": "Indian Contract Act 1872, S.30", "penalty": 0.70, "category": "enforceability", | |
| "condition": lambda f: f.get("is_wagering_clause")}, | |
| {"rule_id": "ICA_006", "name": "Restraint of Legal Proceedings", | |
| "reference": "Indian Contract Act 1872, S.28", "penalty": 0.60, "category": "enforceability", | |
| "condition": lambda f: f.get("restrains_legal_proceedings")}, | |
| # ICA_007 TIGHTENED: indemnity + explicit uncapped signal + no cap | |
| {"rule_id": "ICA_007", "name": "Uncapped Indemnity Obligation", | |
| "reference": "Indian Contract Act 1872, S.124", "penalty": 0.50, "category": "financial", | |
| "condition": lambda f: ( | |
| f.get("has_indemnity_clause") | |
| and f.get("has_uncapped_signal") | |
| and not f.get("indemnity_capped") | |
| )}, | |
| {"rule_id": "ICA_008", "name": "Auto-Renewal Without Opt-Out Window", | |
| "reference": "Indian Contract Act 1872 + CPA 2019", "penalty": 0.35, "category": "enforceability", | |
| "condition": lambda f: f.get("has_auto_renewal") and not f.get("has_opt_out_window")}, | |
| {"rule_id": "ICA_009", "name": "Arbitration in Distant Venue", | |
| "reference": "Arbitration and Conciliation Act 1996, S.20", "penalty": 0.40, "category": "enforceability", | |
| "condition": lambda f: f.get("has_arbitration") and f.get("arbitration_distant_venue")}, | |
| # ICA_010 narrowed via tightened has_exclusivity patterns in feature_extractor | |
| {"rule_id": "ICA_010", "name": "Indefinite Exclusivity", | |
| "reference": "Indian Contract Act 1872, S.27", "penalty": 0.50, "category": "enforceability", | |
| "condition": lambda f: f.get("has_exclusivity") and not f.get("exclusivity_term_defined")}, | |
| {"rule_id": "ICA_011", "name": "Unilateral Price Modification", | |
| "reference": "Indian Contract Act 1872, S.62 + CPA 2019", "penalty": 0.45, "category": "financial", | |
| "condition": lambda f: f.get("unilateral_price_change")}, | |
| {"rule_id": "DPDPA_001", "name": "Missing Data Retention Clause", | |
| "reference": "DPDPA 2023, S.8(7)", "penalty": 0.50, "category": "compliance", | |
| "condition": lambda f: f.get("processes_personal_data") and not f.get("has_data_retention_clause")}, | |
| {"rule_id": "DPDPA_002", "name": "Broad Pre-existing IP Assignment", | |
| "reference": "ICA 1872, S.27 + DPDPA 2023", "penalty": 0.40, "category": "ip", | |
| "condition": lambda f: f.get("assigns_all_ip") and f.get("includes_pre_existing_ip")}, | |
| {"rule_id": "DPDPA_003", "name": "No Data Principal Consent Mechanism", | |
| "reference": "DPDPA 2023, S.6", "penalty": 0.55, "category": "compliance", | |
| "condition": lambda f: f.get("processes_sensitive_data") and not f.get("has_consent_clause")}, | |
| {"rule_id": "DPDPA_004", "name": "No Data Breach Notification Clause", | |
| "reference": "DPDPA 2023, S.8(6)", "penalty": 0.45, "category": "compliance", | |
| "condition": lambda f: f.get("processes_personal_data") and not f.get("has_breach_notification")}, | |
| {"rule_id": "ITA_001", "name": "No Cybersecurity Obligation", | |
| "reference": "IT Act 2000, S.43A", "penalty": 0.35, "category": "compliance", | |
| "condition": lambda f: f.get("handles_digital_data") and not f.get("has_security_clause")}, | |
| {"rule_id": "CPA_001", "name": "Unfair Contract Term (Consumer)", | |
| "reference": "Consumer Protection Act 2019, S.2(46)", "penalty": 0.50, "category": "enforceability", | |
| "condition": lambda f: f.get("is_consumer_contract") and f.get("has_one_sided_clause")}, | |
| ] | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| # Model (unchanged) | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| class ClauseXplainV5(nn.Module): | |
| def __init__(self, num_clause_labels: int, num_risk_labels: int): | |
| super().__init__() | |
| self.encoder = LongformerModel.from_pretrained("allenai/longformer-base-4096") | |
| hidden = self.encoder.config.hidden_size | |
| self.dropout = nn.Dropout(0.1) | |
| self.clause_head = nn.Linear(hidden, num_clause_labels) | |
| self.risk_head = nn.Linear(hidden, num_risk_labels) | |
| self.risk_fusion = nn.Sequential( | |
| nn.Linear(hidden + num_risk_labels + num_clause_labels, 256), | |
| nn.ReLU(), nn.Dropout(0.1), nn.Linear(256, 1), | |
| ) | |
| self.risk_level_classifier = nn.Linear(1, 3) | |
| def forward(self, input_ids, attention_mask): | |
| out = self.encoder(input_ids=input_ids, attention_mask=attention_mask) | |
| pooled = self.dropout(out.last_hidden_state[:, 0]) | |
| clause_logits = self.clause_head(pooled) | |
| risk_logits = self.risk_head(pooled) | |
| clause_probs = torch.sigmoid(clause_logits) | |
| risk_probs = torch.sigmoid(risk_logits) | |
| fusion_input = torch.cat([pooled, risk_probs, clause_probs], dim=1) | |
| risk_score = torch.sigmoid(self.risk_fusion(fusion_input)) | |
| risk_level_logits = self.risk_level_classifier(risk_score) | |
| return clause_logits, risk_logits, risk_score, risk_level_logits, pooled | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| # ModelManager | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| class ModelManager: | |
| def __init__(self): | |
| self.model: ClauseXplainV5 | None = None | |
| self.tokenizer: LongformerTokenizer | None = None | |
| self.clause_mlb: MultiLabelBinarizer | None = None | |
| self.risk_mlb: MultiLabelBinarizer | None = None | |
| self.feature_extractor = ClauseFeatureExtractor() | |
| self.is_ready: bool = False | |
| self.load_error: str = "" | |
| self._lock = threading.Lock() | |
| self._load_attempted: bool = False | |
| def _build_mlbs(self): | |
| clause_mlb = MultiLabelBinarizer(classes=CLAUSE_CLASSES) | |
| clause_mlb.fit([[c] for c in CLAUSE_CLASSES]) | |
| risk_mlb = MultiLabelBinarizer(classes=RISK_CLASSES) | |
| risk_mlb.fit([[r] for r in RISK_CLASSES]) | |
| return clause_mlb, risk_mlb | |
| def ensure_loaded(self): | |
| if self._load_attempted: | |
| return | |
| with self._lock: | |
| if self._load_attempted: | |
| return | |
| self._load_attempted = True | |
| self._do_load() | |
| def _do_load(self): | |
| try: | |
| print("[INFO] Loading tokenizer…") | |
| self.tokenizer = LongformerTokenizer.from_pretrained( | |
| "allenai/longformer-base-4096" | |
| ) | |
| self.clause_mlb, self.risk_mlb = self._build_mlbs() | |
| print("[INFO] Building model architecture…") | |
| self.model = ClauseXplainV5( | |
| num_clause_labels=len(self.clause_mlb.classes_), | |
| num_risk_labels=len(self.risk_mlb.classes_), | |
| ) | |
| print("[INFO] Downloading checkpoint from HuggingFace Hub…") | |
| ckpt_path = hf_hub_download( | |
| repo_id="riyasuryawanshi746/clauseXplain", | |
| filename="clausexplain_v5_best.pt", | |
| ) | |
| checkpoint = torch.load( | |
| ckpt_path, | |
| map_location=torch.device("cpu"), | |
| weights_only=False, | |
| mmap=True, | |
| ) | |
| if isinstance(checkpoint, dict) and "model_state" in checkpoint: | |
| state_dict = checkpoint["model_state"] | |
| elif isinstance(checkpoint, dict) and "state_dict" in checkpoint: | |
| state_dict = checkpoint["state_dict"] | |
| else: | |
| state_dict = checkpoint | |
| cleaned = {k.replace("module.", "", 1): v for k, v in state_dict.items()} | |
| self.model.load_state_dict(cleaned, strict=False) | |
| self.model.eval() | |
| del checkpoint, cleaned | |
| gc.collect() | |
| self.is_ready = True | |
| print("[INFO] ✓ Model loaded and ready (CPU mode)") | |
| except Exception as e: | |
| self.load_error = str(e) | |
| self.is_ready = False | |
| print(f"[ERROR] Model load failed: {e}") | |
| def analyze_clause(self, text: str) -> dict: | |
| from inference import ( | |
| _symbolic_rule_score, _neuro_symbolic_fusion, | |
| _compute_confidence, IP_CLAUSE_TYPES, | |
| ) | |
| self.ensure_loaded() | |
| features, evidence = self.feature_extractor.extract(text) | |
| sym_result = _symbolic_rule_score(features, SYMBOLIC_RULES) | |
| # v5.4: Sanity-check the clause text before inference. | |
| # After pdf_utils filtering, this should always be real legal prose. | |
| word_count = len(text.split()) | |
| print(f"[DEBUG] analyze_clause: {word_count} words | " | |
| f"preview: {text[:80].replace(chr(10),' ')!r}") | |
| if self.is_ready and self.model is not None: | |
| try: | |
| enc = self.tokenizer( | |
| text, padding="max_length", truncation=True, | |
| max_length=256, return_tensors="pt", | |
| ) | |
| with torch.no_grad(): | |
| clause_logits, risk_logits, risk_score_tensor, _, _ = self.model( | |
| enc["input_ids"], enc["attention_mask"], | |
| ) | |
| clause_probs = torch.sigmoid(clause_logits).numpy()[0] | |
| top3_idx = clause_probs.argsort()[::-1][:3] | |
| top_clauses = [ | |
| (self.clause_mlb.classes_[i], round(float(clause_probs[i]), 3)) | |
| for i in top3_idx if clause_probs[i] > 0.05 | |
| ] | |
| risk_probs = torch.sigmoid(risk_logits).numpy()[0] | |
| top2_idx = risk_probs.argsort()[::-1][:2] | |
| top_risks = [ | |
| (self.risk_mlb.classes_[i], round(float(risk_probs[i]), 3)) | |
| for i in top2_idx if risk_probs[i] > 0.05 | |
| ] | |
| neural_score = round(float(risk_score_tensor.item()), 3) | |
| top_clause_name = top_clauses[0][0] if top_clauses else "" | |
| is_ip = top_clause_name in IP_CLAUSE_TYPES | |
| neural_loaded = True | |
| except Exception as e: | |
| print(f"[WARN] Neural inference failed: {e}") | |
| neural_score, top_clauses, top_risks, is_ip = 0.0, [], [], False | |
| neural_loaded = False | |
| else: | |
| neural_score, top_clauses, top_risks, is_ip = 0.0, [], [], False | |
| neural_loaded = False | |
| fusion = _neuro_symbolic_fusion(neural_score, sym_result["symbolic_score"], is_ip) | |
| confidence = _compute_confidence( | |
| neural=neural_score, symbolic=sym_result["symbolic_score"], | |
| fused=fusion["score"], num_triggered=len(sym_result["triggered_rules"]), | |
| neural_loaded=neural_loaded, | |
| ) | |
| triggered_clean = [ | |
| {"rule_id": r["rule_id"], "name": r["name"], | |
| "reference": r["reference"], "penalty": r["penalty"], | |
| "category": r["category"]} | |
| for r in sym_result["triggered_rules"] | |
| ] | |
| return { | |
| "risk_score": fusion["score"], | |
| "neural_score": neural_score, | |
| "symbolic_score": sym_result["symbolic_score"], | |
| "risk_level": f"{fusion['emoji']} {fusion['level']}", | |
| "risk_level_raw": fusion["level"], | |
| "top_clauses": top_clauses, | |
| "top_risk_cats": top_risks, | |
| "triggered_rules": triggered_clean, | |
| "features": {k: v for k, v in features.items() if v}, | |
| "evidence": evidence, | |
| "score_breakdown": fusion["breakdown"], | |
| "confidence": confidence, | |
| } | |
| def analyze_document(self, text: str, max_clauses: int = 50) -> dict: | |
| from pdf_utils import split_into_clauses_with_metadata | |
| clauses_meta = split_into_clauses_with_metadata(text)[:max_clauses] | |
| if not clauses_meta: | |
| clauses_meta = [{"text": text[:2000], "number": None, "kind": "paragraph"}] | |
| results = [] | |
| for idx, meta in enumerate(clauses_meta): | |
| clause_text = meta["text"] | |
| try: | |
| r = self.analyze_clause(clause_text) | |
| except Exception as e: | |
| print(f"[WARN] Clause {idx+1} failed: {e}") | |
| r = { | |
| "risk_score": 0.0, "neural_score": 0.0, "symbolic_score": 0.0, | |
| "risk_level": "🟢 Low", "risk_level_raw": "Low", | |
| "top_clauses": [], "top_risk_cats": [], | |
| "triggered_rules": [], "features": {}, "evidence": {}, | |
| "score_breakdown": None, "confidence": None, | |
| } | |
| r["clause_index"] = idx + 1 | |
| r["clause_text"] = clause_text | |
| r["clause_number"] = meta.get("number") | |
| r["clause_kind"] = meta.get("kind") | |
| results.append(r) | |
| scores = [r["risk_score"] for r in results] | |
| overall = round(0.70 * max(scores) + 0.30 * (sum(scores) / len(scores)), 3) | |
| # v5.3: single source of truth for thresholds | |
| level, _ = level_from_score(overall) | |
| return { | |
| "overall_risk": overall, | |
| "overall_level": level, | |
| "num_clauses": len(results), | |
| "top_risks": sorted(results, key=lambda x: x["risk_score"], reverse=True)[:3], | |
| "clauses": results, | |
| } | |
| manager = ModelManager() | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # UI helpers (unchanged from v5.2) | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| LEVEL_COLOR = {"Low": "🟢", "Medium": "🟡", "High": "🔴"} | |
| LEVEL_HEX = {"Low": "#10b981", "Medium": "#f59e0b", "High": "#ef4444"} | |
| CONF_HEX = {"Low": "#f87171", "Medium": "#fbbf24", "High": "#34d399"} | |
| CAT_ICON = {"financial": "💰", "enforceability": "⚖️", "compliance": "🛡️", | |
| "ip": "🧠", "structural": "🏗️", "ambiguity": "❓"} | |
| def _risk_gauge_html(pct: int, level: str) -> str: | |
| color = LEVEL_HEX.get(level, "#6b7280") | |
| dash = round(169.6 * pct / 100, 1) | |
| return f""" | |
| <div style="display:flex;flex-direction:column;align-items:center;gap:4px"> | |
| <svg width="140" height="80" viewBox="0 0 140 80"> | |
| <path d="M 14 70 A 56 56 0 0 1 126 70" | |
| fill="none" stroke="#1e293b" stroke-width="14" stroke-linecap="round"/> | |
| <path d="M 14 70 A 56 56 0 0 1 126 70" | |
| fill="none" stroke="{color}" stroke-width="14" stroke-linecap="round" | |
| stroke-dasharray="{dash} 169.6"/> | |
| <text x="70" y="66" text-anchor="middle" font-size="22" font-weight="700" | |
| font-family="'DM Mono',monospace" fill="{color}">{pct}%</text> | |
| </svg> | |
| <span style="font-size:12px;font-weight:600;letter-spacing:.08em; | |
| color:{color};text-transform:uppercase">{level} RISK</span> | |
| </div>""" | |
| def _mini_bar(pct: int, level: str) -> str: | |
| color = LEVEL_HEX.get(level, "#6b7280") | |
| return (f'<div style="background:#1e293b;border-radius:4px;height:6px;width:100%">' | |
| f'<div style="background:{color};width:{pct}%;height:6px;border-radius:4px"></div></div>') | |
| def _confidence_badge(confidence) -> str: | |
| if not confidence: | |
| return "" | |
| lvl = confidence.get("level", "Medium") | |
| pct = int(confidence.get("score", 0) * 100) | |
| col = CONF_HEX.get(lvl, "#94a3b8") | |
| return (f'<span class="cx-badge" style="background:{col}22;color:{col}">' | |
| f'CONF · {lvl.upper()} {pct}%</span>') | |
| def _evidence_pills(evidence_dict: dict) -> str: | |
| if not evidence_dict: | |
| return "" | |
| seen, pills = set(), [] | |
| for feat, hits in evidence_dict.items(): | |
| for h in hits: | |
| phrase = h.get("phrase", "").strip() | |
| key = phrase.lower() | |
| if phrase and key not in seen: | |
| seen.add(key) | |
| pills.append(f'<span class="cx-ev-pill">“{phrase}”</span>') | |
| if len(pills) >= 6: break | |
| if len(pills) >= 6: break | |
| return "".join(pills) | |
| def _score_breakdown_html(breakdown) -> str: | |
| if not breakdown: | |
| return "" | |
| w = breakdown["weights"] | |
| return f""" | |
| <div class="cx-breakdown"> | |
| <div class="cx-bd-row"> | |
| <span class="cx-bd-k">Neural</span> | |
| <span class="cx-bd-v">{breakdown['neural_score']:.3f}</span> | |
| <span class="cx-bd-w">× {w['neural']:.2f}</span> | |
| </div> | |
| <div class="cx-bd-row"> | |
| <span class="cx-bd-k">Symbolic</span> | |
| <span class="cx-bd-v">{breakdown['symbolic_score']:.3f}</span> | |
| <span class="cx-bd-w">× {w['symbolic']:.2f}</span> | |
| </div> | |
| <hr class="cx-bd-sep"/> | |
| <div class="cx-bd-formula">{breakdown['formula']}</div> | |
| <div class="cx-bd-final"> | |
| <span class="cx-bd-k">Final</span> | |
| <span class="cx-bd-final-v">{breakdown['final']:.3f}</span> | |
| </div> | |
| </div>""" | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # Analysis flow (unchanged structurally) | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| def _run_analysis(text: str): | |
| if not text or len(text.strip()) < 30: | |
| return None, "⚠️ Input too short — please paste at least one full clause." | |
| try: | |
| return manager.analyze_document(text), "" | |
| except Exception as e: | |
| return None, f"❌ Analysis error: {e}" | |
| def analyze_pdf(pdf_file): | |
| if pdf_file is None: | |
| return _empty_outputs("No file uploaded.") | |
| from pdf_utils import extract_text_from_pdf | |
| try: | |
| text = extract_text_from_pdf(pdf_file) | |
| except Exception as e: | |
| return _empty_outputs(f"❌ PDF read error: {e}") | |
| return _build_outputs(text) | |
| def analyze_text(raw_text: str): | |
| return _build_outputs(raw_text) | |
| def _empty_outputs(msg: str): | |
| html = f'<div class="cx-empty">{msg}</div>' | |
| return html, html, "", gr.update(choices=[], value=None), None, gr.update(visible=False, value=None) | |
| def _build_outputs(text: str): | |
| doc, err = _run_analysis(text) | |
| if doc is None: | |
| return _empty_outputs(err) | |
| overall_level = doc["overall_level"] | |
| overall_score = doc["overall_risk"] | |
| num_clauses = doc["num_clauses"] | |
| pct = int(overall_score * 100) | |
| high_n = sum(1 for r in doc["clauses"] if r["risk_level_raw"] == "High") | |
| med_n = sum(1 for r in doc["clauses"] if r["risk_level_raw"] == "Medium") | |
| low_n = sum(1 for r in doc["clauses"] if r["risk_level_raw"] == "Low") | |
| model_note = "" | |
| if not manager.is_ready and manager._load_attempted: | |
| model_note = (f'<div class="cx-note cx-warn">⚠️ Neural model unavailable — ' | |
| f'<code>{manager.load_error[:100]}</code>. Symbolic only.</div>') | |
| if nl_summarizer is None or not nl_summarizer.enabled: | |
| gem_status = nl_summarizer.last_error if nl_summarizer else "module missing" | |
| model_note += (f'<div class="cx-note">ℹ️ Gemini summaries disabled ' | |
| f'({gem_status}). Template summaries will be used.</div>') | |
| gauge = _risk_gauge_html(pct, overall_level) | |
| summary_html = f""" | |
| <div class="cx-summary-grid"> | |
| <div class="cx-card cx-gauge-card">{gauge}</div> | |
| <div class="cx-card cx-stat-card"><div class="cx-stat-label">Clauses</div> | |
| <div class="cx-stat-val">{num_clauses}</div></div> | |
| <div class="cx-card cx-stat-card"><div class="cx-stat-label">🔴 High</div> | |
| <div class="cx-stat-val" style="color:#ef4444">{high_n}</div></div> | |
| <div class="cx-card cx-stat-card"><div class="cx-stat-label">🟡 Medium</div> | |
| <div class="cx-stat-val" style="color:#f59e0b">{med_n}</div></div> | |
| <div class="cx-card cx-stat-card"><div class="cx-stat-label">🟢 Low</div> | |
| <div class="cx-stat-val" style="color:#10b981">{low_n}</div></div> | |
| </div> | |
| {model_note}""" | |
| top_parts = ['<div class="cx-section-title">🔥 Top Risk Clauses</div>', | |
| '<div class="cx-top-grid">'] | |
| for r in doc["top_risks"]: | |
| lvl = r["risk_level_raw"] | |
| color = LEVEL_HEX.get(lvl, "#6b7280") | |
| cpct = int(r["risk_score"] * 100) | |
| bar = _mini_bar(cpct, lvl) | |
| preview = highlight_keywords(r["clause_text"][:220].replace("\n", " ")) | |
| conf_html = _confidence_badge(r.get("confidence")) | |
| clause_no = r.get("clause_number") | |
| no_str = f' · {clause_no}' if clause_no else '' | |
| pills = "".join( | |
| f'<span class="cx-pill" style="border-color:{color}33;color:{color}">' | |
| f'{CAT_ICON.get(rule["category"],"⚠️")} {rule["rule_id"]}</span>' | |
| for rule in r["triggered_rules"][:3] | |
| ) or '<span class="cx-pill-none">No violations</span>' | |
| top_parts.append(f""" | |
| <div class="cx-clause-card" style="border-left:3px solid {color}"> | |
| <div class="cx-clause-header"> | |
| <span class="cx-clause-num">#{r['clause_index']}{no_str}</span> | |
| <span class="cx-badge" style="background:{color}22;color:{color}">{lvl}</span> | |
| {conf_html} | |
| <span class="cx-score-label">{cpct}%</span> | |
| </div> | |
| <div class="cx-bar-wrap">{bar}</div> | |
| <div class="cx-clause-preview">{preview}{'…' if len(r['clause_text']) > 220 else ''}</div> | |
| <div class="cx-pills">{pills}</div> | |
| </div>""") | |
| top_parts.append("</div>") | |
| top_html = "\n".join(top_parts) | |
| rows = [ | |
| "## 📄 All Clauses\n", | |
| "| # | Marker | Level | Score | Confidence | Symbolic | Preview |", | |
| "|---|--------|-------|-------|------------|----------|---------|", | |
| ] | |
| for r in doc["clauses"]: | |
| preview = r["clause_text"][:55].replace("\n", " ").replace("|", "|") | |
| conf = r.get("confidence") or {} | |
| clvl = conf.get("level", "—") | |
| cscore = int(conf.get("score", 0) * 100) if conf else 0 | |
| marker = r.get("clause_number") or "—" | |
| rows.append( | |
| f"| {r['clause_index']} | {marker} | " | |
| f"{LEVEL_COLOR.get(r['risk_level_raw'],'⚪')} {r['risk_level_raw']} | " | |
| f"`{int(r['risk_score']*100)}%` | `{clvl} {cscore}%` | " | |
| f"`{r['symbolic_score']}` | {preview}… |" | |
| ) | |
| breakdown_md = "\n".join(rows) | |
| clause_choices = [ | |
| f"#{r['clause_index']}" | |
| f"{(' ' + r['clause_number']) if r.get('clause_number') else ''}" | |
| f" | {LEVEL_COLOR.get(r['risk_level_raw'],'⚪')} " | |
| f"{r['risk_level_raw']} {int(r['risk_score']*100)}% | " | |
| f"{r['clause_text'][:55].replace(chr(10), ' ')}…" | |
| for r in doc["clauses"] | |
| ] | |
| pdf_update = gr.update(visible=True, value=None) | |
| return summary_html, top_html, breakdown_md, gr.update(choices=clause_choices, value=None), doc, pdf_update | |
| def show_clause_explanation(choice: str, doc_state: dict): | |
| if not choice or not doc_state: | |
| return '<div class="cx-empty">← Select a clause above to see its full legal analysis.</div>' | |
| try: | |
| idx = int(choice.split("|")[0].split()[0].strip().lstrip("#")) - 1 | |
| r = doc_state["clauses"][idx] | |
| except (ValueError, IndexError): | |
| return '<div class="cx-empty">Could not load clause.</div>' | |
| explanation = generate_explanation(r["clause_text"], r) | |
| if nl_summarizer is not None: | |
| nl_text = nl_summarizer.generate_summary(explanation, r["clause_text"]) | |
| explanation["natural_language_summary"] = nl_text | |
| r["nl_summary"] = nl_text | |
| else: | |
| explanation["natural_language_summary"] = "" | |
| lime_words = [] | |
| if local_explainer is not None and build_predict_fn_for_manager is not None: | |
| try: | |
| manager.ensure_loaded() | |
| predict_fn = build_predict_fn_for_manager(manager) | |
| lime_words = local_explainer.explain_with_lime(r["clause_text"], predict_fn) | |
| except Exception as e: | |
| print(f"[WARN] LIME path failed: {e}") | |
| attn_tokens = [] | |
| if local_explainer is not None and manager.is_ready: | |
| try: | |
| attn_tokens = local_explainer.get_attention_map( | |
| r["clause_text"], manager.model, manager.tokenizer, | |
| ) | |
| except Exception as e: | |
| print(f"[WARN] Attention path failed: {e}") | |
| lvl = r["risk_level_raw"] | |
| color = LEVEL_HEX.get(lvl, "#6b7280") | |
| cpct = int(r["risk_score"] * 100) | |
| bar = _mini_bar(cpct, lvl) | |
| highlighted = highlight_keywords(r["clause_text"]) | |
| sym_note = ('<div class="cx-note">ℹ️ Neural model not loaded — symbolic score only.</div>' | |
| if not manager.is_ready else "") | |
| breakdown_html = _score_breakdown_html(r.get("score_breakdown")) | |
| conf_badge = _confidence_badge(r.get("confidence")) | |
| evidence_pills = _evidence_pills(r.get("evidence", {})) | |
| evidence_block = ( | |
| f'<div class="cx-section-label">🔍 Evidence Detected</div>' | |
| f'<div class="cx-pills">{evidence_pills}</div>' | |
| ) if evidence_pills else "" | |
| nl_block = "" | |
| nl_text = explanation.get("natural_language_summary", "").strip() | |
| if nl_text: | |
| gem_tag = ("🤖 Gemini" if (nl_summarizer and nl_summarizer.enabled) | |
| else "📝 Template") | |
| nl_block = ( | |
| f'<div class="cx-section-label">{gem_tag} AI Summary</div>' | |
| f'<div class="cx-nl">{nl_text}</div>' | |
| ) | |
| lime_block = "" | |
| if lime_words: | |
| lime_block = (f'<div class="cx-section-label">🧪 LIME — Key Legal Terms Driving Risk</div>' | |
| f'{lime_html(lime_words)}') | |
| attn_block = "" | |
| if attn_tokens: | |
| attn_block = (f'<div class="cx-section-label">👁️ Attention Heatmap</div>' | |
| f'{attention_heatmap_html(attn_tokens)}') | |
| bd_text_block = "" | |
| if explanation.get("score_breakdown_text"): | |
| bd_text_block = f'<div class="cx-bd-text">{explanation["score_breakdown_text"]}</div>' | |
| rules_html = "" | |
| for rule_data in explanation.get("rules") or []: | |
| rid = rule_data["rule_id"] | |
| icon = CAT_ICON.get(rule_data.get("category", ""), "⚠️") | |
| ev_html = "" | |
| if rule_data.get("evidence"): | |
| ev_html = '<div class="cx-rule-row"><span class="cx-rule-k">Matched</span><span>' + \ | |
| "".join(f'<span class="cx-ev-pill-sm">“{e["phrase"]}”</span>' | |
| for e in rule_data["evidence"]) + '</span></div>' | |
| rules_html += f""" | |
| <div class="cx-rule-card" style="border-left:2px solid {color}"> | |
| <div class="cx-rule-header"> | |
| <span>{icon} <strong>[{rid}]</strong> {rule_data['name']}</span> | |
| <span class="cx-ref">{rule_data['reference']}</span> | |
| </div> | |
| <div class="cx-rule-row"><span class="cx-rule-k">Why flagged</span> | |
| <span>{rule_data.get('why','—')}</span></div> | |
| <div class="cx-rule-row"><span class="cx-rule-k">What it means</span> | |
| <span>{rule_data.get('meaning','—')}</span></div> | |
| {ev_html} | |
| <div class="cx-rule-row cx-suggestion"><span class="cx-rule-k">💡 Fix</span> | |
| <span>{rule_data.get('suggestion','—')}</span></div> | |
| </div>""" | |
| if not rules_html: | |
| rules_html = '<div class="cx-empty">No specific rule violations detected.</div>' | |
| overview = explanation.get("overview", "") | |
| general_tip = explanation.get("general_tip", "") | |
| tip_block = (f'<div class="cx-section-label">💡 General Guidance</div>' | |
| f'<div class="cx-tip">{general_tip}</div>') if general_tip else "" | |
| clause_no = r.get("clause_number") | |
| title_note = f" · {clause_no}" if clause_no else "" | |
| return f""" | |
| <div class="cx-exp-wrap"> | |
| <div class="cx-exp-header" style="border-left:4px solid {color}"> | |
| <div> | |
| <div class="cx-exp-title">Clause #{r['clause_index']}{title_note}</div> | |
| <div class="cx-badges-row"> | |
| <span class="cx-badge" style="background:{color}22;color:{color}">{lvl} RISK</span> | |
| {conf_badge} | |
| <span class="cx-badge cx-badge-sm">Fused {cpct}%</span> | |
| <span class="cx-badge cx-badge-sm">Neural {r['neural_score']}</span> | |
| <span class="cx-badge cx-badge-sm">Symbolic {r['symbolic_score']}</span> | |
| </div> | |
| </div> | |
| <div style="width:180px;padding-top:8px">{bar}</div> | |
| </div> | |
| {sym_note} | |
| {nl_block} | |
| <div class="cx-section-label">📋 Overview</div> | |
| <div class="cx-overview">{overview}</div> | |
| <div class="cx-section-label">🧮 Score Breakdown</div> | |
| {breakdown_html} | |
| {bd_text_block} | |
| {evidence_block} | |
| {lime_block} | |
| {attn_block} | |
| <div class="cx-section-label">🔦 Clause Text</div> | |
| <div class="cx-clause-text">{highlighted}</div> | |
| <div class="cx-section-label">⚖️ Rule Analysis</div> | |
| {rules_html} | |
| {tip_block} | |
| </div>""" | |
| def build_pdf_report(doc_state: dict): | |
| if not doc_state: | |
| return gr.update(visible=False, value=None) | |
| if generate_report is None: | |
| return gr.update(visible=True, value=None) | |
| try: | |
| tmp = tempfile.NamedTemporaryFile(prefix="clausexplain_", suffix=".pdf", | |
| delete=False) | |
| tmp.close() | |
| out = generate_report(doc_state, tmp.name) | |
| return gr.update(visible=True, value=out) | |
| except Exception as e: | |
| print(f"[ERROR] PDF report generation failed: {e}") | |
| return gr.update(visible=True, value=None) | |
| EXAMPLES = [ | |
| ("⚡ High Risk", """1. Liability Cap | |
| The total liability of either party shall not exceed Rs. 50,000 under any circumstances, | |
| including gross negligence or wilful misconduct of either party. | |
| 2. Non-Compete | |
| Employee shall not compete with the company in any capacity for 3 years following | |
| termination of this agreement, within the territory of India. | |
| 3. Indemnity | |
| The Service Provider shall indemnify and hold harmless the Client against any and all | |
| claims, damages, losses, and expenses arising out of or related to this agreement."""), | |
| ("🟡 Medium Risk", """1. Auto-Renewal | |
| This agreement shall automatically renew for successive one-year terms. | |
| 2. Arbitration | |
| Any dispute arising out of this agreement shall be referred to arbitration with | |
| the seat of arbitration in Singapore. | |
| 3. Pricing | |
| The Company may modify the prices and fees charged under this agreement at | |
| its sole discretion to modify the terms upon written notice."""), | |
| ("🟢 Low Risk", """1. Renewal | |
| This agreement renews automatically every year unless either party provides | |
| 30 days written notice before the renewal date. | |
| 2. Governing Law | |
| This agreement is governed by the laws of India."""), | |
| ("🧪 Benign (M&A-style)", """Compensation paid hereunder shall be exclusive of the Company's | |
| contributions to statutory benefits. Payment of the closing bonus is | |
| contingent on the occurrence of the closing of the merger transaction | |
| and continued employment through such date."""), | |
| ] | |
| CUSTOM_CSS = """ | |
| @import url('https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,400;0,9..40,500;0,9..40,600;0,9..40,700;1,9..40,400&family=DM+Mono:wght@400;500&display=swap'); | |
| * { box-sizing: border-box; } | |
| body, .gradio-container { background:#080d1a !important; font-family:'DM Sans',sans-serif !important; color:#e2e8f0 !important; } | |
| footer { display:none !important; } | |
| .gradio-container { max-width:1080px !important; margin:0 auto !important; } | |
| .cx-hero { text-align:center; padding:52px 24px 36px; background:linear-gradient(135deg,#0f172a 0%,#1a1040 60%,#0f172a 100%); border-radius:16px; margin-bottom:8px; position:relative; overflow:hidden; } | |
| .cx-hero::before { content:''; position:absolute; inset:0; background:radial-gradient(ellipse 70% 60% at 50% -10%,#6366f135 0%,transparent 70%); pointer-events:none; } | |
| .cx-hero-icon { font-size:44px; margin-bottom:14px; } | |
| .cx-hero-title { font-size:38px; font-weight:700; letter-spacing:-.025em; background:linear-gradient(135deg,#f1f5f9 20%,#a5b4fc 80%); -webkit-background-clip:text; -webkit-text-fill-color:transparent; margin:0 0 10px; line-height:1.1; } | |
| .cx-hero-sub { font-size:15px; color:#94a3b8; margin:0 0 22px; font-weight:400; } | |
| .cx-badges { display:flex; gap:8px; flex-wrap:wrap; justify-content:center; } | |
| .cx-badge-hero { font-size:11px; font-weight:600; letter-spacing:.07em; text-transform:uppercase; padding:5px 12px; border-radius:20px; border:1px solid #2d3a55; background:#131c30; color:#8b9fc7; } | |
| .cx-model-notice { background:#111827; border:1px solid #1e293b; border-radius:10px; padding:11px 16px; font-size:13px; color:#94a3b8; display:flex; align-items:center; gap:10px; margin-bottom:4px; } | |
| .cx-model-notice strong { color:#a5b4fc; } | |
| .cx-card { background:#111827; border:1px solid #1e293b; border-radius:12px; padding:16px 20px; } | |
| .cx-summary-grid { display:grid; grid-template-columns:180px 1fr 1fr 1fr 1fr; gap:12px; align-items:stretch; margin:4px 0 8px; } | |
| @media(max-width:720px){ .cx-summary-grid { grid-template-columns:1fr 1fr; } } | |
| .cx-gauge-card { display:flex; align-items:center; justify-content:center; padding:20px; } | |
| .cx-stat-card { display:flex; flex-direction:column; justify-content:center; gap:6px; } | |
| .cx-stat-label { font-size:11px; font-weight:600; letter-spacing:.06em; text-transform:uppercase; color:#4b5563; } | |
| .cx-stat-val { font-size:30px; font-weight:700; font-family:'DM Mono',monospace; line-height:1; } | |
| .cx-note { background:#1e293b; border-radius:8px; padding:10px 14px; font-size:13px; color:#94a3b8; margin:6px 0; } | |
| .cx-warn { border-left:3px solid #f59e0b; color:#fcd34d !important; } | |
| .cx-section-title, .cx-section-label { font-size:11px; font-weight:700; letter-spacing:.08em; text-transform:uppercase; color:#4b5563; margin:20px 0 10px; } | |
| .cx-top-grid { display:flex; flex-direction:column; gap:12px; } | |
| .cx-clause-card { background:#111827; border:1px solid #1e293b; border-radius:12px; padding:16px 18px; transition:border-color .15s; } | |
| .cx-clause-card:hover { border-color:#2d3748; } | |
| .cx-clause-header { display:flex; align-items:center; gap:8px; margin-bottom:10px; flex-wrap:wrap; } | |
| .cx-clause-num { font-family:'DM Mono',monospace; font-size:12px; color:#4b5563; min-width:28px; } | |
| .cx-badge { font-size:10px; font-weight:700; letter-spacing:.07em; text-transform:uppercase; padding:3px 9px; border-radius:20px; } | |
| .cx-badge-sm { background:#1e293b !important; color:#64748b !important; } | |
| .cx-score-label { font-family:'DM Mono',monospace; font-size:14px; font-weight:600; color:#e2e8f0; margin-left:auto; } | |
| .cx-bar-wrap { margin-bottom:12px; } | |
| .cx-clause-preview { font-size:13px; color:#94a3b8; line-height:1.65; margin-bottom:12px; } | |
| .cx-clause-preview strong { color:#fca5a5; background:#7f1d1d28; border-radius:3px; padding:0 2px; } | |
| .cx-pills { display:flex; flex-wrap:wrap; gap:6px; } | |
| .cx-pill { font-size:11px; font-weight:500; padding:3px 8px; border-radius:6px; border:1px solid; letter-spacing:.02em; } | |
| .cx-pill-none { font-size:12px; color:#374151; } | |
| .cx-ev-pill { font-size:11px; padding:3px 8px; border-radius:6px; background:#1e293b; color:#a5b4fc; border:1px solid #312e81; font-family:'DM Mono',monospace; } | |
| .cx-ev-pill-sm { font-size:11px; padding:2px 7px; border-radius:5px; background:#1e293b; color:#a5b4fc; border:1px solid #312e81; font-family:'DM Mono',monospace; margin-right:4px; display:inline-block; } | |
| .cx-breakdown { background:#0c1525; border:1px solid #1e293b; border-radius:10px; padding:14px 16px; font-family:'DM Mono',monospace; font-size:13px; color:#cbd5e1; } | |
| .cx-bd-row { display:grid; grid-template-columns:80px 80px 1fr; align-items:center; padding:4px 0; } | |
| .cx-bd-k { color:#6b7280; font-size:11px; text-transform:uppercase; letter-spacing:.06em; } | |
| .cx-bd-v { color:#e2e8f0; font-weight:600; } | |
| .cx-bd-w { color:#94a3b8; } | |
| .cx-bd-sep { border:none; border-top:1px solid #1e293b; margin:8px 0; } | |
| .cx-bd-formula { color:#a5b4fc; font-size:13px; padding:4px 0 8px; } | |
| .cx-bd-final { display:grid; grid-template-columns:80px 1fr; padding-top:4px; } | |
| .cx-bd-final-v { color:#34d399; font-weight:700; font-size:16px; } | |
| .cx-bd-text { font-family:'DM Mono',monospace; font-size:12px; color:#94a3b8; padding:6px 14px; } | |
| .cx-divider { border:none; border-top:1px solid #1a2332; margin:24px 0; } | |
| .cx-empty { color:#374151; font-size:14px; padding:28px 0; text-align:center; } | |
| .cx-exp-wrap { display:flex; flex-direction:column; gap:14px; } | |
| .cx-exp-header { background:#111827; border-radius:12px; padding:16px 20px; display:flex; align-items:flex-start; justify-content:space-between; gap:16px; } | |
| .cx-exp-title { font-size:17px; font-weight:700; margin-bottom:8px; } | |
| .cx-badges-row { display:flex; gap:6px; flex-wrap:wrap; } | |
| .cx-overview, .cx-nl { background:#111827; border-radius:10px; padding:14px 16px; font-size:14px; color:#cbd5e1; line-height:1.75; } | |
| .cx-nl { border-left:3px solid #8b5cf6; } | |
| .cx-clause-text { background:#0c1525; border:1px solid #1e293b; border-radius:10px; padding:16px; font-size:13px; line-height:1.9; color:#94a3b8; font-family:'DM Mono',monospace; white-space:pre-wrap; } | |
| .cx-clause-text strong { color:#fca5a5; background:#7f1d1d2a; border-radius:3px; padding:1px 3px; } | |
| .cx-rule-card { background:#111827; border-radius:10px; padding:14px 16px; display:flex; flex-direction:column; gap:8px; margin-bottom:8px; } | |
| .cx-rule-header { display:flex; align-items:flex-start; justify-content:space-between; gap:12px; font-size:14px; font-weight:600; } | |
| .cx-ref { font-size:11px; color:#6366f1; font-family:'DM Mono',monospace; white-space:nowrap; padding-top:2px; flex-shrink:0; } | |
| .cx-rule-row { display:grid; grid-template-columns:100px 1fr; gap:8px; font-size:13px; color:#94a3b8; align-items:baseline; } | |
| .cx-rule-k { font-size:10px; font-weight:700; letter-spacing:.05em; text-transform:uppercase; color:#374151; } | |
| .cx-suggestion span:last-child { color:#6ee7b7; } | |
| .cx-tip { background:#0a1f16; border:1px solid #064e3b40; border-radius:10px; padding:14px 16px; font-size:13px; color:#6ee7b7; line-height:1.75; } | |
| .cx-attn-wrap, .cx-lime-wrap { background:#0c1525; border:1px solid #1e293b; border-radius:10px; padding:14px 16px; } | |
| .cx-attn-title, .cx-lime-title { font-size:11px; font-weight:700; letter-spacing:.07em; text-transform:uppercase; color:#6b7280; margin-bottom:10px; } | |
| .cx-attn-grid { display:flex; flex-wrap:wrap; gap:4px; } | |
| .cx-attn-chip { display:inline-flex; align-items:center; gap:4px; padding:3px 7px; border-radius:5px; font-family:'DM Mono',monospace; font-size:12px; color:#e2e8f0; } | |
| .cx-attn-w { font-size:9px; opacity:.6; } | |
| .cx-attn-legend, .cx-lime-legend { font-size:11px; color:#4b5563; margin-top:10px; } | |
| .cx-lime-list { display:flex; flex-direction:column; gap:6px; } | |
| .cx-lime-row { display:grid; grid-template-columns:120px 1fr 80px; align-items:center; gap:10px; } | |
| .cx-lime-word { font-family:'DM Mono',monospace; font-size:12px; color:#cbd5e1; overflow:hidden; text-overflow:ellipsis; white-space:nowrap; } | |
| .cx-lime-bar-wrap { background:#1e293b; border-radius:4px; height:6px; } | |
| .cx-lime-bar { height:6px; border-radius:4px; } | |
| .cx-lime-w { font-family:'DM Mono',monospace; font-size:11px; text-align:right; } | |
| .gr-button, button { border-radius:8px !important; font-weight:600 !important; } | |
| button.primary { background:linear-gradient(135deg,#6366f1,#8b5cf6) !important; border:none !important; color:#fff !important; letter-spacing:.02em !important; transition:opacity .15s !important; } | |
| button.secondary { background:#1e293b !important; border:1px solid #334155 !important; color:#e2e8f0 !important; } | |
| button:hover { opacity:.88 !important; } | |
| .gr-box, .gr-form { background:#111827 !important; border-color:#1e293b !important; border-radius:12px !important; } | |
| textarea, input[type=text] { background:#0c1525 !important; border:1px solid #1e293b !important; color:#e2e8f0 !important; border-radius:8px !important; font-family:'DM Sans',sans-serif !important; } | |
| label > span { color:#64748b !important; font-size:13px !important; } | |
| .gr-file { background:#0c1525 !important; border:1px dashed #2d3748 !important; border-radius:10px !important; } | |
| select, .gr-dropdown { background:#0c1525 !important; border-color:#1e293b !important; color:#e2e8f0 !important; border-radius:8px !important; } | |
| .gr-accordion > .label-wrap { background:#111827 !important; border-color:#1e293b !important; border-radius:10px !important; color:#94a3b8 !important; } | |
| """ | |
| def build_ui(): | |
| with gr.Blocks( | |
| title="ClauseXplain — AI Legal Risk Dashboard", | |
| theme=gr.themes.Base( | |
| primary_hue=gr.themes.colors.indigo, | |
| neutral_hue=gr.themes.colors.slate, | |
| font=[gr.themes.GoogleFont("DM Sans"), "sans-serif"], | |
| ), | |
| css=CUSTOM_CSS, | |
| ) as demo: | |
| doc_state = gr.State(value=None) | |
| gr.HTML(""" | |
| <div class="cx-hero"> | |
| <div class="cx-hero-icon">⚖️</div> | |
| <h1 class="cx-hero-title">ClauseXplain</h1> | |
| <p class="cx-hero-sub">International contract neural backbone, localised via Indian neuro-symbolic legal reasoning</p> | |
| <div class="cx-badges"> | |
| <span class="cx-badge-hero">ICA 1872</span> | |
| <span class="cx-badge-hero">DPDPA 2023</span> | |
| <span class="cx-badge-hero">IT Act 2000</span> | |
| <span class="cx-badge-hero">CPA 2019</span> | |
| <span class="cx-badge-hero">Arbitration Act 1996</span> | |
| <span class="cx-badge-hero">Gemini · LIME · Attention</span> | |
| </div> | |
| </div> | |
| """) | |
| gr.HTML(""" | |
| <div class="cx-model-notice"> | |
| ⏳ The neural model (~2 GB) loads on your <strong>first analysis request</strong> — | |
| expect 60–90 s. Per-clause LIME + attention run lazily when you inspect a clause (~15–25 s). | |
| </div> | |
| """) | |
| with gr.Row(equal_height=True): | |
| with gr.Column(): | |
| gr.HTML('<div style="font-size:11px;font-weight:700;letter-spacing:.08em;' | |
| 'text-transform:uppercase;color:#6366f1;margin-bottom:8px">📂 Upload PDF</div>') | |
| pdf_input = gr.File(label="Contract PDF", file_types=[".pdf"], type="filepath") | |
| pdf_btn = gr.Button("Analyse PDF →", variant="primary") | |
| with gr.Column(): | |
| gr.HTML('<div style="font-size:11px;font-weight:700;letter-spacing:.08em;' | |
| 'text-transform:uppercase;color:#6366f1;margin-bottom:8px">✏️ Paste Text</div>') | |
| text_input = gr.Textbox(label="", placeholder="Paste one or more contract clauses here…", lines=6) | |
| text_btn = gr.Button("Analyse Text →", variant="secondary") | |
| gr.HTML('<div style="font-size:11px;font-weight:600;letter-spacing:.07em;' | |
| 'text-transform:uppercase;color:#374151;margin:16px 0 8px">Try an example</div>') | |
| with gr.Row(): | |
| for label, content in EXAMPLES: | |
| gr.Button(label, size="sm").click(fn=lambda c=content: c, outputs=text_input) | |
| gr.HTML('<hr class="cx-divider">') | |
| summary_out = gr.HTML('<div class="cx-empty">Upload a PDF or paste contract text to begin analysis.</div>') | |
| gr.HTML('<hr class="cx-divider">') | |
| top_risks_out = gr.HTML("") | |
| with gr.Accordion("📄 Full Clause Breakdown", open=False): | |
| breakdown_out = gr.Markdown("") | |
| with gr.Row(): | |
| pdf_dl_btn = gr.Button("📥 Download PDF Report", variant="primary") | |
| pdf_file_out = gr.File(label="Compliance Report", visible=False, interactive=False) | |
| gr.HTML('<hr class="cx-divider">') | |
| gr.HTML('<div style="font-size:11px;font-weight:700;letter-spacing:.08em;' | |
| 'text-transform:uppercase;color:#4b5563;margin-bottom:12px">🔎 Clause Explorer</div>') | |
| clause_selector = gr.Dropdown(label="Select a clause to inspect", choices=[], interactive=True) | |
| explanation_out = gr.HTML('<div class="cx-empty">← Select a clause above to see its full legal analysis.</div>') | |
| gr.HTML(""" | |
| <hr class="cx-divider"> | |
| <div style="text-align:center;padding:12px 0 4px;color:#1e293b;font-size:12px;letter-spacing:.04em"> | |
| Built for Indian Contract Intelligence · | |
| Neuro-Symbolic AI · | |
| <em>For informational purposes only — not legal advice. Consult a qualified lawyer.</em> | |
| </div> | |
| """) | |
| shared = [summary_out, top_risks_out, breakdown_out, clause_selector, doc_state, pdf_file_out] | |
| pdf_btn.click(fn=analyze_pdf, inputs=[pdf_input], outputs=shared) | |
| text_btn.click(fn=analyze_text, inputs=[text_input], outputs=shared) | |
| clause_selector.change(fn=show_clause_explanation, | |
| inputs=[clause_selector, doc_state], | |
| outputs=[explanation_out]) | |
| pdf_dl_btn.click(fn=build_pdf_report, inputs=[doc_state], outputs=[pdf_file_out]) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = build_ui() | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |