# utils.py # Keyword highlighting and text formatting helpers. from __future__ import annotations import re # ── High-risk keyword groups ──────────────────────────────────────────────── RISKY_KEYWORDS = [ # Liability / damages "gross negligence", "wilful misconduct", "liquidated damages", "sole discretion", "without liability", "no obligation", "cap on liability", "shall not exceed", "unconditionally", # Data / privacy "personal data", "personal information", "sensitive personal", "collect and process", "data subject", "user data", "customer data", "health data", "financial data", "biometric", "aadhaar", # IP "all intellectual property", "assigns all ip", "belongs exclusively", "vests in", "pre-existing", "background ip", # Termination / enforceability "may terminate", "without cause", "at will", "unilaterally", "irrevocable", "perpetual", "non-terminable", "non-compete", "shall not compete", "no-solicit", "waive right to sue", "shall not bring action", "contingent on", "wagering", "speculative", # Notice "no notice required", "immediate termination", "forthwith", ] HIGHLIGHT_OPEN = "**" # Gradio Markdown bold HIGHLIGHT_CLOSE = "**" def highlight_keywords(text: str) -> str: """ Wrap risky keywords in Markdown bold for Gradio display. Case-insensitive, longest-match-first to avoid partial overlaps. """ sorted_kw = sorted(RISKY_KEYWORDS, key=len, reverse=True) result = text for kw in sorted_kw: pattern = re.compile(re.escape(kw), re.IGNORECASE) # Skip if already wrapped result = pattern.sub( lambda m: f"{HIGHLIGHT_OPEN}{m.group(0)}{HIGHLIGHT_CLOSE}" if HIGHLIGHT_OPEN not in text[max(0, m.start()-2): m.start()] else m.group(0), result, ) return result def format_triggered_rules(rules: list) -> str: """Return a Markdown-formatted string of triggered rules.""" if not rules: return "✅ No Indian-law violations detected." lines = [] for r in rules: lines.append( f"- ⚠️ **[{r['rule_id']}] {r['name']}** \n" f" *{r['reference']}* — penalty weight: `{r['penalty']}`" ) return "\n".join(lines) def format_explanation(explanation: dict) -> str: """Return a Markdown-formatted full explanation block.""" parts = [f"### 📋 Overview\n{explanation['overview']}"] if explanation.get("rules"): parts.append("### ⚖️ Rule-by-Rule Breakdown") for r in explanation["rules"]: parts.append( f"**[{r['rule_id']}] {r['name']}** *(ref: {r['reference']})*\n\n" f"- **Why it's flagged:** {r['why']}\n" f"- **What it means:** {r['meaning']}\n" f"- **Suggestion:** {r['suggestion']}" ) if explanation.get("general_tip"): parts.append(f"### 💡 General Guidance\n{explanation['general_tip']}") return "\n\n".join(parts) def score_to_bar(score: float, width: int = 20) -> str: """Simple ASCII progress bar for risk score display.""" filled = round(score * width) bar = "█" * filled + "░" * (width - filled) pct = int(score * 100) return f"`[{bar}] {pct}%`"