File size: 3,504 Bytes
28db5b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
547548e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# utils.py
# Keyword highlighting and text formatting helpers.

from __future__ import annotations
import re

# ── High-risk keyword groups ────────────────────────────────────────────────
RISKY_KEYWORDS = [
    # Liability / damages
    "gross negligence", "wilful misconduct", "liquidated damages",
    "sole discretion", "without liability", "no obligation",
    "cap on liability", "shall not exceed", "unconditionally",
    # Data / privacy
    "personal data", "personal information", "sensitive personal",
    "collect and process", "data subject", "user data", "customer data",
    "health data", "financial data", "biometric", "aadhaar",
    # IP
    "all intellectual property", "assigns all ip", "belongs exclusively",
    "vests in", "pre-existing", "background ip",
    # Termination / enforceability
    "may terminate", "without cause", "at will", "unilaterally",
    "irrevocable", "perpetual", "non-terminable",
    "non-compete", "shall not compete", "no-solicit",
    "waive right to sue", "shall not bring action",
    "contingent on", "wagering", "speculative",
    # Notice
    "no notice required", "immediate termination", "forthwith",
]

HIGHLIGHT_OPEN  = "**"   # Gradio Markdown bold
HIGHLIGHT_CLOSE = "**"


def highlight_keywords(text: str) -> str:
    """

    Wrap risky keywords in Markdown bold for Gradio display.

    Case-insensitive, longest-match-first to avoid partial overlaps.

    """
    sorted_kw = sorted(RISKY_KEYWORDS, key=len, reverse=True)
    result     = text

    for kw in sorted_kw:
        pattern = re.compile(re.escape(kw), re.IGNORECASE)
        # Skip if already wrapped
        result = pattern.sub(
            lambda m: f"{HIGHLIGHT_OPEN}{m.group(0)}{HIGHLIGHT_CLOSE}"
            if HIGHLIGHT_OPEN not in text[max(0, m.start()-2): m.start()]
            else m.group(0),
            result,
        )
    return result


def format_triggered_rules(rules: list) -> str:
    """Return a Markdown-formatted string of triggered rules."""
    if not rules:
        return "βœ… No Indian-law violations detected."
    lines = []
    for r in rules:
        lines.append(
            f"- ⚠️ **[{r['rule_id']}] {r['name']}**  \n"
            f"  *{r['reference']}* β€” penalty weight: `{r['penalty']}`"
        )
    return "\n".join(lines)


def format_explanation(explanation: dict) -> str:
    """Return a Markdown-formatted full explanation block."""
    parts = [f"### πŸ“‹ Overview\n{explanation['overview']}"]

    if explanation.get("rules"):
        parts.append("### βš–οΈ Rule-by-Rule Breakdown")
        for r in explanation["rules"]:
            parts.append(
                f"**[{r['rule_id']}] {r['name']}** *(ref: {r['reference']})*\n\n"
                f"- **Why it's flagged:** {r['why']}\n"
                f"- **What it means:** {r['meaning']}\n"
                f"- **Suggestion:** {r['suggestion']}"
            )

    if explanation.get("general_tip"):
        parts.append(f"### πŸ’‘ General Guidance\n{explanation['general_tip']}")

    return "\n\n".join(parts)


def score_to_bar(score: float, width: int = 20) -> str:
    """Simple ASCII progress bar for risk score display."""
    filled = round(score * width)
    bar    = "β–ˆ" * filled + "β–‘" * (width - filled)
    pct    = int(score * 100)
    return f"`[{bar}] {pct}%`"