Spaces:
Sleeping
Sleeping
File size: 3,504 Bytes
28db5b3 547548e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | # utils.py
# Keyword highlighting and text formatting helpers.
from __future__ import annotations
import re
# ββ High-risk keyword groups ββββββββββββββββββββββββββββββββββββββββββββββββ
RISKY_KEYWORDS = [
# Liability / damages
"gross negligence", "wilful misconduct", "liquidated damages",
"sole discretion", "without liability", "no obligation",
"cap on liability", "shall not exceed", "unconditionally",
# Data / privacy
"personal data", "personal information", "sensitive personal",
"collect and process", "data subject", "user data", "customer data",
"health data", "financial data", "biometric", "aadhaar",
# IP
"all intellectual property", "assigns all ip", "belongs exclusively",
"vests in", "pre-existing", "background ip",
# Termination / enforceability
"may terminate", "without cause", "at will", "unilaterally",
"irrevocable", "perpetual", "non-terminable",
"non-compete", "shall not compete", "no-solicit",
"waive right to sue", "shall not bring action",
"contingent on", "wagering", "speculative",
# Notice
"no notice required", "immediate termination", "forthwith",
]
HIGHLIGHT_OPEN = "**" # Gradio Markdown bold
HIGHLIGHT_CLOSE = "**"
def highlight_keywords(text: str) -> str:
"""
Wrap risky keywords in Markdown bold for Gradio display.
Case-insensitive, longest-match-first to avoid partial overlaps.
"""
sorted_kw = sorted(RISKY_KEYWORDS, key=len, reverse=True)
result = text
for kw in sorted_kw:
pattern = re.compile(re.escape(kw), re.IGNORECASE)
# Skip if already wrapped
result = pattern.sub(
lambda m: f"{HIGHLIGHT_OPEN}{m.group(0)}{HIGHLIGHT_CLOSE}"
if HIGHLIGHT_OPEN not in text[max(0, m.start()-2): m.start()]
else m.group(0),
result,
)
return result
def format_triggered_rules(rules: list) -> str:
"""Return a Markdown-formatted string of triggered rules."""
if not rules:
return "β
No Indian-law violations detected."
lines = []
for r in rules:
lines.append(
f"- β οΈ **[{r['rule_id']}] {r['name']}** \n"
f" *{r['reference']}* β penalty weight: `{r['penalty']}`"
)
return "\n".join(lines)
def format_explanation(explanation: dict) -> str:
"""Return a Markdown-formatted full explanation block."""
parts = [f"### π Overview\n{explanation['overview']}"]
if explanation.get("rules"):
parts.append("### βοΈ Rule-by-Rule Breakdown")
for r in explanation["rules"]:
parts.append(
f"**[{r['rule_id']}] {r['name']}** *(ref: {r['reference']})*\n\n"
f"- **Why it's flagged:** {r['why']}\n"
f"- **What it means:** {r['meaning']}\n"
f"- **Suggestion:** {r['suggestion']}"
)
if explanation.get("general_tip"):
parts.append(f"### π‘ General Guidance\n{explanation['general_tip']}")
return "\n\n".join(parts)
def score_to_bar(score: float, width: int = 20) -> str:
"""Simple ASCII progress bar for risk score display."""
filled = round(score * width)
bar = "β" * filled + "β" * (width - filled)
pct = int(score * 100)
return f"`[{bar}] {pct}%`" |