Major_Project / utils.py
riyasuryawanshi746's picture
Updated after checkpoints
547548e verified
# utils.py
# Keyword highlighting and text formatting helpers.
from __future__ import annotations
import re
# ── High-risk keyword groups ────────────────────────────────────────────────
RISKY_KEYWORDS = [
# Liability / damages
"gross negligence", "wilful misconduct", "liquidated damages",
"sole discretion", "without liability", "no obligation",
"cap on liability", "shall not exceed", "unconditionally",
# Data / privacy
"personal data", "personal information", "sensitive personal",
"collect and process", "data subject", "user data", "customer data",
"health data", "financial data", "biometric", "aadhaar",
# IP
"all intellectual property", "assigns all ip", "belongs exclusively",
"vests in", "pre-existing", "background ip",
# Termination / enforceability
"may terminate", "without cause", "at will", "unilaterally",
"irrevocable", "perpetual", "non-terminable",
"non-compete", "shall not compete", "no-solicit",
"waive right to sue", "shall not bring action",
"contingent on", "wagering", "speculative",
# Notice
"no notice required", "immediate termination", "forthwith",
]
HIGHLIGHT_OPEN = "**" # Gradio Markdown bold
HIGHLIGHT_CLOSE = "**"
def highlight_keywords(text: str) -> str:
"""
Wrap risky keywords in Markdown bold for Gradio display.
Case-insensitive, longest-match-first to avoid partial overlaps.
"""
sorted_kw = sorted(RISKY_KEYWORDS, key=len, reverse=True)
result = text
for kw in sorted_kw:
pattern = re.compile(re.escape(kw), re.IGNORECASE)
# Skip if already wrapped
result = pattern.sub(
lambda m: f"{HIGHLIGHT_OPEN}{m.group(0)}{HIGHLIGHT_CLOSE}"
if HIGHLIGHT_OPEN not in text[max(0, m.start()-2): m.start()]
else m.group(0),
result,
)
return result
def format_triggered_rules(rules: list) -> str:
"""Return a Markdown-formatted string of triggered rules."""
if not rules:
return "βœ… No Indian-law violations detected."
lines = []
for r in rules:
lines.append(
f"- ⚠️ **[{r['rule_id']}] {r['name']}** \n"
f" *{r['reference']}* β€” penalty weight: `{r['penalty']}`"
)
return "\n".join(lines)
def format_explanation(explanation: dict) -> str:
"""Return a Markdown-formatted full explanation block."""
parts = [f"### πŸ“‹ Overview\n{explanation['overview']}"]
if explanation.get("rules"):
parts.append("### βš–οΈ Rule-by-Rule Breakdown")
for r in explanation["rules"]:
parts.append(
f"**[{r['rule_id']}] {r['name']}** *(ref: {r['reference']})*\n\n"
f"- **Why it's flagged:** {r['why']}\n"
f"- **What it means:** {r['meaning']}\n"
f"- **Suggestion:** {r['suggestion']}"
)
if explanation.get("general_tip"):
parts.append(f"### πŸ’‘ General Guidance\n{explanation['general_tip']}")
return "\n\n".join(parts)
def score_to_bar(score: float, width: int = 20) -> str:
"""Simple ASCII progress bar for risk score display."""
filled = round(score * width)
bar = "β–ˆ" * filled + "β–‘" * (width - filled)
pct = int(score * 100)
return f"`[{bar}] {pct}%`"