Spaces:

riyasuryawanshi746
/

Major_Project

Running

App Files Files Community

Major_Project / utils.py

riyasuryawanshi746

Updated after checkpoints

547548e verified 19 days ago

raw

history blame contribute delete

3.41 kB

	# utils.py
	# Keyword highlighting and text formatting helpers.

	from __future__ import annotations
	import re

	# ── High-risk keyword groups ────────────────────────────────────────────────
	RISKY_KEYWORDS = [
	# Liability / damages
	"gross negligence", "wilful misconduct", "liquidated damages",
	"sole discretion", "without liability", "no obligation",
	"cap on liability", "shall not exceed", "unconditionally",
	# Data / privacy
	"personal data", "personal information", "sensitive personal",
	"collect and process", "data subject", "user data", "customer data",
	"health data", "financial data", "biometric", "aadhaar",
	# IP
	"all intellectual property", "assigns all ip", "belongs exclusively",
	"vests in", "pre-existing", "background ip",
	# Termination / enforceability
	"may terminate", "without cause", "at will", "unilaterally",
	"irrevocable", "perpetual", "non-terminable",
	"non-compete", "shall not compete", "no-solicit",
	"waive right to sue", "shall not bring action",
	"contingent on", "wagering", "speculative",
	# Notice
	"no notice required", "immediate termination", "forthwith",
	]

	HIGHLIGHT_OPEN = "**" # Gradio Markdown bold
	HIGHLIGHT_CLOSE = "**"


	def highlight_keywords(text: str) -> str:
	"""
	Wrap risky keywords in Markdown bold for Gradio display.
	Case-insensitive, longest-match-first to avoid partial overlaps.
	"""
	sorted_kw = sorted(RISKY_KEYWORDS, key=len, reverse=True)
	result = text

	for kw in sorted_kw:
	pattern = re.compile(re.escape(kw), re.IGNORECASE)
	# Skip if already wrapped
	result = pattern.sub(
	lambda m: f"{HIGHLIGHT_OPEN}{m.group(0)}{HIGHLIGHT_CLOSE}"
	if HIGHLIGHT_OPEN not in text[max(0, m.start()-2): m.start()]
	else m.group(0),
	result,
	)
	return result


	def format_triggered_rules(rules: list) -> str:
	"""Return a Markdown-formatted string of triggered rules."""
	if not rules:
	return "✅ No Indian-law violations detected."
	lines = []
	for r in rules:
	lines.append(
	f"- ⚠️ [{r['rule_id']}] {r['name']} \n"
	f" {r['reference']} — penalty weight: `{r['penalty']}`"
	)
	return "\n".join(lines)


	def format_explanation(explanation: dict) -> str:
	"""Return a Markdown-formatted full explanation block."""
	parts = [f"### 📋 Overview\n{explanation['overview']}"]

	if explanation.get("rules"):
	parts.append("### ⚖️ Rule-by-Rule Breakdown")
	for r in explanation["rules"]:
	parts.append(
	f"[{r['rule_id']}] {r['name']} (ref: {r['reference']})\n\n"
	f"- Why it's flagged: {r['why']}\n"
	f"- What it means: {r['meaning']}\n"
	f"- Suggestion: {r['suggestion']}"
	)

	if explanation.get("general_tip"):
	parts.append(f"### 💡 General Guidance\n{explanation['general_tip']}")

	return "\n\n".join(parts)


	def score_to_bar(score: float, width: int = 20) -> str:
	"""Simple ASCII progress bar for risk score display."""
	filled = round(score * width)
	bar = "█" * filled + "░" * (width - filled)
	pct = int(score * 100)
	return f"`[{bar}] {pct}%`"