sammoftah's picture
Add no-token fallback
96286a4 verified
import gradio as gr
from huggingface_hub import InferenceClient
import PyPDF2
import io
import os
import sys
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
from shared.components import create_method_panel, create_premium_hero
# Initialize clients
client = InferenceClient(token=os.getenv("HF_TOKEN"))
# Known risky clause patterns
RISKY_PATTERNS = [
{
"category": "Termination",
"keywords": ["at-will", "without cause", "immediate termination", "no notice"],
"risk_indicators": ["employer can terminate", "company may terminate", "dismiss without"]
},
{
"category": "Non-Compete",
"keywords": ["non-compete", "covenant not to compete", "shall not engage", "prohibited from working"],
"risk_indicators": ["any business", "similar industry", "12 months", "24 months", "geographic area"]
},
{
"category": "Intellectual Property",
"keywords": ["work product", "intellectual property", "inventions", "copyrights", "patents"],
"risk_indicators": ["belong to company", "assigned to employer", "waive rights", "all ideas"]
},
{
"category": "Confidentiality",
"keywords": ["confidential", "proprietary", "trade secrets", "non-disclosure"],
"risk_indicators": ["indefinitely", "perpetuity", "all information", "broadly defined"]
},
{
"category": "Liability",
"keywords": ["indemnify", "hold harmless", "liability", "damages"],
"risk_indicators": ["unlimited liability", "personal liability", "consequential damages", "punitive damages"]
},
{
"category": "Compensation",
"keywords": ["salary", "compensation", "bonus", "payment"],
"risk_indicators": ["discretionary", "sole discretion", "may adjust", "subject to change"]
},
{
"category": "Arbitration",
"keywords": ["arbitration", "dispute resolution", "waive right to sue"],
"risk_indicators": ["binding arbitration", "waive jury trial", "class action waiver"]
}
]
def extract_text_from_pdf(pdf_file):
"""Extract text from uploaded PDF file"""
try:
pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
text = ""
for page in pdf_reader.pages:
text += page.extract_text() + "\n"
return text
except Exception as e:
return f"Error extracting PDF: {str(e)}"
def split_into_clauses(text):
"""Split contract into clauses"""
# Split by common clause separators
import re
# Split by numbered sections, or by double newlines, or by periods followed by uppercase
clauses = []
# First try numbered sections
sections = re.split(r'\n\s*\d+\.|\n\s*[A-Z]\.|Article \d+', text)
for section in sections:
# Further split long sections by sentences
sentences = section.split('. ')
current_clause = ""
for sentence in sentences:
current_clause += sentence + ". "
if len(current_clause) > 100: # Create clause every ~100 chars
clauses.append(current_clause.strip())
current_clause = ""
if current_clause.strip():
clauses.append(current_clause.strip())
# Filter out very short clauses
clauses = [c for c in clauses if len(c) > 50]
return clauses
def analyze_clause_risk(clause):
"""Analyze a single clause for risk using pattern matching and reasoning"""
risks = []
clause_lower = clause.lower()
for pattern in RISKY_PATTERNS:
# Check if clause contains risk indicators
keyword_matches = sum(1 for kw in pattern["keywords"] if kw in clause_lower)
risk_matches = sum(1 for ri in pattern["risk_indicators"] if ri in clause_lower)
if keyword_matches > 0 and risk_matches > 0:
# Calculate risk score
if risk_matches >= 2 or "unlimited" in clause_lower or "perpetuity" in clause_lower:
risk_level = "HIGH"
elif risk_matches >= 1:
risk_level = "MEDIUM"
else:
risk_level = "LOW"
risks.append({
"category": pattern["category"],
"risk_level": risk_level,
"clause": clause[:300] + "..." if len(clause) > 300 else clause
})
return risks
def get_ai_analysis(clause, category, risk_level):
"""Use reasoning model to explain the risk"""
if not os.getenv("HF_TOKEN"):
return (
f"This clause was flagged by deterministic pattern matching in the {category} category. The risk level is {risk_level}, so the exact wording should be reviewed before signing.",
"Ask counsel to narrow the clause, define terms precisely, and make obligations mutual where appropriate."
)
prompt = f"""You are a legal risk analyst. Analyze this contract clause.
Category: {category}
Risk Level: {risk_level}
Clause: "{clause}"
Provide:
1. WHY this clause is risky (2-3 sentences, be specific)
2. SUGGESTED REVISION (1-2 sentences on how to make it fairer)
Be direct and actionable."""
try:
response = ""
for message in client.chat_completion(
model="Qwen/Qwen2.5-72B-Instruct",
messages=[{"role": "user", "content": prompt}],
max_tokens=300,
stream=True
):
if message.choices[0].delta.content:
response += message.choices[0].delta.content
# Parse response
parts = response.split("SUGGESTED REVISION")
why_risky = parts[0].replace("WHY", "").replace("1.", "").strip()
suggested = parts[1].replace("2.", "").strip() if len(parts) > 1 else "Negotiate this clause with legal counsel."
return why_risky, suggested
except:
return "This clause may impose unfair obligations or risks on one party.", "Consider negotiating more balanced terms with legal counsel."
def analyze_contract(pdf_file, progress=gr.Progress()):
"""Main function to analyze contract"""
if pdf_file is None:
return "πŸ” Please upload a contract to investigate.", ""
progress(0, desc="πŸ” Investigating contract...")
# Extract text
text = extract_text_from_pdf(pdf_file)
if "Error" in text:
return text, ""
progress(0.2, desc="πŸ“„ Analyzing clauses...")
# Split into clauses
clauses = split_into_clauses(text)
if len(clauses) == 0:
return "⚠️ Could not extract clauses from contract. Please ensure it's a text-based PDF.", ""
progress(0.4, desc="πŸ”Ž Searching for suspicious clauses...")
# Analyze each clause
all_risks = []
for i, clause in enumerate(clauses):
risks = analyze_clause_risk(clause)
all_risks.extend(risks)
progress(0.4 + (0.4 * (i / len(clauses))), desc=f"πŸ”Ž Analyzing clause {i+1}/{len(clauses)}...")
if len(all_risks) == 0:
summary = f"""
# βœ… Investigation Complete
**Contract Status:** LOW RISK
πŸ” Analyzed {len(clauses)} clauses
βœ“ No major red flags detected
**Note:** This is an automated analysis. Always consult a licensed attorney for legal advice.
"""
return summary, ""
progress(0.8, desc="🧠 Generating risk analysis...")
# Sort by risk level
risk_order = {"HIGH": 0, "MEDIUM": 1, "LOW": 2}
all_risks.sort(key=lambda x: risk_order[x["risk_level"]])
# Generate detailed analysis for top risks
detailed_findings = []
for i, risk in enumerate(all_risks[:5]): # Limit to top 5
why, suggested = get_ai_analysis(risk["clause"], risk["category"], risk["risk_level"])
detailed_findings.append({
"category": risk["category"],
"risk_level": risk["risk_level"],
"clause": risk["clause"],
"why_risky": why,
"suggested": suggested
})
progress(0.8 + (0.15 * (i / min(5, len(all_risks)))), desc=f"🧠 Analyzing risk {i+1}...")
progress(0.95, desc="πŸ“Š Generating report...")
# Create summary
high_count = sum(1 for r in all_risks if r["risk_level"] == "HIGH")
medium_count = sum(1 for r in all_risks if r["risk_level"] == "MEDIUM")
low_count = sum(1 for r in all_risks if r["risk_level"] == "LOW")
overall_risk = "HIGH" if high_count > 0 else ("MEDIUM" if medium_count > 0 else "LOW")
risk_emoji = "🚨" if overall_risk == "HIGH" else ("⚠️" if overall_risk == "MEDIUM" else "βœ…")
summary = f"""
# {risk_emoji} Investigation Complete
**Overall Risk Assessment:** {overall_risk}
## πŸ“Š Risk Dashboard
- 🚨 **HIGH Risk Clauses:** {high_count}
- ⚠️ **MEDIUM Risk Clauses:** {medium_count}
- ℹ️ **LOW Risk Clauses:** {low_count}
**Total Clauses Analyzed:** {len(clauses)}
---
**⚠️ DISCLAIMER:** This is an automated analysis tool, NOT legal advice. Always consult a licensed attorney before signing any contract.
"""
# Create detailed report
detailed = "# πŸ” Detailed Findings\n\n"
for i, finding in enumerate(detailed_findings, 1):
risk_color = "🚨" if finding["risk_level"] == "HIGH" else ("⚠️" if finding["risk_level"] == "MEDIUM" else "ℹ️")
detailed += f"""
## {risk_color} Finding #{i}: {finding["category"]}
**Risk Level:** {finding["risk_level"]}
**Suspicious Clause:**
> {finding["clause"]}
**Why This Is Risky:**
{finding["why_risky"]}
**Suggested Revision:**
{finding["suggested"]}
---
"""
progress(1.0, desc="βœ… Investigation complete!")
return summary, detailed
# Custom CSS for detective theme
custom_css = """
.gradio-container {
font-family: 'Courier New', monospace;
}
.risk-high {
background-color: #fee;
border-left: 4px solid #c00;
padding: 10px;
}
.risk-medium {
background-color: #ffeaa7;
border-left: 4px solid #fdcb6e;
padding: 10px;
}
.risk-low {
background-color: #d5f4e6;
border-left: 4px solid #00b894;
padding: 10px;
}
#component-0 {
background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
}
.title-text {
color: #f39c12;
text-shadow: 2px 2px 4px rgba(0,0,0,0.5);
}
.detective-badge {
background-color: #2c3e50;
color: #f39c12;
padding: 20px;
border-radius: 10px;
border: 2px solid #f39c12;
}
"""
# Create Gradio interface
with gr.Blocks(css=custom_css, theme=gr.themes.Monochrome()) as demo:
create_premium_hero(
"Contract Red Team",
"Upload a contract and inspect risky clauses with retrieval, clause patterning, and model-assisted explanation.",
"πŸ”",
badge="Legal AI Triage",
highlights=["Clause parsing", "Risk taxonomy", "Evidence-first report"],
)
create_method_panel({
"Technique": "PDF parsing β†’ clause chunking β†’ risk-pattern scoring β†’ explanation.",
"What it proves": "You can build an evidence-first AI assistant for high-stakes document review.",
"Safety note": "This is triage and education, not legal advice or a substitute for counsel.",
})
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("""
### πŸ“‹ How It Works
1. **Upload** your contract (PDF)
2. **AI investigates** each clause
3. **Get detailed report** on risky clauses
### βœ… Supported Contracts
- NDAs (Non-Disclosure Agreements)
- Employment Contracts
- Service Agreements
- Consulting Agreements
- Freelance Contracts
### ⚠️ We Check For
- Unfair termination clauses
- Broad non-compete agreements
- IP rights issues
- Liability concerns
- Confidentiality overreach
- Compensation risks
- Arbitration requirements
""")
pdf_input = gr.File(
label="πŸ“„ Upload Contract (PDF)",
file_types=[".pdf"],
type="binary"
)
analyze_btn = gr.Button(
"πŸ” Start Investigation",
variant="primary",
size="lg"
)
with gr.Column(scale=2):
gr.Markdown("### πŸ“Š Investigation Summary")
summary_output = gr.Markdown(value="πŸ” Upload a contract to begin investigation...")
gr.Markdown("### πŸ” Detailed Findings")
detailed_output = gr.Markdown(value="")
gr.HTML("""
<div style='background-color: #2c3e50; color: #ecf0f1; padding: 20px; border-radius: 10px; margin-top: 20px; text-align: center;'>
<h3 style='color: #e74c3c; margin-top: 0;'>⚠️ IMPORTANT DISCLAIMER</h3>
<p>This tool provides an automated analysis and is <strong>NOT legal advice</strong>.
Always consult with a licensed attorney before signing any contract.
The AI may miss risks or provide inaccurate information.</p>
</div>
""")
# Event handlers
analyze_btn.click(
fn=analyze_contract,
inputs=[pdf_input],
outputs=[summary_output, detailed_output]
)
if __name__ == "__main__":
demo.launch()