import gradio as gr from huggingface_hub import InferenceClient import PyPDF2 import io import os import sys sys.path.append(os.path.join(os.path.dirname(__file__), '..')) from shared.components import create_method_panel, create_premium_hero # Initialize clients client = InferenceClient(token=os.getenv("HF_TOKEN")) # Known risky clause patterns RISKY_PATTERNS = [ { "category": "Termination", "keywords": ["at-will", "without cause", "immediate termination", "no notice"], "risk_indicators": ["employer can terminate", "company may terminate", "dismiss without"] }, { "category": "Non-Compete", "keywords": ["non-compete", "covenant not to compete", "shall not engage", "prohibited from working"], "risk_indicators": ["any business", "similar industry", "12 months", "24 months", "geographic area"] }, { "category": "Intellectual Property", "keywords": ["work product", "intellectual property", "inventions", "copyrights", "patents"], "risk_indicators": ["belong to company", "assigned to employer", "waive rights", "all ideas"] }, { "category": "Confidentiality", "keywords": ["confidential", "proprietary", "trade secrets", "non-disclosure"], "risk_indicators": ["indefinitely", "perpetuity", "all information", "broadly defined"] }, { "category": "Liability", "keywords": ["indemnify", "hold harmless", "liability", "damages"], "risk_indicators": ["unlimited liability", "personal liability", "consequential damages", "punitive damages"] }, { "category": "Compensation", "keywords": ["salary", "compensation", "bonus", "payment"], "risk_indicators": ["discretionary", "sole discretion", "may adjust", "subject to change"] }, { "category": "Arbitration", "keywords": ["arbitration", "dispute resolution", "waive right to sue"], "risk_indicators": ["binding arbitration", "waive jury trial", "class action waiver"] } ] def extract_text_from_pdf(pdf_file): """Extract text from uploaded PDF file""" try: pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file)) text = "" for page in pdf_reader.pages: text += page.extract_text() + "\n" return text except Exception as e: return f"Error extracting PDF: {str(e)}" def split_into_clauses(text): """Split contract into clauses""" # Split by common clause separators import re # Split by numbered sections, or by double newlines, or by periods followed by uppercase clauses = [] # First try numbered sections sections = re.split(r'\n\s*\d+\.|\n\s*[A-Z]\.|Article \d+', text) for section in sections: # Further split long sections by sentences sentences = section.split('. ') current_clause = "" for sentence in sentences: current_clause += sentence + ". " if len(current_clause) > 100: # Create clause every ~100 chars clauses.append(current_clause.strip()) current_clause = "" if current_clause.strip(): clauses.append(current_clause.strip()) # Filter out very short clauses clauses = [c for c in clauses if len(c) > 50] return clauses def analyze_clause_risk(clause): """Analyze a single clause for risk using pattern matching and reasoning""" risks = [] clause_lower = clause.lower() for pattern in RISKY_PATTERNS: # Check if clause contains risk indicators keyword_matches = sum(1 for kw in pattern["keywords"] if kw in clause_lower) risk_matches = sum(1 for ri in pattern["risk_indicators"] if ri in clause_lower) if keyword_matches > 0 and risk_matches > 0: # Calculate risk score if risk_matches >= 2 or "unlimited" in clause_lower or "perpetuity" in clause_lower: risk_level = "HIGH" elif risk_matches >= 1: risk_level = "MEDIUM" else: risk_level = "LOW" risks.append({ "category": pattern["category"], "risk_level": risk_level, "clause": clause[:300] + "..." if len(clause) > 300 else clause }) return risks def get_ai_analysis(clause, category, risk_level): """Use reasoning model to explain the risk""" if not os.getenv("HF_TOKEN"): return ( f"This clause was flagged by deterministic pattern matching in the {category} category. The risk level is {risk_level}, so the exact wording should be reviewed before signing.", "Ask counsel to narrow the clause, define terms precisely, and make obligations mutual where appropriate." ) prompt = f"""You are a legal risk analyst. Analyze this contract clause. Category: {category} Risk Level: {risk_level} Clause: "{clause}" Provide: 1. WHY this clause is risky (2-3 sentences, be specific) 2. SUGGESTED REVISION (1-2 sentences on how to make it fairer) Be direct and actionable.""" try: response = "" for message in client.chat_completion( model="Qwen/Qwen2.5-72B-Instruct", messages=[{"role": "user", "content": prompt}], max_tokens=300, stream=True ): if message.choices[0].delta.content: response += message.choices[0].delta.content # Parse response parts = response.split("SUGGESTED REVISION") why_risky = parts[0].replace("WHY", "").replace("1.", "").strip() suggested = parts[1].replace("2.", "").strip() if len(parts) > 1 else "Negotiate this clause with legal counsel." return why_risky, suggested except: return "This clause may impose unfair obligations or risks on one party.", "Consider negotiating more balanced terms with legal counsel." def analyze_contract(pdf_file, progress=gr.Progress()): """Main function to analyze contract""" if pdf_file is None: return "🔍 Please upload a contract to investigate.", "" progress(0, desc="🔍 Investigating contract...") # Extract text text = extract_text_from_pdf(pdf_file) if "Error" in text: return text, "" progress(0.2, desc="📄 Analyzing clauses...") # Split into clauses clauses = split_into_clauses(text) if len(clauses) == 0: return "âš ī¸ Could not extract clauses from contract. Please ensure it's a text-based PDF.", "" progress(0.4, desc="🔎 Searching for suspicious clauses...") # Analyze each clause all_risks = [] for i, clause in enumerate(clauses): risks = analyze_clause_risk(clause) all_risks.extend(risks) progress(0.4 + (0.4 * (i / len(clauses))), desc=f"🔎 Analyzing clause {i+1}/{len(clauses)}...") if len(all_risks) == 0: summary = f""" # ✅ Investigation Complete **Contract Status:** LOW RISK 🔍 Analyzed {len(clauses)} clauses ✓ No major red flags detected **Note:** This is an automated analysis. Always consult a licensed attorney for legal advice. """ return summary, "" progress(0.8, desc="🧠 Generating risk analysis...") # Sort by risk level risk_order = {"HIGH": 0, "MEDIUM": 1, "LOW": 2} all_risks.sort(key=lambda x: risk_order[x["risk_level"]]) # Generate detailed analysis for top risks detailed_findings = [] for i, risk in enumerate(all_risks[:5]): # Limit to top 5 why, suggested = get_ai_analysis(risk["clause"], risk["category"], risk["risk_level"]) detailed_findings.append({ "category": risk["category"], "risk_level": risk["risk_level"], "clause": risk["clause"], "why_risky": why, "suggested": suggested }) progress(0.8 + (0.15 * (i / min(5, len(all_risks)))), desc=f"🧠 Analyzing risk {i+1}...") progress(0.95, desc="📊 Generating report...") # Create summary high_count = sum(1 for r in all_risks if r["risk_level"] == "HIGH") medium_count = sum(1 for r in all_risks if r["risk_level"] == "MEDIUM") low_count = sum(1 for r in all_risks if r["risk_level"] == "LOW") overall_risk = "HIGH" if high_count > 0 else ("MEDIUM" if medium_count > 0 else "LOW") risk_emoji = "🚨" if overall_risk == "HIGH" else ("âš ī¸" if overall_risk == "MEDIUM" else "✅") summary = f""" # {risk_emoji} Investigation Complete **Overall Risk Assessment:** {overall_risk} ## 📊 Risk Dashboard - 🚨 **HIGH Risk Clauses:** {high_count} - âš ī¸ **MEDIUM Risk Clauses:** {medium_count} - â„šī¸ **LOW Risk Clauses:** {low_count} **Total Clauses Analyzed:** {len(clauses)} --- **âš ī¸ DISCLAIMER:** This is an automated analysis tool, NOT legal advice. Always consult a licensed attorney before signing any contract. """ # Create detailed report detailed = "# 🔍 Detailed Findings\n\n" for i, finding in enumerate(detailed_findings, 1): risk_color = "🚨" if finding["risk_level"] == "HIGH" else ("âš ī¸" if finding["risk_level"] == "MEDIUM" else "â„šī¸") detailed += f""" ## {risk_color} Finding #{i}: {finding["category"]} **Risk Level:** {finding["risk_level"]} **Suspicious Clause:** > {finding["clause"]} **Why This Is Risky:** {finding["why_risky"]} **Suggested Revision:** {finding["suggested"]} --- """ progress(1.0, desc="✅ Investigation complete!") return summary, detailed # Custom CSS for detective theme custom_css = """ .gradio-container { font-family: 'Courier New', monospace; } .risk-high { background-color: #fee; border-left: 4px solid #c00; padding: 10px; } .risk-medium { background-color: #ffeaa7; border-left: 4px solid #fdcb6e; padding: 10px; } .risk-low { background-color: #d5f4e6; border-left: 4px solid #00b894; padding: 10px; } #component-0 { background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); } .title-text { color: #f39c12; text-shadow: 2px 2px 4px rgba(0,0,0,0.5); } .detective-badge { background-color: #2c3e50; color: #f39c12; padding: 20px; border-radius: 10px; border: 2px solid #f39c12; } """ # Create Gradio interface with gr.Blocks(css=custom_css, theme=gr.themes.Monochrome()) as demo: create_premium_hero( "Contract Red Team", "Upload a contract and inspect risky clauses with retrieval, clause patterning, and model-assisted explanation.", "🔍", badge="Legal AI Triage", highlights=["Clause parsing", "Risk taxonomy", "Evidence-first report"], ) create_method_panel({ "Technique": "PDF parsing → clause chunking → risk-pattern scoring → explanation.", "What it proves": "You can build an evidence-first AI assistant for high-stakes document review.", "Safety note": "This is triage and education, not legal advice or a substitute for counsel.", }) with gr.Row(): with gr.Column(scale=1): gr.Markdown(""" ### 📋 How It Works 1. **Upload** your contract (PDF) 2. **AI investigates** each clause 3. **Get detailed report** on risky clauses ### ✅ Supported Contracts - NDAs (Non-Disclosure Agreements) - Employment Contracts - Service Agreements - Consulting Agreements - Freelance Contracts ### âš ī¸ We Check For - Unfair termination clauses - Broad non-compete agreements - IP rights issues - Liability concerns - Confidentiality overreach - Compensation risks - Arbitration requirements """) pdf_input = gr.File( label="📄 Upload Contract (PDF)", file_types=[".pdf"], type="binary" ) analyze_btn = gr.Button( "🔍 Start Investigation", variant="primary", size="lg" ) with gr.Column(scale=2): gr.Markdown("### 📊 Investigation Summary") summary_output = gr.Markdown(value="🔍 Upload a contract to begin investigation...") gr.Markdown("### 🔍 Detailed Findings") detailed_output = gr.Markdown(value="") gr.HTML("""

âš ī¸ IMPORTANT DISCLAIMER

This tool provides an automated analysis and is NOT legal advice. Always consult with a licensed attorney before signing any contract. The AI may miss risks or provide inaccurate information.

""") # Event handlers analyze_btn.click( fn=analyze_contract, inputs=[pdf_input], outputs=[summary_output, detailed_output] ) if __name__ == "__main__": demo.launch()