Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| import PyPDF2 | |
| import io | |
| import os | |
| import sys | |
| sys.path.append(os.path.join(os.path.dirname(__file__), '..')) | |
| from shared.components import create_method_panel, create_premium_hero | |
| # Initialize clients | |
| client = InferenceClient(token=os.getenv("HF_TOKEN")) | |
| # Known risky clause patterns | |
| RISKY_PATTERNS = [ | |
| { | |
| "category": "Termination", | |
| "keywords": ["at-will", "without cause", "immediate termination", "no notice"], | |
| "risk_indicators": ["employer can terminate", "company may terminate", "dismiss without"] | |
| }, | |
| { | |
| "category": "Non-Compete", | |
| "keywords": ["non-compete", "covenant not to compete", "shall not engage", "prohibited from working"], | |
| "risk_indicators": ["any business", "similar industry", "12 months", "24 months", "geographic area"] | |
| }, | |
| { | |
| "category": "Intellectual Property", | |
| "keywords": ["work product", "intellectual property", "inventions", "copyrights", "patents"], | |
| "risk_indicators": ["belong to company", "assigned to employer", "waive rights", "all ideas"] | |
| }, | |
| { | |
| "category": "Confidentiality", | |
| "keywords": ["confidential", "proprietary", "trade secrets", "non-disclosure"], | |
| "risk_indicators": ["indefinitely", "perpetuity", "all information", "broadly defined"] | |
| }, | |
| { | |
| "category": "Liability", | |
| "keywords": ["indemnify", "hold harmless", "liability", "damages"], | |
| "risk_indicators": ["unlimited liability", "personal liability", "consequential damages", "punitive damages"] | |
| }, | |
| { | |
| "category": "Compensation", | |
| "keywords": ["salary", "compensation", "bonus", "payment"], | |
| "risk_indicators": ["discretionary", "sole discretion", "may adjust", "subject to change"] | |
| }, | |
| { | |
| "category": "Arbitration", | |
| "keywords": ["arbitration", "dispute resolution", "waive right to sue"], | |
| "risk_indicators": ["binding arbitration", "waive jury trial", "class action waiver"] | |
| } | |
| ] | |
| def extract_text_from_pdf(pdf_file): | |
| """Extract text from uploaded PDF file""" | |
| try: | |
| pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file)) | |
| text = "" | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() + "\n" | |
| return text | |
| except Exception as e: | |
| return f"Error extracting PDF: {str(e)}" | |
| def split_into_clauses(text): | |
| """Split contract into clauses""" | |
| # Split by common clause separators | |
| import re | |
| # Split by numbered sections, or by double newlines, or by periods followed by uppercase | |
| clauses = [] | |
| # First try numbered sections | |
| sections = re.split(r'\n\s*\d+\.|\n\s*[A-Z]\.|Article \d+', text) | |
| for section in sections: | |
| # Further split long sections by sentences | |
| sentences = section.split('. ') | |
| current_clause = "" | |
| for sentence in sentences: | |
| current_clause += sentence + ". " | |
| if len(current_clause) > 100: # Create clause every ~100 chars | |
| clauses.append(current_clause.strip()) | |
| current_clause = "" | |
| if current_clause.strip(): | |
| clauses.append(current_clause.strip()) | |
| # Filter out very short clauses | |
| clauses = [c for c in clauses if len(c) > 50] | |
| return clauses | |
| def analyze_clause_risk(clause): | |
| """Analyze a single clause for risk using pattern matching and reasoning""" | |
| risks = [] | |
| clause_lower = clause.lower() | |
| for pattern in RISKY_PATTERNS: | |
| # Check if clause contains risk indicators | |
| keyword_matches = sum(1 for kw in pattern["keywords"] if kw in clause_lower) | |
| risk_matches = sum(1 for ri in pattern["risk_indicators"] if ri in clause_lower) | |
| if keyword_matches > 0 and risk_matches > 0: | |
| # Calculate risk score | |
| if risk_matches >= 2 or "unlimited" in clause_lower or "perpetuity" in clause_lower: | |
| risk_level = "HIGH" | |
| elif risk_matches >= 1: | |
| risk_level = "MEDIUM" | |
| else: | |
| risk_level = "LOW" | |
| risks.append({ | |
| "category": pattern["category"], | |
| "risk_level": risk_level, | |
| "clause": clause[:300] + "..." if len(clause) > 300 else clause | |
| }) | |
| return risks | |
| def get_ai_analysis(clause, category, risk_level): | |
| """Use reasoning model to explain the risk""" | |
| if not os.getenv("HF_TOKEN"): | |
| return ( | |
| f"This clause was flagged by deterministic pattern matching in the {category} category. The risk level is {risk_level}, so the exact wording should be reviewed before signing.", | |
| "Ask counsel to narrow the clause, define terms precisely, and make obligations mutual where appropriate." | |
| ) | |
| prompt = f"""You are a legal risk analyst. Analyze this contract clause. | |
| Category: {category} | |
| Risk Level: {risk_level} | |
| Clause: "{clause}" | |
| Provide: | |
| 1. WHY this clause is risky (2-3 sentences, be specific) | |
| 2. SUGGESTED REVISION (1-2 sentences on how to make it fairer) | |
| Be direct and actionable.""" | |
| try: | |
| response = "" | |
| for message in client.chat_completion( | |
| model="Qwen/Qwen2.5-72B-Instruct", | |
| messages=[{"role": "user", "content": prompt}], | |
| max_tokens=300, | |
| stream=True | |
| ): | |
| if message.choices[0].delta.content: | |
| response += message.choices[0].delta.content | |
| # Parse response | |
| parts = response.split("SUGGESTED REVISION") | |
| why_risky = parts[0].replace("WHY", "").replace("1.", "").strip() | |
| suggested = parts[1].replace("2.", "").strip() if len(parts) > 1 else "Negotiate this clause with legal counsel." | |
| return why_risky, suggested | |
| except: | |
| return "This clause may impose unfair obligations or risks on one party.", "Consider negotiating more balanced terms with legal counsel." | |
| def analyze_contract(pdf_file, progress=gr.Progress()): | |
| """Main function to analyze contract""" | |
| if pdf_file is None: | |
| return "π Please upload a contract to investigate.", "" | |
| progress(0, desc="π Investigating contract...") | |
| # Extract text | |
| text = extract_text_from_pdf(pdf_file) | |
| if "Error" in text: | |
| return text, "" | |
| progress(0.2, desc="π Analyzing clauses...") | |
| # Split into clauses | |
| clauses = split_into_clauses(text) | |
| if len(clauses) == 0: | |
| return "β οΈ Could not extract clauses from contract. Please ensure it's a text-based PDF.", "" | |
| progress(0.4, desc="π Searching for suspicious clauses...") | |
| # Analyze each clause | |
| all_risks = [] | |
| for i, clause in enumerate(clauses): | |
| risks = analyze_clause_risk(clause) | |
| all_risks.extend(risks) | |
| progress(0.4 + (0.4 * (i / len(clauses))), desc=f"π Analyzing clause {i+1}/{len(clauses)}...") | |
| if len(all_risks) == 0: | |
| summary = f""" | |
| # β Investigation Complete | |
| **Contract Status:** LOW RISK | |
| π Analyzed {len(clauses)} clauses | |
| β No major red flags detected | |
| **Note:** This is an automated analysis. Always consult a licensed attorney for legal advice. | |
| """ | |
| return summary, "" | |
| progress(0.8, desc="π§ Generating risk analysis...") | |
| # Sort by risk level | |
| risk_order = {"HIGH": 0, "MEDIUM": 1, "LOW": 2} | |
| all_risks.sort(key=lambda x: risk_order[x["risk_level"]]) | |
| # Generate detailed analysis for top risks | |
| detailed_findings = [] | |
| for i, risk in enumerate(all_risks[:5]): # Limit to top 5 | |
| why, suggested = get_ai_analysis(risk["clause"], risk["category"], risk["risk_level"]) | |
| detailed_findings.append({ | |
| "category": risk["category"], | |
| "risk_level": risk["risk_level"], | |
| "clause": risk["clause"], | |
| "why_risky": why, | |
| "suggested": suggested | |
| }) | |
| progress(0.8 + (0.15 * (i / min(5, len(all_risks)))), desc=f"π§ Analyzing risk {i+1}...") | |
| progress(0.95, desc="π Generating report...") | |
| # Create summary | |
| high_count = sum(1 for r in all_risks if r["risk_level"] == "HIGH") | |
| medium_count = sum(1 for r in all_risks if r["risk_level"] == "MEDIUM") | |
| low_count = sum(1 for r in all_risks if r["risk_level"] == "LOW") | |
| overall_risk = "HIGH" if high_count > 0 else ("MEDIUM" if medium_count > 0 else "LOW") | |
| risk_emoji = "π¨" if overall_risk == "HIGH" else ("β οΈ" if overall_risk == "MEDIUM" else "β ") | |
| summary = f""" | |
| # {risk_emoji} Investigation Complete | |
| **Overall Risk Assessment:** {overall_risk} | |
| ## π Risk Dashboard | |
| - π¨ **HIGH Risk Clauses:** {high_count} | |
| - β οΈ **MEDIUM Risk Clauses:** {medium_count} | |
| - βΉοΈ **LOW Risk Clauses:** {low_count} | |
| **Total Clauses Analyzed:** {len(clauses)} | |
| --- | |
| **β οΈ DISCLAIMER:** This is an automated analysis tool, NOT legal advice. Always consult a licensed attorney before signing any contract. | |
| """ | |
| # Create detailed report | |
| detailed = "# π Detailed Findings\n\n" | |
| for i, finding in enumerate(detailed_findings, 1): | |
| risk_color = "π¨" if finding["risk_level"] == "HIGH" else ("β οΈ" if finding["risk_level"] == "MEDIUM" else "βΉοΈ") | |
| detailed += f""" | |
| ## {risk_color} Finding #{i}: {finding["category"]} | |
| **Risk Level:** {finding["risk_level"]} | |
| **Suspicious Clause:** | |
| > {finding["clause"]} | |
| **Why This Is Risky:** | |
| {finding["why_risky"]} | |
| **Suggested Revision:** | |
| {finding["suggested"]} | |
| --- | |
| """ | |
| progress(1.0, desc="β Investigation complete!") | |
| return summary, detailed | |
| # Custom CSS for detective theme | |
| custom_css = """ | |
| .gradio-container { | |
| font-family: 'Courier New', monospace; | |
| } | |
| .risk-high { | |
| background-color: #fee; | |
| border-left: 4px solid #c00; | |
| padding: 10px; | |
| } | |
| .risk-medium { | |
| background-color: #ffeaa7; | |
| border-left: 4px solid #fdcb6e; | |
| padding: 10px; | |
| } | |
| .risk-low { | |
| background-color: #d5f4e6; | |
| border-left: 4px solid #00b894; | |
| padding: 10px; | |
| } | |
| #component-0 { | |
| background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); | |
| } | |
| .title-text { | |
| color: #f39c12; | |
| text-shadow: 2px 2px 4px rgba(0,0,0,0.5); | |
| } | |
| .detective-badge { | |
| background-color: #2c3e50; | |
| color: #f39c12; | |
| padding: 20px; | |
| border-radius: 10px; | |
| border: 2px solid #f39c12; | |
| } | |
| """ | |
| # Create Gradio interface | |
| with gr.Blocks(css=custom_css, theme=gr.themes.Monochrome()) as demo: | |
| create_premium_hero( | |
| "Contract Red Team", | |
| "Upload a contract and inspect risky clauses with retrieval, clause patterning, and model-assisted explanation.", | |
| "π", | |
| badge="Legal AI Triage", | |
| highlights=["Clause parsing", "Risk taxonomy", "Evidence-first report"], | |
| ) | |
| create_method_panel({ | |
| "Technique": "PDF parsing β clause chunking β risk-pattern scoring β explanation.", | |
| "What it proves": "You can build an evidence-first AI assistant for high-stakes document review.", | |
| "Safety note": "This is triage and education, not legal advice or a substitute for counsel.", | |
| }) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown(""" | |
| ### π How It Works | |
| 1. **Upload** your contract (PDF) | |
| 2. **AI investigates** each clause | |
| 3. **Get detailed report** on risky clauses | |
| ### β Supported Contracts | |
| - NDAs (Non-Disclosure Agreements) | |
| - Employment Contracts | |
| - Service Agreements | |
| - Consulting Agreements | |
| - Freelance Contracts | |
| ### β οΈ We Check For | |
| - Unfair termination clauses | |
| - Broad non-compete agreements | |
| - IP rights issues | |
| - Liability concerns | |
| - Confidentiality overreach | |
| - Compensation risks | |
| - Arbitration requirements | |
| """) | |
| pdf_input = gr.File( | |
| label="π Upload Contract (PDF)", | |
| file_types=[".pdf"], | |
| type="binary" | |
| ) | |
| analyze_btn = gr.Button( | |
| "π Start Investigation", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| with gr.Column(scale=2): | |
| gr.Markdown("### π Investigation Summary") | |
| summary_output = gr.Markdown(value="π Upload a contract to begin investigation...") | |
| gr.Markdown("### π Detailed Findings") | |
| detailed_output = gr.Markdown(value="") | |
| gr.HTML(""" | |
| <div style='background-color: #2c3e50; color: #ecf0f1; padding: 20px; border-radius: 10px; margin-top: 20px; text-align: center;'> | |
| <h3 style='color: #e74c3c; margin-top: 0;'>β οΈ IMPORTANT DISCLAIMER</h3> | |
| <p>This tool provides an automated analysis and is <strong>NOT legal advice</strong>. | |
| Always consult with a licensed attorney before signing any contract. | |
| The AI may miss risks or provide inaccurate information.</p> | |
| </div> | |
| """) | |
| # Event handlers | |
| analyze_btn.click( | |
| fn=analyze_contract, | |
| inputs=[pdf_input], | |
| outputs=[summary_output, detailed_output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |