File size: 13,179 Bytes
efd1a3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96286a4
 
 
 
 
 
efd1a3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0942842
efd1a3b
 
0942842
efd1a3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
import gradio as gr
from huggingface_hub import InferenceClient
import PyPDF2
import io
import os
import sys

sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
from shared.components import create_method_panel, create_premium_hero

# Initialize clients
client = InferenceClient(token=os.getenv("HF_TOKEN"))

# Known risky clause patterns
RISKY_PATTERNS = [
    {
        "category": "Termination",
        "keywords": ["at-will", "without cause", "immediate termination", "no notice"],
        "risk_indicators": ["employer can terminate", "company may terminate", "dismiss without"]
    },
    {
        "category": "Non-Compete",
        "keywords": ["non-compete", "covenant not to compete", "shall not engage", "prohibited from working"],
        "risk_indicators": ["any business", "similar industry", "12 months", "24 months", "geographic area"]
    },
    {
        "category": "Intellectual Property",
        "keywords": ["work product", "intellectual property", "inventions", "copyrights", "patents"],
        "risk_indicators": ["belong to company", "assigned to employer", "waive rights", "all ideas"]
    },
    {
        "category": "Confidentiality",
        "keywords": ["confidential", "proprietary", "trade secrets", "non-disclosure"],
        "risk_indicators": ["indefinitely", "perpetuity", "all information", "broadly defined"]
    },
    {
        "category": "Liability",
        "keywords": ["indemnify", "hold harmless", "liability", "damages"],
        "risk_indicators": ["unlimited liability", "personal liability", "consequential damages", "punitive damages"]
    },
    {
        "category": "Compensation",
        "keywords": ["salary", "compensation", "bonus", "payment"],
        "risk_indicators": ["discretionary", "sole discretion", "may adjust", "subject to change"]
    },
    {
        "category": "Arbitration",
        "keywords": ["arbitration", "dispute resolution", "waive right to sue"],
        "risk_indicators": ["binding arbitration", "waive jury trial", "class action waiver"]
    }
]

def extract_text_from_pdf(pdf_file):
    """Extract text from uploaded PDF file"""
    try:
        pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text() + "\n"
        return text
    except Exception as e:
        return f"Error extracting PDF: {str(e)}"

def split_into_clauses(text):
    """Split contract into clauses"""
    # Split by common clause separators
    import re
    # Split by numbered sections, or by double newlines, or by periods followed by uppercase
    clauses = []

    # First try numbered sections
    sections = re.split(r'\n\s*\d+\.|\n\s*[A-Z]\.|Article \d+', text)

    for section in sections:
        # Further split long sections by sentences
        sentences = section.split('. ')
        current_clause = ""
        for sentence in sentences:
            current_clause += sentence + ". "
            if len(current_clause) > 100:  # Create clause every ~100 chars
                clauses.append(current_clause.strip())
                current_clause = ""
        if current_clause.strip():
            clauses.append(current_clause.strip())

    # Filter out very short clauses
    clauses = [c for c in clauses if len(c) > 50]
    return clauses

def analyze_clause_risk(clause):
    """Analyze a single clause for risk using pattern matching and reasoning"""
    risks = []

    clause_lower = clause.lower()

    for pattern in RISKY_PATTERNS:
        # Check if clause contains risk indicators
        keyword_matches = sum(1 for kw in pattern["keywords"] if kw in clause_lower)
        risk_matches = sum(1 for ri in pattern["risk_indicators"] if ri in clause_lower)

        if keyword_matches > 0 and risk_matches > 0:
            # Calculate risk score
            if risk_matches >= 2 or "unlimited" in clause_lower or "perpetuity" in clause_lower:
                risk_level = "HIGH"
            elif risk_matches >= 1:
                risk_level = "MEDIUM"
            else:
                risk_level = "LOW"

            risks.append({
                "category": pattern["category"],
                "risk_level": risk_level,
                "clause": clause[:300] + "..." if len(clause) > 300 else clause
            })

    return risks

def get_ai_analysis(clause, category, risk_level):
    """Use reasoning model to explain the risk"""
    if not os.getenv("HF_TOKEN"):
        return (
            f"This clause was flagged by deterministic pattern matching in the {category} category. The risk level is {risk_level}, so the exact wording should be reviewed before signing.",
            "Ask counsel to narrow the clause, define terms precisely, and make obligations mutual where appropriate."
        )

    prompt = f"""You are a legal risk analyst. Analyze this contract clause.

Category: {category}
Risk Level: {risk_level}
Clause: "{clause}"

Provide:
1. WHY this clause is risky (2-3 sentences, be specific)
2. SUGGESTED REVISION (1-2 sentences on how to make it fairer)

Be direct and actionable."""

    try:
        response = ""
        for message in client.chat_completion(
            model="Qwen/Qwen2.5-72B-Instruct",
            messages=[{"role": "user", "content": prompt}],
            max_tokens=300,
            stream=True
        ):
            if message.choices[0].delta.content:
                response += message.choices[0].delta.content

        # Parse response
        parts = response.split("SUGGESTED REVISION")
        why_risky = parts[0].replace("WHY", "").replace("1.", "").strip()
        suggested = parts[1].replace("2.", "").strip() if len(parts) > 1 else "Negotiate this clause with legal counsel."

        return why_risky, suggested
    except:
        return "This clause may impose unfair obligations or risks on one party.", "Consider negotiating more balanced terms with legal counsel."

def analyze_contract(pdf_file, progress=gr.Progress()):
    """Main function to analyze contract"""
    if pdf_file is None:
        return "πŸ” Please upload a contract to investigate.", ""

    progress(0, desc="πŸ” Investigating contract...")

    # Extract text
    text = extract_text_from_pdf(pdf_file)
    if "Error" in text:
        return text, ""

    progress(0.2, desc="πŸ“„ Analyzing clauses...")

    # Split into clauses
    clauses = split_into_clauses(text)

    if len(clauses) == 0:
        return "⚠️ Could not extract clauses from contract. Please ensure it's a text-based PDF.", ""

    progress(0.4, desc="πŸ”Ž Searching for suspicious clauses...")

    # Analyze each clause
    all_risks = []
    for i, clause in enumerate(clauses):
        risks = analyze_clause_risk(clause)
        all_risks.extend(risks)
        progress(0.4 + (0.4 * (i / len(clauses))), desc=f"πŸ”Ž Analyzing clause {i+1}/{len(clauses)}...")

    if len(all_risks) == 0:
        summary = f"""
# βœ… Investigation Complete

**Contract Status:** LOW RISK

πŸ” Analyzed {len(clauses)} clauses
βœ“ No major red flags detected

**Note:** This is an automated analysis. Always consult a licensed attorney for legal advice.
        """
        return summary, ""

    progress(0.8, desc="🧠 Generating risk analysis...")

    # Sort by risk level
    risk_order = {"HIGH": 0, "MEDIUM": 1, "LOW": 2}
    all_risks.sort(key=lambda x: risk_order[x["risk_level"]])

    # Generate detailed analysis for top risks
    detailed_findings = []
    for i, risk in enumerate(all_risks[:5]):  # Limit to top 5
        why, suggested = get_ai_analysis(risk["clause"], risk["category"], risk["risk_level"])
        detailed_findings.append({
            "category": risk["category"],
            "risk_level": risk["risk_level"],
            "clause": risk["clause"],
            "why_risky": why,
            "suggested": suggested
        })
        progress(0.8 + (0.15 * (i / min(5, len(all_risks)))), desc=f"🧠 Analyzing risk {i+1}...")

    progress(0.95, desc="πŸ“Š Generating report...")

    # Create summary
    high_count = sum(1 for r in all_risks if r["risk_level"] == "HIGH")
    medium_count = sum(1 for r in all_risks if r["risk_level"] == "MEDIUM")
    low_count = sum(1 for r in all_risks if r["risk_level"] == "LOW")

    overall_risk = "HIGH" if high_count > 0 else ("MEDIUM" if medium_count > 0 else "LOW")
    risk_emoji = "🚨" if overall_risk == "HIGH" else ("⚠️" if overall_risk == "MEDIUM" else "βœ…")

    summary = f"""
# {risk_emoji} Investigation Complete

**Overall Risk Assessment:** {overall_risk}

## πŸ“Š Risk Dashboard
- 🚨 **HIGH Risk Clauses:** {high_count}
- ⚠️ **MEDIUM Risk Clauses:** {medium_count}
- ℹ️ **LOW Risk Clauses:** {low_count}

**Total Clauses Analyzed:** {len(clauses)}

---

**⚠️ DISCLAIMER:** This is an automated analysis tool, NOT legal advice. Always consult a licensed attorney before signing any contract.
    """

    # Create detailed report
    detailed = "# πŸ” Detailed Findings\n\n"

    for i, finding in enumerate(detailed_findings, 1):
        risk_color = "🚨" if finding["risk_level"] == "HIGH" else ("⚠️" if finding["risk_level"] == "MEDIUM" else "ℹ️")

        detailed += f"""
## {risk_color} Finding #{i}: {finding["category"]}

**Risk Level:** {finding["risk_level"]}

**Suspicious Clause:**
> {finding["clause"]}

**Why This Is Risky:**
{finding["why_risky"]}

**Suggested Revision:**
{finding["suggested"]}

---

"""

    progress(1.0, desc="βœ… Investigation complete!")

    return summary, detailed

# Custom CSS for detective theme
custom_css = """
.gradio-container {
    font-family: 'Courier New', monospace;
}

.risk-high {
    background-color: #fee;
    border-left: 4px solid #c00;
    padding: 10px;
}

.risk-medium {
    background-color: #ffeaa7;
    border-left: 4px solid #fdcb6e;
    padding: 10px;
}

.risk-low {
    background-color: #d5f4e6;
    border-left: 4px solid #00b894;
    padding: 10px;
}

#component-0 {
    background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
}

.title-text {
    color: #f39c12;
    text-shadow: 2px 2px 4px rgba(0,0,0,0.5);
}

.detective-badge {
    background-color: #2c3e50;
    color: #f39c12;
    padding: 20px;
    border-radius: 10px;
    border: 2px solid #f39c12;
}
"""

# Create Gradio interface
with gr.Blocks(css=custom_css, theme=gr.themes.Monochrome()) as demo:
    create_premium_hero(
        "Contract Red Team",
        "Upload a contract and inspect risky clauses with retrieval, clause patterning, and model-assisted explanation.",
        "πŸ”",
        badge="Legal AI Triage",
        highlights=["Clause parsing", "Risk taxonomy", "Evidence-first report"],
    )
    create_method_panel({
        "Technique": "PDF parsing β†’ clause chunking β†’ risk-pattern scoring β†’ explanation.",
        "What it proves": "You can build an evidence-first AI assistant for high-stakes document review.",
        "Safety note": "This is triage and education, not legal advice or a substitute for counsel.",
    })

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("""
            ### πŸ“‹ How It Works
            1. **Upload** your contract (PDF)
            2. **AI investigates** each clause
            3. **Get detailed report** on risky clauses

            ### βœ… Supported Contracts
            - NDAs (Non-Disclosure Agreements)
            - Employment Contracts
            - Service Agreements
            - Consulting Agreements
            - Freelance Contracts

            ### ⚠️ We Check For
            - Unfair termination clauses
            - Broad non-compete agreements
            - IP rights issues
            - Liability concerns
            - Confidentiality overreach
            - Compensation risks
            - Arbitration requirements
            """)

            pdf_input = gr.File(
                label="πŸ“„ Upload Contract (PDF)",
                file_types=[".pdf"],
                type="binary"
            )

            analyze_btn = gr.Button(
                "πŸ” Start Investigation",
                variant="primary",
                size="lg"
            )

        with gr.Column(scale=2):
            gr.Markdown("### πŸ“Š Investigation Summary")
            summary_output = gr.Markdown(value="πŸ” Upload a contract to begin investigation...")

            gr.Markdown("### πŸ” Detailed Findings")
            detailed_output = gr.Markdown(value="")

    gr.HTML("""
        <div style='background-color: #2c3e50; color: #ecf0f1; padding: 20px; border-radius: 10px; margin-top: 20px; text-align: center;'>
            <h3 style='color: #e74c3c; margin-top: 0;'>⚠️ IMPORTANT DISCLAIMER</h3>
            <p>This tool provides an automated analysis and is <strong>NOT legal advice</strong>.
            Always consult with a licensed attorney before signing any contract.
            The AI may miss risks or provide inaccurate information.</p>
        </div>
    """)

    # Event handlers
    analyze_btn.click(
        fn=analyze_contract,
        inputs=[pdf_input],
        outputs=[summary_output, detailed_output]
    )

if __name__ == "__main__":
    demo.launch()