|
|
""" |
|
|
Real-World Contract Analysis Demo |
|
|
|
|
|
This script shows how to analyze full contract documents (not just individual clauses). |
|
|
|
|
|
Usage: |
|
|
python analyze_document.py --contract path/to/contract.txt |
|
|
python analyze_document.py --demo # Use built-in demo contract |
|
|
""" |
|
|
|
|
|
import argparse |
|
|
from typing import Dict, Any |
|
|
from utils import ( |
|
|
split_into_clauses, |
|
|
analyze_full_document, |
|
|
print_document_analysis |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
DEMO_CONTRACT = """ |
|
|
SERVICE AGREEMENT |
|
|
|
|
|
This Service Agreement ("Agreement") is entered into as of January 1, 2024, |
|
|
by and between TechCorp Inc. ("Provider") and ClientCo LLC ("Client"). |
|
|
|
|
|
1. SERVICES |
|
|
Provider shall provide software development services as described in Exhibit A |
|
|
to Client in accordance with the terms and conditions set forth herein. |
|
|
Provider shall use commercially reasonable efforts to perform the Services. |
|
|
|
|
|
2. PAYMENT TERMS |
|
|
Client shall pay Provider the fees specified in Exhibit B within thirty (30) days |
|
|
of receipt of each invoice. Late payments shall incur a penalty of 1.5% per month |
|
|
or the maximum rate permitted by law, whichever is less. |
|
|
|
|
|
3. TERM AND TERMINATION |
|
|
This Agreement shall commence on the Effective Date and continue for a period of |
|
|
twelve (12) months unless earlier terminated as provided herein. Either party may |
|
|
terminate this Agreement upon thirty (30) days written notice to the other party. |
|
|
Upon termination, Client shall pay all fees due for Services performed up to the |
|
|
termination date. |
|
|
|
|
|
4. INTELLECTUAL PROPERTY |
|
|
All intellectual property rights in the deliverables shall remain the exclusive |
|
|
property of Provider. Client is granted a non-exclusive, non-transferable license |
|
|
to use the deliverables solely for Client's internal business purposes. |
|
|
|
|
|
5. CONFIDENTIALITY |
|
|
Each party agrees to maintain in confidence all Confidential Information disclosed |
|
|
by the other party. The receiving party shall not disclose such information to any |
|
|
third party without prior written consent. This obligation shall survive termination |
|
|
of this Agreement for a period of three (3) years. |
|
|
|
|
|
6. LIMITATION OF LIABILITY |
|
|
In no event shall either party's total liability under this Agreement exceed the |
|
|
total amount paid by Client to Provider in the twelve (12) months immediately |
|
|
preceding the claim. Neither party shall be liable for any indirect, incidental, |
|
|
consequential, or punitive damages, including lost profits or business interruption. |
|
|
|
|
|
7. INDEMNIFICATION |
|
|
Each party shall indemnify, defend, and hold harmless the other party from and |
|
|
against any third-party claims, damages, or expenses arising out of such party's |
|
|
breach of this Agreement or gross negligence. Provider shall indemnify Client |
|
|
against any claims that the deliverables infringe any third-party intellectual |
|
|
property rights. |
|
|
|
|
|
8. WARRANTY DISCLAIMER |
|
|
Provider warrants that Services will be performed in a professional and workmanlike |
|
|
manner. EXCEPT AS EXPRESSLY SET FORTH HEREIN, PROVIDER MAKES NO OTHER WARRANTIES, |
|
|
EXPRESS OR IMPLIED, INCLUDING WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A |
|
|
PARTICULAR PURPOSE. |
|
|
|
|
|
9. FORCE MAJEURE |
|
|
Neither party shall be liable for any failure or delay in performance due to |
|
|
circumstances beyond its reasonable control, including acts of God, war, terrorism, |
|
|
pandemic, or natural disasters. |
|
|
|
|
|
10. ASSIGNMENT |
|
|
Neither party may assign this Agreement without the prior written consent of the |
|
|
other party, except that either party may assign this Agreement to a successor in |
|
|
connection with a merger, acquisition, or sale of substantially all of its assets. |
|
|
|
|
|
11. DISPUTE RESOLUTION |
|
|
Any disputes arising out of this Agreement shall first be attempted to be resolved |
|
|
through good faith negotiations. If negotiations fail, disputes shall be resolved |
|
|
through binding arbitration in accordance with the rules of the American Arbitration |
|
|
Association. |
|
|
|
|
|
12. GOVERNING LAW |
|
|
This Agreement shall be governed by and construed in accordance with the laws of |
|
|
the State of Delaware, without regard to its conflict of law provisions. |
|
|
|
|
|
13. ENTIRE AGREEMENT |
|
|
This Agreement constitutes the entire agreement between the parties and supersedes |
|
|
all prior agreements and understandings, whether written or oral, relating to the |
|
|
subject matter hereof. |
|
|
|
|
|
IN WITNESS WHEREOF, the parties have executed this Agreement as of the date first |
|
|
written above. |
|
|
""" |
|
|
|
|
|
|
|
|
def analyze_contract_file(filepath: str, model) -> Dict[str, Any]: |
|
|
""" |
|
|
Analyze a contract from a text file. |
|
|
|
|
|
Args: |
|
|
filepath: Path to contract text file |
|
|
model: Trained Legal-BERT model |
|
|
|
|
|
Returns: |
|
|
Analysis results |
|
|
""" |
|
|
print(f"π Loading contract from: {filepath}") |
|
|
|
|
|
try: |
|
|
with open(filepath, 'r', encoding='utf-8') as f: |
|
|
contract_text = f.read() |
|
|
except Exception as e: |
|
|
print(f"β Error reading file: {e}") |
|
|
return {} |
|
|
|
|
|
print(f" Contract length: {len(contract_text)} characters") |
|
|
|
|
|
|
|
|
results = analyze_full_document(contract_text, model, return_details=True) |
|
|
|
|
|
return results |
|
|
|
|
|
|
|
|
def demo_clause_extraction(): |
|
|
""" |
|
|
Demo: Show how paragraph splitting works |
|
|
""" |
|
|
print("\n" + "=" * 80) |
|
|
print("π§ DEMO: CLAUSE EXTRACTION") |
|
|
print("=" * 80) |
|
|
|
|
|
print("\nπ Original Paragraph:") |
|
|
print("-" * 80) |
|
|
sample = """ |
|
|
Provider shall provide software development services as described in Exhibit A. |
|
|
Client shall pay Provider the fees specified in Exhibit B within thirty days. |
|
|
Either party may terminate this Agreement upon thirty days written notice. |
|
|
All intellectual property rights shall remain with Provider. |
|
|
""" |
|
|
print(sample) |
|
|
|
|
|
print("\nβοΈ Extracted Clauses:") |
|
|
print("-" * 80) |
|
|
clauses = split_into_clauses(sample, method='sentence') |
|
|
|
|
|
for i, clause in enumerate(clauses, 1): |
|
|
print(f"{i}. {clause}") |
|
|
|
|
|
print(f"\nβ
Total clauses extracted: {len(clauses)}") |
|
|
|
|
|
|
|
|
def demo_full_analysis(): |
|
|
""" |
|
|
Demo: Show how full document analysis works |
|
|
(Note: Requires trained model - this is a mockup) |
|
|
""" |
|
|
print("\n" + "=" * 80) |
|
|
print("π DEMO: FULL DOCUMENT ANALYSIS") |
|
|
print("=" * 80) |
|
|
|
|
|
print("\nβ οΈ Note: This demo requires a trained model.") |
|
|
print(" After training, use:") |
|
|
print(" >>> from model import LegalBERTMultiTask") |
|
|
print(" >>> model = LegalBERTMultiTask.load('checkpoints/best_model.pt')") |
|
|
print(" >>> results = analyze_full_document(contract_text, model)") |
|
|
|
|
|
|
|
|
print("\nπ Sample Output Structure:") |
|
|
print("-" * 80) |
|
|
|
|
|
sample_result = { |
|
|
'document_summary': { |
|
|
'total_clauses': 47, |
|
|
'analyzed_clauses': 47, |
|
|
'overall_severity': 6.2, |
|
|
'max_severity': 8.5, |
|
|
'overall_importance': 7.1, |
|
|
'high_risk_clause_count': 8, |
|
|
'dominant_risk_type': 'LIABILITY_RISK', |
|
|
'dominant_risk_percentage': 23.4 |
|
|
}, |
|
|
'risk_distribution': { |
|
|
'LIABILITY_RISK': 0.234, |
|
|
'TERMINATION_RISK': 0.170, |
|
|
'INDEMNITY_RISK': 0.149, |
|
|
'IP_RISK': 0.128, |
|
|
'CONFIDENTIALITY_RISK': 0.106, |
|
|
'OPERATIONAL_RISK': 0.128, |
|
|
'COMPLIANCE_RISK': 0.085 |
|
|
}, |
|
|
'high_risk_clauses': [ |
|
|
{ |
|
|
'clause_id': 15, |
|
|
'clause_text': 'In no event shall either party\'s total liability...', |
|
|
'risk_name': 'LIABILITY_RISK', |
|
|
'severity': 8.5, |
|
|
'confidence': 0.92 |
|
|
} |
|
|
] |
|
|
} |
|
|
|
|
|
print_document_analysis(sample_result) |
|
|
|
|
|
|
|
|
def main(): |
|
|
"""Main execution""" |
|
|
parser = argparse.ArgumentParser( |
|
|
description='Analyze full contract documents for risk' |
|
|
) |
|
|
parser.add_argument( |
|
|
'--contract', |
|
|
type=str, |
|
|
help='Path to contract text file' |
|
|
) |
|
|
parser.add_argument( |
|
|
'--demo', |
|
|
action='store_true', |
|
|
help='Run demo with built-in sample contract' |
|
|
) |
|
|
parser.add_argument( |
|
|
'--model-path', |
|
|
type=str, |
|
|
default='checkpoints/best_model.pt', |
|
|
help='Path to trained model checkpoint' |
|
|
) |
|
|
parser.add_argument( |
|
|
'--show-clauses', |
|
|
action='store_true', |
|
|
help='Show extracted clauses (for debugging)' |
|
|
) |
|
|
parser.add_argument( |
|
|
'--hierarchical', |
|
|
action='store_true', |
|
|
help='Use hierarchical document-level analysis (with context)' |
|
|
) |
|
|
parser.add_argument( |
|
|
'--use-context', |
|
|
action='store_true', |
|
|
help='Use sliding window context for clause analysis' |
|
|
) |
|
|
|
|
|
args = parser.parse_args() |
|
|
|
|
|
|
|
|
if args.demo or (not args.contract): |
|
|
print("=" * 80) |
|
|
print("π― LEGAL-BERT: FULL DOCUMENT ANALYSIS DEMO") |
|
|
print("=" * 80) |
|
|
|
|
|
|
|
|
demo_clause_extraction() |
|
|
|
|
|
|
|
|
demo_full_analysis() |
|
|
|
|
|
|
|
|
if args.show_clauses: |
|
|
print("\n" + "=" * 80) |
|
|
print("π DEMO CONTRACT CLAUSES") |
|
|
print("=" * 80) |
|
|
clauses = split_into_clauses(DEMO_CONTRACT, method='legal') |
|
|
for i, clause in enumerate(clauses, 1): |
|
|
print(f"\n{i}. {clause[:100]}..." if len(clause) > 100 else f"\n{i}. {clause}") |
|
|
print(f"\nβ
Total: {len(clauses)} clauses") |
|
|
|
|
|
return |
|
|
|
|
|
|
|
|
print("=" * 80) |
|
|
print("π― LEGAL-BERT: CONTRACT RISK ANALYSIS") |
|
|
print("=" * 80) |
|
|
|
|
|
|
|
|
print(f"\nπ€ Loading model from: {args.model_path}") |
|
|
try: |
|
|
import torch |
|
|
from model import FullyLearningBasedLegalBERT, HierarchicalLegalBERT |
|
|
from config import LegalBertConfig |
|
|
|
|
|
checkpoint = torch.load(args.model_path, map_location='cpu') |
|
|
config = checkpoint.get('config', LegalBertConfig()) |
|
|
model_type = checkpoint.get('model_type', 'standard') |
|
|
num_risks = len(checkpoint.get('discovered_patterns', {})) |
|
|
|
|
|
if model_type == 'hierarchical' or args.hierarchical: |
|
|
print("π Loading Hierarchical BERT model (context-aware)") |
|
|
model = HierarchicalLegalBERT( |
|
|
config, |
|
|
num_discovered_risks=num_risks, |
|
|
hidden_dim=config.hierarchical_hidden_dim, |
|
|
num_lstm_layers=config.hierarchical_num_lstm_layers |
|
|
) |
|
|
else: |
|
|
print("π Loading Standard BERT model") |
|
|
model = FullyLearningBasedLegalBERT(config, num_discovered_risks=num_risks) |
|
|
|
|
|
model.load_state_dict(checkpoint['model_state_dict']) |
|
|
model.eval() |
|
|
print("β
Model loaded successfully") |
|
|
except Exception as e: |
|
|
print(f"β Error loading model: {e}") |
|
|
print("\nπ‘ Tip: Train the model first using:") |
|
|
print(" python train.py") |
|
|
return |
|
|
|
|
|
|
|
|
if args.hierarchical and isinstance(model, HierarchicalLegalBERT): |
|
|
print("\nπ Running hierarchical document-level analysis (with context)...") |
|
|
from utils import analyze_with_section_context |
|
|
results = analyze_with_section_context( |
|
|
open(args.contract).read() if args.contract else DEMO_CONTRACT, |
|
|
model |
|
|
) |
|
|
elif args.use_context: |
|
|
print("\nπ Running clause-level analysis (with sliding window context)...") |
|
|
results = analyze_full_document( |
|
|
open(args.contract).read() if args.contract else DEMO_CONTRACT, |
|
|
model, |
|
|
use_context=True, |
|
|
context_window=2 |
|
|
) |
|
|
else: |
|
|
print("\nπ Running standard clause-level analysis...") |
|
|
results = analyze_contract_file(args.contract, model) |
|
|
|
|
|
if results: |
|
|
print_document_analysis(results) |
|
|
|
|
|
|
|
|
output_path = args.contract.replace('.txt', '_analysis.json') |
|
|
import json |
|
|
with open(output_path, 'w') as f: |
|
|
json.dump(results, f, indent=2) |
|
|
print(f"\nπΎ Full results saved to: {output_path}") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|