File size: 12,223 Bytes
9b1c753 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 |
"""
Real-World Contract Analysis Demo
This script shows how to analyze full contract documents (not just individual clauses).
Usage:
python analyze_document.py --contract path/to/contract.txt
python analyze_document.py --demo # Use built-in demo contract
"""
import argparse
from typing import Dict, Any
from utils import (
split_into_clauses,
analyze_full_document,
print_document_analysis
)
# Demo contract for testing
DEMO_CONTRACT = """
SERVICE AGREEMENT
This Service Agreement ("Agreement") is entered into as of January 1, 2024,
by and between TechCorp Inc. ("Provider") and ClientCo LLC ("Client").
1. SERVICES
Provider shall provide software development services as described in Exhibit A
to Client in accordance with the terms and conditions set forth herein.
Provider shall use commercially reasonable efforts to perform the Services.
2. PAYMENT TERMS
Client shall pay Provider the fees specified in Exhibit B within thirty (30) days
of receipt of each invoice. Late payments shall incur a penalty of 1.5% per month
or the maximum rate permitted by law, whichever is less.
3. TERM AND TERMINATION
This Agreement shall commence on the Effective Date and continue for a period of
twelve (12) months unless earlier terminated as provided herein. Either party may
terminate this Agreement upon thirty (30) days written notice to the other party.
Upon termination, Client shall pay all fees due for Services performed up to the
termination date.
4. INTELLECTUAL PROPERTY
All intellectual property rights in the deliverables shall remain the exclusive
property of Provider. Client is granted a non-exclusive, non-transferable license
to use the deliverables solely for Client's internal business purposes.
5. CONFIDENTIALITY
Each party agrees to maintain in confidence all Confidential Information disclosed
by the other party. The receiving party shall not disclose such information to any
third party without prior written consent. This obligation shall survive termination
of this Agreement for a period of three (3) years.
6. LIMITATION OF LIABILITY
In no event shall either party's total liability under this Agreement exceed the
total amount paid by Client to Provider in the twelve (12) months immediately
preceding the claim. Neither party shall be liable for any indirect, incidental,
consequential, or punitive damages, including lost profits or business interruption.
7. INDEMNIFICATION
Each party shall indemnify, defend, and hold harmless the other party from and
against any third-party claims, damages, or expenses arising out of such party's
breach of this Agreement or gross negligence. Provider shall indemnify Client
against any claims that the deliverables infringe any third-party intellectual
property rights.
8. WARRANTY DISCLAIMER
Provider warrants that Services will be performed in a professional and workmanlike
manner. EXCEPT AS EXPRESSLY SET FORTH HEREIN, PROVIDER MAKES NO OTHER WARRANTIES,
EXPRESS OR IMPLIED, INCLUDING WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
PARTICULAR PURPOSE.
9. FORCE MAJEURE
Neither party shall be liable for any failure or delay in performance due to
circumstances beyond its reasonable control, including acts of God, war, terrorism,
pandemic, or natural disasters.
10. ASSIGNMENT
Neither party may assign this Agreement without the prior written consent of the
other party, except that either party may assign this Agreement to a successor in
connection with a merger, acquisition, or sale of substantially all of its assets.
11. DISPUTE RESOLUTION
Any disputes arising out of this Agreement shall first be attempted to be resolved
through good faith negotiations. If negotiations fail, disputes shall be resolved
through binding arbitration in accordance with the rules of the American Arbitration
Association.
12. GOVERNING LAW
This Agreement shall be governed by and construed in accordance with the laws of
the State of Delaware, without regard to its conflict of law provisions.
13. ENTIRE AGREEMENT
This Agreement constitutes the entire agreement between the parties and supersedes
all prior agreements and understandings, whether written or oral, relating to the
subject matter hereof.
IN WITNESS WHEREOF, the parties have executed this Agreement as of the date first
written above.
"""
def analyze_contract_file(filepath: str, model) -> Dict[str, Any]:
"""
Analyze a contract from a text file.
Args:
filepath: Path to contract text file
model: Trained Legal-BERT model
Returns:
Analysis results
"""
print(f"π Loading contract from: {filepath}")
try:
with open(filepath, 'r', encoding='utf-8') as f:
contract_text = f.read()
except Exception as e:
print(f"β Error reading file: {e}")
return {}
print(f" Contract length: {len(contract_text)} characters")
# Analyze the full document
results = analyze_full_document(contract_text, model, return_details=True)
return results
def demo_clause_extraction():
"""
Demo: Show how paragraph splitting works
"""
print("\n" + "=" * 80)
print("π§ DEMO: CLAUSE EXTRACTION")
print("=" * 80)
print("\nπ Original Paragraph:")
print("-" * 80)
sample = """
Provider shall provide software development services as described in Exhibit A.
Client shall pay Provider the fees specified in Exhibit B within thirty days.
Either party may terminate this Agreement upon thirty days written notice.
All intellectual property rights shall remain with Provider.
"""
print(sample)
print("\nβοΈ Extracted Clauses:")
print("-" * 80)
clauses = split_into_clauses(sample, method='sentence')
for i, clause in enumerate(clauses, 1):
print(f"{i}. {clause}")
print(f"\nβ
Total clauses extracted: {len(clauses)}")
def demo_full_analysis():
"""
Demo: Show how full document analysis works
(Note: Requires trained model - this is a mockup)
"""
print("\n" + "=" * 80)
print("π DEMO: FULL DOCUMENT ANALYSIS")
print("=" * 80)
print("\nβ οΈ Note: This demo requires a trained model.")
print(" After training, use:")
print(" >>> from model import LegalBERTMultiTask")
print(" >>> model = LegalBERTMultiTask.load('checkpoints/best_model.pt')")
print(" >>> results = analyze_full_document(contract_text, model)")
# For now, just show what the output would look like
print("\nπ Sample Output Structure:")
print("-" * 80)
sample_result = {
'document_summary': {
'total_clauses': 47,
'analyzed_clauses': 47,
'overall_severity': 6.2,
'max_severity': 8.5,
'overall_importance': 7.1,
'high_risk_clause_count': 8,
'dominant_risk_type': 'LIABILITY_RISK',
'dominant_risk_percentage': 23.4
},
'risk_distribution': {
'LIABILITY_RISK': 0.234,
'TERMINATION_RISK': 0.170,
'INDEMNITY_RISK': 0.149,
'IP_RISK': 0.128,
'CONFIDENTIALITY_RISK': 0.106,
'OPERATIONAL_RISK': 0.128,
'COMPLIANCE_RISK': 0.085
},
'high_risk_clauses': [
{
'clause_id': 15,
'clause_text': 'In no event shall either party\'s total liability...',
'risk_name': 'LIABILITY_RISK',
'severity': 8.5,
'confidence': 0.92
}
]
}
print_document_analysis(sample_result)
def main():
"""Main execution"""
parser = argparse.ArgumentParser(
description='Analyze full contract documents for risk'
)
parser.add_argument(
'--contract',
type=str,
help='Path to contract text file'
)
parser.add_argument(
'--demo',
action='store_true',
help='Run demo with built-in sample contract'
)
parser.add_argument(
'--model-path',
type=str,
default='checkpoints/best_model.pt',
help='Path to trained model checkpoint'
)
parser.add_argument(
'--show-clauses',
action='store_true',
help='Show extracted clauses (for debugging)'
)
parser.add_argument(
'--hierarchical',
action='store_true',
help='Use hierarchical document-level analysis (with context)'
)
parser.add_argument(
'--use-context',
action='store_true',
help='Use sliding window context for clause analysis'
)
args = parser.parse_args()
# Demo mode
if args.demo or (not args.contract):
print("=" * 80)
print("π― LEGAL-BERT: FULL DOCUMENT ANALYSIS DEMO")
print("=" * 80)
# Demo 1: Clause extraction
demo_clause_extraction()
# Demo 2: Full analysis
demo_full_analysis()
# Show clause extraction for demo contract
if args.show_clauses:
print("\n" + "=" * 80)
print("π DEMO CONTRACT CLAUSES")
print("=" * 80)
clauses = split_into_clauses(DEMO_CONTRACT, method='legal')
for i, clause in enumerate(clauses, 1):
print(f"\n{i}. {clause[:100]}..." if len(clause) > 100 else f"\n{i}. {clause}")
print(f"\nβ
Total: {len(clauses)} clauses")
return
# Real analysis mode
print("=" * 80)
print("π― LEGAL-BERT: CONTRACT RISK ANALYSIS")
print("=" * 80)
# Load model
print(f"\nπ€ Loading model from: {args.model_path}")
try:
import torch
from model import FullyLearningBasedLegalBERT, HierarchicalLegalBERT
from config import LegalBertConfig
checkpoint = torch.load(args.model_path, map_location='cpu')
config = checkpoint.get('config', LegalBertConfig())
model_type = checkpoint.get('model_type', 'standard')
num_risks = len(checkpoint.get('discovered_patterns', {}))
if model_type == 'hierarchical' or args.hierarchical:
print("π Loading Hierarchical BERT model (context-aware)")
model = HierarchicalLegalBERT(
config,
num_discovered_risks=num_risks,
hidden_dim=config.hierarchical_hidden_dim,
num_lstm_layers=config.hierarchical_num_lstm_layers
)
else:
print("π Loading Standard BERT model")
model = FullyLearningBasedLegalBERT(config, num_discovered_risks=num_risks)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()
print("β
Model loaded successfully")
except Exception as e:
print(f"β Error loading model: {e}")
print("\nπ‘ Tip: Train the model first using:")
print(" python train.py")
return
# Analyze contract
if args.hierarchical and isinstance(model, HierarchicalLegalBERT):
print("\nπ Running hierarchical document-level analysis (with context)...")
from utils import analyze_with_section_context
results = analyze_with_section_context(
open(args.contract).read() if args.contract else DEMO_CONTRACT,
model
)
elif args.use_context:
print("\nπ Running clause-level analysis (with sliding window context)...")
results = analyze_full_document(
open(args.contract).read() if args.contract else DEMO_CONTRACT,
model,
use_context=True,
context_window=2
)
else:
print("\nπ Running standard clause-level analysis...")
results = analyze_contract_file(args.contract, model)
if results:
print_document_analysis(results)
# Save results
output_path = args.contract.replace('.txt', '_analysis.json')
import json
with open(output_path, 'w') as f:
json.dump(results, f, indent=2)
print(f"\nπΎ Full results saved to: {output_path}")
if __name__ == "__main__":
main()
|