code2-repo / analyze_document.py
Deepu1965's picture
Upload folder using huggingface_hub
9b1c753 verified
"""
Real-World Contract Analysis Demo
This script shows how to analyze full contract documents (not just individual clauses).
Usage:
python analyze_document.py --contract path/to/contract.txt
python analyze_document.py --demo # Use built-in demo contract
"""
import argparse
from typing import Dict, Any
from utils import (
split_into_clauses,
analyze_full_document,
print_document_analysis
)
# Demo contract for testing
DEMO_CONTRACT = """
SERVICE AGREEMENT
This Service Agreement ("Agreement") is entered into as of January 1, 2024,
by and between TechCorp Inc. ("Provider") and ClientCo LLC ("Client").
1. SERVICES
Provider shall provide software development services as described in Exhibit A
to Client in accordance with the terms and conditions set forth herein.
Provider shall use commercially reasonable efforts to perform the Services.
2. PAYMENT TERMS
Client shall pay Provider the fees specified in Exhibit B within thirty (30) days
of receipt of each invoice. Late payments shall incur a penalty of 1.5% per month
or the maximum rate permitted by law, whichever is less.
3. TERM AND TERMINATION
This Agreement shall commence on the Effective Date and continue for a period of
twelve (12) months unless earlier terminated as provided herein. Either party may
terminate this Agreement upon thirty (30) days written notice to the other party.
Upon termination, Client shall pay all fees due for Services performed up to the
termination date.
4. INTELLECTUAL PROPERTY
All intellectual property rights in the deliverables shall remain the exclusive
property of Provider. Client is granted a non-exclusive, non-transferable license
to use the deliverables solely for Client's internal business purposes.
5. CONFIDENTIALITY
Each party agrees to maintain in confidence all Confidential Information disclosed
by the other party. The receiving party shall not disclose such information to any
third party without prior written consent. This obligation shall survive termination
of this Agreement for a period of three (3) years.
6. LIMITATION OF LIABILITY
In no event shall either party's total liability under this Agreement exceed the
total amount paid by Client to Provider in the twelve (12) months immediately
preceding the claim. Neither party shall be liable for any indirect, incidental,
consequential, or punitive damages, including lost profits or business interruption.
7. INDEMNIFICATION
Each party shall indemnify, defend, and hold harmless the other party from and
against any third-party claims, damages, or expenses arising out of such party's
breach of this Agreement or gross negligence. Provider shall indemnify Client
against any claims that the deliverables infringe any third-party intellectual
property rights.
8. WARRANTY DISCLAIMER
Provider warrants that Services will be performed in a professional and workmanlike
manner. EXCEPT AS EXPRESSLY SET FORTH HEREIN, PROVIDER MAKES NO OTHER WARRANTIES,
EXPRESS OR IMPLIED, INCLUDING WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
PARTICULAR PURPOSE.
9. FORCE MAJEURE
Neither party shall be liable for any failure or delay in performance due to
circumstances beyond its reasonable control, including acts of God, war, terrorism,
pandemic, or natural disasters.
10. ASSIGNMENT
Neither party may assign this Agreement without the prior written consent of the
other party, except that either party may assign this Agreement to a successor in
connection with a merger, acquisition, or sale of substantially all of its assets.
11. DISPUTE RESOLUTION
Any disputes arising out of this Agreement shall first be attempted to be resolved
through good faith negotiations. If negotiations fail, disputes shall be resolved
through binding arbitration in accordance with the rules of the American Arbitration
Association.
12. GOVERNING LAW
This Agreement shall be governed by and construed in accordance with the laws of
the State of Delaware, without regard to its conflict of law provisions.
13. ENTIRE AGREEMENT
This Agreement constitutes the entire agreement between the parties and supersedes
all prior agreements and understandings, whether written or oral, relating to the
subject matter hereof.
IN WITNESS WHEREOF, the parties have executed this Agreement as of the date first
written above.
"""
def analyze_contract_file(filepath: str, model) -> Dict[str, Any]:
"""
Analyze a contract from a text file.
Args:
filepath: Path to contract text file
model: Trained Legal-BERT model
Returns:
Analysis results
"""
print(f"πŸ“„ Loading contract from: {filepath}")
try:
with open(filepath, 'r', encoding='utf-8') as f:
contract_text = f.read()
except Exception as e:
print(f"❌ Error reading file: {e}")
return {}
print(f" Contract length: {len(contract_text)} characters")
# Analyze the full document
results = analyze_full_document(contract_text, model, return_details=True)
return results
def demo_clause_extraction():
"""
Demo: Show how paragraph splitting works
"""
print("\n" + "=" * 80)
print("πŸ”§ DEMO: CLAUSE EXTRACTION")
print("=" * 80)
print("\nπŸ“ Original Paragraph:")
print("-" * 80)
sample = """
Provider shall provide software development services as described in Exhibit A.
Client shall pay Provider the fees specified in Exhibit B within thirty days.
Either party may terminate this Agreement upon thirty days written notice.
All intellectual property rights shall remain with Provider.
"""
print(sample)
print("\nβœ‚οΈ Extracted Clauses:")
print("-" * 80)
clauses = split_into_clauses(sample, method='sentence')
for i, clause in enumerate(clauses, 1):
print(f"{i}. {clause}")
print(f"\nβœ… Total clauses extracted: {len(clauses)}")
def demo_full_analysis():
"""
Demo: Show how full document analysis works
(Note: Requires trained model - this is a mockup)
"""
print("\n" + "=" * 80)
print("πŸ“Š DEMO: FULL DOCUMENT ANALYSIS")
print("=" * 80)
print("\n⚠️ Note: This demo requires a trained model.")
print(" After training, use:")
print(" >>> from model import LegalBERTMultiTask")
print(" >>> model = LegalBERTMultiTask.load('checkpoints/best_model.pt')")
print(" >>> results = analyze_full_document(contract_text, model)")
# For now, just show what the output would look like
print("\nπŸ“„ Sample Output Structure:")
print("-" * 80)
sample_result = {
'document_summary': {
'total_clauses': 47,
'analyzed_clauses': 47,
'overall_severity': 6.2,
'max_severity': 8.5,
'overall_importance': 7.1,
'high_risk_clause_count': 8,
'dominant_risk_type': 'LIABILITY_RISK',
'dominant_risk_percentage': 23.4
},
'risk_distribution': {
'LIABILITY_RISK': 0.234,
'TERMINATION_RISK': 0.170,
'INDEMNITY_RISK': 0.149,
'IP_RISK': 0.128,
'CONFIDENTIALITY_RISK': 0.106,
'OPERATIONAL_RISK': 0.128,
'COMPLIANCE_RISK': 0.085
},
'high_risk_clauses': [
{
'clause_id': 15,
'clause_text': 'In no event shall either party\'s total liability...',
'risk_name': 'LIABILITY_RISK',
'severity': 8.5,
'confidence': 0.92
}
]
}
print_document_analysis(sample_result)
def main():
"""Main execution"""
parser = argparse.ArgumentParser(
description='Analyze full contract documents for risk'
)
parser.add_argument(
'--contract',
type=str,
help='Path to contract text file'
)
parser.add_argument(
'--demo',
action='store_true',
help='Run demo with built-in sample contract'
)
parser.add_argument(
'--model-path',
type=str,
default='checkpoints/best_model.pt',
help='Path to trained model checkpoint'
)
parser.add_argument(
'--show-clauses',
action='store_true',
help='Show extracted clauses (for debugging)'
)
parser.add_argument(
'--hierarchical',
action='store_true',
help='Use hierarchical document-level analysis (with context)'
)
parser.add_argument(
'--use-context',
action='store_true',
help='Use sliding window context for clause analysis'
)
args = parser.parse_args()
# Demo mode
if args.demo or (not args.contract):
print("=" * 80)
print("🎯 LEGAL-BERT: FULL DOCUMENT ANALYSIS DEMO")
print("=" * 80)
# Demo 1: Clause extraction
demo_clause_extraction()
# Demo 2: Full analysis
demo_full_analysis()
# Show clause extraction for demo contract
if args.show_clauses:
print("\n" + "=" * 80)
print("πŸ“‹ DEMO CONTRACT CLAUSES")
print("=" * 80)
clauses = split_into_clauses(DEMO_CONTRACT, method='legal')
for i, clause in enumerate(clauses, 1):
print(f"\n{i}. {clause[:100]}..." if len(clause) > 100 else f"\n{i}. {clause}")
print(f"\nβœ… Total: {len(clauses)} clauses")
return
# Real analysis mode
print("=" * 80)
print("🎯 LEGAL-BERT: CONTRACT RISK ANALYSIS")
print("=" * 80)
# Load model
print(f"\nπŸ€– Loading model from: {args.model_path}")
try:
import torch
from model import FullyLearningBasedLegalBERT, HierarchicalLegalBERT
from config import LegalBertConfig
checkpoint = torch.load(args.model_path, map_location='cpu')
config = checkpoint.get('config', LegalBertConfig())
model_type = checkpoint.get('model_type', 'standard')
num_risks = len(checkpoint.get('discovered_patterns', {}))
if model_type == 'hierarchical' or args.hierarchical:
print("πŸ“Š Loading Hierarchical BERT model (context-aware)")
model = HierarchicalLegalBERT(
config,
num_discovered_risks=num_risks,
hidden_dim=config.hierarchical_hidden_dim,
num_lstm_layers=config.hierarchical_num_lstm_layers
)
else:
print("πŸ“Š Loading Standard BERT model")
model = FullyLearningBasedLegalBERT(config, num_discovered_risks=num_risks)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()
print("βœ… Model loaded successfully")
except Exception as e:
print(f"❌ Error loading model: {e}")
print("\nπŸ’‘ Tip: Train the model first using:")
print(" python train.py")
return
# Analyze contract
if args.hierarchical and isinstance(model, HierarchicalLegalBERT):
print("\nπŸ” Running hierarchical document-level analysis (with context)...")
from utils import analyze_with_section_context
results = analyze_with_section_context(
open(args.contract).read() if args.contract else DEMO_CONTRACT,
model
)
elif args.use_context:
print("\nπŸ” Running clause-level analysis (with sliding window context)...")
results = analyze_full_document(
open(args.contract).read() if args.contract else DEMO_CONTRACT,
model,
use_context=True,
context_window=2
)
else:
print("\nπŸ” Running standard clause-level analysis...")
results = analyze_contract_file(args.contract, model)
if results:
print_document_analysis(results)
# Save results
output_path = args.contract.replace('.txt', '_analysis.json')
import json
with open(output_path, 'w') as f:
json.dump(results, f, indent=2)
print(f"\nπŸ’Ύ Full results saved to: {output_path}")
if __name__ == "__main__":
main()