Spaces:
Sleeping
Sleeping
Commit ·
de8f1bc
1
Parent(s): d15efc9
code refactor
Browse files- app.py +290 -98
- config/model_config.py +21 -1
- launch.py +3 -3
- model_manager/llm_manager.py +19 -7
- reporter/pdf_generator.py +1 -50
- services/clause_extractor.py +2 -1
- services/contract_classifier.py +116 -150
- services/risk_analyzer.py +279 -222
- services/summary_generator.py +570 -0
- static/index.html +307 -112
- utils/logger.py +3 -12
app.py
CHANGED
|
@@ -3,19 +3,24 @@ FastAPI Application for AI Contract Risk Analyzer
|
|
| 3 |
Complete pre-loading approach: All models loaded at startup
|
| 4 |
Direct synchronous flow: Upload → Analyze → Return Results + PDF
|
| 5 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
from fastapi.responses import JSONResponse, FileResponse, Response
|
| 7 |
-
from fastapi import FastAPI, File, UploadFile, HTTPException, Form
|
| 8 |
from fastapi.middleware.cors import CORSMiddleware
|
| 9 |
from fastapi.staticfiles import StaticFiles
|
| 10 |
from pydantic import BaseModel, Field
|
| 11 |
-
from typing import List, Optional, Dict, Any
|
| 12 |
-
import uuid
|
| 13 |
-
import os
|
| 14 |
-
from datetime import datetime
|
| 15 |
-
from pathlib import Path
|
| 16 |
import sys
|
| 17 |
-
import tempfile
|
| 18 |
-
import io
|
| 19 |
|
| 20 |
# Add parent directory to path
|
| 21 |
sys.path.append(str(Path(__file__).parent))
|
|
@@ -37,19 +42,140 @@ from services.protection_checker import ProtectionChecker
|
|
| 37 |
from services.llm_interpreter import LLMClauseInterpreter
|
| 38 |
from services.negotiation_engine import NegotiationEngine
|
| 39 |
from services.market_comparator import MarketComparator
|
|
|
|
| 40 |
|
| 41 |
# Import PDF generator
|
| 42 |
from reporter.pdf_generator import generate_pdf_report
|
| 43 |
|
| 44 |
-
#
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
# ============================================================================
|
| 49 |
# PYDANTIC SCHEMAS
|
| 50 |
# ============================================================================
|
| 51 |
|
| 52 |
-
class
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
"""Health check response"""
|
| 54 |
status: str
|
| 55 |
version: str
|
|
@@ -58,14 +184,16 @@ class HealthResponse(BaseModel):
|
|
| 58 |
services_loaded: int
|
| 59 |
memory_usage_mb: float
|
| 60 |
|
| 61 |
-
|
|
|
|
| 62 |
"""Analysis options"""
|
| 63 |
max_clauses: int = Field(default=15, ge=5, le=30)
|
| 64 |
interpret_clauses: bool = Field(default=True)
|
| 65 |
generate_negotiation_points: bool = Field(default=True)
|
| 66 |
compare_to_market: bool = Field(default=True)
|
| 67 |
|
| 68 |
-
|
|
|
|
| 69 |
"""Complete analysis result"""
|
| 70 |
analysis_id: str
|
| 71 |
timestamp: str
|
|
@@ -81,12 +209,22 @@ class AnalysisResult(BaseModel):
|
|
| 81 |
metadata: Dict[str, Any]
|
| 82 |
pdf_available: bool = True
|
| 83 |
|
| 84 |
-
|
|
|
|
| 85 |
"""Error response"""
|
| 86 |
error: str
|
| 87 |
detail: str
|
| 88 |
timestamp: str
|
| 89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
# ============================================================================
|
| 91 |
# SERVICE INITIALIZATION WITH FULL PRE-LOADING
|
| 92 |
# ============================================================================
|
|
@@ -234,7 +372,7 @@ class PreloadedAnalysisService:
|
|
| 234 |
|
| 235 |
# Step 1: Classify contract
|
| 236 |
classification = self.services["classifier"].classify_contract(contract_text)
|
| 237 |
-
classification_dict = classification.to_dict()
|
| 238 |
actual_category = classification.category
|
| 239 |
|
| 240 |
log_info(f"Contract classified as: {actual_category}")
|
|
@@ -255,7 +393,7 @@ class PreloadedAnalysisService:
|
|
| 255 |
|
| 256 |
# Extract clauses
|
| 257 |
clauses = extractor.extract_clauses(contract_text, options.max_clauses)
|
| 258 |
-
clauses_dict = [clause.to_dict() for clause in clauses]
|
| 259 |
log_info(f"Extracted {len(clauses)} clauses")
|
| 260 |
|
| 261 |
# Step 3: Map to ContractType and get appropriate risk analyzer
|
|
@@ -279,17 +417,17 @@ class PreloadedAnalysisService:
|
|
| 279 |
|
| 280 |
# Analyze risk
|
| 281 |
risk_score = risk_analyzer.analyze_risk(contract_text, clauses)
|
| 282 |
-
risk_dict = risk_score.to_dict()
|
| 283 |
log_info(f"Risk analysis completed: {risk_dict['overall_score']}/100")
|
| 284 |
|
| 285 |
# Step 4: Find unfavorable terms
|
| 286 |
unfavorable_terms = self.services["term_analyzer"].analyze_unfavorable_terms(contract_text, clauses)
|
| 287 |
-
unfavorable_dict = [term.to_dict() for term in unfavorable_terms]
|
| 288 |
log_info(f"Found {len(unfavorable_terms)} unfavorable terms")
|
| 289 |
|
| 290 |
# Step 5: Check missing protections
|
| 291 |
missing_protections = self.services["protection_checker"].check_missing_protections(contract_text, clauses)
|
| 292 |
-
missing_dict = [prot.to_dict() for prot in missing_protections]
|
| 293 |
log_info(f"Found {len(missing_protections)} missing protections")
|
| 294 |
|
| 295 |
# Optional steps
|
|
@@ -302,7 +440,7 @@ class PreloadedAnalysisService:
|
|
| 302 |
interpretations = self.services["interpreter"].interpret_clauses(
|
| 303 |
clauses, min(10, options.max_clauses)
|
| 304 |
)
|
| 305 |
-
interpretations_dict = [interp.to_dict() for interp in interpretations]
|
| 306 |
log_info(f"Interpreted {len(interpretations)} clauses")
|
| 307 |
except Exception as e:
|
| 308 |
log_error(f"Clause interpretation failed: {e}")
|
|
@@ -313,7 +451,7 @@ class PreloadedAnalysisService:
|
|
| 313 |
negotiation_points = self.services["negotiation_engine"].generate_negotiation_points(
|
| 314 |
risk_score, unfavorable_terms, missing_protections, clauses, 7
|
| 315 |
)
|
| 316 |
-
negotiation_dict = [point.to_dict() for point in negotiation_points]
|
| 317 |
log_info(f"Generated {len(negotiation_points)} negotiation points")
|
| 318 |
except Exception as e:
|
| 319 |
log_error(f"Negotiation points generation failed: {e}")
|
|
@@ -322,7 +460,7 @@ class PreloadedAnalysisService:
|
|
| 322 |
if options.compare_to_market:
|
| 323 |
try:
|
| 324 |
market_comparisons = self.services["market_comparator"].compare_to_market(clauses)
|
| 325 |
-
market_dict = [comp.to_dict() for comp in market_comparisons]
|
| 326 |
log_info(f"Compared {len(market_comparisons)} clauses to market")
|
| 327 |
except Exception as e:
|
| 328 |
log_error(f"Market comparison failed: {e}")
|
|
@@ -330,7 +468,7 @@ class PreloadedAnalysisService:
|
|
| 330 |
|
| 331 |
# Generate executive summary
|
| 332 |
executive_summary = self._generate_executive_summary(
|
| 333 |
-
classification_dict, risk_dict, unfavorable_dict, missing_dict
|
| 334 |
)
|
| 335 |
|
| 336 |
# Build result
|
|
@@ -365,53 +503,87 @@ class PreloadedAnalysisService:
|
|
| 365 |
raise
|
| 366 |
|
| 367 |
def _generate_executive_summary(self, classification: Dict, risk_score: Dict,
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
risk_msg = "SIGNIFICANT CONCERNS"
|
| 381 |
-
elif score >= 40:
|
| 382 |
-
risk_msg = "MODERATE RISK"
|
| 383 |
-
else:
|
| 384 |
-
risk_msg = "LOW RISK"
|
| 385 |
-
|
| 386 |
-
return f"This {category} contract scored {score}/100 ({risk_level.upper()} risk). {risk_msg}. Found {len(unfavorable_terms)} unfavorable terms ({critical_terms} critical) and {len(missing_protections)} missing protections ({critical_protections} critical). Review detailed analysis below."
|
| 387 |
|
| 388 |
# ============================================================================
|
| 389 |
-
# FASTAPI
|
| 390 |
# ============================================================================
|
| 391 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 392 |
app = FastAPI(
|
| 393 |
title=settings.APP_NAME,
|
| 394 |
version=settings.APP_VERSION,
|
| 395 |
description="AI-powered contract risk analysis with complete model pre-loading",
|
| 396 |
docs_url="/api/docs",
|
| 397 |
-
redoc_url="/api/redoc"
|
|
|
|
|
|
|
| 398 |
)
|
| 399 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
# Serve static files
|
| 401 |
-
app.mount("/static", StaticFiles(directory=
|
| 402 |
|
| 403 |
-
# CORS middleware
|
| 404 |
app.add_middleware(
|
| 405 |
CORSMiddleware,
|
| 406 |
-
allow_origins=
|
| 407 |
-
allow_credentials=
|
| 408 |
-
allow_methods=
|
| 409 |
-
allow_headers=
|
| 410 |
)
|
| 411 |
|
| 412 |
-
# Initialize pre-loaded analysis service
|
| 413 |
-
analysis_service = PreloadedAnalysisService()
|
| 414 |
-
|
| 415 |
# ============================================================================
|
| 416 |
# HELPER FUNCTIONS
|
| 417 |
# ============================================================================
|
|
@@ -468,11 +640,14 @@ def validate_contract_text(text: str) -> tuple[bool, str]:
|
|
| 468 |
@app.get("/")
|
| 469 |
async def serve_frontend():
|
| 470 |
"""Serve the frontend"""
|
| 471 |
-
return FileResponse(
|
| 472 |
|
| 473 |
@app.get("/api/v1/health", response_model=HealthResponse)
|
| 474 |
async def health_check():
|
| 475 |
"""Health check endpoint with service status"""
|
|
|
|
|
|
|
|
|
|
| 476 |
service_status = analysis_service.get_service_status()
|
| 477 |
|
| 478 |
return HealthResponse(
|
|
@@ -487,6 +662,8 @@ async def health_check():
|
|
| 487 |
@app.get("/api/v1/status")
|
| 488 |
async def get_detailed_status():
|
| 489 |
"""Get detailed service status"""
|
|
|
|
|
|
|
| 490 |
return analysis_service.get_service_status()
|
| 491 |
|
| 492 |
@app.post("/api/v1/analyze/file", response_model=AnalysisResult)
|
|
@@ -498,6 +675,9 @@ async def analyze_contract_file(
|
|
| 498 |
compare_to_market: bool = Form(True)
|
| 499 |
):
|
| 500 |
"""Analyze uploaded contract file - DIRECT SYNC FLOW"""
|
|
|
|
|
|
|
|
|
|
| 501 |
try:
|
| 502 |
# Validate file
|
| 503 |
is_valid, message = validate_file(file)
|
|
@@ -552,6 +732,9 @@ async def analyze_contract_text(
|
|
| 552 |
compare_to_market: bool = Form(True)
|
| 553 |
):
|
| 554 |
"""Analyze pasted contract text - DIRECT SYNC FLOW"""
|
|
|
|
|
|
|
|
|
|
| 555 |
try:
|
| 556 |
# Validate contract text
|
| 557 |
is_valid, message = validate_contract_text(contract_text)
|
|
@@ -609,6 +792,9 @@ async def generate_pdf_from_analysis(analysis_result: Dict[str, Any]):
|
|
| 609 |
@app.get("/api/v1/categories")
|
| 610 |
async def get_contract_categories():
|
| 611 |
"""Get list of supported contract categories"""
|
|
|
|
|
|
|
|
|
|
| 612 |
try:
|
| 613 |
categories = analysis_service.services["classifier"].get_all_categories()
|
| 614 |
return {"categories": categories}
|
|
@@ -616,55 +802,55 @@ async def get_contract_categories():
|
|
| 616 |
log_error(f"Categories fetch failed: {e}")
|
| 617 |
raise HTTPException(status_code=500, detail=f"Failed to get categories: {str(e)}")
|
| 618 |
|
| 619 |
-
@app.post("/api/v1/validate/file")
|
| 620 |
async def validate_contract_file(file: UploadFile = File(...)):
|
| 621 |
"""Quick validation endpoint"""
|
| 622 |
try:
|
| 623 |
is_valid, message = validate_file(file)
|
| 624 |
if not is_valid:
|
| 625 |
-
return
|
| 626 |
|
| 627 |
contract_text = read_contract_file(file)
|
| 628 |
|
| 629 |
# Validate text length
|
| 630 |
is_valid_text, text_message = validate_contract_text(contract_text)
|
| 631 |
if not is_valid_text:
|
| 632 |
-
return
|
| 633 |
|
| 634 |
# Validate contract structure using ContractValidator
|
| 635 |
validator = ContractValidator()
|
| 636 |
report = validator.get_validation_report(contract_text)
|
| 637 |
|
| 638 |
-
return
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
|
| 645 |
except Exception as e:
|
| 646 |
log_error(f"File validation failed: {e}")
|
| 647 |
raise HTTPException(status_code=400, detail=f"Validation failed: {str(e)}")
|
| 648 |
|
| 649 |
-
@app.post("/api/v1/validate/text")
|
| 650 |
async def validate_contract_text_endpoint(contract_text: str = Form(...)):
|
| 651 |
"""Validate pasted contract text"""
|
| 652 |
try:
|
| 653 |
# Validate text length
|
| 654 |
is_valid, message = validate_contract_text(contract_text)
|
| 655 |
if not is_valid:
|
| 656 |
-
return
|
| 657 |
|
| 658 |
# Validate contract structure using ContractValidator
|
| 659 |
validator = ContractValidator()
|
| 660 |
report = validator.get_validation_report(contract_text)
|
| 661 |
|
| 662 |
-
return
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
| 667 |
-
|
| 668 |
|
| 669 |
except Exception as e:
|
| 670 |
log_error(f"Text validation failed: {e}")
|
|
@@ -677,7 +863,7 @@ async def validate_contract_text_endpoint(contract_text: str = Form(...)):
|
|
| 677 |
@app.exception_handler(HTTPException)
|
| 678 |
async def http_exception_handler(request, exc):
|
| 679 |
"""Handle HTTP exceptions"""
|
| 680 |
-
return
|
| 681 |
status_code=exc.status_code,
|
| 682 |
content=ErrorResponse(
|
| 683 |
error=exc.detail,
|
|
@@ -690,7 +876,7 @@ async def http_exception_handler(request, exc):
|
|
| 690 |
async def general_exception_handler(request, exc):
|
| 691 |
"""Handle general exceptions"""
|
| 692 |
log_error(f"Unhandled exception: {exc}")
|
| 693 |
-
return
|
| 694 |
status_code=500,
|
| 695 |
content=ErrorResponse(
|
| 696 |
error="Internal server error",
|
|
@@ -700,34 +886,40 @@ async def general_exception_handler(request, exc):
|
|
| 700 |
)
|
| 701 |
|
| 702 |
# ============================================================================
|
| 703 |
-
#
|
| 704 |
# ============================================================================
|
| 705 |
|
| 706 |
-
@app.
|
| 707 |
-
async def
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
"""Shutdown event"""
|
| 716 |
-
log_info("🛑 Shutting down server...")
|
| 717 |
-
log_info("✅ Server shutdown complete")
|
| 718 |
|
| 719 |
# ============================================================================
|
| 720 |
# MAIN
|
| 721 |
# ============================================================================
|
| 722 |
-
|
| 723 |
if __name__ == "__main__":
|
| 724 |
-
|
| 725 |
-
|
| 726 |
-
|
| 727 |
-
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
Complete pre-loading approach: All models loaded at startup
|
| 4 |
Direct synchronous flow: Upload → Analyze → Return Results + PDF
|
| 5 |
"""
|
| 6 |
+
import signal
|
| 7 |
+
import os
|
| 8 |
+
import time
|
| 9 |
+
import json
|
| 10 |
+
import uuid
|
| 11 |
+
from typing import Any, List, Dict, Optional
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
from datetime import datetime
|
| 14 |
+
from contextlib import asynccontextmanager
|
| 15 |
+
|
| 16 |
+
import uvicorn
|
| 17 |
+
import numpy as np
|
| 18 |
+
from fastapi import FastAPI, File, UploadFile, HTTPException, Form, Request
|
| 19 |
from fastapi.responses import JSONResponse, FileResponse, Response
|
|
|
|
| 20 |
from fastapi.middleware.cors import CORSMiddleware
|
| 21 |
from fastapi.staticfiles import StaticFiles
|
| 22 |
from pydantic import BaseModel, Field
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
import sys
|
|
|
|
|
|
|
| 24 |
|
| 25 |
# Add parent directory to path
|
| 26 |
sys.path.append(str(Path(__file__).parent))
|
|
|
|
| 42 |
from services.llm_interpreter import LLMClauseInterpreter
|
| 43 |
from services.negotiation_engine import NegotiationEngine
|
| 44 |
from services.market_comparator import MarketComparator
|
| 45 |
+
from services.summary_generator import SummaryGenerator
|
| 46 |
|
| 47 |
# Import PDF generator
|
| 48 |
from reporter.pdf_generator import generate_pdf_report
|
| 49 |
|
| 50 |
+
# ============================================================================
|
| 51 |
+
# CUSTOM SERIALIZATION
|
| 52 |
+
# ============================================================================
|
| 53 |
+
|
| 54 |
+
class NumpyJSONEncoder(json.JSONEncoder):
|
| 55 |
+
"""
|
| 56 |
+
Custom JSON encoder that handles NumPy types and custom objects
|
| 57 |
+
"""
|
| 58 |
+
def default(self, obj: Any) -> Any:
|
| 59 |
+
"""
|
| 60 |
+
Convert non-serializable objects to JSON-serializable types
|
| 61 |
+
"""
|
| 62 |
+
# NumPy types
|
| 63 |
+
if isinstance(obj, (np.float32, np.float64)):
|
| 64 |
+
return float(obj)
|
| 65 |
+
elif isinstance(obj, (np.int32, np.int64, np.int8, np.uint8)):
|
| 66 |
+
return int(obj)
|
| 67 |
+
elif isinstance(obj, np.ndarray):
|
| 68 |
+
return obj.tolist()
|
| 69 |
+
elif isinstance(obj, np.bool_):
|
| 70 |
+
return bool(obj)
|
| 71 |
+
elif hasattr(obj, 'item'):
|
| 72 |
+
# numpy scalar types
|
| 73 |
+
return obj.item()
|
| 74 |
+
|
| 75 |
+
# Custom objects with to_dict method
|
| 76 |
+
elif hasattr(obj, 'to_dict'):
|
| 77 |
+
return obj.to_dict()
|
| 78 |
+
|
| 79 |
+
# Pydantic models
|
| 80 |
+
elif hasattr(obj, 'dict'):
|
| 81 |
+
return obj.dict()
|
| 82 |
+
|
| 83 |
+
# Handle other types
|
| 84 |
+
elif isinstance(obj, (set, tuple)):
|
| 85 |
+
return list(obj)
|
| 86 |
+
|
| 87 |
+
return super().default(obj)
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
class NumpyJSONResponse(JSONResponse):
|
| 91 |
+
"""
|
| 92 |
+
Custom JSON response that handles NumPy types
|
| 93 |
+
"""
|
| 94 |
+
def render(self, content: Any) -> bytes:
|
| 95 |
+
"""
|
| 96 |
+
Render content with NumPy type handling
|
| 97 |
+
"""
|
| 98 |
+
return json.dumps(
|
| 99 |
+
content,
|
| 100 |
+
ensure_ascii=False,
|
| 101 |
+
allow_nan=False,
|
| 102 |
+
indent=None,
|
| 103 |
+
separators=(",", ":"),
|
| 104 |
+
cls=NumpyJSONEncoder,
|
| 105 |
+
).encode("utf-8")
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def convert_numpy_types(obj: Any) -> Any:
|
| 109 |
+
"""
|
| 110 |
+
Recursively convert numpy types to Python native types
|
| 111 |
+
"""
|
| 112 |
+
if obj is None:
|
| 113 |
+
return None
|
| 114 |
+
|
| 115 |
+
# Handle dictionaries
|
| 116 |
+
if isinstance(obj, dict):
|
| 117 |
+
return {key: convert_numpy_types(value) for key, value in obj.items()}
|
| 118 |
+
|
| 119 |
+
# Handle lists, tuples, sets
|
| 120 |
+
elif isinstance(obj, (list, tuple, set)):
|
| 121 |
+
return [convert_numpy_types(item) for item in obj]
|
| 122 |
+
|
| 123 |
+
# Handle NumPy types
|
| 124 |
+
elif isinstance(obj, (np.float32, np.float64)):
|
| 125 |
+
return float(obj)
|
| 126 |
+
elif isinstance(obj, (np.int32, np.int64, np.int8, np.uint8)):
|
| 127 |
+
return int(obj)
|
| 128 |
+
elif isinstance(obj, np.ndarray):
|
| 129 |
+
return obj.tolist()
|
| 130 |
+
elif isinstance(obj, np.bool_):
|
| 131 |
+
return bool(obj)
|
| 132 |
+
elif hasattr(obj, 'item'):
|
| 133 |
+
return obj.item()
|
| 134 |
+
|
| 135 |
+
# Handle custom objects with to_dict method
|
| 136 |
+
elif hasattr(obj, 'to_dict'):
|
| 137 |
+
return convert_numpy_types(obj.to_dict())
|
| 138 |
+
|
| 139 |
+
# Handle Pydantic models
|
| 140 |
+
elif hasattr(obj, 'dict'):
|
| 141 |
+
return convert_numpy_types(obj.dict())
|
| 142 |
+
|
| 143 |
+
# Return as-is for other types
|
| 144 |
+
else:
|
| 145 |
+
return obj
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def safe_serialize_response(data: Any) -> Any:
|
| 149 |
+
"""
|
| 150 |
+
Safely serialize response data ensuring all types are JSON-compatible
|
| 151 |
+
"""
|
| 152 |
+
return convert_numpy_types(data)
|
| 153 |
+
|
| 154 |
|
| 155 |
# ============================================================================
|
| 156 |
# PYDANTIC SCHEMAS
|
| 157 |
# ============================================================================
|
| 158 |
|
| 159 |
+
class SerializableBaseModel(BaseModel):
|
| 160 |
+
"""
|
| 161 |
+
Base model with enhanced serialization for NumPy types
|
| 162 |
+
"""
|
| 163 |
+
def dict(self, *args, **kwargs) -> Dict[str, Any]:
|
| 164 |
+
"""
|
| 165 |
+
Override dict method to handle NumPy types
|
| 166 |
+
"""
|
| 167 |
+
data = super().dict(*args, **kwargs)
|
| 168 |
+
return convert_numpy_types(data)
|
| 169 |
+
|
| 170 |
+
def json(self, *args, **kwargs) -> str:
|
| 171 |
+
"""
|
| 172 |
+
Override json method to handle NumPy types
|
| 173 |
+
"""
|
| 174 |
+
data = self.dict(*args, **kwargs)
|
| 175 |
+
return json.dumps(data, cls=NumpyJSONEncoder, *args, **kwargs)
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
class HealthResponse(SerializableBaseModel):
|
| 179 |
"""Health check response"""
|
| 180 |
status: str
|
| 181 |
version: str
|
|
|
|
| 184 |
services_loaded: int
|
| 185 |
memory_usage_mb: float
|
| 186 |
|
| 187 |
+
|
| 188 |
+
class AnalysisOptions(SerializableBaseModel):
|
| 189 |
"""Analysis options"""
|
| 190 |
max_clauses: int = Field(default=15, ge=5, le=30)
|
| 191 |
interpret_clauses: bool = Field(default=True)
|
| 192 |
generate_negotiation_points: bool = Field(default=True)
|
| 193 |
compare_to_market: bool = Field(default=True)
|
| 194 |
|
| 195 |
+
|
| 196 |
+
class AnalysisResult(SerializableBaseModel):
|
| 197 |
"""Complete analysis result"""
|
| 198 |
analysis_id: str
|
| 199 |
timestamp: str
|
|
|
|
| 209 |
metadata: Dict[str, Any]
|
| 210 |
pdf_available: bool = True
|
| 211 |
|
| 212 |
+
|
| 213 |
+
class ErrorResponse(SerializableBaseModel):
|
| 214 |
"""Error response"""
|
| 215 |
error: str
|
| 216 |
detail: str
|
| 217 |
timestamp: str
|
| 218 |
|
| 219 |
+
|
| 220 |
+
class FileValidationResponse(SerializableBaseModel):
|
| 221 |
+
"""File validation response"""
|
| 222 |
+
valid: bool
|
| 223 |
+
message: str
|
| 224 |
+
confidence: Optional[float] = None
|
| 225 |
+
report: Optional[Dict[str, Any]] = None
|
| 226 |
+
|
| 227 |
+
|
| 228 |
# ============================================================================
|
| 229 |
# SERVICE INITIALIZATION WITH FULL PRE-LOADING
|
| 230 |
# ============================================================================
|
|
|
|
| 372 |
|
| 373 |
# Step 1: Classify contract
|
| 374 |
classification = self.services["classifier"].classify_contract(contract_text)
|
| 375 |
+
classification_dict = safe_serialize_response(classification.to_dict())
|
| 376 |
actual_category = classification.category
|
| 377 |
|
| 378 |
log_info(f"Contract classified as: {actual_category}")
|
|
|
|
| 393 |
|
| 394 |
# Extract clauses
|
| 395 |
clauses = extractor.extract_clauses(contract_text, options.max_clauses)
|
| 396 |
+
clauses_dict = [safe_serialize_response(clause.to_dict()) for clause in clauses]
|
| 397 |
log_info(f"Extracted {len(clauses)} clauses")
|
| 398 |
|
| 399 |
# Step 3: Map to ContractType and get appropriate risk analyzer
|
|
|
|
| 417 |
|
| 418 |
# Analyze risk
|
| 419 |
risk_score = risk_analyzer.analyze_risk(contract_text, clauses)
|
| 420 |
+
risk_dict = safe_serialize_response(risk_score.to_dict())
|
| 421 |
log_info(f"Risk analysis completed: {risk_dict['overall_score']}/100")
|
| 422 |
|
| 423 |
# Step 4: Find unfavorable terms
|
| 424 |
unfavorable_terms = self.services["term_analyzer"].analyze_unfavorable_terms(contract_text, clauses)
|
| 425 |
+
unfavorable_dict = [safe_serialize_response(term.to_dict()) for term in unfavorable_terms]
|
| 426 |
log_info(f"Found {len(unfavorable_terms)} unfavorable terms")
|
| 427 |
|
| 428 |
# Step 5: Check missing protections
|
| 429 |
missing_protections = self.services["protection_checker"].check_missing_protections(contract_text, clauses)
|
| 430 |
+
missing_dict = [safe_serialize_response(prot.to_dict()) for prot in missing_protections]
|
| 431 |
log_info(f"Found {len(missing_protections)} missing protections")
|
| 432 |
|
| 433 |
# Optional steps
|
|
|
|
| 440 |
interpretations = self.services["interpreter"].interpret_clauses(
|
| 441 |
clauses, min(10, options.max_clauses)
|
| 442 |
)
|
| 443 |
+
interpretations_dict = [safe_serialize_response(interp.to_dict()) for interp in interpretations]
|
| 444 |
log_info(f"Interpreted {len(interpretations)} clauses")
|
| 445 |
except Exception as e:
|
| 446 |
log_error(f"Clause interpretation failed: {e}")
|
|
|
|
| 451 |
negotiation_points = self.services["negotiation_engine"].generate_negotiation_points(
|
| 452 |
risk_score, unfavorable_terms, missing_protections, clauses, 7
|
| 453 |
)
|
| 454 |
+
negotiation_dict = [safe_serialize_response(point.to_dict()) for point in negotiation_points]
|
| 455 |
log_info(f"Generated {len(negotiation_points)} negotiation points")
|
| 456 |
except Exception as e:
|
| 457 |
log_error(f"Negotiation points generation failed: {e}")
|
|
|
|
| 460 |
if options.compare_to_market:
|
| 461 |
try:
|
| 462 |
market_comparisons = self.services["market_comparator"].compare_to_market(clauses)
|
| 463 |
+
market_dict = [safe_serialize_response(comp.to_dict()) for comp in market_comparisons]
|
| 464 |
log_info(f"Compared {len(market_comparisons)} clauses to market")
|
| 465 |
except Exception as e:
|
| 466 |
log_error(f"Market comparison failed: {e}")
|
|
|
|
| 468 |
|
| 469 |
# Generate executive summary
|
| 470 |
executive_summary = self._generate_executive_summary(
|
| 471 |
+
classification_dict, risk_dict, unfavorable_dict, missing_dict, clauses,
|
| 472 |
)
|
| 473 |
|
| 474 |
# Build result
|
|
|
|
| 503 |
raise
|
| 504 |
|
| 505 |
def _generate_executive_summary(self, classification: Dict, risk_score: Dict,
|
| 506 |
+
unfavorable_terms: List, missing_protections: List,
|
| 507 |
+
clauses: List[Dict]) -> str:
|
| 508 |
+
"""Generate executive summary using LLM"""
|
| 509 |
+
summary_generator = SummaryGenerator()
|
| 510 |
+
|
| 511 |
+
return summary_generator.generate_executive_summary(
|
| 512 |
+
classification=classification,
|
| 513 |
+
risk_analysis=risk_score,
|
| 514 |
+
unfavorable_terms=unfavorable_terms,
|
| 515 |
+
missing_protections=missing_protections,
|
| 516 |
+
clauses=clauses
|
| 517 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 518 |
|
| 519 |
# ============================================================================
|
| 520 |
+
# FASTAPI APPLICATION
|
| 521 |
# ============================================================================
|
| 522 |
|
| 523 |
+
# Global instances
|
| 524 |
+
analysis_service: Optional[PreloadedAnalysisService] = None
|
| 525 |
+
app_start_time = time.time()
|
| 526 |
+
|
| 527 |
+
# Initialize logger
|
| 528 |
+
ContractAnalyzerLogger.setup(log_dir="logs", app_name="contract_analyzer")
|
| 529 |
+
logger = ContractAnalyzerLogger.get_logger()
|
| 530 |
+
|
| 531 |
+
@asynccontextmanager
|
| 532 |
+
async def lifespan(app: FastAPI):
|
| 533 |
+
"""Lifespan events for startup and shutdown"""
|
| 534 |
+
global analysis_service
|
| 535 |
+
|
| 536 |
+
# Startup
|
| 537 |
+
log_info(f"🚀 {settings.APP_NAME} v{settings.APP_VERSION} STARTING UP...")
|
| 538 |
+
log_info("=" * 80)
|
| 539 |
+
|
| 540 |
+
try:
|
| 541 |
+
# Initialize analysis service
|
| 542 |
+
analysis_service = PreloadedAnalysisService()
|
| 543 |
+
log_info("✅ All services initialized successfully")
|
| 544 |
+
|
| 545 |
+
except Exception as e:
|
| 546 |
+
log_error(f"Startup failed: {e}")
|
| 547 |
+
raise
|
| 548 |
+
|
| 549 |
+
log_info(f"📍 Server: {settings.HOST}:{settings.PORT}")
|
| 550 |
+
log_info("=" * 80)
|
| 551 |
+
log_info("✅ AI Contract Risk Analyzer Ready!")
|
| 552 |
+
|
| 553 |
+
try:
|
| 554 |
+
yield
|
| 555 |
+
finally:
|
| 556 |
+
# Shutdown - This runs on normal shutdown and KeyboardInterrupt
|
| 557 |
+
log_info("🛑 Shutting down server...")
|
| 558 |
+
log_info("✅ Server shutdown complete")
|
| 559 |
+
|
| 560 |
+
|
| 561 |
app = FastAPI(
|
| 562 |
title=settings.APP_NAME,
|
| 563 |
version=settings.APP_VERSION,
|
| 564 |
description="AI-powered contract risk analysis with complete model pre-loading",
|
| 565 |
docs_url="/api/docs",
|
| 566 |
+
redoc_url="/api/redoc",
|
| 567 |
+
default_response_class=NumpyJSONResponse,
|
| 568 |
+
lifespan=lifespan
|
| 569 |
)
|
| 570 |
|
| 571 |
+
# Get absolute paths
|
| 572 |
+
BASE_DIR = Path(__file__).parent
|
| 573 |
+
STATIC_DIR = BASE_DIR / "static"
|
| 574 |
+
|
| 575 |
# Serve static files
|
| 576 |
+
app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
|
| 577 |
|
| 578 |
+
# Enhanced CORS middleware
|
| 579 |
app.add_middleware(
|
| 580 |
CORSMiddleware,
|
| 581 |
+
allow_origins=["*"], # For development - restrict in production
|
| 582 |
+
allow_credentials=True,
|
| 583 |
+
allow_methods=["*"],
|
| 584 |
+
allow_headers=["*"],
|
| 585 |
)
|
| 586 |
|
|
|
|
|
|
|
|
|
|
| 587 |
# ============================================================================
|
| 588 |
# HELPER FUNCTIONS
|
| 589 |
# ============================================================================
|
|
|
|
| 640 |
@app.get("/")
|
| 641 |
async def serve_frontend():
|
| 642 |
"""Serve the frontend"""
|
| 643 |
+
return FileResponse(str(STATIC_DIR / "index.html"))
|
| 644 |
|
| 645 |
@app.get("/api/v1/health", response_model=HealthResponse)
|
| 646 |
async def health_check():
|
| 647 |
"""Health check endpoint with service status"""
|
| 648 |
+
if not analysis_service:
|
| 649 |
+
raise HTTPException(status_code=503, detail="Service not initialized")
|
| 650 |
+
|
| 651 |
service_status = analysis_service.get_service_status()
|
| 652 |
|
| 653 |
return HealthResponse(
|
|
|
|
| 662 |
@app.get("/api/v1/status")
|
| 663 |
async def get_detailed_status():
|
| 664 |
"""Get detailed service status"""
|
| 665 |
+
if not analysis_service:
|
| 666 |
+
raise HTTPException(status_code=503, detail="Service not initialized")
|
| 667 |
return analysis_service.get_service_status()
|
| 668 |
|
| 669 |
@app.post("/api/v1/analyze/file", response_model=AnalysisResult)
|
|
|
|
| 675 |
compare_to_market: bool = Form(True)
|
| 676 |
):
|
| 677 |
"""Analyze uploaded contract file - DIRECT SYNC FLOW"""
|
| 678 |
+
if not analysis_service:
|
| 679 |
+
raise HTTPException(status_code=503, detail="Service not initialized")
|
| 680 |
+
|
| 681 |
try:
|
| 682 |
# Validate file
|
| 683 |
is_valid, message = validate_file(file)
|
|
|
|
| 732 |
compare_to_market: bool = Form(True)
|
| 733 |
):
|
| 734 |
"""Analyze pasted contract text - DIRECT SYNC FLOW"""
|
| 735 |
+
if not analysis_service:
|
| 736 |
+
raise HTTPException(status_code=503, detail="Service not initialized")
|
| 737 |
+
|
| 738 |
try:
|
| 739 |
# Validate contract text
|
| 740 |
is_valid, message = validate_contract_text(contract_text)
|
|
|
|
| 792 |
@app.get("/api/v1/categories")
|
| 793 |
async def get_contract_categories():
|
| 794 |
"""Get list of supported contract categories"""
|
| 795 |
+
if not analysis_service:
|
| 796 |
+
raise HTTPException(status_code=503, detail="Service not initialized")
|
| 797 |
+
|
| 798 |
try:
|
| 799 |
categories = analysis_service.services["classifier"].get_all_categories()
|
| 800 |
return {"categories": categories}
|
|
|
|
| 802 |
log_error(f"Categories fetch failed: {e}")
|
| 803 |
raise HTTPException(status_code=500, detail=f"Failed to get categories: {str(e)}")
|
| 804 |
|
| 805 |
+
@app.post("/api/v1/validate/file", response_model=FileValidationResponse)
|
| 806 |
async def validate_contract_file(file: UploadFile = File(...)):
|
| 807 |
"""Quick validation endpoint"""
|
| 808 |
try:
|
| 809 |
is_valid, message = validate_file(file)
|
| 810 |
if not is_valid:
|
| 811 |
+
return FileValidationResponse(valid=False, message=message)
|
| 812 |
|
| 813 |
contract_text = read_contract_file(file)
|
| 814 |
|
| 815 |
# Validate text length
|
| 816 |
is_valid_text, text_message = validate_contract_text(contract_text)
|
| 817 |
if not is_valid_text:
|
| 818 |
+
return FileValidationResponse(valid=False, message=text_message)
|
| 819 |
|
| 820 |
# Validate contract structure using ContractValidator
|
| 821 |
validator = ContractValidator()
|
| 822 |
report = validator.get_validation_report(contract_text)
|
| 823 |
|
| 824 |
+
return FileValidationResponse(
|
| 825 |
+
valid=report["scores"]["total"] > 50 and is_valid_text,
|
| 826 |
+
message="Contract appears valid" if report["scores"]["total"] > 50 else "May not be a valid contract",
|
| 827 |
+
confidence=report["scores"]["total"],
|
| 828 |
+
report=report
|
| 829 |
+
)
|
| 830 |
|
| 831 |
except Exception as e:
|
| 832 |
log_error(f"File validation failed: {e}")
|
| 833 |
raise HTTPException(status_code=400, detail=f"Validation failed: {str(e)}")
|
| 834 |
|
| 835 |
+
@app.post("/api/v1/validate/text", response_model=FileValidationResponse)
|
| 836 |
async def validate_contract_text_endpoint(contract_text: str = Form(...)):
|
| 837 |
"""Validate pasted contract text"""
|
| 838 |
try:
|
| 839 |
# Validate text length
|
| 840 |
is_valid, message = validate_contract_text(contract_text)
|
| 841 |
if not is_valid:
|
| 842 |
+
return FileValidationResponse(valid=False, message=message)
|
| 843 |
|
| 844 |
# Validate contract structure using ContractValidator
|
| 845 |
validator = ContractValidator()
|
| 846 |
report = validator.get_validation_report(contract_text)
|
| 847 |
|
| 848 |
+
return FileValidationResponse(
|
| 849 |
+
valid=report["scores"]["total"] > 50 and is_valid,
|
| 850 |
+
message="Contract appears valid" if report["scores"]["total"] > 50 else "May not be a valid contract",
|
| 851 |
+
confidence=report["scores"]["total"],
|
| 852 |
+
report=report
|
| 853 |
+
)
|
| 854 |
|
| 855 |
except Exception as e:
|
| 856 |
log_error(f"Text validation failed: {e}")
|
|
|
|
| 863 |
@app.exception_handler(HTTPException)
|
| 864 |
async def http_exception_handler(request, exc):
|
| 865 |
"""Handle HTTP exceptions"""
|
| 866 |
+
return NumpyJSONResponse(
|
| 867 |
status_code=exc.status_code,
|
| 868 |
content=ErrorResponse(
|
| 869 |
error=exc.detail,
|
|
|
|
| 876 |
async def general_exception_handler(request, exc):
|
| 877 |
"""Handle general exceptions"""
|
| 878 |
log_error(f"Unhandled exception: {exc}")
|
| 879 |
+
return NumpyJSONResponse(
|
| 880 |
status_code=500,
|
| 881 |
content=ErrorResponse(
|
| 882 |
error="Internal server error",
|
|
|
|
| 886 |
)
|
| 887 |
|
| 888 |
# ============================================================================
|
| 889 |
+
# REQUEST LOGGING MIDDLEWARE
|
| 890 |
# ============================================================================
|
| 891 |
|
| 892 |
+
@app.middleware("http")
|
| 893 |
+
async def log_requests(request: Request, call_next):
|
| 894 |
+
start_time = time.time()
|
| 895 |
+
response = await call_next(request)
|
| 896 |
+
process_time = time.time() - start_time
|
| 897 |
+
|
| 898 |
+
log_info(f"API Request: {request.method} {request.url.path} - Status: {response.status_code} - Duration: {process_time:.3f}s")
|
| 899 |
+
|
| 900 |
+
return response
|
|
|
|
|
|
|
|
|
|
| 901 |
|
| 902 |
# ============================================================================
|
| 903 |
# MAIN
|
| 904 |
# ============================================================================
|
|
|
|
| 905 |
if __name__ == "__main__":
|
| 906 |
+
def signal_handler(sig, frame):
|
| 907 |
+
print("\n👋 Received Ctrl+C, shutting down gracefully...")
|
| 908 |
+
sys.exit(0)
|
| 909 |
+
|
| 910 |
+
signal.signal(signal.SIGINT, signal_handler)
|
| 911 |
+
|
| 912 |
+
try:
|
| 913 |
+
uvicorn.run(
|
| 914 |
+
"app:app",
|
| 915 |
+
host=settings.HOST,
|
| 916 |
+
port=settings.PORT,
|
| 917 |
+
reload=settings.RELOAD,
|
| 918 |
+
workers=1,
|
| 919 |
+
log_level=settings.LOG_LEVEL.lower()
|
| 920 |
+
)
|
| 921 |
+
except KeyboardInterrupt:
|
| 922 |
+
print("\n🎯 Server stopped by user")
|
| 923 |
+
except Exception as e:
|
| 924 |
+
log_error(f"Server error: {e}")
|
| 925 |
+
sys.exit(1)
|
config/model_config.py
CHANGED
|
@@ -6,8 +6,13 @@ class ModelConfig:
|
|
| 6 |
"""
|
| 7 |
Model-specific configurations - FOR AI MODEL SETTINGS ONLY
|
| 8 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
# Model Architecture Settings
|
| 10 |
LEGAL_BERT = {"model_name" : "nlpaueb/legal-bert-base-uncased",
|
|
|
|
| 11 |
"task" : "clause-extraction",
|
| 12 |
"max_length" : 512,
|
| 13 |
"batch_size" : 16,
|
|
@@ -18,6 +23,7 @@ class ModelConfig:
|
|
| 18 |
|
| 19 |
# Embedding Model Settings
|
| 20 |
EMBEDDING_MODEL = {"model_name" : "sentence-transformers/all-MiniLM-L6-v2",
|
|
|
|
| 21 |
"dimension" : 384,
|
| 22 |
"pooling" : "mean",
|
| 23 |
"normalize" : True,
|
|
@@ -75,6 +81,20 @@ class ModelConfig:
|
|
| 75 |
"entity_confidence" : 0.8,
|
| 76 |
}
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
@classmethod
|
| 80 |
def get_model_config(cls, model_type: str) -> dict:
|
|
@@ -91,4 +111,4 @@ class ModelConfig:
|
|
| 91 |
"text_processing" : cls.TEXT_PROCESSING,
|
| 92 |
}
|
| 93 |
|
| 94 |
-
return config_map.get(model_type, {})
|
|
|
|
| 6 |
"""
|
| 7 |
Model-specific configurations - FOR AI MODEL SETTINGS ONLY
|
| 8 |
"""
|
| 9 |
+
# Directory Settings
|
| 10 |
+
MODEL_DIR = Path("models")
|
| 11 |
+
CACHE_DIR = Path("cache/models")
|
| 12 |
+
|
| 13 |
# Model Architecture Settings
|
| 14 |
LEGAL_BERT = {"model_name" : "nlpaueb/legal-bert-base-uncased",
|
| 15 |
+
"local_path" : MODEL_DIR / "legal-bert",
|
| 16 |
"task" : "clause-extraction",
|
| 17 |
"max_length" : 512,
|
| 18 |
"batch_size" : 16,
|
|
|
|
| 23 |
|
| 24 |
# Embedding Model Settings
|
| 25 |
EMBEDDING_MODEL = {"model_name" : "sentence-transformers/all-MiniLM-L6-v2",
|
| 26 |
+
"local_path" : MODEL_DIR / "embeddings",
|
| 27 |
"dimension" : 384,
|
| 28 |
"pooling" : "mean",
|
| 29 |
"normalize" : True,
|
|
|
|
| 81 |
"entity_confidence" : 0.8,
|
| 82 |
}
|
| 83 |
|
| 84 |
+
@classmethod
|
| 85 |
+
def ensure_directories(cls):
|
| 86 |
+
"""
|
| 87 |
+
Ensure all required directories exist
|
| 88 |
+
"""
|
| 89 |
+
directories = [cls.MODEL_DIR,
|
| 90 |
+
cls.CACHE_DIR,
|
| 91 |
+
cls.MODEL_DIR / "legal-bert",
|
| 92 |
+
cls.MODEL_DIR / "embeddings",
|
| 93 |
+
]
|
| 94 |
+
|
| 95 |
+
for directory in directories:
|
| 96 |
+
directory.mkdir(parents = True, exist_ok = True)
|
| 97 |
+
|
| 98 |
|
| 99 |
@classmethod
|
| 100 |
def get_model_config(cls, model_type: str) -> dict:
|
|
|
|
| 111 |
"text_processing" : cls.TEXT_PROCESSING,
|
| 112 |
}
|
| 113 |
|
| 114 |
+
return config_map.get(model_type, {})
|
launch.py
CHANGED
|
@@ -57,10 +57,10 @@ def start_api():
|
|
| 57 |
time.sleep(3)
|
| 58 |
|
| 59 |
try:
|
| 60 |
-
response = requests.get("http://localhost:
|
| 61 |
if response.status_code == 200:
|
| 62 |
-
print("✓ API Server running at: http://localhost:
|
| 63 |
-
print("✓ Documentation at: http://localhost:
|
| 64 |
return True
|
| 65 |
except:
|
| 66 |
pass
|
|
|
|
| 57 |
time.sleep(3)
|
| 58 |
|
| 59 |
try:
|
| 60 |
+
response = requests.get("http://localhost:8005/api/v1/health", timeout=5)
|
| 61 |
if response.status_code == 200:
|
| 62 |
+
print("✓ API Server running at: http://localhost:8005")
|
| 63 |
+
print("✓ Documentation at: http://localhost:8005/api/docs")
|
| 64 |
return True
|
| 65 |
except:
|
| 66 |
pass
|
model_manager/llm_manager.py
CHANGED
|
@@ -11,6 +11,7 @@ from pathlib import Path
|
|
| 11 |
from typing import Literal
|
| 12 |
from typing import Optional
|
| 13 |
from dataclasses import dataclass
|
|
|
|
| 14 |
|
| 15 |
# Add parent directory to path for imports
|
| 16 |
sys.path.append(str(Path(__file__).parent.parent))
|
|
@@ -80,14 +81,14 @@ class LLMManager:
|
|
| 80 |
Unified LLM manager for multiple providers : handles Ollama (local), OpenAI API, and Anthropic API
|
| 81 |
"""
|
| 82 |
def __init__(self, default_provider: LLMProvider = LLMProvider.OLLAMA, ollama_base_url: Optional[str] = None,
|
| 83 |
-
|
| 84 |
"""
|
| 85 |
Initialize LLM Manager
|
| 86 |
|
| 87 |
Arguments:
|
| 88 |
----------
|
| 89 |
default_provider : Default LLM provider to use
|
| 90 |
-
|
| 91 |
ollama_base_url : Ollama server URL (default: http://localhost:11434)
|
| 92 |
|
| 93 |
openai_api_key : OpenAI API key (or set OPENAI_API_KEY env var)
|
|
@@ -101,9 +102,20 @@ class LLMManager:
|
|
| 101 |
self.config = ModelConfig()
|
| 102 |
|
| 103 |
# Ollama configuration
|
| 104 |
-
self.ollama_base_url = ollama_base_url or
|
| 105 |
-
self.ollama_model =
|
| 106 |
-
self.ollama_timeout =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
# OpenAI configuration
|
| 109 |
self.openai_api_key = openai_api_key
|
|
@@ -116,7 +128,7 @@ class LLMManager:
|
|
| 116 |
|
| 117 |
if (ANTHROPIC_AVAILABLE and self.anthropic_api_key):
|
| 118 |
self.anthropic_client = anthropic.Anthropic(api_key = self.anthropic_api_key)
|
| 119 |
-
|
| 120 |
else:
|
| 121 |
self.anthropic_client = None
|
| 122 |
|
|
@@ -133,7 +145,7 @@ class LLMManager:
|
|
| 133 |
openai_available = OPENAI_AVAILABLE and bool(self.openai_api_key),
|
| 134 |
anthropic_available = ANTHROPIC_AVAILABLE and bool(self.anthropic_api_key),
|
| 135 |
)
|
| 136 |
-
|
| 137 |
|
| 138 |
# PROVIDER AVAILABILITY CHECKS
|
| 139 |
def _check_ollama_available(self) -> bool:
|
|
|
|
| 11 |
from typing import Literal
|
| 12 |
from typing import Optional
|
| 13 |
from dataclasses import dataclass
|
| 14 |
+
from config.settings import settings
|
| 15 |
|
| 16 |
# Add parent directory to path for imports
|
| 17 |
sys.path.append(str(Path(__file__).parent.parent))
|
|
|
|
| 81 |
Unified LLM manager for multiple providers : handles Ollama (local), OpenAI API, and Anthropic API
|
| 82 |
"""
|
| 83 |
def __init__(self, default_provider: LLMProvider = LLMProvider.OLLAMA, ollama_base_url: Optional[str] = None,
|
| 84 |
+
openai_api_key: Optional[str] = None, anthropic_api_key: Optional[str] = None):
|
| 85 |
"""
|
| 86 |
Initialize LLM Manager
|
| 87 |
|
| 88 |
Arguments:
|
| 89 |
----------
|
| 90 |
default_provider : Default LLM provider to use
|
| 91 |
+
|
| 92 |
ollama_base_url : Ollama server URL (default: http://localhost:11434)
|
| 93 |
|
| 94 |
openai_api_key : OpenAI API key (or set OPENAI_API_KEY env var)
|
|
|
|
| 102 |
self.config = ModelConfig()
|
| 103 |
|
| 104 |
# Ollama configuration
|
| 105 |
+
self.ollama_base_url = ollama_base_url or "http://localhost:11434" # Default Ollama URL
|
| 106 |
+
self.ollama_model = "mistral:7b" # Default model
|
| 107 |
+
self.ollama_timeout = 300 # Default timeout
|
| 108 |
+
|
| 109 |
+
# Get settings from environment or use defaults
|
| 110 |
+
try:
|
| 111 |
+
|
| 112 |
+
self.ollama_base_url = ollama_base_url or settings.OLLAMA_BASE_URL
|
| 113 |
+
self.ollama_model = settings.OLLAMA_MODEL
|
| 114 |
+
self.ollama_timeout = settings.OLLAMA_TIMEOUT
|
| 115 |
+
|
| 116 |
+
except ImportError:
|
| 117 |
+
# Fallback to defaults if settings not available
|
| 118 |
+
pass
|
| 119 |
|
| 120 |
# OpenAI configuration
|
| 121 |
self.openai_api_key = openai_api_key
|
|
|
|
| 128 |
|
| 129 |
if (ANTHROPIC_AVAILABLE and self.anthropic_api_key):
|
| 130 |
self.anthropic_client = anthropic.Anthropic(api_key = self.anthropic_api_key)
|
| 131 |
+
|
| 132 |
else:
|
| 133 |
self.anthropic_client = None
|
| 134 |
|
|
|
|
| 145 |
openai_available = OPENAI_AVAILABLE and bool(self.openai_api_key),
|
| 146 |
anthropic_available = ANTHROPIC_AVAILABLE and bool(self.anthropic_api_key),
|
| 147 |
)
|
| 148 |
+
|
| 149 |
|
| 150 |
# PROVIDER AVAILABILITY CHECKS
|
| 151 |
def _check_ollama_available(self) -> bool:
|
reporter/pdf_generator.py
CHANGED
|
@@ -67,7 +67,7 @@ class PDFReportGenerator:
|
|
| 67 |
|
| 68 |
# Body text
|
| 69 |
self.styles.add(ParagraphStyle(
|
| 70 |
-
name='
|
| 71 |
parent=self.styles['Normal'],
|
| 72 |
fontSize=10,
|
| 73 |
leading=14,
|
|
@@ -445,52 +445,3 @@ def generate_pdf_report(analysis_result: Dict[str, Any],
|
|
| 445 |
generator = PDFReportGenerator()
|
| 446 |
return generator.generate_report(analysis_result, output_path)
|
| 447 |
|
| 448 |
-
|
| 449 |
-
if __name__ == "__main__":
|
| 450 |
-
# Test with sample data
|
| 451 |
-
sample_result = {
|
| 452 |
-
"analysis_id": "test-123",
|
| 453 |
-
"timestamp": datetime.now().isoformat(),
|
| 454 |
-
"risk_analysis": {
|
| 455 |
-
"overall_score": 85,
|
| 456 |
-
"risk_level": "CRITICAL",
|
| 457 |
-
"risk_breakdown": [
|
| 458 |
-
{
|
| 459 |
-
"category": "Restrictive Covenants",
|
| 460 |
-
"score": 95,
|
| 461 |
-
"summary": "The agreement contains exceptionally broad and long-lasting non-compete (24 months) and non-solicitation (5 years) clauses."
|
| 462 |
-
},
|
| 463 |
-
{
|
| 464 |
-
"category": "Penalties & Termination",
|
| 465 |
-
"score": 90,
|
| 466 |
-
"summary": "The contract includes severe penalties for breach, including forfeiture of earned salary."
|
| 467 |
-
}
|
| 468 |
-
]
|
| 469 |
-
},
|
| 470 |
-
"executive_summary": "This employment agreement is heavily skewed in favor of the Employer, presenting a very high risk.",
|
| 471 |
-
"unfavorable_terms": [
|
| 472 |
-
{
|
| 473 |
-
"term": "Undefined Post-Probation Salary",
|
| 474 |
-
"clause_reference": "Clause 8.2",
|
| 475 |
-
"severity": "critical",
|
| 476 |
-
"explanation": "Post-probation salary is undefined ('as discussed').",
|
| 477 |
-
"suggested_fix": "Insist that the exact salary be explicitly stated."
|
| 478 |
-
}
|
| 479 |
-
],
|
| 480 |
-
"missing_protections": [
|
| 481 |
-
{
|
| 482 |
-
"protection": "Defined Post-Probation Salary",
|
| 483 |
-
"importance": "critical",
|
| 484 |
-
"explanation": "The contract lacks a specific, written salary commitment."
|
| 485 |
-
}
|
| 486 |
-
],
|
| 487 |
-
"negotiation_points": [
|
| 488 |
-
{
|
| 489 |
-
"issue": "Post-probation salary",
|
| 490 |
-
"rationale": "Must be explicitly defined in writing before signing."
|
| 491 |
-
}
|
| 492 |
-
]
|
| 493 |
-
}
|
| 494 |
-
|
| 495 |
-
buffer = generate_pdf_report(sample_result, "test_report.pdf")
|
| 496 |
-
print("Test PDF generated successfully!")
|
|
|
|
| 67 |
|
| 68 |
# Body text
|
| 69 |
self.styles.add(ParagraphStyle(
|
| 70 |
+
name='CustomBodyText',
|
| 71 |
parent=self.styles['Normal'],
|
| 72 |
fontSize=10,
|
| 73 |
leading=14,
|
|
|
|
| 445 |
generator = PDFReportGenerator()
|
| 446 |
return generator.generate_report(analysis_result, output_path)
|
| 447 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
services/clause_extractor.py
CHANGED
|
@@ -21,6 +21,7 @@ from utils.logger import log_info
|
|
| 21 |
from utils.logger import log_error
|
| 22 |
from utils.text_processor import TextProcessor
|
| 23 |
from utils.logger import ContractAnalyzerLogger
|
|
|
|
| 24 |
|
| 25 |
|
| 26 |
@dataclass
|
|
@@ -604,7 +605,7 @@ class ClauseExtractor:
|
|
| 604 |
Extract risk indicator keywords from clause text
|
| 605 |
"""
|
| 606 |
text_lower = text.lower()
|
| 607 |
-
found_indicators =
|
| 608 |
|
| 609 |
for severity, indicators in self.RISK_INDICATORS.items():
|
| 610 |
for indicator in indicators:
|
|
|
|
| 21 |
from utils.logger import log_error
|
| 22 |
from utils.text_processor import TextProcessor
|
| 23 |
from utils.logger import ContractAnalyzerLogger
|
| 24 |
+
from model_manager.model_loader import ModelLoader
|
| 25 |
|
| 26 |
|
| 27 |
@dataclass
|
|
|
|
| 605 |
Extract risk indicator keywords from clause text
|
| 606 |
"""
|
| 607 |
text_lower = text.lower()
|
| 608 |
+
found_indicators = list()
|
| 609 |
|
| 610 |
for severity, indicators in self.RISK_INDICATORS.items():
|
| 611 |
for indicator in indicators:
|
services/contract_classifier.py
CHANGED
|
@@ -55,127 +55,55 @@ class ContractClassifier:
|
|
| 55 |
4. Confidence scoring with explanations
|
| 56 |
"""
|
| 57 |
# CATEGORY HIERARCHY WITH KEYWORDS
|
| 58 |
-
CATEGORY_HIERARCHY
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
'subcategories': ['
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
},
|
| 108 |
-
|
| 109 |
-
'real_estate': {
|
| 110 |
-
'subcategories': ['residential_lease', 'commercial_lease', 'sublease', 'purchase_agreement'],
|
| 111 |
-
'keywords': [
|
| 112 |
-
'landlord', 'tenant', 'lease', 'premises', 'rent', 'property',
|
| 113 |
-
'security deposit', 'utilities', 'maintenance', 'repairs',
|
| 114 |
-
'eviction', 'lease term', 'renewal', 'square footage'
|
| 115 |
-
],
|
| 116 |
-
'weight': 1.0
|
| 117 |
-
},
|
| 118 |
-
|
| 119 |
-
'financial': {
|
| 120 |
-
'subcategories': ['loan', 'mortgage', 'credit', 'investment', 'promissory_note'],
|
| 121 |
-
'keywords': [
|
| 122 |
-
'loan', 'borrower', 'lender', 'principal', 'interest rate',
|
| 123 |
-
'collateral', 'default', 'repayment', 'amortization',
|
| 124 |
-
'promissory note', 'security interest', 'mortgage'
|
| 125 |
-
],
|
| 126 |
-
'weight': 1.0
|
| 127 |
-
},
|
| 128 |
-
|
| 129 |
-
'business': {
|
| 130 |
-
'subcategories': ['partnership', 'joint_venture', 'shareholders', 'llc_operating', 'merger'],
|
| 131 |
-
'keywords': [
|
| 132 |
-
'partnership', 'joint venture', 'equity', 'shares', 'profit sharing',
|
| 133 |
-
'loss allocation', 'management', 'governance', 'voting rights',
|
| 134 |
-
'dissolution', 'capital contribution', 'distribution'
|
| 135 |
-
],
|
| 136 |
-
'weight': 1.0
|
| 137 |
-
},
|
| 138 |
-
|
| 139 |
-
'sales': {
|
| 140 |
-
'subcategories': ['purchase_order', 'sales_agreement', 'distribution', 'supply_agreement'],
|
| 141 |
-
'keywords': [
|
| 142 |
-
'purchase', 'sale', 'buyer', 'seller', 'goods', 'products',
|
| 143 |
-
'delivery', 'shipment', 'payment terms', 'invoice',
|
| 144 |
-
'purchase price', 'quantity', 'specifications'
|
| 145 |
-
],
|
| 146 |
-
'weight': 1.0
|
| 147 |
-
},
|
| 148 |
-
|
| 149 |
-
'service_agreement': {
|
| 150 |
-
'subcategories': ['master_services', 'maintenance', 'support', 'subscription'],
|
| 151 |
-
'keywords': [
|
| 152 |
-
'service provider', 'services', 'sla', 'service level agreement',
|
| 153 |
-
'uptime', 'response time', 'support', 'maintenance',
|
| 154 |
-
'service credits', 'performance metrics', 'implementation'
|
| 155 |
-
],
|
| 156 |
-
'weight': 1.0
|
| 157 |
-
},
|
| 158 |
-
|
| 159 |
-
'vendor': {
|
| 160 |
-
'subcategories': ['supplier_agreement', 'procurement', 'master_vendor'],
|
| 161 |
-
'keywords': [
|
| 162 |
-
'vendor', 'supplier', 'procurement', 'supply chain',
|
| 163 |
-
'purchase order', 'fulfillment', 'vendor management',
|
| 164 |
-
'pricing', 'terms of supply'
|
| 165 |
-
],
|
| 166 |
-
'weight': 1.0
|
| 167 |
-
},
|
| 168 |
-
|
| 169 |
-
'agency': {
|
| 170 |
-
'subcategories': ['marketing_agency', 'recruiting', 'representation'],
|
| 171 |
-
'keywords': [
|
| 172 |
-
'agent', 'agency', 'principal', 'commission', 'representation',
|
| 173 |
-
'authority', 'scope of authority', 'compensation',
|
| 174 |
-
'exclusive rights', 'territory'
|
| 175 |
-
],
|
| 176 |
-
'weight': 1.0
|
| 177 |
-
}
|
| 178 |
-
}
|
| 179 |
|
| 180 |
# SUBCATEGORY DETECTION PATTERNS
|
| 181 |
SUBCATEGORY_PATTERNS = {'full_time' : ['full-time', 'full time', 'permanent', 'regular employee', '40 hours', 'exempt employee'],
|
|
@@ -325,7 +253,7 @@ class ContractClassifier:
|
|
| 325 |
raise ValueError("Contract text too short for classification")
|
| 326 |
|
| 327 |
# Preprocess text (use first 3000 chars for efficiency)
|
| 328 |
-
text_excerpt = contract_text
|
| 329 |
|
| 330 |
log_info("Starting contract classification",
|
| 331 |
text_length = len(contract_text),
|
|
@@ -338,8 +266,8 @@ class ContractClassifier:
|
|
| 338 |
# Step 2: Semantic similarity
|
| 339 |
semantic_scores = self._semantic_similarity(text_excerpt)
|
| 340 |
|
| 341 |
-
# Step 3: Legal-BERT
|
| 342 |
-
legal_bert_scores = self.
|
| 343 |
|
| 344 |
# Step 4: Combine scores (weighted average)
|
| 345 |
combined_scores = self._combine_scores(keyword_scores = keyword_scores,
|
|
@@ -369,6 +297,7 @@ class ContractClassifier:
|
|
| 369 |
subcategory = subcategory,
|
| 370 |
keyword_scores = keyword_scores,
|
| 371 |
semantic_scores = semantic_scores,
|
|
|
|
| 372 |
combined_scores = combined_scores,
|
| 373 |
)
|
| 374 |
|
|
@@ -452,9 +381,9 @@ class ContractClassifier:
|
|
| 452 |
return similarities
|
| 453 |
|
| 454 |
|
| 455 |
-
def
|
| 456 |
"""
|
| 457 |
-
Use Legal-BERT for
|
| 458 |
|
| 459 |
Arguments:
|
| 460 |
----------
|
|
@@ -462,7 +391,42 @@ class ContractClassifier:
|
|
| 462 |
|
| 463 |
Returns:
|
| 464 |
--------
|
| 465 |
-
{ dict } : Dictionary of {category:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 466 |
"""
|
| 467 |
# Tokenize
|
| 468 |
inputs = self.legal_bert_tokenizer(text,
|
|
@@ -475,9 +439,10 @@ class ContractClassifier:
|
|
| 475 |
# Get embeddings
|
| 476 |
with torch.no_grad():
|
| 477 |
outputs = self.legal_bert_model(**inputs)
|
|
|
|
| 478 |
cls_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()[0]
|
| 479 |
|
| 480 |
-
return
|
| 481 |
|
| 482 |
|
| 483 |
def _combine_scores(self, keyword_scores: Dict[str, float], semantic_scores: Dict[str, float], legal_bert_scores: Dict[str, float] = None) -> Dict[str, float]:
|
|
@@ -490,7 +455,7 @@ class ContractClassifier:
|
|
| 490 |
|
| 491 |
semantic_scores { dict } : Semantic similarity scores
|
| 492 |
|
| 493 |
-
legal_bert_scores { dict } : Legal-BERT scores (optional)
|
| 494 |
|
| 495 |
Returns:
|
| 496 |
--------
|
|
@@ -499,22 +464,14 @@ class ContractClassifier:
|
|
| 499 |
combined = dict()
|
| 500 |
|
| 501 |
# Weights for each method
|
| 502 |
-
keyword_weight = 0.
|
| 503 |
-
semantic_weight = 0.
|
| 504 |
-
legal_bert_weight = 0.
|
| 505 |
-
|
| 506 |
-
if legal_bert_scores:
|
| 507 |
-
# Normalize weights
|
| 508 |
-
total_weight = keyword_weight + semantic_weight + legal_bert_weight
|
| 509 |
-
keyword_weight /= total_weight
|
| 510 |
-
semantic_weight /= total_weight
|
| 511 |
-
legal_bert_weight /= total_weight
|
| 512 |
|
| 513 |
for category in self.CATEGORY_HIERARCHY.keys():
|
| 514 |
-
score = (keyword_scores.get(category, 0) * keyword_weight +
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
score += legal_bert_scores.get(category, 0) * legal_bert_weight
|
| 518 |
|
| 519 |
combined[category] = score
|
| 520 |
|
|
@@ -562,8 +519,9 @@ class ContractClassifier:
|
|
| 562 |
return None
|
| 563 |
|
| 564 |
|
| 565 |
-
def _generate_reasoning(self, contract_text: str, primary_category: str, subcategory: Optional[str],
|
| 566 |
-
|
|
|
|
| 567 |
"""
|
| 568 |
Generate human-readable reasoning for classification
|
| 569 |
|
|
@@ -576,6 +534,7 @@ class ContractClassifier:
|
|
| 576 |
# Primary category reasoning
|
| 577 |
keyword_match = keyword_scores.get(primary_category, 0)
|
| 578 |
semantic_match = semantic_scores.get(primary_category, 0)
|
|
|
|
| 579 |
|
| 580 |
if (keyword_match > 0.5):
|
| 581 |
reasoning.append(f"Strong keyword indicators for {primary_category.replace('_', ' ')} category "
|
|
@@ -597,6 +556,11 @@ class ContractClassifier:
|
|
| 597 |
f"(similarity: {semantic_match:.2f})"
|
| 598 |
)
|
| 599 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 600 |
# Subcategory reasoning
|
| 601 |
if subcategory:
|
| 602 |
reasoning.append(f"Specific subcategory identified: {subcategory.replace('_', ' ')}")
|
|
@@ -659,9 +623,10 @@ class ContractClassifier:
|
|
| 659 |
log_info("Starting multi-label classification", threshold = threshold)
|
| 660 |
|
| 661 |
# Get scores
|
| 662 |
-
keyword_scores
|
| 663 |
-
semantic_scores
|
| 664 |
-
|
|
|
|
| 665 |
|
| 666 |
# Get all categories above threshold
|
| 667 |
matches = list()
|
|
@@ -669,7 +634,8 @@ class ContractClassifier:
|
|
| 669 |
for category, score in combined_scores.items():
|
| 670 |
if (score >= threshold):
|
| 671 |
subcategory = self._detect_subcategory(text, category)
|
| 672 |
-
reasoning = self._generate_reasoning(text, category, subcategory, keyword_scores,
|
|
|
|
| 673 |
keywords = self._extract_detected_keywords(text, category)
|
| 674 |
|
| 675 |
matches.append(ContractCategory(category = category,
|
|
@@ -720,4 +686,4 @@ class ContractClassifier:
|
|
| 720 |
"""
|
| 721 |
Get subcategories for a specific category
|
| 722 |
"""
|
| 723 |
-
return self.CATEGORY_HIERARCHY.get(category, {}).get('subcategories', [])
|
|
|
|
| 55 |
4. Confidence scoring with explanations
|
| 56 |
"""
|
| 57 |
# CATEGORY HIERARCHY WITH KEYWORDS
|
| 58 |
+
CATEGORY_HIERARCHY = {'employment' : {'subcategories' : ['full_time', 'part_time', 'contract_worker', 'internship', 'executive'],
|
| 59 |
+
'keywords' : ['employee', 'employment', 'job', 'position', 'salary', 'benefits', 'annual leave', 'sick leave', 'probation', 'job description', 'work hours', 'overtime', 'performance review', 'bonus structure'],
|
| 60 |
+
'weight' : 1.0,
|
| 61 |
+
},
|
| 62 |
+
'consulting' : {'subcategories' : ['independent_contractor', 'advisory', 'professional_services', 'freelance'],
|
| 63 |
+
'keywords' : ['consultant', 'consulting', 'independent contractor', 'statement of work', 'deliverables', 'professional services', 'hourly rate', 'project scope', 'milestone', 'acceptance criteria', 'work product'],
|
| 64 |
+
'weight' : 1.0,
|
| 65 |
+
},
|
| 66 |
+
'nda' : {'subcategories' : ['mutual_nda', 'unilateral_nda', 'confidentiality_agreement'],
|
| 67 |
+
'keywords' : ['non-disclosure', 'confidentiality', 'proprietary information', 'nda', 'disclosure agreement', 'trade secret', 'confidential information', 'receiving party', 'disclosing party', 'confidentiality obligation'],
|
| 68 |
+
'weight' : 1.2,
|
| 69 |
+
},
|
| 70 |
+
'technology' : {'subcategories' : ['software_license', 'saas', 'cloud_services', 'development', 'api_access'],
|
| 71 |
+
'keywords' : ['software', 'license', 'saas', 'subscription', 'source code', 'object code', 'api', 'cloud', 'hosting', 'maintenance', 'updates', 'support', 'uptime', 'service level'],
|
| 72 |
+
'weight' : 1.0,
|
| 73 |
+
},
|
| 74 |
+
'intellectual_property' : {'subcategories' : ['ip_assignment', 'licensing', 'patent', 'trademark', 'copyright'],
|
| 75 |
+
'keywords' : ['intellectual property', 'ip', 'copyright', 'patent', 'trademark', 'work product', 'inventions', 'ip rights', 'ownership', 'assignment of rights', 'license grant', 'royalty'],
|
| 76 |
+
'weight' : 1.1,
|
| 77 |
+
},
|
| 78 |
+
'real_estate' : {'subcategories' : ['residential_lease', 'commercial_lease', 'sublease', 'purchase_agreement'],
|
| 79 |
+
'keywords' : ['landlord', 'tenant', 'lease', 'premises', 'rent', 'property', 'security deposit', 'utilities', 'maintenance', 'repairs', 'eviction', 'lease term', 'renewal', 'square footage'],
|
| 80 |
+
'weight' : 1.0,
|
| 81 |
+
},
|
| 82 |
+
'financial' : {'subcategories' : ['loan', 'mortgage', 'credit', 'investment', 'promissory_note'],
|
| 83 |
+
'keywords' : ['loan', 'borrower', 'lender', 'principal', 'interest rate', 'collateral', 'default', 'repayment', 'amortization', 'promissory note', 'security interest', 'mortgage'],
|
| 84 |
+
'weight' : 1.0,
|
| 85 |
+
},
|
| 86 |
+
'business' : {'subcategories' : ['partnership', 'joint_venture', 'shareholders', 'llc_operating', 'merger'],
|
| 87 |
+
'keywords' : ['partnership', 'joint venture', 'equity', 'shares', 'profit sharing', 'loss allocation', 'management', 'governance', 'voting rights', 'dissolution', 'capital contribution', 'distribution'],
|
| 88 |
+
'weight' : 1.0,
|
| 89 |
+
},
|
| 90 |
+
'sales' : {'subcategories' : ['purchase_order', 'sales_agreement', 'distribution', 'supply_agreement'],
|
| 91 |
+
'keywords' : ['purchase', 'sale', 'buyer', 'seller', 'goods', 'products', 'delivery', 'shipment', 'payment terms', 'invoice', 'purchase price', 'quantity', 'specifications'],
|
| 92 |
+
'weight' : 1.0,
|
| 93 |
+
},
|
| 94 |
+
'service_agreement' : {'subcategories' : ['master_services', 'maintenance', 'support', 'subscription'],
|
| 95 |
+
'keywords' : ['service provider', 'services', 'sla', 'service level agreement', 'uptime', 'response time', 'support', 'maintenance', 'service credits', 'performance metrics', 'implementation'],
|
| 96 |
+
'weight' : 1.0,
|
| 97 |
+
},
|
| 98 |
+
'vendor' : {'subcategories' : ['supplier_agreement', 'procurement', 'master_vendor'],
|
| 99 |
+
'keywords' : ['vendor', 'supplier', 'procurement', 'supply chain', 'purchase order', 'fulfillment', 'vendor management', 'pricing', 'terms of supply'],
|
| 100 |
+
'weight' : 1.0,
|
| 101 |
+
},
|
| 102 |
+
'agency' : {'subcategories' : ['marketing_agency', 'recruiting', 'representation'],
|
| 103 |
+
'keywords' : ['agent', 'agency', 'principal', 'commission', 'representation', 'authority', 'scope of authority', 'compensation', 'exclusive rights', 'territory'],
|
| 104 |
+
'weight' : 1.0,
|
| 105 |
+
},
|
| 106 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
# SUBCATEGORY DETECTION PATTERNS
|
| 109 |
SUBCATEGORY_PATTERNS = {'full_time' : ['full-time', 'full time', 'permanent', 'regular employee', '40 hours', 'exempt employee'],
|
|
|
|
| 253 |
raise ValueError("Contract text too short for classification")
|
| 254 |
|
| 255 |
# Preprocess text (use first 3000 chars for efficiency)
|
| 256 |
+
text_excerpt = contract_text[:3000]
|
| 257 |
|
| 258 |
log_info("Starting contract classification",
|
| 259 |
text_length = len(contract_text),
|
|
|
|
| 266 |
# Step 2: Semantic similarity
|
| 267 |
semantic_scores = self._semantic_similarity(text_excerpt)
|
| 268 |
|
| 269 |
+
# Step 3: Legal-BERT semantic similarity (enhanced)
|
| 270 |
+
legal_bert_scores = self._legal_bert_similarity(text_excerpt)
|
| 271 |
|
| 272 |
# Step 4: Combine scores (weighted average)
|
| 273 |
combined_scores = self._combine_scores(keyword_scores = keyword_scores,
|
|
|
|
| 297 |
subcategory = subcategory,
|
| 298 |
keyword_scores = keyword_scores,
|
| 299 |
semantic_scores = semantic_scores,
|
| 300 |
+
legal_bert_scores = legal_bert_scores,
|
| 301 |
combined_scores = combined_scores,
|
| 302 |
)
|
| 303 |
|
|
|
|
| 381 |
return similarities
|
| 382 |
|
| 383 |
|
| 384 |
+
def _legal_bert_similarity(self, text: str) -> Dict[str, float]:
|
| 385 |
"""
|
| 386 |
+
Use Legal-BERT for semantic similarity calculation
|
| 387 |
|
| 388 |
Arguments:
|
| 389 |
----------
|
|
|
|
| 391 |
|
| 392 |
Returns:
|
| 393 |
--------
|
| 394 |
+
{ dict } : Dictionary of {category: similarity_score} using Legal-BERT embeddings
|
| 395 |
+
"""
|
| 396 |
+
# Get Legal-BERT embedding for the text
|
| 397 |
+
text_embedding = self._get_legal_bert_embedding(text)
|
| 398 |
+
|
| 399 |
+
# Calculate similarity to each category's Legal-BERT embedding
|
| 400 |
+
similarities = dict()
|
| 401 |
+
|
| 402 |
+
for category in self.CATEGORY_HIERARCHY.keys():
|
| 403 |
+
# Get pre-computed category embedding
|
| 404 |
+
cat_embedding = self._get_legal_bert_embedding(
|
| 405 |
+
f"This is a {category.replace('_', ' ')} contract agreement"
|
| 406 |
+
)
|
| 407 |
+
|
| 408 |
+
# Calculate cosine similarity
|
| 409 |
+
similarity = torch.nn.functional.cosine_similarity(
|
| 410 |
+
torch.tensor(text_embedding).unsqueeze(0),
|
| 411 |
+
torch.tensor(cat_embedding).unsqueeze(0)
|
| 412 |
+
).item()
|
| 413 |
+
|
| 414 |
+
similarities[category] = similarity
|
| 415 |
+
|
| 416 |
+
return similarities
|
| 417 |
+
|
| 418 |
+
|
| 419 |
+
def _get_legal_bert_embedding(self, text: str) -> np.ndarray:
|
| 420 |
+
"""
|
| 421 |
+
Get Legal-BERT embedding for text using [CLS] token
|
| 422 |
+
|
| 423 |
+
Arguments:
|
| 424 |
+
----------
|
| 425 |
+
text { str } : Input text
|
| 426 |
+
|
| 427 |
+
Returns:
|
| 428 |
+
--------
|
| 429 |
+
{ np.ndarray } : Embedding vector
|
| 430 |
"""
|
| 431 |
# Tokenize
|
| 432 |
inputs = self.legal_bert_tokenizer(text,
|
|
|
|
| 439 |
# Get embeddings
|
| 440 |
with torch.no_grad():
|
| 441 |
outputs = self.legal_bert_model(**inputs)
|
| 442 |
+
# Use [CLS] token embedding (first token)
|
| 443 |
cls_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()[0]
|
| 444 |
|
| 445 |
+
return cls_embedding
|
| 446 |
|
| 447 |
|
| 448 |
def _combine_scores(self, keyword_scores: Dict[str, float], semantic_scores: Dict[str, float], legal_bert_scores: Dict[str, float] = None) -> Dict[str, float]:
|
|
|
|
| 455 |
|
| 456 |
semantic_scores { dict } : Semantic similarity scores
|
| 457 |
|
| 458 |
+
legal_bert_scores { dict } : Legal-BERT similarity scores (optional)
|
| 459 |
|
| 460 |
Returns:
|
| 461 |
--------
|
|
|
|
| 464 |
combined = dict()
|
| 465 |
|
| 466 |
# Weights for each method
|
| 467 |
+
keyword_weight = 0.30
|
| 468 |
+
semantic_weight = 0.40
|
| 469 |
+
legal_bert_weight = 0.30
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
|
| 471 |
for category in self.CATEGORY_HIERARCHY.keys():
|
| 472 |
+
score = (keyword_scores.get(category, 0) * keyword_weight +
|
| 473 |
+
semantic_scores.get(category, 0) * semantic_weight +
|
| 474 |
+
legal_bert_scores.get(category, 0) * legal_bert_weight)
|
|
|
|
| 475 |
|
| 476 |
combined[category] = score
|
| 477 |
|
|
|
|
| 519 |
return None
|
| 520 |
|
| 521 |
|
| 522 |
+
def _generate_reasoning(self, contract_text: str, primary_category: str, subcategory: Optional[str],
|
| 523 |
+
keyword_scores: Dict[str, float], semantic_scores: Dict[str, float],
|
| 524 |
+
legal_bert_scores: Dict[str, float], combined_scores: Dict[str, float]) -> List[str]:
|
| 525 |
"""
|
| 526 |
Generate human-readable reasoning for classification
|
| 527 |
|
|
|
|
| 534 |
# Primary category reasoning
|
| 535 |
keyword_match = keyword_scores.get(primary_category, 0)
|
| 536 |
semantic_match = semantic_scores.get(primary_category, 0)
|
| 537 |
+
legal_bert_match = legal_bert_scores.get(primary_category, 0)
|
| 538 |
|
| 539 |
if (keyword_match > 0.5):
|
| 540 |
reasoning.append(f"Strong keyword indicators for {primary_category.replace('_', ' ')} category "
|
|
|
|
| 556 |
f"(similarity: {semantic_match:.2f})"
|
| 557 |
)
|
| 558 |
|
| 559 |
+
if (legal_bert_match > 0.60):
|
| 560 |
+
reasoning.append(f"Legal-BERT semantic analysis confirms {primary_category.replace('_', ' ')} classification "
|
| 561 |
+
f"(similarity: {legal_bert_match:.2f})"
|
| 562 |
+
)
|
| 563 |
+
|
| 564 |
# Subcategory reasoning
|
| 565 |
if subcategory:
|
| 566 |
reasoning.append(f"Specific subcategory identified: {subcategory.replace('_', ' ')}")
|
|
|
|
| 623 |
log_info("Starting multi-label classification", threshold = threshold)
|
| 624 |
|
| 625 |
# Get scores
|
| 626 |
+
keyword_scores = self._score_keywords(text.lower())
|
| 627 |
+
semantic_scores = self._semantic_similarity(text[:3000])
|
| 628 |
+
legal_bert_scores = self._legal_bert_similarity(text[:3000])
|
| 629 |
+
combined_scores = self._combine_scores(keyword_scores, semantic_scores, legal_bert_scores)
|
| 630 |
|
| 631 |
# Get all categories above threshold
|
| 632 |
matches = list()
|
|
|
|
| 634 |
for category, score in combined_scores.items():
|
| 635 |
if (score >= threshold):
|
| 636 |
subcategory = self._detect_subcategory(text, category)
|
| 637 |
+
reasoning = self._generate_reasoning(text, category, subcategory, keyword_scores,
|
| 638 |
+
semantic_scores, legal_bert_scores, combined_scores)
|
| 639 |
keywords = self._extract_detected_keywords(text, category)
|
| 640 |
|
| 641 |
matches.append(ContractCategory(category = category,
|
|
|
|
| 686 |
"""
|
| 687 |
Get subcategories for a specific category
|
| 688 |
"""
|
| 689 |
+
return self.CATEGORY_HIERARCHY.get(category, {}).get('subcategories', [])
|
services/risk_analyzer.py
CHANGED
|
@@ -1,70 +1,82 @@
|
|
| 1 |
-
|
| 2 |
-
Multi-Factor Risk Analyzer with sophisticated rule-based scoring
|
| 3 |
-
Analyzes contracts using keyword severity, structural patterns, clause-level analysis,
|
| 4 |
-
industry benchmarks, and missing protections detection
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
import re
|
| 8 |
-
from typing import Dict, List, Tuple, Optional, Any
|
| 9 |
-
from dataclasses import dataclass, field
|
| 10 |
-
from collections import defaultdict
|
| 11 |
import sys
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
# Add parent directory to path for imports
|
| 15 |
sys.path.append(str(Path(__file__).parent.parent))
|
| 16 |
|
| 17 |
-
from
|
| 18 |
-
from
|
| 19 |
-
from
|
|
|
|
| 20 |
from utils.text_processor import TextProcessor
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
@dataclass
|
| 24 |
class RiskBreakdownItem:
|
| 25 |
-
"""
|
| 26 |
-
category
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
| 30 |
|
|
|
|
| 31 |
def to_dict(self) -> Dict[str, Any]:
|
| 32 |
-
"""
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
|
|
|
| 39 |
|
| 40 |
|
| 41 |
@dataclass
|
| 42 |
class RiskScore:
|
| 43 |
-
"""
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
|
|
|
|
|
|
| 51 |
|
|
|
|
| 52 |
def to_dict(self) -> Dict[str, Any]:
|
| 53 |
-
"""
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
|
|
|
| 63 |
|
| 64 |
|
| 65 |
class MultiFactorRiskAnalyzer:
|
| 66 |
"""
|
| 67 |
-
|
| 68 |
|
| 69 |
Analysis Factors:
|
| 70 |
1. Keyword severity scoring (critical/high/medium keywords)
|
|
@@ -74,353 +86,386 @@ class MultiFactorRiskAnalyzer:
|
|
| 74 |
5. Missing protections check
|
| 75 |
6. Contract type-specific weight adjustments
|
| 76 |
"""
|
| 77 |
-
|
| 78 |
def __init__(self, contract_type: ContractType = ContractType.GENERAL):
|
| 79 |
"""
|
| 80 |
Initialize risk analyzer
|
| 81 |
|
| 82 |
-
|
| 83 |
-
|
|
|
|
| 84 |
"""
|
| 85 |
-
self.contract_type
|
| 86 |
-
self.rules
|
| 87 |
self.adjusted_weights = self.rules.get_adjusted_weights(contract_type)
|
| 88 |
-
self.text_processor
|
| 89 |
-
self.logger
|
| 90 |
|
| 91 |
log_info("MultiFactorRiskAnalyzer initialized",
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
# =========================================================================
|
| 96 |
-
# MAIN ANALYSIS METHOD
|
| 97 |
-
# =========================================================================
|
| 98 |
|
|
|
|
| 99 |
@ContractAnalyzerLogger.log_execution_time("analyze_risk")
|
| 100 |
-
def analyze_risk(self, contract_text: str,
|
| 101 |
-
clauses: List[ExtractedClause]) -> RiskScore:
|
| 102 |
"""
|
| 103 |
Comprehensive multi-factor risk analysis
|
| 104 |
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
|
|
|
|
|
|
| 108 |
|
| 109 |
Returns:
|
| 110 |
-
|
|
|
|
| 111 |
"""
|
| 112 |
|
| 113 |
-
log_info("Starting risk analysis",
|
| 114 |
-
text_length=len(contract_text),
|
| 115 |
-
num_clauses=len(clauses),
|
| 116 |
-
contract_type=self.contract_type.value)
|
| 117 |
|
| 118 |
# Initialize scoring containers
|
| 119 |
-
category_scores
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
#
|
| 124 |
-
keyword_risks
|
| 125 |
-
log_info("Keyword analysis complete",
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
# Factor 2: Structural Pattern Analysis
|
| 130 |
-
pattern_risks = self._analyze_patterns(contract_text)
|
| 131 |
log_info(f"Pattern analysis found {len(pattern_risks)} risky patterns")
|
| 132 |
|
| 133 |
-
#
|
| 134 |
-
clause_risks
|
| 135 |
log_info(f"Clause analysis complete for {len(clause_risks)} categories")
|
| 136 |
|
| 137 |
-
#
|
| 138 |
-
missing_risks
|
| 139 |
log_info(f"Missing protections analysis complete")
|
| 140 |
|
| 141 |
-
#
|
| 142 |
benchmark_comparison = self._compare_to_benchmarks(contract_text, clauses)
|
| 143 |
log_info(f"Benchmark comparison complete")
|
| 144 |
|
| 145 |
# Aggregate scores by category
|
| 146 |
for category in self.adjusted_weights.keys():
|
| 147 |
-
category_risk
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
category_scores[category] = category_risk["score"]
|
| 156 |
detailed_findings[category] = category_risk["findings"]
|
| 157 |
|
| 158 |
-
if category_risk["score"] >= self.rules.RISK_THRESHOLDS["high"]:
|
| 159 |
risk_factors.append(category)
|
| 160 |
|
| 161 |
# Calculate weighted overall score
|
| 162 |
-
overall_score
|
| 163 |
-
risk_level
|
| 164 |
|
| 165 |
# Create risk breakdown items
|
| 166 |
-
risk_breakdown = self._create_risk_breakdown(
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
risk_breakdown=risk_breakdown
|
| 179 |
-
)
|
| 180 |
-
|
| 181 |
-
log_info("Risk analysis complete",
|
| 182 |
-
overall_score=overall_score,
|
| 183 |
-
risk_level=risk_level,
|
| 184 |
-
high_risk_categories=len(risk_factors))
|
| 185 |
|
| 186 |
return result
|
| 187 |
|
| 188 |
-
# =========================================================================
|
| 189 |
-
# FACTOR 1: KEYWORD SEVERITY SCORING
|
| 190 |
-
# =========================================================================
|
| 191 |
|
| 192 |
def _score_keywords(self, text: str) -> Dict[str, int]:
|
| 193 |
"""
|
| 194 |
Score text based on keyword severity tiers
|
| 195 |
|
| 196 |
Returns:
|
| 197 |
-
|
|
|
|
| 198 |
"""
|
| 199 |
text_lower = text.lower()
|
| 200 |
-
scores
|
| 201 |
|
| 202 |
# Critical keywords (Tier 1)
|
| 203 |
for keyword, weight in self.rules.CRITICAL_KEYWORDS.items():
|
| 204 |
if keyword in text_lower:
|
| 205 |
-
count
|
| 206 |
-
|
|
|
|
|
|
|
| 207 |
|
| 208 |
# High-risk keywords (Tier 2)
|
| 209 |
for keyword, weight in self.rules.HIGH_RISK_KEYWORDS.items():
|
| 210 |
if keyword in text_lower:
|
| 211 |
-
count
|
| 212 |
scores["high"] += weight * min(count, 2)
|
| 213 |
|
| 214 |
# Medium-risk keywords (Tier 3)
|
| 215 |
for keyword, weight in self.rules.MEDIUM_RISK_KEYWORDS.items():
|
| 216 |
if keyword in text_lower:
|
| 217 |
-
count
|
| 218 |
scores["medium"] += weight * min(count, 2)
|
| 219 |
|
| 220 |
return dict(scores)
|
| 221 |
|
| 222 |
-
# =========================================================================
|
| 223 |
-
# FACTOR 2: STRUCTURAL PATTERN ANALYSIS
|
| 224 |
-
# =========================================================================
|
| 225 |
|
| 226 |
def _analyze_patterns(self, text: str) -> List[Dict]:
|
| 227 |
"""
|
| 228 |
Detect risky structural patterns in contract
|
| 229 |
|
| 230 |
Returns:
|
| 231 |
-
|
|
|
|
| 232 |
"""
|
| 233 |
-
findings =
|
| 234 |
|
| 235 |
for pattern, risk_points, description in self.rules.RISKY_PATTERNS:
|
| 236 |
matches = re.finditer(pattern, text, re.IGNORECASE)
|
| 237 |
for match in matches:
|
| 238 |
-
findings.append({
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
})
|
| 244 |
|
| 245 |
return findings
|
| 246 |
|
| 247 |
-
# =========================================================================
|
| 248 |
-
# FACTOR 3: CLAUSE-LEVEL DETAILED ANALYSIS
|
| 249 |
-
# =========================================================================
|
| 250 |
|
| 251 |
def _analyze_clauses(self, clauses: List[ExtractedClause]) -> Dict[str, List[Dict]]:
|
| 252 |
"""
|
| 253 |
Deep dive into each clause with specific risk factors
|
| 254 |
|
| 255 |
Returns:
|
| 256 |
-
|
|
|
|
| 257 |
"""
|
| 258 |
clause_analysis = defaultdict(list)
|
| 259 |
|
| 260 |
for clause in clauses:
|
| 261 |
# Get risk factors for this clause category
|
| 262 |
-
if clause.category in self.rules.CLAUSE_RISK_FACTORS:
|
| 263 |
analysis = self._analyze_single_clause(clause)
|
| 264 |
clause_analysis[clause.category].append(analysis)
|
| 265 |
|
| 266 |
return dict(clause_analysis)
|
| 267 |
|
|
|
|
| 268 |
def _analyze_single_clause(self, clause: ExtractedClause) -> Dict:
|
| 269 |
"""
|
| 270 |
Analyze a single clause with detailed risk factors
|
| 271 |
|
| 272 |
Returns:
|
| 273 |
-
|
|
|
|
| 274 |
"""
|
| 275 |
risk_config = self.rules.CLAUSE_RISK_FACTORS.get(clause.category, {})
|
| 276 |
-
base_risk
|
| 277 |
|
| 278 |
-
risk_score
|
| 279 |
-
findings
|
| 280 |
|
| 281 |
-
text_lower
|
| 282 |
|
| 283 |
# Check red flags
|
| 284 |
-
if "red_flags" in risk_config:
|
| 285 |
for flag, adjustment in risk_config["red_flags"].items():
|
| 286 |
if flag in text_lower:
|
| 287 |
risk_score += adjustment
|
| 288 |
-
severity
|
|
|
|
| 289 |
findings.append(f"Found '{flag}' ({severity} risk by {abs(adjustment)} points)")
|
| 290 |
|
| 291 |
# Special handling for specific clause types
|
| 292 |
-
if clause.category == "non_compete":
|
| 293 |
duration_risk = self._analyze_noncompete_duration(clause.text)
|
| 294 |
-
risk_score
|
|
|
|
| 295 |
findings.extend(duration_risk["findings"])
|
| 296 |
|
| 297 |
-
scope_risk
|
| 298 |
-
risk_score
|
|
|
|
| 299 |
findings.extend(scope_risk["findings"])
|
| 300 |
|
| 301 |
-
elif clause.category == "termination":
|
| 302 |
notice_risk = self._analyze_notice_period(clause.text)
|
| 303 |
risk_score += notice_risk["adjustment"]
|
|
|
|
| 304 |
findings.extend(notice_risk["findings"])
|
| 305 |
|
| 306 |
-
elif clause.category == "indemnification":
|
| 307 |
mutual_risk = self._analyze_indemnification_mutuality(clause.text)
|
| 308 |
risk_score += mutual_risk["adjustment"]
|
|
|
|
| 309 |
findings.extend(mutual_risk["findings"])
|
| 310 |
|
| 311 |
-
elif clause.category == "compensation":
|
| 312 |
clarity_risk = self._analyze_compensation_clarity(clause.text)
|
| 313 |
-
risk_score
|
|
|
|
| 314 |
findings.extend(clarity_risk["findings"])
|
| 315 |
|
| 316 |
-
elif clause.category == "intellectual_property":
|
| 317 |
-
scope_risk
|
| 318 |
risk_score += scope_risk["adjustment"]
|
|
|
|
| 319 |
findings.extend(scope_risk["findings"])
|
| 320 |
|
| 321 |
# Cap score between 0 and 100
|
| 322 |
risk_score = max(0, min(100, risk_score))
|
| 323 |
|
| 324 |
-
return {
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
}
|
| 330 |
|
|
|
|
| 331 |
def _analyze_noncompete_duration(self, text: str) -> Dict:
|
| 332 |
-
"""
|
|
|
|
|
|
|
| 333 |
duration_pattern = r'(\d+)\s*(year|yr|month|mo)s?'
|
| 334 |
-
matches
|
| 335 |
|
| 336 |
if not matches:
|
| 337 |
-
return {"adjustment": 0,
|
|
|
|
|
|
|
| 338 |
|
| 339 |
# Convert to months
|
| 340 |
duration_months = 0
|
|
|
|
| 341 |
for num, unit in matches:
|
| 342 |
-
months
|
| 343 |
duration_months = max(duration_months, months)
|
| 344 |
|
| 345 |
# Get benchmark
|
| 346 |
-
industry
|
| 347 |
benchmark = self.rules.INDUSTRY_BENCHMARKS["non_compete_duration"][industry]
|
| 348 |
|
| 349 |
-
if duration_months <= benchmark["reasonable"]:
|
| 350 |
return {"adjustment": -10, "findings": [f"Duration of {duration_months} months is reasonable"]}
|
| 351 |
-
|
|
|
|
| 352 |
return {"adjustment": 0, "findings": [f"Duration of {duration_months} months is standard"]}
|
| 353 |
-
|
|
|
|
| 354 |
return {"adjustment": +15, "findings": [f"Duration of {duration_months} months is lengthy"]}
|
|
|
|
| 355 |
else:
|
| 356 |
return {"adjustment": +30, "findings": [f"Duration of {duration_months} months is excessive"]}
|
| 357 |
|
|
|
|
| 358 |
def _analyze_noncompete_scope(self, text: str) -> Dict:
|
| 359 |
-
"""
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
|
|
|
|
|
|
| 363 |
|
| 364 |
scope_config = self.rules.CLAUSE_RISK_FACTORS["non-compete"]["scope_keywords"]
|
| 365 |
|
| 366 |
for keyword, adj in scope_config.items():
|
| 367 |
if keyword in text_lower:
|
| 368 |
adjustment += adj
|
| 369 |
-
severity
|
|
|
|
| 370 |
findings.append(f"Scope includes '{keyword}' ({severity})")
|
| 371 |
|
| 372 |
return {"adjustment": adjustment, "findings": findings}
|
| 373 |
|
|
|
|
| 374 |
def _analyze_notice_period(self, text: str) -> Dict:
|
| 375 |
-
"""
|
|
|
|
|
|
|
| 376 |
notice_pattern = r'(\d+)\s*days?\s*(?:notice|prior\s+notice)'
|
| 377 |
-
matches
|
| 378 |
|
| 379 |
-
if len(matches) < 2:
|
| 380 |
-
return {"adjustment": 0,
|
|
|
|
|
|
|
| 381 |
|
| 382 |
periods = [int(m) for m in matches]
|
| 383 |
|
| 384 |
-
if len(periods) >= 2:
|
| 385 |
ratio = max(periods) / min(periods)
|
| 386 |
|
| 387 |
-
if ratio >= 4:
|
| 388 |
-
return {"adjustment": +25, "findings": [f"Notice periods highly imbalanced ({max(periods)} vs {min(periods)} days)"]}
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
else:
|
| 394 |
-
return {"adjustment": -5,
|
|
|
|
|
|
|
| 395 |
|
| 396 |
-
return {"adjustment": 0,
|
|
|
|
|
|
|
| 397 |
|
|
|
|
| 398 |
def _analyze_indemnification_mutuality(self, text: str) -> Dict:
|
| 399 |
-
"""
|
| 400 |
-
|
|
|
|
|
|
|
| 401 |
|
| 402 |
-
mutual_indicators
|
| 403 |
-
one_sided_indicators = ["employee shall indemnify", "consultant shall indemnify",
|
| 404 |
-
"contractor shall indemnify", "you shall indemnify"]
|
| 405 |
|
| 406 |
-
has_mutual
|
| 407 |
-
has_one_sided
|
| 408 |
|
| 409 |
-
if has_mutual and not has_one_sided:
|
| 410 |
-
return {"adjustment": -15,
|
|
|
|
|
|
|
|
|
|
| 411 |
elif has_one_sided:
|
| 412 |
-
return {"adjustment": +20,
|
|
|
|
|
|
|
|
|
|
| 413 |
else:
|
| 414 |
-
return {"adjustment": 0,
|
|
|
|
|
|
|
|
|
|
| 415 |
|
| 416 |
def _analyze_compensation_clarity(self, text: str) -> Dict:
|
| 417 |
-
"""
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
|
|
|
|
|
|
| 421 |
|
| 422 |
# Check for vague terms
|
| 423 |
vague_terms = ["to be determined", "tbd", "subject to review", "discretionary"]
|
|
|
|
| 424 |
for term in vague_terms:
|
| 425 |
if term in text_lower:
|
| 426 |
adjustment += 10
|
|
@@ -431,16 +476,22 @@ class MultiFactorRiskAnalyzer:
|
|
| 431 |
adjustment -= 10
|
| 432 |
findings.append("Specific monetary amount provided (good)")
|
| 433 |
|
| 434 |
-
return {"adjustment": adjustment,
|
|
|
|
|
|
|
|
|
|
| 435 |
|
| 436 |
def _analyze_ip_scope(self, text: str) -> Dict:
|
| 437 |
-
"""
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
|
|
|
|
|
|
| 441 |
|
| 442 |
# Overly broad indicators
|
| 443 |
broad_terms = ["all work product", "anything created", "whether or not related"]
|
|
|
|
| 444 |
for term in broad_terms:
|
| 445 |
if term in text_lower:
|
| 446 |
adjustment += 15
|
|
@@ -449,25 +500,31 @@ class MultiFactorRiskAnalyzer:
|
|
| 449 |
# Protective terms
|
| 450 |
protective_terms = ["prior ip excluded", "personal projects excluded"]
|
| 451 |
for term in protective_terms:
|
| 452 |
-
|
|
|
|
| 453 |
adjustment -= 15
|
| 454 |
findings.append(f"Protective IP term present: '{term}'")
|
| 455 |
|
| 456 |
-
return {"adjustment": adjustment,
|
|
|
|
|
|
|
| 457 |
|
| 458 |
-
# =========================================================================
|
| 459 |
-
# FACTOR 4: MISSING PROTECTIONS CHECK
|
| 460 |
-
# =========================================================================
|
| 461 |
|
| 462 |
-
def _check_missing_protections(self, text: str,
|
| 463 |
-
clauses: List[ExtractedClause]) -> Dict[str, int]:
|
| 464 |
"""
|
| 465 |
Check for missing critical protections
|
| 466 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
Returns:
|
| 468 |
-
|
|
|
|
| 469 |
"""
|
| 470 |
-
text_lower
|
| 471 |
missing_risks = defaultdict(int)
|
| 472 |
|
| 473 |
for protection_id, config in self.rules.PROTECTION_CHECKLIST.items():
|
|
@@ -480,11 +537,11 @@ class MultiFactorRiskAnalyzer:
|
|
| 480 |
|
| 481 |
return dict(missing_risks)
|
| 482 |
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
protection_indicators = {
|
| 489 |
"for_cause_definition": ["for cause", "cause defined", "grounds for termination"],
|
| 490 |
"severance_provision": ["severance", "severance pay", "separation pay"],
|
|
|
|
| 1 |
+
# DEPENDENCIES
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import re
|
|
|
|
|
|
|
|
|
|
| 3 |
import sys
|
| 4 |
+
from typing import Any
|
| 5 |
+
from typing import List
|
| 6 |
+
from typing import Dict
|
| 7 |
+
from typing import Tuple
|
| 8 |
from pathlib import Path
|
| 9 |
+
from typing import Optional
|
| 10 |
+
from dataclasses import field
|
| 11 |
+
from dataclasses import dataclass
|
| 12 |
+
from collections import defaultdict
|
| 13 |
|
| 14 |
# Add parent directory to path for imports
|
| 15 |
sys.path.append(str(Path(__file__).parent.parent))
|
| 16 |
|
| 17 |
+
from utils.logger import log_info
|
| 18 |
+
from utils.logger import log_error
|
| 19 |
+
from config.risk_rules import RiskRules
|
| 20 |
+
from config.risk_rules import ContractType
|
| 21 |
from utils.text_processor import TextProcessor
|
| 22 |
+
from utils.logger import ContractAnalyzerLogger
|
| 23 |
+
from services.clause_extractor import ExtractedClause
|
| 24 |
+
|
| 25 |
|
| 26 |
|
| 27 |
@dataclass
|
| 28 |
class RiskBreakdownItem:
|
| 29 |
+
"""
|
| 30 |
+
Individual risk category breakdown
|
| 31 |
+
"""
|
| 32 |
+
category : str
|
| 33 |
+
score : int # 0-100
|
| 34 |
+
summary : str
|
| 35 |
+
findings : List[str] = field(default_factory=list)
|
| 36 |
|
| 37 |
+
|
| 38 |
def to_dict(self) -> Dict[str, Any]:
|
| 39 |
+
"""
|
| 40 |
+
Convert to dictionary
|
| 41 |
+
"""
|
| 42 |
+
return {"category" : self.category,
|
| 43 |
+
"score" : self.score,
|
| 44 |
+
"summary" : self.summary,
|
| 45 |
+
"findings" : self.findings,
|
| 46 |
+
}
|
| 47 |
|
| 48 |
|
| 49 |
@dataclass
|
| 50 |
class RiskScore:
|
| 51 |
+
"""
|
| 52 |
+
Comprehensive risk score with detailed breakdown
|
| 53 |
+
"""
|
| 54 |
+
overall_score : int # 0-100
|
| 55 |
+
risk_level : str # "CRITICAL", "HIGH", "MEDIUM", "LOW"
|
| 56 |
+
category_scores : Dict[str, int]
|
| 57 |
+
risk_factors : List[str]
|
| 58 |
+
detailed_findings : Dict[str, List[str]]
|
| 59 |
+
benchmark_comparison : Dict[str, str]
|
| 60 |
+
risk_breakdown : List[RiskBreakdownItem]
|
| 61 |
|
| 62 |
+
|
| 63 |
def to_dict(self) -> Dict[str, Any]:
|
| 64 |
+
"""
|
| 65 |
+
Convert to dictionary for serialization
|
| 66 |
+
"""
|
| 67 |
+
return {"overall_score" : self.overall_score,
|
| 68 |
+
"risk_level" : self.risk_level,
|
| 69 |
+
"category_scores" : self.category_scores,
|
| 70 |
+
"risk_factors" : self.risk_factors,
|
| 71 |
+
"detailed_findings" : self.detailed_findings,
|
| 72 |
+
"benchmark_comparison" : self.benchmark_comparison,
|
| 73 |
+
"risk_breakdown" : [item.to_dict() for item in self.risk_breakdown],
|
| 74 |
+
}
|
| 75 |
|
| 76 |
|
| 77 |
class MultiFactorRiskAnalyzer:
|
| 78 |
"""
|
| 79 |
+
Multi-factor risk analysis engine
|
| 80 |
|
| 81 |
Analysis Factors:
|
| 82 |
1. Keyword severity scoring (critical/high/medium keywords)
|
|
|
|
| 86 |
5. Missing protections check
|
| 87 |
6. Contract type-specific weight adjustments
|
| 88 |
"""
|
|
|
|
| 89 |
def __init__(self, contract_type: ContractType = ContractType.GENERAL):
|
| 90 |
"""
|
| 91 |
Initialize risk analyzer
|
| 92 |
|
| 93 |
+
Arguments:
|
| 94 |
+
----------
|
| 95 |
+
contract_type { ContractType } : Type of contract for specialized analysis
|
| 96 |
"""
|
| 97 |
+
self.contract_type = contract_type
|
| 98 |
+
self.rules = RiskRules()
|
| 99 |
self.adjusted_weights = self.rules.get_adjusted_weights(contract_type)
|
| 100 |
+
self.text_processor = TextProcessor(use_spacy = True)
|
| 101 |
+
self.logger = ContractAnalyzerLogger.get_logger()
|
| 102 |
|
| 103 |
log_info("MultiFactorRiskAnalyzer initialized",
|
| 104 |
+
contract_type = contract_type.value,
|
| 105 |
+
adjusted_weights = self.adjusted_weights,
|
| 106 |
+
)
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
+
|
| 109 |
@ContractAnalyzerLogger.log_execution_time("analyze_risk")
|
| 110 |
+
def analyze_risk(self, contract_text: str, clauses: List[ExtractedClause]) -> RiskScore:
|
|
|
|
| 111 |
"""
|
| 112 |
Comprehensive multi-factor risk analysis
|
| 113 |
|
| 114 |
+
Arguments:
|
| 115 |
+
----------
|
| 116 |
+
contract_text { str } : Full contract text
|
| 117 |
+
|
| 118 |
+
clauses { list } : Extracted clauses from ClauseExtractor
|
| 119 |
|
| 120 |
Returns:
|
| 121 |
+
--------
|
| 122 |
+
{ RiskScore } : RiskScore object with detailed analysis
|
| 123 |
"""
|
| 124 |
|
| 125 |
+
log_info("Starting risk analysis", text_length = len(contract_text), num_clauses = len(clauses), contract_type = self.contract_type.value)
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
# Initialize scoring containers
|
| 128 |
+
category_scores = defaultdict(list)
|
| 129 |
+
detailed_findings = defaultdict(list)
|
| 130 |
+
risk_factors = list()
|
| 131 |
+
|
| 132 |
+
# Keyword Severity Scoring
|
| 133 |
+
keyword_risks = self._score_keywords(contract_text)
|
| 134 |
+
log_info("Keyword analysis complete", critical_score = keyword_risks.get('critical', 0), high_score = keyword_risks.get('high', 0))
|
| 135 |
+
|
| 136 |
+
# Structural Pattern Analysis
|
| 137 |
+
pattern_risks = self._analyze_patterns(contract_text)
|
|
|
|
|
|
|
| 138 |
log_info(f"Pattern analysis found {len(pattern_risks)} risky patterns")
|
| 139 |
|
| 140 |
+
# Clause-Level Analysis
|
| 141 |
+
clause_risks = self._analyze_clauses(clauses)
|
| 142 |
log_info(f"Clause analysis complete for {len(clause_risks)} categories")
|
| 143 |
|
| 144 |
+
# Missing Protections
|
| 145 |
+
missing_risks = self._check_missing_protections(contract_text, clauses)
|
| 146 |
log_info(f"Missing protections analysis complete")
|
| 147 |
|
| 148 |
+
# Industry Benchmark Comparison
|
| 149 |
benchmark_comparison = self._compare_to_benchmarks(contract_text, clauses)
|
| 150 |
log_info(f"Benchmark comparison complete")
|
| 151 |
|
| 152 |
# Aggregate scores by category
|
| 153 |
for category in self.adjusted_weights.keys():
|
| 154 |
+
category_risk = self._calculate_category_risk(category = category,
|
| 155 |
+
keyword_risks = keyword_risks,
|
| 156 |
+
pattern_risks = pattern_risks,
|
| 157 |
+
clause_risks = clause_risks,
|
| 158 |
+
missing_risks = missing_risks,
|
| 159 |
+
benchmark_comparison = benchmark_comparison,
|
| 160 |
+
)
|
| 161 |
+
category_scores[category] = category_risk["score"]
|
|
|
|
| 162 |
detailed_findings[category] = category_risk["findings"]
|
| 163 |
|
| 164 |
+
if (category_risk["score"] >= self.rules.RISK_THRESHOLDS["high"]):
|
| 165 |
risk_factors.append(category)
|
| 166 |
|
| 167 |
# Calculate weighted overall score
|
| 168 |
+
overall_score = self._calculate_weighted_score(category_scores)
|
| 169 |
+
risk_level = self._get_risk_level(overall_score)
|
| 170 |
|
| 171 |
# Create risk breakdown items
|
| 172 |
+
risk_breakdown = self._create_risk_breakdown(category_scores, detailed_findings)
|
| 173 |
+
|
| 174 |
+
result = RiskScore(overall_score = overall_score,
|
| 175 |
+
risk_level = risk_level,
|
| 176 |
+
category_scores = dict(category_scores),
|
| 177 |
+
risk_factors = risk_factors,
|
| 178 |
+
detailed_findings = dict(detailed_findings),
|
| 179 |
+
benchmark_comparison = benchmark_comparison,
|
| 180 |
+
risk_breakdown = risk_breakdown,
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
log_info("Risk analysis complete", overall_score = overall_score, risk_level = risk_level, high_risk_categories = len(risk_factors))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
return result
|
| 186 |
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
def _score_keywords(self, text: str) -> Dict[str, int]:
|
| 189 |
"""
|
| 190 |
Score text based on keyword severity tiers
|
| 191 |
|
| 192 |
Returns:
|
| 193 |
+
--------
|
| 194 |
+
{ dict } : Dictionary with 'critical', 'high', 'medium' scores
|
| 195 |
"""
|
| 196 |
text_lower = text.lower()
|
| 197 |
+
scores = defaultdict(int)
|
| 198 |
|
| 199 |
# Critical keywords (Tier 1)
|
| 200 |
for keyword, weight in self.rules.CRITICAL_KEYWORDS.items():
|
| 201 |
if keyword in text_lower:
|
| 202 |
+
count = text_lower.count(keyword)
|
| 203 |
+
|
| 204 |
+
# Cap at 3 occurrences
|
| 205 |
+
scores["critical"] += weight * min(count, 3)
|
| 206 |
|
| 207 |
# High-risk keywords (Tier 2)
|
| 208 |
for keyword, weight in self.rules.HIGH_RISK_KEYWORDS.items():
|
| 209 |
if keyword in text_lower:
|
| 210 |
+
count = text_lower.count(keyword)
|
| 211 |
scores["high"] += weight * min(count, 2)
|
| 212 |
|
| 213 |
# Medium-risk keywords (Tier 3)
|
| 214 |
for keyword, weight in self.rules.MEDIUM_RISK_KEYWORDS.items():
|
| 215 |
if keyword in text_lower:
|
| 216 |
+
count = text_lower.count(keyword)
|
| 217 |
scores["medium"] += weight * min(count, 2)
|
| 218 |
|
| 219 |
return dict(scores)
|
| 220 |
|
|
|
|
|
|
|
|
|
|
| 221 |
|
| 222 |
def _analyze_patterns(self, text: str) -> List[Dict]:
|
| 223 |
"""
|
| 224 |
Detect risky structural patterns in contract
|
| 225 |
|
| 226 |
Returns:
|
| 227 |
+
--------
|
| 228 |
+
{ list } : List of detected pattern dictionaries
|
| 229 |
"""
|
| 230 |
+
findings = list()
|
| 231 |
|
| 232 |
for pattern, risk_points, description in self.rules.RISKY_PATTERNS:
|
| 233 |
matches = re.finditer(pattern, text, re.IGNORECASE)
|
| 234 |
for match in matches:
|
| 235 |
+
findings.append({"pattern" : description,
|
| 236 |
+
"risk_points" : risk_points,
|
| 237 |
+
"match" : match.group(0)[:100], # First 100 chars
|
| 238 |
+
"position" : match.start(),
|
| 239 |
+
})
|
|
|
|
| 240 |
|
| 241 |
return findings
|
| 242 |
|
|
|
|
|
|
|
|
|
|
| 243 |
|
| 244 |
def _analyze_clauses(self, clauses: List[ExtractedClause]) -> Dict[str, List[Dict]]:
|
| 245 |
"""
|
| 246 |
Deep dive into each clause with specific risk factors
|
| 247 |
|
| 248 |
Returns:
|
| 249 |
+
--------
|
| 250 |
+
{ dict } : Dictionary mapping categories to clause analysis results
|
| 251 |
"""
|
| 252 |
clause_analysis = defaultdict(list)
|
| 253 |
|
| 254 |
for clause in clauses:
|
| 255 |
# Get risk factors for this clause category
|
| 256 |
+
if (clause.category in self.rules.CLAUSE_RISK_FACTORS):
|
| 257 |
analysis = self._analyze_single_clause(clause)
|
| 258 |
clause_analysis[clause.category].append(analysis)
|
| 259 |
|
| 260 |
return dict(clause_analysis)
|
| 261 |
|
| 262 |
+
|
| 263 |
def _analyze_single_clause(self, clause: ExtractedClause) -> Dict:
|
| 264 |
"""
|
| 265 |
Analyze a single clause with detailed risk factors
|
| 266 |
|
| 267 |
Returns:
|
| 268 |
+
--------
|
| 269 |
+
{ dict } : Dictionary with risk_score and findings
|
| 270 |
"""
|
| 271 |
risk_config = self.rules.CLAUSE_RISK_FACTORS.get(clause.category, {})
|
| 272 |
+
base_risk = risk_config.get("base_risk", 50)
|
| 273 |
|
| 274 |
+
risk_score = base_risk
|
| 275 |
+
findings = list()
|
| 276 |
|
| 277 |
+
text_lower = clause.text.lower()
|
| 278 |
|
| 279 |
# Check red flags
|
| 280 |
+
if ("red_flags" in risk_config):
|
| 281 |
for flag, adjustment in risk_config["red_flags"].items():
|
| 282 |
if flag in text_lower:
|
| 283 |
risk_score += adjustment
|
| 284 |
+
severity = "increases" if (adjustment > 0) else "decreases"
|
| 285 |
+
|
| 286 |
findings.append(f"Found '{flag}' ({severity} risk by {abs(adjustment)} points)")
|
| 287 |
|
| 288 |
# Special handling for specific clause types
|
| 289 |
+
if (clause.category == "non_compete"):
|
| 290 |
duration_risk = self._analyze_noncompete_duration(clause.text)
|
| 291 |
+
risk_score += duration_risk["adjustment"]
|
| 292 |
+
|
| 293 |
findings.extend(duration_risk["findings"])
|
| 294 |
|
| 295 |
+
scope_risk = self._analyze_noncompete_scope(clause.text)
|
| 296 |
+
risk_score += scope_risk["adjustment"]
|
| 297 |
+
|
| 298 |
findings.extend(scope_risk["findings"])
|
| 299 |
|
| 300 |
+
elif (clause.category == "termination"):
|
| 301 |
notice_risk = self._analyze_notice_period(clause.text)
|
| 302 |
risk_score += notice_risk["adjustment"]
|
| 303 |
+
|
| 304 |
findings.extend(notice_risk["findings"])
|
| 305 |
|
| 306 |
+
elif (clause.category == "indemnification"):
|
| 307 |
mutual_risk = self._analyze_indemnification_mutuality(clause.text)
|
| 308 |
risk_score += mutual_risk["adjustment"]
|
| 309 |
+
|
| 310 |
findings.extend(mutual_risk["findings"])
|
| 311 |
|
| 312 |
+
elif (clause.category == "compensation"):
|
| 313 |
clarity_risk = self._analyze_compensation_clarity(clause.text)
|
| 314 |
+
risk_score += clarity_risk["adjustment"]
|
| 315 |
+
|
| 316 |
findings.extend(clarity_risk["findings"])
|
| 317 |
|
| 318 |
+
elif (clause.category == "intellectual_property"):
|
| 319 |
+
scope_risk = self._analyze_ip_scope(clause.text)
|
| 320 |
risk_score += scope_risk["adjustment"]
|
| 321 |
+
|
| 322 |
findings.extend(scope_risk["findings"])
|
| 323 |
|
| 324 |
# Cap score between 0 and 100
|
| 325 |
risk_score = max(0, min(100, risk_score))
|
| 326 |
|
| 327 |
+
return {"clause_reference" : clause.reference,
|
| 328 |
+
"risk_score" : risk_score,
|
| 329 |
+
"findings" : findings,
|
| 330 |
+
"confidence" : clause.confidence,
|
| 331 |
+
}
|
|
|
|
| 332 |
|
| 333 |
+
|
| 334 |
def _analyze_noncompete_duration(self, text: str) -> Dict:
|
| 335 |
+
"""
|
| 336 |
+
Analyze non-compete duration reasonableness
|
| 337 |
+
"""
|
| 338 |
duration_pattern = r'(\d+)\s*(year|yr|month|mo)s?'
|
| 339 |
+
matches = re.findall(duration_pattern, text, re.IGNORECASE)
|
| 340 |
|
| 341 |
if not matches:
|
| 342 |
+
return {"adjustment" : 0,
|
| 343 |
+
"findings" : ["No specific duration found"],
|
| 344 |
+
}
|
| 345 |
|
| 346 |
# Convert to months
|
| 347 |
duration_months = 0
|
| 348 |
+
|
| 349 |
for num, unit in matches:
|
| 350 |
+
months = int(num) * (12 if 'year' in unit.lower() or 'yr' in unit.lower() else 1)
|
| 351 |
duration_months = max(duration_months, months)
|
| 352 |
|
| 353 |
# Get benchmark
|
| 354 |
+
industry = self._detect_industry()
|
| 355 |
benchmark = self.rules.INDUSTRY_BENCHMARKS["non_compete_duration"][industry]
|
| 356 |
|
| 357 |
+
if (duration_months <= benchmark["reasonable"]):
|
| 358 |
return {"adjustment": -10, "findings": [f"Duration of {duration_months} months is reasonable"]}
|
| 359 |
+
|
| 360 |
+
elif (duration_months <= benchmark["standard"]):
|
| 361 |
return {"adjustment": 0, "findings": [f"Duration of {duration_months} months is standard"]}
|
| 362 |
+
|
| 363 |
+
elif (duration_months <= benchmark["excessive"]):
|
| 364 |
return {"adjustment": +15, "findings": [f"Duration of {duration_months} months is lengthy"]}
|
| 365 |
+
|
| 366 |
else:
|
| 367 |
return {"adjustment": +30, "findings": [f"Duration of {duration_months} months is excessive"]}
|
| 368 |
|
| 369 |
+
|
| 370 |
def _analyze_noncompete_scope(self, text: str) -> Dict:
|
| 371 |
+
"""
|
| 372 |
+
Analyze non-compete scope reasonableness
|
| 373 |
+
"""
|
| 374 |
+
text_lower = text.lower()
|
| 375 |
+
adjustment = 0
|
| 376 |
+
findings = list()
|
| 377 |
|
| 378 |
scope_config = self.rules.CLAUSE_RISK_FACTORS["non-compete"]["scope_keywords"]
|
| 379 |
|
| 380 |
for keyword, adj in scope_config.items():
|
| 381 |
if keyword in text_lower:
|
| 382 |
adjustment += adj
|
| 383 |
+
severity = "reasonable" if adj < 0 else "concerning"
|
| 384 |
+
|
| 385 |
findings.append(f"Scope includes '{keyword}' ({severity})")
|
| 386 |
|
| 387 |
return {"adjustment": adjustment, "findings": findings}
|
| 388 |
|
| 389 |
+
|
| 390 |
def _analyze_notice_period(self, text: str) -> Dict:
|
| 391 |
+
"""
|
| 392 |
+
Analyze termination notice period balance
|
| 393 |
+
"""
|
| 394 |
notice_pattern = r'(\d+)\s*days?\s*(?:notice|prior\s+notice)'
|
| 395 |
+
matches = re.findall(notice_pattern, text, re.IGNORECASE)
|
| 396 |
|
| 397 |
+
if (len(matches) < 2):
|
| 398 |
+
return {"adjustment" : 0,
|
| 399 |
+
"findings" : ["Notice period analysis inconclusive"],
|
| 400 |
+
}
|
| 401 |
|
| 402 |
periods = [int(m) for m in matches]
|
| 403 |
|
| 404 |
+
if (len(periods) >= 2):
|
| 405 |
ratio = max(periods) / min(periods)
|
| 406 |
|
| 407 |
+
if (ratio >= 4):
|
| 408 |
+
return {"adjustment" : +25, "findings": [f"Notice periods highly imbalanced ({max(periods)} vs {min(periods)} days)"]}
|
| 409 |
+
|
| 410 |
+
elif (ratio >= 3):
|
| 411 |
+
return {"adjustment" : +18,
|
| 412 |
+
"findings" : [f"Notice periods significantly imbalanced ({max(periods)} vs {min(periods)} days)"],
|
| 413 |
+
}
|
| 414 |
+
|
| 415 |
+
elif (ratio >= 2):
|
| 416 |
+
return {"adjustment" : +10,
|
| 417 |
+
"findings" : [f"Notice periods moderately imbalanced ({max(periods)} vs {min(periods)} days)"],
|
| 418 |
+
}
|
| 419 |
+
|
| 420 |
else:
|
| 421 |
+
return {"adjustment" : -5,
|
| 422 |
+
"findings" : [f"Notice periods balanced ({max(periods)} vs {min(periods)} days)"],
|
| 423 |
+
}
|
| 424 |
|
| 425 |
+
return {"adjustment" : 0,
|
| 426 |
+
"findings" : ["Could not determine notice period balance"],
|
| 427 |
+
}
|
| 428 |
|
| 429 |
+
|
| 430 |
def _analyze_indemnification_mutuality(self, text: str) -> Dict:
|
| 431 |
+
"""
|
| 432 |
+
Check if indemnification is mutual or one-sided
|
| 433 |
+
"""
|
| 434 |
+
text_lower = text.lower()
|
| 435 |
|
| 436 |
+
mutual_indicators = ["mutual", "both parties", "each party", "reciprocal"]
|
| 437 |
+
one_sided_indicators = ["employee shall indemnify", "consultant shall indemnify", "contractor shall indemnify", "you shall indemnify"]
|
|
|
|
| 438 |
|
| 439 |
+
has_mutual = any(ind in text_lower for ind in mutual_indicators)
|
| 440 |
+
has_one_sided = any(ind in text_lower for ind in one_sided_indicators)
|
| 441 |
|
| 442 |
+
if (has_mutual and not has_one_sided):
|
| 443 |
+
return {"adjustment" : -15,
|
| 444 |
+
"findings" : ["Mutual indemnification (balanced)"],
|
| 445 |
+
}
|
| 446 |
+
|
| 447 |
elif has_one_sided:
|
| 448 |
+
return {"adjustment" : +20,
|
| 449 |
+
"findings" : ["One-sided indemnification (unfavorable)"],
|
| 450 |
+
}
|
| 451 |
+
|
| 452 |
else:
|
| 453 |
+
return {"adjustment" : 0,
|
| 454 |
+
"findings" : ["Indemnification mutuality unclear"],
|
| 455 |
+
}
|
| 456 |
+
|
| 457 |
|
| 458 |
def _analyze_compensation_clarity(self, text: str) -> Dict:
|
| 459 |
+
"""
|
| 460 |
+
Analyze clarity of compensation terms
|
| 461 |
+
"""
|
| 462 |
+
text_lower = text.lower()
|
| 463 |
+
adjustment = 0
|
| 464 |
+
findings = list()
|
| 465 |
|
| 466 |
# Check for vague terms
|
| 467 |
vague_terms = ["to be determined", "tbd", "subject to review", "discretionary"]
|
| 468 |
+
|
| 469 |
for term in vague_terms:
|
| 470 |
if term in text_lower:
|
| 471 |
adjustment += 10
|
|
|
|
| 476 |
adjustment -= 10
|
| 477 |
findings.append("Specific monetary amount provided (good)")
|
| 478 |
|
| 479 |
+
return {"adjustment" : adjustment,
|
| 480 |
+
"findings" : findings,
|
| 481 |
+
}
|
| 482 |
+
|
| 483 |
|
| 484 |
def _analyze_ip_scope(self, text: str) -> Dict:
|
| 485 |
+
"""
|
| 486 |
+
Analyze IP assignment scope
|
| 487 |
+
"""
|
| 488 |
+
text_lower = text.lower()
|
| 489 |
+
adjustment = 0
|
| 490 |
+
findings = list()
|
| 491 |
|
| 492 |
# Overly broad indicators
|
| 493 |
broad_terms = ["all work product", "anything created", "whether or not related"]
|
| 494 |
+
|
| 495 |
for term in broad_terms:
|
| 496 |
if term in text_lower:
|
| 497 |
adjustment += 15
|
|
|
|
| 500 |
# Protective terms
|
| 501 |
protective_terms = ["prior ip excluded", "personal projects excluded"]
|
| 502 |
for term in protective_terms:
|
| 503 |
+
|
| 504 |
+
if (term in text_lower):
|
| 505 |
adjustment -= 15
|
| 506 |
findings.append(f"Protective IP term present: '{term}'")
|
| 507 |
|
| 508 |
+
return {"adjustment" : adjustment,
|
| 509 |
+
"findings" : findings,
|
| 510 |
+
}
|
| 511 |
|
|
|
|
|
|
|
|
|
|
| 512 |
|
| 513 |
+
def _check_missing_protections(self, text: str, clauses: List[ExtractedClause]) -> Dict[str, int]:
|
|
|
|
| 514 |
"""
|
| 515 |
Check for missing critical protections
|
| 516 |
|
| 517 |
+
Arguments:
|
| 518 |
+
----------
|
| 519 |
+
text { str } :
|
| 520 |
+
|
| 521 |
+
clauses { list } :
|
| 522 |
+
|
| 523 |
Returns:
|
| 524 |
+
--------
|
| 525 |
+
{ dict } : Dictionary mapping categories to risk scores for missing items
|
| 526 |
"""
|
| 527 |
+
text_lower = text.lower()
|
| 528 |
missing_risks = defaultdict(int)
|
| 529 |
|
| 530 |
for protection_id, config in self.rules.PROTECTION_CHECKLIST.items():
|
|
|
|
| 537 |
|
| 538 |
return dict(missing_risks)
|
| 539 |
|
| 540 |
+
|
| 541 |
+
def _check_protection_present(self, protection_id: str, text_lower: str, clauses: List[ExtractedClause]) -> bool:
|
| 542 |
+
"""
|
| 543 |
+
Check if a specific protection is present
|
| 544 |
+
"""
|
| 545 |
protection_indicators = {
|
| 546 |
"for_cause_definition": ["for cause", "cause defined", "grounds for termination"],
|
| 547 |
"severance_provision": ["severance", "severance pay", "separation pay"],
|
services/summary_generator.py
ADDED
|
@@ -0,0 +1,570 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# services/summary_generator.py
|
| 2 |
+
|
| 3 |
+
import logging
|
| 4 |
+
from typing import Dict, List, Optional
|
| 5 |
+
from dataclasses import dataclass
|
| 6 |
+
|
| 7 |
+
from utils.logger import ContractAnalyzerLogger
|
| 8 |
+
from model_manager.llm_manager import LLMManager, LLMProvider
|
| 9 |
+
|
| 10 |
+
logger = ContractAnalyzerLogger.get_logger()
|
| 11 |
+
|
| 12 |
+
@dataclass
|
| 13 |
+
class SummaryContext:
|
| 14 |
+
"""Context data for summary generation"""
|
| 15 |
+
contract_type: str
|
| 16 |
+
risk_score: int
|
| 17 |
+
risk_level: str
|
| 18 |
+
category_scores: Dict[str, int]
|
| 19 |
+
unfavorable_terms: List[Dict]
|
| 20 |
+
missing_protections: List[Dict]
|
| 21 |
+
clauses: List
|
| 22 |
+
key_findings: List[str]
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class SummaryGenerator:
|
| 26 |
+
"""
|
| 27 |
+
LLM-powered executive summary generator for contract analysis
|
| 28 |
+
Generates professional, detailed executive summaries like legal professionals
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
def __init__(self, llm_manager: Optional[LLMManager] = None):
|
| 32 |
+
"""
|
| 33 |
+
Initialize the summary generator
|
| 34 |
+
|
| 35 |
+
Args:
|
| 36 |
+
llm_manager: LLM manager instance (if None, creates one with default settings)
|
| 37 |
+
"""
|
| 38 |
+
self.llm_manager = llm_manager or LLMManager()
|
| 39 |
+
self.logger = ContractAnalyzerLogger.get_logger()
|
| 40 |
+
|
| 41 |
+
# Use proper logging syntax without keyword arguments
|
| 42 |
+
logger.info("Summary generator initialized")
|
| 43 |
+
|
| 44 |
+
def generate_executive_summary(self,
|
| 45 |
+
classification: Dict,
|
| 46 |
+
risk_analysis: Dict,
|
| 47 |
+
unfavorable_terms: List[Dict],
|
| 48 |
+
missing_protections: List[Dict],
|
| 49 |
+
clauses: List) -> str:
|
| 50 |
+
"""
|
| 51 |
+
Generate a comprehensive executive summary using LLM
|
| 52 |
+
|
| 53 |
+
Args:
|
| 54 |
+
classification: Contract classification data
|
| 55 |
+
risk_analysis: Risk analysis results
|
| 56 |
+
unfavorable_terms: List of unfavorable terms
|
| 57 |
+
missing_protections: List of missing protections
|
| 58 |
+
clauses: List of analyzed clauses (ExtractedClause objects)
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
Generated executive summary string
|
| 62 |
+
"""
|
| 63 |
+
try:
|
| 64 |
+
# Prepare context for the LLM
|
| 65 |
+
context = self._prepare_summary_context(
|
| 66 |
+
classification, risk_analysis, unfavorable_terms,
|
| 67 |
+
missing_protections, clauses
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
# Generate summary using LLM
|
| 71 |
+
summary = self._generate_with_llm(context)
|
| 72 |
+
|
| 73 |
+
# Use proper logging syntax
|
| 74 |
+
logger.info(f"Executive summary generated successfully - Risk score: {context.risk_score}, Risk level: {context.risk_level}")
|
| 75 |
+
|
| 76 |
+
return summary
|
| 77 |
+
|
| 78 |
+
except Exception as e:
|
| 79 |
+
logger.error(f"Failed to generate executive summary: {e}")
|
| 80 |
+
|
| 81 |
+
# Create fallback context if preparation failed
|
| 82 |
+
fallback_context = SummaryContext(
|
| 83 |
+
contract_type=classification.get("category", "contract"),
|
| 84 |
+
risk_score=risk_analysis.get("overall_score", 0),
|
| 85 |
+
risk_level=risk_analysis.get("risk_level", "unknown"),
|
| 86 |
+
category_scores=risk_analysis.get("category_scores", {}),
|
| 87 |
+
unfavorable_terms=unfavorable_terms,
|
| 88 |
+
missing_protections=missing_protections,
|
| 89 |
+
clauses=clauses,
|
| 90 |
+
key_findings=[]
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
# Fallback to simple summary
|
| 94 |
+
return self._generate_fallback_summary(fallback_context)
|
| 95 |
+
|
| 96 |
+
def _prepare_summary_context(self,
|
| 97 |
+
classification: Dict,
|
| 98 |
+
risk_analysis: Dict,
|
| 99 |
+
unfavorable_terms: List[Dict],
|
| 100 |
+
missing_protections: List[Dict],
|
| 101 |
+
clauses: List) -> SummaryContext:
|
| 102 |
+
"""Prepare structured context for summary generation"""
|
| 103 |
+
|
| 104 |
+
contract_type = classification.get("category", "contract")
|
| 105 |
+
risk_score = risk_analysis.get("overall_score", 0)
|
| 106 |
+
risk_level = risk_analysis.get("risk_level", "unknown")
|
| 107 |
+
category_scores = risk_analysis.get("category_scores", {})
|
| 108 |
+
|
| 109 |
+
# Extract key findings
|
| 110 |
+
key_findings = self._extract_key_findings(
|
| 111 |
+
unfavorable_terms, missing_protections, clauses, risk_score
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
return SummaryContext(
|
| 115 |
+
contract_type=contract_type,
|
| 116 |
+
risk_score=risk_score,
|
| 117 |
+
risk_level=risk_level,
|
| 118 |
+
category_scores=category_scores,
|
| 119 |
+
unfavorable_terms=unfavorable_terms,
|
| 120 |
+
missing_protections=missing_protections,
|
| 121 |
+
clauses=clauses,
|
| 122 |
+
key_findings=key_findings
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
def _extract_key_findings(self,
|
| 126 |
+
unfavorable_terms: List[Dict],
|
| 127 |
+
missing_protections: List[Dict],
|
| 128 |
+
clauses: List,
|
| 129 |
+
risk_score: int) -> List[str]:
|
| 130 |
+
"""Extract the most important findings for the summary"""
|
| 131 |
+
|
| 132 |
+
findings = []
|
| 133 |
+
|
| 134 |
+
# High-risk clauses - handle both dict and object clauses
|
| 135 |
+
high_risk_clauses = []
|
| 136 |
+
for clause in clauses:
|
| 137 |
+
try:
|
| 138 |
+
# Try to access as object first, then as dict
|
| 139 |
+
if hasattr(clause, 'confidence'):
|
| 140 |
+
confidence = clause.confidence
|
| 141 |
+
risk_level = getattr(clause, 'risk_level', None)
|
| 142 |
+
category = getattr(clause, 'category', 'clause')
|
| 143 |
+
text = getattr(clause, 'text', '')
|
| 144 |
+
else:
|
| 145 |
+
# Fallback to dict access
|
| 146 |
+
confidence = clause.get('confidence', 0)
|
| 147 |
+
risk_level = clause.get('risk_level')
|
| 148 |
+
category = clause.get('category', 'clause')
|
| 149 |
+
text = clause.get('text', '')
|
| 150 |
+
|
| 151 |
+
if confidence > 0.7 and risk_level in ['high', 'critical']:
|
| 152 |
+
high_risk_clauses.append({
|
| 153 |
+
'category': category,
|
| 154 |
+
'text': text,
|
| 155 |
+
'confidence': confidence,
|
| 156 |
+
'risk_level': risk_level
|
| 157 |
+
})
|
| 158 |
+
except (AttributeError, KeyError, TypeError):
|
| 159 |
+
# Skip clauses that can't be processed
|
| 160 |
+
continue
|
| 161 |
+
|
| 162 |
+
for clause in high_risk_clauses[:3]: # Top 3 high-risk clauses
|
| 163 |
+
clause_text = clause['text'][:100] + '...' if len(clause['text']) > 100 else clause['text']
|
| 164 |
+
findings.append(f"High-risk {clause['category']}: {clause_text}")
|
| 165 |
+
|
| 166 |
+
# Critical unfavorable terms
|
| 167 |
+
critical_terms = []
|
| 168 |
+
for term in unfavorable_terms:
|
| 169 |
+
try:
|
| 170 |
+
if hasattr(term, 'severity'):
|
| 171 |
+
severity = term.severity
|
| 172 |
+
term_name = getattr(term, 'term', 'Unknown')
|
| 173 |
+
explanation = getattr(term, 'explanation', '')
|
| 174 |
+
else:
|
| 175 |
+
severity = term.get('severity')
|
| 176 |
+
term_name = term.get('term', 'Unknown')
|
| 177 |
+
explanation = term.get('explanation', '')
|
| 178 |
+
|
| 179 |
+
if severity == 'critical':
|
| 180 |
+
critical_terms.append({
|
| 181 |
+
'term': term_name,
|
| 182 |
+
'explanation': explanation
|
| 183 |
+
})
|
| 184 |
+
except (AttributeError, KeyError, TypeError):
|
| 185 |
+
continue
|
| 186 |
+
|
| 187 |
+
for term in critical_terms[:2]:
|
| 188 |
+
findings.append(f"Critical term: {term['term']} - {term['explanation']}")
|
| 189 |
+
|
| 190 |
+
# Important missing protections
|
| 191 |
+
critical_protections = []
|
| 192 |
+
for prot in missing_protections:
|
| 193 |
+
try:
|
| 194 |
+
if hasattr(prot, 'importance'):
|
| 195 |
+
importance = prot.importance
|
| 196 |
+
protection_name = getattr(prot, 'protection', 'Unknown')
|
| 197 |
+
explanation = getattr(prot, 'explanation', '')
|
| 198 |
+
else:
|
| 199 |
+
importance = prot.get('importance')
|
| 200 |
+
protection_name = prot.get('protection', 'Unknown')
|
| 201 |
+
explanation = prot.get('explanation', '')
|
| 202 |
+
|
| 203 |
+
if importance == 'critical':
|
| 204 |
+
critical_protections.append({
|
| 205 |
+
'protection': protection_name,
|
| 206 |
+
'explanation': explanation
|
| 207 |
+
})
|
| 208 |
+
except (AttributeError, KeyError, TypeError):
|
| 209 |
+
continue
|
| 210 |
+
|
| 211 |
+
for prot in critical_protections[:2]:
|
| 212 |
+
findings.append(f"Missing protection: {prot['protection']}")
|
| 213 |
+
|
| 214 |
+
# Overall risk context
|
| 215 |
+
if risk_score >= 80:
|
| 216 |
+
findings.append("Contract presents critical level of risk requiring immediate attention")
|
| 217 |
+
elif risk_score >= 60:
|
| 218 |
+
findings.append("Significant concerns identified requiring careful review")
|
| 219 |
+
|
| 220 |
+
return findings
|
| 221 |
+
|
| 222 |
+
def _generate_with_llm(self, context: SummaryContext) -> str:
|
| 223 |
+
"""Generate summary using LLM"""
|
| 224 |
+
|
| 225 |
+
prompt = self._build_summary_prompt(context)
|
| 226 |
+
system_prompt = self._build_system_prompt()
|
| 227 |
+
|
| 228 |
+
try:
|
| 229 |
+
response = self.llm_manager.complete(
|
| 230 |
+
prompt=prompt,
|
| 231 |
+
system_prompt=system_prompt,
|
| 232 |
+
temperature=0.3, # Lower temperature for more consistent, professional output
|
| 233 |
+
max_tokens=800, # Limit summary length
|
| 234 |
+
json_mode=False
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
if response.success and response.text.strip():
|
| 238 |
+
return self._clean_summary_response(response.text)
|
| 239 |
+
else:
|
| 240 |
+
raise ValueError(f"LLM generation failed: {response.error_message}")
|
| 241 |
+
|
| 242 |
+
except Exception as e:
|
| 243 |
+
logger.error(f"LLM summary generation failed: {e}")
|
| 244 |
+
raise
|
| 245 |
+
|
| 246 |
+
def _build_system_prompt(self) -> str:
|
| 247 |
+
"""Build system prompt for professional summary generation"""
|
| 248 |
+
|
| 249 |
+
return """You are a senior legal analyst specializing in contract risk assessment. Your task is to generate concise, professional executive summaries that:
|
| 250 |
+
|
| 251 |
+
KEY REQUIREMENTS:
|
| 252 |
+
1. Write in formal, professional business language
|
| 253 |
+
2. Focus on the most critical risks and implications
|
| 254 |
+
3. Be specific about contractual provisions and their impact
|
| 255 |
+
4. Maintain objective, factual tone
|
| 256 |
+
5. Keep summary length between 100-200 words
|
| 257 |
+
6. Structure: Start with overall risk assessment, then key findings, then implications
|
| 258 |
+
|
| 259 |
+
WRITING STYLE:
|
| 260 |
+
- Use precise legal/business terminology
|
| 261 |
+
- Avoid markdown formatting
|
| 262 |
+
- Be direct and actionable
|
| 263 |
+
- Highlight asymmetrical terms and missing protections
|
| 264 |
+
- Focus on practical consequences for the signing party
|
| 265 |
+
|
| 266 |
+
OUTPUT FORMAT:
|
| 267 |
+
Return only the executive summary text, no headings, no bullet points, just clean paragraph text."""
|
| 268 |
+
|
| 269 |
+
def _build_summary_prompt(self, context: SummaryContext) -> str:
|
| 270 |
+
"""Build detailed prompt for summary generation"""
|
| 271 |
+
|
| 272 |
+
# Build risk context
|
| 273 |
+
risk_context = self._build_risk_context(context)
|
| 274 |
+
|
| 275 |
+
# Build key provisions section
|
| 276 |
+
key_provisions = self._build_key_provisions_context(context)
|
| 277 |
+
|
| 278 |
+
# Build missing protections section
|
| 279 |
+
missing_protections_text = self._build_missing_protections_context(context)
|
| 280 |
+
|
| 281 |
+
prompt = f"""
|
| 282 |
+
CONTRACT ANALYSIS DATA:
|
| 283 |
+
|
| 284 |
+
{risk_context}
|
| 285 |
+
|
| 286 |
+
{key_provisions}
|
| 287 |
+
|
| 288 |
+
{missing_protections_text}
|
| 289 |
+
|
| 290 |
+
GENERATION INSTRUCTIONS:
|
| 291 |
+
Based on the analysis above, write a professional executive summary that:
|
| 292 |
+
1. Starts with the overall risk assessment for the {context.contract_type}
|
| 293 |
+
2. Highlights the 2-3 most critical issues
|
| 294 |
+
3. Explains the practical implications for the signing party
|
| 295 |
+
4. Mentions any severely imbalanced or punitive clauses
|
| 296 |
+
5. Notes significant missing protections
|
| 297 |
+
|
| 298 |
+
Focus on clarity, specificity, and actionable insights.
|
| 299 |
+
"""
|
| 300 |
+
return prompt
|
| 301 |
+
|
| 302 |
+
def _build_risk_context(self, context: SummaryContext) -> str:
|
| 303 |
+
"""Build risk assessment context"""
|
| 304 |
+
|
| 305 |
+
risk_level_descriptions = {
|
| 306 |
+
"critical": "CRITICAL level of risk requiring immediate attention",
|
| 307 |
+
"high": "HIGH level of risk requiring significant review",
|
| 308 |
+
"medium": "MODERATE level of risk with some concerns",
|
| 309 |
+
"low": "LOW level of risk, generally favorable"
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
risk_desc = risk_level_descriptions.get(context.risk_level.lower(), "UNKNOWN level of risk")
|
| 313 |
+
|
| 314 |
+
text = f"RISK ASSESSMENT:\n"
|
| 315 |
+
text += f"- Overall Score: {context.risk_score}/100 ({risk_desc})\n"
|
| 316 |
+
text += f"- Contract Type: {context.contract_type.replace('_', ' ').title()}\n"
|
| 317 |
+
|
| 318 |
+
# Add category scores
|
| 319 |
+
if context.category_scores:
|
| 320 |
+
text += "- Risk by Category:\n"
|
| 321 |
+
for category, score in context.category_scores.items():
|
| 322 |
+
category_name = category.replace('_', ' ').title()
|
| 323 |
+
text += f" * {category_name}: {score}/100\n"
|
| 324 |
+
|
| 325 |
+
return text
|
| 326 |
+
|
| 327 |
+
def _build_key_provisions_context(self, context: SummaryContext) -> str:
|
| 328 |
+
"""Build context about key provisions and unfavorable terms"""
|
| 329 |
+
|
| 330 |
+
text = "KEY PROVISIONS & UNFAVORABLE TERMS:\n"
|
| 331 |
+
|
| 332 |
+
# Critical terms first
|
| 333 |
+
critical_terms = []
|
| 334 |
+
for term in context.unfavorable_terms:
|
| 335 |
+
try:
|
| 336 |
+
if hasattr(term, 'severity'):
|
| 337 |
+
severity = term.severity
|
| 338 |
+
else:
|
| 339 |
+
severity = term.get('severity')
|
| 340 |
+
|
| 341 |
+
if severity == 'critical':
|
| 342 |
+
critical_terms.append(term)
|
| 343 |
+
except (AttributeError, KeyError):
|
| 344 |
+
continue
|
| 345 |
+
|
| 346 |
+
high_terms = []
|
| 347 |
+
for term in context.unfavorable_terms:
|
| 348 |
+
try:
|
| 349 |
+
if hasattr(term, 'severity'):
|
| 350 |
+
severity = term.severity
|
| 351 |
+
else:
|
| 352 |
+
severity = term.get('severity')
|
| 353 |
+
|
| 354 |
+
if severity == 'high':
|
| 355 |
+
high_terms.append(term)
|
| 356 |
+
except (AttributeError, KeyError):
|
| 357 |
+
continue
|
| 358 |
+
|
| 359 |
+
if critical_terms:
|
| 360 |
+
text += f"- Critical Issues Found: {len(critical_terms)}\n"
|
| 361 |
+
for term in critical_terms[:3]:
|
| 362 |
+
try:
|
| 363 |
+
if hasattr(term, 'term'):
|
| 364 |
+
term_name = term.term
|
| 365 |
+
explanation = getattr(term, 'explanation', '')
|
| 366 |
+
else:
|
| 367 |
+
term_name = term.get('term', 'Unknown')
|
| 368 |
+
explanation = term.get('explanation', '')
|
| 369 |
+
text += f" * {term_name}: {explanation}\n"
|
| 370 |
+
except (AttributeError, KeyError):
|
| 371 |
+
continue
|
| 372 |
+
|
| 373 |
+
if high_terms:
|
| 374 |
+
text += f"- Significant Concerns: {len(high_terms)}\n"
|
| 375 |
+
for term in high_terms[:2]:
|
| 376 |
+
try:
|
| 377 |
+
if hasattr(term, 'term'):
|
| 378 |
+
term_name = term.term
|
| 379 |
+
explanation = getattr(term, 'explanation', '')
|
| 380 |
+
else:
|
| 381 |
+
term_name = term.get('term', 'Unknown')
|
| 382 |
+
explanation = term.get('explanation', '')
|
| 383 |
+
text += f" * {term_name}: {explanation}\n"
|
| 384 |
+
except (AttributeError, KeyError):
|
| 385 |
+
continue
|
| 386 |
+
|
| 387 |
+
# High-risk clauses
|
| 388 |
+
high_risk_clauses = []
|
| 389 |
+
for clause in context.clauses:
|
| 390 |
+
try:
|
| 391 |
+
if hasattr(clause, 'confidence'):
|
| 392 |
+
confidence = clause.confidence
|
| 393 |
+
risk_level = getattr(clause, 'risk_level', None)
|
| 394 |
+
else:
|
| 395 |
+
confidence = clause.get('confidence', 0)
|
| 396 |
+
risk_level = clause.get('risk_level')
|
| 397 |
+
|
| 398 |
+
if confidence > 0.7 and risk_level in ['high', 'critical']:
|
| 399 |
+
high_risk_clauses.append(clause)
|
| 400 |
+
except (AttributeError, KeyError, TypeError):
|
| 401 |
+
continue
|
| 402 |
+
|
| 403 |
+
if high_risk_clauses:
|
| 404 |
+
text += f"- High-Risk Clauses Identified: {len(high_risk_clauses)}\n"
|
| 405 |
+
for clause in high_risk_clauses[:2]:
|
| 406 |
+
try:
|
| 407 |
+
if hasattr(clause, 'category'):
|
| 408 |
+
category = clause.category
|
| 409 |
+
clause_text = getattr(clause, 'text', '')
|
| 410 |
+
else:
|
| 411 |
+
category = clause.get('category', 'Unknown')
|
| 412 |
+
clause_text = clause.get('text', '')
|
| 413 |
+
|
| 414 |
+
display_text = clause_text[:80] + '...' if len(clause_text) > 80 else clause_text
|
| 415 |
+
text += f" * {category}: {display_text}\n"
|
| 416 |
+
except (AttributeError, KeyError):
|
| 417 |
+
continue
|
| 418 |
+
|
| 419 |
+
return text
|
| 420 |
+
|
| 421 |
+
def _build_missing_protections_context(self, context: SummaryContext) -> str:
|
| 422 |
+
"""Build context about missing protections"""
|
| 423 |
+
|
| 424 |
+
text = "MISSING PROTECTIONS:\n"
|
| 425 |
+
|
| 426 |
+
critical_protections = []
|
| 427 |
+
for prot in context.missing_protections:
|
| 428 |
+
try:
|
| 429 |
+
if hasattr(prot, 'importance'):
|
| 430 |
+
importance = prot.importance
|
| 431 |
+
else:
|
| 432 |
+
importance = prot.get('importance')
|
| 433 |
+
|
| 434 |
+
if importance == 'critical':
|
| 435 |
+
critical_protections.append(prot)
|
| 436 |
+
except (AttributeError, KeyError):
|
| 437 |
+
continue
|
| 438 |
+
|
| 439 |
+
important_protections = []
|
| 440 |
+
for prot in context.missing_protections:
|
| 441 |
+
try:
|
| 442 |
+
if hasattr(prot, 'importance'):
|
| 443 |
+
importance = prot.importance
|
| 444 |
+
else:
|
| 445 |
+
importance = prot.get('importance')
|
| 446 |
+
|
| 447 |
+
if importance == 'high':
|
| 448 |
+
important_protections.append(prot)
|
| 449 |
+
except (AttributeError, KeyError):
|
| 450 |
+
continue
|
| 451 |
+
|
| 452 |
+
if critical_protections:
|
| 453 |
+
text += f"- Critical Protections Missing: {len(critical_protections)}\n"
|
| 454 |
+
for prot in critical_protections[:3]:
|
| 455 |
+
try:
|
| 456 |
+
if hasattr(prot, 'protection'):
|
| 457 |
+
protection_name = prot.protection
|
| 458 |
+
explanation = getattr(prot, 'explanation', '')
|
| 459 |
+
else:
|
| 460 |
+
protection_name = prot.get('protection', 'Unknown')
|
| 461 |
+
explanation = prot.get('explanation', '')
|
| 462 |
+
text += f" * {protection_name}: {explanation}\n"
|
| 463 |
+
except (AttributeError, KeyError):
|
| 464 |
+
continue
|
| 465 |
+
|
| 466 |
+
if important_protections:
|
| 467 |
+
text += f"- Important Protections Missing: {len(important_protections)}\n"
|
| 468 |
+
for prot in important_protections[:2]:
|
| 469 |
+
try:
|
| 470 |
+
if hasattr(prot, 'protection'):
|
| 471 |
+
protection_name = prot.protection
|
| 472 |
+
explanation = getattr(prot, 'explanation', '')
|
| 473 |
+
else:
|
| 474 |
+
protection_name = prot.get('protection', 'Unknown')
|
| 475 |
+
explanation = prot.get('explanation', '')
|
| 476 |
+
text += f" * {protection_name}: {explanation}\n"
|
| 477 |
+
except (AttributeError, KeyError):
|
| 478 |
+
continue
|
| 479 |
+
|
| 480 |
+
if not critical_protections and not important_protections:
|
| 481 |
+
text += "- No critical protections missing\n"
|
| 482 |
+
|
| 483 |
+
return text
|
| 484 |
+
|
| 485 |
+
def _clean_summary_response(self, text: str) -> str:
|
| 486 |
+
"""Clean and format the LLM response"""
|
| 487 |
+
|
| 488 |
+
# Remove any markdown formatting
|
| 489 |
+
text = text.replace('**', '').replace('*', '').replace('#', '')
|
| 490 |
+
|
| 491 |
+
# Remove common LLM artifacts
|
| 492 |
+
lines = text.split('\n')
|
| 493 |
+
cleaned_lines = []
|
| 494 |
+
|
| 495 |
+
for line in lines:
|
| 496 |
+
line = line.strip()
|
| 497 |
+
if line and not line.lower().startswith(('executive summary', 'summary:', 'here is', 'based on')):
|
| 498 |
+
cleaned_lines.append(line)
|
| 499 |
+
|
| 500 |
+
# Join into coherent paragraph
|
| 501 |
+
summary = ' '.join(cleaned_lines)
|
| 502 |
+
|
| 503 |
+
# Ensure proper sentence structure
|
| 504 |
+
if summary and not summary[0].isupper():
|
| 505 |
+
summary = summary[0].upper() + summary[1:]
|
| 506 |
+
|
| 507 |
+
if summary and not summary.endswith(('.', '!', '?')):
|
| 508 |
+
summary += '.'
|
| 509 |
+
|
| 510 |
+
return summary
|
| 511 |
+
|
| 512 |
+
def _generate_fallback_summary(self, context: SummaryContext) -> str:
|
| 513 |
+
"""Generate a fallback summary when LLM is not available"""
|
| 514 |
+
|
| 515 |
+
contract_type_display = context.contract_type.replace('_', ' ').title()
|
| 516 |
+
|
| 517 |
+
# Count critical items
|
| 518 |
+
critical_terms = 0
|
| 519 |
+
for term in context.unfavorable_terms:
|
| 520 |
+
try:
|
| 521 |
+
if hasattr(term, 'severity'):
|
| 522 |
+
if term.severity == 'critical':
|
| 523 |
+
critical_terms += 1
|
| 524 |
+
else:
|
| 525 |
+
if term.get('severity') == 'critical':
|
| 526 |
+
critical_terms += 1
|
| 527 |
+
except (AttributeError, KeyError):
|
| 528 |
+
continue
|
| 529 |
+
|
| 530 |
+
critical_protections = 0
|
| 531 |
+
for prot in context.missing_protections:
|
| 532 |
+
try:
|
| 533 |
+
if hasattr(prot, 'importance'):
|
| 534 |
+
if prot.importance == 'critical':
|
| 535 |
+
critical_protections += 1
|
| 536 |
+
else:
|
| 537 |
+
if prot.get('importance') == 'critical':
|
| 538 |
+
critical_protections += 1
|
| 539 |
+
except (AttributeError, KeyError):
|
| 540 |
+
continue
|
| 541 |
+
|
| 542 |
+
if context.risk_score >= 80:
|
| 543 |
+
risk_assessment = f"This {contract_type_display} presents a CRITICAL level of risk"
|
| 544 |
+
action = "requires immediate attention and significant revision"
|
| 545 |
+
elif context.risk_score >= 60:
|
| 546 |
+
risk_assessment = f"This {contract_type_display} presents a HIGH level of risk"
|
| 547 |
+
action = "requires careful review and substantial negotiation"
|
| 548 |
+
elif context.risk_score >= 40:
|
| 549 |
+
risk_assessment = f"This {contract_type_display} presents a MODERATE level of risk"
|
| 550 |
+
action = "requires review and selective negotiation"
|
| 551 |
+
else:
|
| 552 |
+
risk_assessment = f"This {contract_type_display} presents a LOW level of risk"
|
| 553 |
+
action = "appears generally reasonable but should be reviewed"
|
| 554 |
+
|
| 555 |
+
summary = f"{risk_assessment} with a score of {context.risk_score}/100. "
|
| 556 |
+
summary += f"The agreement {action}. "
|
| 557 |
+
|
| 558 |
+
if critical_terms > 0:
|
| 559 |
+
summary += f"Found {critical_terms} critical unfavorable terms and "
|
| 560 |
+
else:
|
| 561 |
+
summary += f"Found {len(context.unfavorable_terms)} unfavorable terms and "
|
| 562 |
+
|
| 563 |
+
if critical_protections > 0:
|
| 564 |
+
summary += f"{critical_protections} critical missing protections. "
|
| 565 |
+
else:
|
| 566 |
+
summary += f"{len(context.missing_protections)} missing protections. "
|
| 567 |
+
|
| 568 |
+
summary += "Review the detailed analysis below for specific clauses and recommendations."
|
| 569 |
+
|
| 570 |
+
return summary
|
static/index.html
CHANGED
|
@@ -15,14 +15,14 @@
|
|
| 15 |
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
|
| 16 |
background: #ffffff;
|
| 17 |
color: #333;
|
| 18 |
-
line-height: 1.
|
| 19 |
}
|
| 20 |
|
| 21 |
/* Header */
|
| 22 |
.header {
|
| 23 |
background: white;
|
| 24 |
-
border-bottom:
|
| 25 |
-
padding:
|
| 26 |
display: flex;
|
| 27 |
justify-content: space-between;
|
| 28 |
align-items: center;
|
|
@@ -49,32 +49,32 @@
|
|
| 49 |
align-items: center;
|
| 50 |
justify-content: center;
|
| 51 |
color: white;
|
| 52 |
-
font-size:
|
| 53 |
}
|
| 54 |
|
| 55 |
.subtitle {
|
| 56 |
color: #666;
|
| 57 |
-
font-size:
|
| 58 |
-
font-weight:
|
| 59 |
}
|
| 60 |
|
| 61 |
.container {
|
| 62 |
max-width: 1200px;
|
| 63 |
margin: 0 auto;
|
| 64 |
-
padding: 0 2rem;
|
| 65 |
}
|
| 66 |
|
| 67 |
-
/* Landing Page Styles
|
| 68 |
.landing-screen {
|
| 69 |
-
padding-top:
|
| 70 |
}
|
| 71 |
|
| 72 |
.hero-section {
|
| 73 |
text-align: center;
|
| 74 |
-
padding:
|
| 75 |
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 76 |
color: white;
|
| 77 |
-
margin-bottom:
|
| 78 |
}
|
| 79 |
|
| 80 |
.hero-title {
|
|
@@ -88,7 +88,7 @@
|
|
| 88 |
font-size: 1.3rem;
|
| 89 |
margin-bottom: 2.5rem;
|
| 90 |
opacity: 0.95;
|
| 91 |
-
max-width:
|
| 92 |
margin-left: auto;
|
| 93 |
margin-right: auto;
|
| 94 |
}
|
|
@@ -112,42 +112,42 @@
|
|
| 112 |
}
|
| 113 |
|
| 114 |
.section {
|
| 115 |
-
padding:
|
| 116 |
text-align: center;
|
| 117 |
}
|
| 118 |
|
| 119 |
.section-title {
|
| 120 |
font-size: 2.2rem;
|
| 121 |
font-weight: 600;
|
| 122 |
-
margin-bottom:
|
| 123 |
color: #333;
|
| 124 |
}
|
| 125 |
|
| 126 |
.section-subtitle {
|
| 127 |
font-size: 1.2rem;
|
| 128 |
color: #666;
|
| 129 |
-
margin-bottom:
|
| 130 |
-
max-width:
|
| 131 |
margin-left: auto;
|
| 132 |
margin-right: auto;
|
| 133 |
-
line-height:
|
| 134 |
}
|
| 135 |
|
| 136 |
.features-grid {
|
| 137 |
display: grid;
|
| 138 |
grid-template-columns: repeat(3, 1fr);
|
| 139 |
gap: 3rem;
|
| 140 |
-
margin-bottom:
|
| 141 |
}
|
| 142 |
|
| 143 |
.feature-card {
|
| 144 |
text-align: center;
|
| 145 |
-
padding:
|
| 146 |
}
|
| 147 |
|
| 148 |
.feature-icon {
|
| 149 |
font-size: 3rem;
|
| 150 |
-
margin-bottom: 1.
|
| 151 |
}
|
| 152 |
|
| 153 |
.feature-title {
|
|
@@ -159,25 +159,26 @@
|
|
| 159 |
|
| 160 |
.feature-description {
|
| 161 |
color: #666;
|
| 162 |
-
line-height: 1.
|
| 163 |
font-size: 1rem;
|
| 164 |
}
|
| 165 |
|
| 166 |
.steps-section {
|
| 167 |
-
background:
|
| 168 |
-
padding:
|
|
|
|
| 169 |
}
|
| 170 |
|
| 171 |
.steps-grid {
|
| 172 |
display: grid;
|
| 173 |
grid-template-columns: repeat(3, 1fr);
|
| 174 |
gap: 3rem;
|
| 175 |
-
margin-top:
|
| 176 |
}
|
| 177 |
|
| 178 |
.step-card {
|
| 179 |
text-align: center;
|
| 180 |
-
padding:
|
| 181 |
}
|
| 182 |
|
| 183 |
.step-number {
|
|
@@ -191,7 +192,7 @@
|
|
| 191 |
justify-content: center;
|
| 192 |
font-size: 1.5rem;
|
| 193 |
font-weight: 700;
|
| 194 |
-
margin: 0 auto 1.
|
| 195 |
}
|
| 196 |
|
| 197 |
.step-title {
|
|
@@ -203,16 +204,16 @@
|
|
| 203 |
|
| 204 |
.step-description {
|
| 205 |
color: #666;
|
| 206 |
-
line-height: 1.
|
| 207 |
}
|
| 208 |
|
| 209 |
.footer {
|
| 210 |
text-align: center;
|
| 211 |
-
padding:
|
| 212 |
color: #999;
|
| 213 |
font-size: 0.9rem;
|
| 214 |
-
border-top:
|
| 215 |
-
background:
|
| 216 |
}
|
| 217 |
|
| 218 |
/* Analyzer Styles */
|
|
@@ -240,6 +241,14 @@
|
|
| 240 |
margin-bottom: 2rem;
|
| 241 |
}
|
| 242 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
.upload-card {
|
| 244 |
background: white;
|
| 245 |
border-radius: 12px;
|
|
@@ -446,11 +455,11 @@
|
|
| 446 |
font-size: 1rem;
|
| 447 |
}
|
| 448 |
|
| 449 |
-
.results-
|
| 450 |
display: none;
|
| 451 |
}
|
| 452 |
|
| 453 |
-
.results-
|
| 454 |
display: block;
|
| 455 |
}
|
| 456 |
|
|
@@ -493,22 +502,26 @@
|
|
| 493 |
border: 1px solid #fecaca;
|
| 494 |
}
|
| 495 |
|
| 496 |
-
/* Results screen styles */
|
| 497 |
.results-header {
|
| 498 |
display: flex;
|
| 499 |
justify-content: space-between;
|
| 500 |
-
align-items:
|
| 501 |
margin-bottom: 2rem;
|
|
|
|
| 502 |
}
|
| 503 |
|
| 504 |
.results-title {
|
| 505 |
font-size: 2rem;
|
| 506 |
font-weight: 700;
|
|
|
|
| 507 |
}
|
| 508 |
|
| 509 |
.results-actions {
|
| 510 |
display: flex;
|
| 511 |
gap: 1rem;
|
|
|
|
|
|
|
| 512 |
}
|
| 513 |
|
| 514 |
.btn {
|
|
@@ -519,6 +532,10 @@
|
|
| 519 |
cursor: pointer;
|
| 520 |
border: none;
|
| 521 |
transition: all 0.2s;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 522 |
}
|
| 523 |
|
| 524 |
.btn-primary {
|
|
@@ -582,7 +599,6 @@
|
|
| 582 |
transform: translate(-50%, -50%);
|
| 583 |
font-size: 3rem;
|
| 584 |
font-weight: 700;
|
| 585 |
-
color: #dc2626;
|
| 586 |
}
|
| 587 |
|
| 588 |
.risk-level {
|
|
@@ -864,9 +880,10 @@
|
|
| 864 |
<!-- Hero Section -->
|
| 865 |
<section class="hero-section">
|
| 866 |
<div class="container">
|
| 867 |
-
<h1 class="hero-title">Unlock Legal Intelligence
|
| 868 |
<p class="hero-subtitle">
|
| 869 |
Instantly identify risks, uncover unfavorable terms, and gain actionable negotiation points.
|
|
|
|
| 870 |
Our AI-powered platform gives you the clarity and confidence to sign better contracts.
|
| 871 |
</p>
|
| 872 |
<button class="cta-button" id="getStartedBtn">Try Now for Free</button>
|
|
@@ -954,48 +971,46 @@
|
|
| 954 |
← Back to Overview
|
| 955 |
</button>
|
| 956 |
|
| 957 |
-
<
|
| 958 |
-
|
| 959 |
-
<
|
| 960 |
-
|
| 961 |
-
|
| 962 |
-
<!-- API Status Indicator -->
|
| 963 |
-
<div id="apiStatus" class="api-status" style="display: none;">
|
| 964 |
-
Checking backend connection...
|
| 965 |
-
</div>
|
| 966 |
-
|
| 967 |
-
<div class="upload-card">
|
| 968 |
-
<div class="tabs">
|
| 969 |
-
<button class="tab active" data-tab="paste">Paste Text</button>
|
| 970 |
-
<button class="tab" data-tab="upload">Upload File</button>
|
| 971 |
</div>
|
| 972 |
|
| 973 |
-
<div
|
| 974 |
-
<
|
| 975 |
-
|
|
|
|
|
|
|
| 976 |
|
| 977 |
-
|
| 978 |
-
|
| 979 |
-
<input type="file" id="fileInput" class="file-input" accept=".pdf,.docx,.txt">
|
| 980 |
-
<div class="upload-icon">📄</div>
|
| 981 |
-
<div class="upload-text">Click to upload or drag and drop</div>
|
| 982 |
-
<div class="upload-hint">PDF, DOCX, or TXT files (Max 10MB)</div>
|
| 983 |
</div>
|
| 984 |
-
|
| 985 |
-
|
| 986 |
-
<div class="file-
|
| 987 |
-
<
|
| 988 |
-
<div class="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 989 |
</div>
|
| 990 |
-
<button class="remove-file" id="removeFile">×</button>
|
| 991 |
</div>
|
| 992 |
-
</div>
|
| 993 |
|
| 994 |
-
|
| 995 |
-
|
| 996 |
-
|
| 997 |
-
|
| 998 |
-
|
|
|
|
| 999 |
</div>
|
| 1000 |
</div>
|
| 1001 |
|
|
@@ -1006,8 +1021,8 @@
|
|
| 1006 |
<p class="loading-text">This may take a moment for large documents.</p>
|
| 1007 |
</div>
|
| 1008 |
|
| 1009 |
-
<!-- Results
|
| 1010 |
-
<div id="
|
| 1011 |
<div class="results-header">
|
| 1012 |
<h1 class="results-title">Analysis Report</h1>
|
| 1013 |
<div class="results-actions">
|
|
@@ -1098,20 +1113,19 @@
|
|
| 1098 |
</div>
|
| 1099 |
|
| 1100 |
<script>
|
| 1101 |
-
|
| 1102 |
-
|
| 1103 |
-
: '/api/v1';
|
| 1104 |
|
| 1105 |
let selectedFile = null;
|
| 1106 |
-
let
|
| 1107 |
-
let pollInterval = null;
|
| 1108 |
|
| 1109 |
// Screen management
|
| 1110 |
function showScreen(screenName) {
|
| 1111 |
document.getElementById('landingScreen').style.display = 'none';
|
| 1112 |
document.getElementById('analyzerScreen').style.display = 'none';
|
| 1113 |
document.getElementById('loadingScreen').classList.remove('active');
|
| 1114 |
-
document.getElementById('
|
|
|
|
| 1115 |
|
| 1116 |
if (screenName === 'landing') {
|
| 1117 |
document.getElementById('landingScreen').style.display = 'block';
|
|
@@ -1121,9 +1135,11 @@
|
|
| 1121 |
} else if (screenName === 'loading') {
|
| 1122 |
document.getElementById('analyzerScreen').style.display = 'block';
|
| 1123 |
document.getElementById('loadingScreen').classList.add('active');
|
|
|
|
| 1124 |
} else if (screenName === 'results') {
|
| 1125 |
document.getElementById('analyzerScreen').style.display = 'block';
|
| 1126 |
-
document.getElementById('
|
|
|
|
| 1127 |
}
|
| 1128 |
}
|
| 1129 |
|
|
@@ -1143,14 +1159,15 @@
|
|
| 1143 |
});
|
| 1144 |
|
| 1145 |
if (response.ok) {
|
| 1146 |
-
|
|
|
|
| 1147 |
statusElement.className = 'api-status connected';
|
| 1148 |
} else {
|
| 1149 |
throw new Error('Backend not responding properly');
|
| 1150 |
}
|
| 1151 |
} catch (error) {
|
| 1152 |
console.error('Backend connection failed:', error);
|
| 1153 |
-
statusElement.textContent = '✗ Cannot connect to backend. Make sure the server is running
|
| 1154 |
statusElement.className = 'api-status disconnected';
|
| 1155 |
|
| 1156 |
setTimeout(() => {
|
|
@@ -1168,6 +1185,18 @@
|
|
| 1168 |
showScreen('landing');
|
| 1169 |
});
|
| 1170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1171 |
// Tab switching
|
| 1172 |
document.querySelectorAll('.tab').forEach(tab => {
|
| 1173 |
tab.addEventListener('click', (e) => {
|
|
@@ -1271,15 +1300,13 @@
|
|
| 1271 |
alert('Please paste contract text');
|
| 1272 |
return;
|
| 1273 |
}
|
| 1274 |
-
|
| 1275 |
-
const file = new File([blob], 'contract.txt', { type: 'text/plain' });
|
| 1276 |
-
await analyzeContract(file);
|
| 1277 |
} else {
|
| 1278 |
if (!selectedFile) {
|
| 1279 |
alert('Please select a file');
|
| 1280 |
return;
|
| 1281 |
}
|
| 1282 |
-
await
|
| 1283 |
}
|
| 1284 |
} catch (error) {
|
| 1285 |
console.error('Analysis error:', error);
|
|
@@ -1290,7 +1317,8 @@
|
|
| 1290 |
}
|
| 1291 |
});
|
| 1292 |
|
| 1293 |
-
|
|
|
|
| 1294 |
try {
|
| 1295 |
showScreen('loading');
|
| 1296 |
|
|
@@ -1300,9 +1328,8 @@
|
|
| 1300 |
formData.append('interpret_clauses', 'true');
|
| 1301 |
formData.append('generate_negotiation_points', 'true');
|
| 1302 |
formData.append('compare_to_market', 'true');
|
| 1303 |
-
formData.append('llm_provider', 'ollama');
|
| 1304 |
|
| 1305 |
-
const response = await fetch(`${API_BASE_URL}/analyze`, {
|
| 1306 |
method: 'POST',
|
| 1307 |
body: formData
|
| 1308 |
});
|
|
@@ -1318,10 +1345,10 @@
|
|
| 1318 |
throw new Error(errorDetail);
|
| 1319 |
}
|
| 1320 |
|
| 1321 |
-
const
|
| 1322 |
-
|
| 1323 |
-
|
| 1324 |
-
|
| 1325 |
|
| 1326 |
} catch (error) {
|
| 1327 |
console.error('Error:', error);
|
|
@@ -1330,44 +1357,110 @@
|
|
| 1330 |
}
|
| 1331 |
}
|
| 1332 |
|
| 1333 |
-
|
|
|
|
| 1334 |
try {
|
| 1335 |
-
|
| 1336 |
-
|
| 1337 |
-
|
| 1338 |
-
|
| 1339 |
-
|
| 1340 |
-
|
| 1341 |
-
|
| 1342 |
-
|
| 1343 |
-
|
| 1344 |
-
|
| 1345 |
-
|
| 1346 |
-
|
| 1347 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1348 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1349 |
} catch (error) {
|
| 1350 |
-
console.error('
|
|
|
|
|
|
|
| 1351 |
}
|
| 1352 |
}
|
| 1353 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1354 |
function displayResults(result) {
|
| 1355 |
const score = result.risk_analysis.overall_score;
|
| 1356 |
const riskLevel = result.risk_analysis.risk_level;
|
| 1357 |
|
|
|
|
| 1358 |
document.getElementById('riskScoreValue').textContent = score;
|
| 1359 |
document.getElementById('riskLevel').textContent = riskLevel.toUpperCase();
|
| 1360 |
document.getElementById('riskLevel').className = 'risk-level risk-' + getRiskClass(score);
|
| 1361 |
|
|
|
|
| 1362 |
const circumference = 534;
|
| 1363 |
const offset = circumference - (score / 100) * circumference;
|
| 1364 |
const circle = document.getElementById('riskCircle');
|
| 1365 |
circle.style.strokeDashoffset = offset;
|
| 1366 |
-
|
| 1367 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1368 |
document.getElementById('executiveSummary').textContent = result.executive_summary;
|
| 1369 |
|
| 1370 |
-
// Update
|
| 1371 |
const unfavorableList = document.getElementById('unfavorableTermsList');
|
| 1372 |
unfavorableList.innerHTML = '';
|
| 1373 |
if (result.unfavorable_terms && result.unfavorable_terms.length > 0) {
|
|
@@ -1380,7 +1473,91 @@
|
|
| 1380 |
unfavorableList.innerHTML = '<li>No unfavorable terms detected</li>';
|
| 1381 |
}
|
| 1382 |
|
| 1383 |
-
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1384 |
}
|
| 1385 |
|
| 1386 |
function getRiskClass(score) {
|
|
@@ -1397,6 +1574,24 @@
|
|
| 1397 |
return '#16a34a';
|
| 1398 |
}
|
| 1399 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1400 |
// Initialize
|
| 1401 |
showScreen('landing');
|
| 1402 |
</script>
|
|
|
|
| 15 |
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
|
| 16 |
background: #ffffff;
|
| 17 |
color: #333;
|
| 18 |
+
line-height: 1.5;
|
| 19 |
}
|
| 20 |
|
| 21 |
/* Header */
|
| 22 |
.header {
|
| 23 |
background: white;
|
| 24 |
+
border-bottom: 0.2px solid #e5e5e5;
|
| 25 |
+
padding: 0.3rem 2.0rem;
|
| 26 |
display: flex;
|
| 27 |
justify-content: space-between;
|
| 28 |
align-items: center;
|
|
|
|
| 49 |
align-items: center;
|
| 50 |
justify-content: center;
|
| 51 |
color: white;
|
| 52 |
+
font-size: 20px;
|
| 53 |
}
|
| 54 |
|
| 55 |
.subtitle {
|
| 56 |
color: #666;
|
| 57 |
+
font-size: 1.0rem;
|
| 58 |
+
font-weight: 500;
|
| 59 |
}
|
| 60 |
|
| 61 |
.container {
|
| 62 |
max-width: 1200px;
|
| 63 |
margin: 0 auto;
|
| 64 |
+
padding: 0 0.2rem;
|
| 65 |
}
|
| 66 |
|
| 67 |
+
/* Landing Page Styles */
|
| 68 |
.landing-screen {
|
| 69 |
+
padding-top: 50px;
|
| 70 |
}
|
| 71 |
|
| 72 |
.hero-section {
|
| 73 |
text-align: center;
|
| 74 |
+
padding: 1rem 0 1rem;
|
| 75 |
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 76 |
color: white;
|
| 77 |
+
margin-bottom: 2rem;
|
| 78 |
}
|
| 79 |
|
| 80 |
.hero-title {
|
|
|
|
| 88 |
font-size: 1.3rem;
|
| 89 |
margin-bottom: 2.5rem;
|
| 90 |
opacity: 0.95;
|
| 91 |
+
max-width: 1000px;
|
| 92 |
margin-left: auto;
|
| 93 |
margin-right: auto;
|
| 94 |
}
|
|
|
|
| 112 |
}
|
| 113 |
|
| 114 |
.section {
|
| 115 |
+
padding: 0.5rem 0;
|
| 116 |
text-align: center;
|
| 117 |
}
|
| 118 |
|
| 119 |
.section-title {
|
| 120 |
font-size: 2.2rem;
|
| 121 |
font-weight: 600;
|
| 122 |
+
margin-bottom: 1rem;
|
| 123 |
color: #333;
|
| 124 |
}
|
| 125 |
|
| 126 |
.section-subtitle {
|
| 127 |
font-size: 1.2rem;
|
| 128 |
color: #666;
|
| 129 |
+
margin-bottom: 1rem;
|
| 130 |
+
max-width: 1000px;
|
| 131 |
margin-left: auto;
|
| 132 |
margin-right: auto;
|
| 133 |
+
line-height: 0.5;
|
| 134 |
}
|
| 135 |
|
| 136 |
.features-grid {
|
| 137 |
display: grid;
|
| 138 |
grid-template-columns: repeat(3, 1fr);
|
| 139 |
gap: 3rem;
|
| 140 |
+
margin-bottom: 0.1rem;
|
| 141 |
}
|
| 142 |
|
| 143 |
.feature-card {
|
| 144 |
text-align: center;
|
| 145 |
+
padding: 1rem;
|
| 146 |
}
|
| 147 |
|
| 148 |
.feature-icon {
|
| 149 |
font-size: 3rem;
|
| 150 |
+
margin-bottom: 1.0rem;
|
| 151 |
}
|
| 152 |
|
| 153 |
.feature-title {
|
|
|
|
| 159 |
|
| 160 |
.feature-description {
|
| 161 |
color: #666;
|
| 162 |
+
line-height: 1.5;
|
| 163 |
font-size: 1rem;
|
| 164 |
}
|
| 165 |
|
| 166 |
.steps-section {
|
| 167 |
+
background: white;
|
| 168 |
+
padding: 1rem 0;
|
| 169 |
+
text-align: center;
|
| 170 |
}
|
| 171 |
|
| 172 |
.steps-grid {
|
| 173 |
display: grid;
|
| 174 |
grid-template-columns: repeat(3, 1fr);
|
| 175 |
gap: 3rem;
|
| 176 |
+
margin-top: 0.5rem;
|
| 177 |
}
|
| 178 |
|
| 179 |
.step-card {
|
| 180 |
text-align: center;
|
| 181 |
+
padding: 0.5rem;
|
| 182 |
}
|
| 183 |
|
| 184 |
.step-number {
|
|
|
|
| 192 |
justify-content: center;
|
| 193 |
font-size: 1.5rem;
|
| 194 |
font-weight: 700;
|
| 195 |
+
margin: 0 auto 1.2rem;
|
| 196 |
}
|
| 197 |
|
| 198 |
.step-title {
|
|
|
|
| 204 |
|
| 205 |
.step-description {
|
| 206 |
color: #666;
|
| 207 |
+
line-height: 1.5;
|
| 208 |
}
|
| 209 |
|
| 210 |
.footer {
|
| 211 |
text-align: center;
|
| 212 |
+
padding: 1rem 0.5rem;
|
| 213 |
color: #999;
|
| 214 |
font-size: 0.9rem;
|
| 215 |
+
border-top: 0.5px solid #e5e5e5;
|
| 216 |
+
background: white;
|
| 217 |
}
|
| 218 |
|
| 219 |
/* Analyzer Styles */
|
|
|
|
| 241 |
margin-bottom: 2rem;
|
| 242 |
}
|
| 243 |
|
| 244 |
+
.upload-section {
|
| 245 |
+
transition: all 0.3s ease;
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
.upload-section.hidden {
|
| 249 |
+
display: none !important;
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
.upload-card {
|
| 253 |
background: white;
|
| 254 |
border-radius: 12px;
|
|
|
|
| 455 |
font-size: 1rem;
|
| 456 |
}
|
| 457 |
|
| 458 |
+
.results-content {
|
| 459 |
display: none;
|
| 460 |
}
|
| 461 |
|
| 462 |
+
.results-content.active {
|
| 463 |
display: block;
|
| 464 |
}
|
| 465 |
|
|
|
|
| 502 |
border: 1px solid #fecaca;
|
| 503 |
}
|
| 504 |
|
| 505 |
+
/* Results screen styles - UPDATED BUTTON POSITIONING */
|
| 506 |
.results-header {
|
| 507 |
display: flex;
|
| 508 |
justify-content: space-between;
|
| 509 |
+
align-items: flex-start;
|
| 510 |
margin-bottom: 2rem;
|
| 511 |
+
gap: 2rem;
|
| 512 |
}
|
| 513 |
|
| 514 |
.results-title {
|
| 515 |
font-size: 2rem;
|
| 516 |
font-weight: 700;
|
| 517 |
+
flex: 1;
|
| 518 |
}
|
| 519 |
|
| 520 |
.results-actions {
|
| 521 |
display: flex;
|
| 522 |
gap: 1rem;
|
| 523 |
+
align-items: center;
|
| 524 |
+
justify-content: flex-end;
|
| 525 |
}
|
| 526 |
|
| 527 |
.btn {
|
|
|
|
| 532 |
cursor: pointer;
|
| 533 |
border: none;
|
| 534 |
transition: all 0.2s;
|
| 535 |
+
display: flex;
|
| 536 |
+
align-items: center;
|
| 537 |
+
gap: 0.5rem;
|
| 538 |
+
white-space: nowrap;
|
| 539 |
}
|
| 540 |
|
| 541 |
.btn-primary {
|
|
|
|
| 599 |
transform: translate(-50%, -50%);
|
| 600 |
font-size: 3rem;
|
| 601 |
font-weight: 700;
|
|
|
|
| 602 |
}
|
| 603 |
|
| 604 |
.risk-level {
|
|
|
|
| 880 |
<!-- Hero Section -->
|
| 881 |
<section class="hero-section">
|
| 882 |
<div class="container">
|
| 883 |
+
<h1 class="hero-title">Unlock Legal Intelligence : Analyze Contracts with AI</h1>
|
| 884 |
<p class="hero-subtitle">
|
| 885 |
Instantly identify risks, uncover unfavorable terms, and gain actionable negotiation points.
|
| 886 |
+
<br>
|
| 887 |
Our AI-powered platform gives you the clarity and confidence to sign better contracts.
|
| 888 |
</p>
|
| 889 |
<button class="cta-button" id="getStartedBtn">Try Now for Free</button>
|
|
|
|
| 971 |
← Back to Overview
|
| 972 |
</button>
|
| 973 |
|
| 974 |
+
<!-- Upload Section - Shown by default -->
|
| 975 |
+
<div id="uploadSection" class="upload-section">
|
| 976 |
+
<div class="hero-section-analyzer">
|
| 977 |
+
<h1 class="hero-title-analyzer">Analyze Your Contract in Seconds</h1>
|
| 978 |
+
<p class="hero-description">Paste your contract or upload a file to get an instant, AI-powered risk assessment.</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 979 |
</div>
|
| 980 |
|
| 981 |
+
<div class="upload-card">
|
| 982 |
+
<div class="tabs">
|
| 983 |
+
<button class="tab active" data-tab="paste">Paste Text</button>
|
| 984 |
+
<button class="tab" data-tab="upload">Upload File</button>
|
| 985 |
+
</div>
|
| 986 |
|
| 987 |
+
<div id="pasteTab" class="tab-content active">
|
| 988 |
+
<textarea class="textarea" id="contractText" placeholder="Paste your full contract text here..."></textarea>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 989 |
</div>
|
| 990 |
+
|
| 991 |
+
<div id="uploadTab" class="tab-content">
|
| 992 |
+
<div class="file-upload-area" id="fileUploadArea">
|
| 993 |
+
<input type="file" id="fileInput" class="file-input" accept=".pdf,.docx,.txt">
|
| 994 |
+
<div class="upload-icon">📄</div>
|
| 995 |
+
<div class="upload-text">Click to upload or drag and drop</div>
|
| 996 |
+
<div class="upload-hint">PDF, DOCX, or TXT files (Max 10MB)</div>
|
| 997 |
+
</div>
|
| 998 |
+
<div id="selectedFile" class="selected-file" style="display: none;">
|
| 999 |
+
<div class="file-icon">📄</div>
|
| 1000 |
+
<div class="file-info">
|
| 1001 |
+
<div class="file-name" id="fileName"></div>
|
| 1002 |
+
<div class="file-size" id="fileSize"></div>
|
| 1003 |
+
</div>
|
| 1004 |
+
<button class="remove-file" id="removeFile">×</button>
|
| 1005 |
</div>
|
|
|
|
| 1006 |
</div>
|
|
|
|
| 1007 |
|
| 1008 |
+
<div class="analyze-btn-container">
|
| 1009 |
+
<button class="analyze-btn" id="analyzeBtn">
|
| 1010 |
+
<span>🔍</span>
|
| 1011 |
+
<span>Analyze Contract</span>
|
| 1012 |
+
</button>
|
| 1013 |
+
</div>
|
| 1014 |
</div>
|
| 1015 |
</div>
|
| 1016 |
|
|
|
|
| 1021 |
<p class="loading-text">This may take a moment for large documents.</p>
|
| 1022 |
</div>
|
| 1023 |
|
| 1024 |
+
<!-- Results Content - Hidden by default -->
|
| 1025 |
+
<div id="resultsContent" class="results-content">
|
| 1026 |
<div class="results-header">
|
| 1027 |
<h1 class="results-title">Analysis Report</h1>
|
| 1028 |
<div class="results-actions">
|
|
|
|
| 1113 |
</div>
|
| 1114 |
|
| 1115 |
<script>
|
| 1116 |
+
// DYNAMIC API BASE URL - Automatically detects current port
|
| 1117 |
+
const API_BASE_URL = `${window.location.protocol}//${window.location.host}/api/v1`;
|
|
|
|
| 1118 |
|
| 1119 |
let selectedFile = null;
|
| 1120 |
+
let currentAnalysisResult = null;
|
|
|
|
| 1121 |
|
| 1122 |
// Screen management
|
| 1123 |
function showScreen(screenName) {
|
| 1124 |
document.getElementById('landingScreen').style.display = 'none';
|
| 1125 |
document.getElementById('analyzerScreen').style.display = 'none';
|
| 1126 |
document.getElementById('loadingScreen').classList.remove('active');
|
| 1127 |
+
document.getElementById('resultsContent').classList.remove('active');
|
| 1128 |
+
document.getElementById('uploadSection').classList.remove('hidden');
|
| 1129 |
|
| 1130 |
if (screenName === 'landing') {
|
| 1131 |
document.getElementById('landingScreen').style.display = 'block';
|
|
|
|
| 1135 |
} else if (screenName === 'loading') {
|
| 1136 |
document.getElementById('analyzerScreen').style.display = 'block';
|
| 1137 |
document.getElementById('loadingScreen').classList.add('active');
|
| 1138 |
+
document.getElementById('uploadSection').classList.add('hidden');
|
| 1139 |
} else if (screenName === 'results') {
|
| 1140 |
document.getElementById('analyzerScreen').style.display = 'block';
|
| 1141 |
+
document.getElementById('resultsContent').classList.add('active');
|
| 1142 |
+
document.getElementById('uploadSection').classList.add('hidden');
|
| 1143 |
}
|
| 1144 |
}
|
| 1145 |
|
|
|
|
| 1159 |
});
|
| 1160 |
|
| 1161 |
if (response.ok) {
|
| 1162 |
+
const data = await response.json();
|
| 1163 |
+
statusElement.textContent = `✓ Backend connected (${data.models_loaded} models, ${data.services_loaded} services)`;
|
| 1164 |
statusElement.className = 'api-status connected';
|
| 1165 |
} else {
|
| 1166 |
throw new Error('Backend not responding properly');
|
| 1167 |
}
|
| 1168 |
} catch (error) {
|
| 1169 |
console.error('Backend connection failed:', error);
|
| 1170 |
+
statusElement.textContent = '✗ Cannot connect to backend. Make sure the server is running.';
|
| 1171 |
statusElement.className = 'api-status disconnected';
|
| 1172 |
|
| 1173 |
setTimeout(() => {
|
|
|
|
| 1185 |
showScreen('landing');
|
| 1186 |
});
|
| 1187 |
|
| 1188 |
+
document.getElementById('analyzeAnotherBtn').addEventListener('click', () => {
|
| 1189 |
+
// Reset form
|
| 1190 |
+
document.getElementById('contractText').value = '';
|
| 1191 |
+
selectedFile = null;
|
| 1192 |
+
fileInput.value = '';
|
| 1193 |
+
selectedFileDiv.style.display = 'none';
|
| 1194 |
+
fileUploadArea.style.display = 'block';
|
| 1195 |
+
|
| 1196 |
+
// Show upload section again
|
| 1197 |
+
showScreen('analyzer');
|
| 1198 |
+
});
|
| 1199 |
+
|
| 1200 |
// Tab switching
|
| 1201 |
document.querySelectorAll('.tab').forEach(tab => {
|
| 1202 |
tab.addEventListener('click', (e) => {
|
|
|
|
| 1300 |
alert('Please paste contract text');
|
| 1301 |
return;
|
| 1302 |
}
|
| 1303 |
+
await analyzeContractText(text);
|
|
|
|
|
|
|
| 1304 |
} else {
|
| 1305 |
if (!selectedFile) {
|
| 1306 |
alert('Please select a file');
|
| 1307 |
return;
|
| 1308 |
}
|
| 1309 |
+
await analyzeContractFile(selectedFile);
|
| 1310 |
}
|
| 1311 |
} catch (error) {
|
| 1312 |
console.error('Analysis error:', error);
|
|
|
|
| 1317 |
}
|
| 1318 |
});
|
| 1319 |
|
| 1320 |
+
// Direct file analysis (synchronous)
|
| 1321 |
+
async function analyzeContractFile(file) {
|
| 1322 |
try {
|
| 1323 |
showScreen('loading');
|
| 1324 |
|
|
|
|
| 1328 |
formData.append('interpret_clauses', 'true');
|
| 1329 |
formData.append('generate_negotiation_points', 'true');
|
| 1330 |
formData.append('compare_to_market', 'true');
|
|
|
|
| 1331 |
|
| 1332 |
+
const response = await fetch(`${API_BASE_URL}/analyze/file`, {
|
| 1333 |
method: 'POST',
|
| 1334 |
body: formData
|
| 1335 |
});
|
|
|
|
| 1345 |
throw new Error(errorDetail);
|
| 1346 |
}
|
| 1347 |
|
| 1348 |
+
const result = await response.json();
|
| 1349 |
+
currentAnalysisResult = result;
|
| 1350 |
+
displayResults(result);
|
| 1351 |
+
showScreen('results');
|
| 1352 |
|
| 1353 |
} catch (error) {
|
| 1354 |
console.error('Error:', error);
|
|
|
|
| 1357 |
}
|
| 1358 |
}
|
| 1359 |
|
| 1360 |
+
// Direct text analysis (synchronous)
|
| 1361 |
+
async function analyzeContractText(text) {
|
| 1362 |
try {
|
| 1363 |
+
showScreen('loading');
|
| 1364 |
+
|
| 1365 |
+
const formData = new FormData();
|
| 1366 |
+
formData.append('contract_text', text);
|
| 1367 |
+
formData.append('max_clauses', '15');
|
| 1368 |
+
formData.append('interpret_clauses', 'true');
|
| 1369 |
+
formData.append('generate_negotiation_points', 'true');
|
| 1370 |
+
formData.append('compare_to_market', 'true');
|
| 1371 |
+
|
| 1372 |
+
const response = await fetch(`${API_BASE_URL}/analyze/text`, {
|
| 1373 |
+
method: 'POST',
|
| 1374 |
+
body: formData
|
| 1375 |
+
});
|
| 1376 |
+
|
| 1377 |
+
if (!response.ok) {
|
| 1378 |
+
let errorDetail = 'Analysis failed';
|
| 1379 |
+
try {
|
| 1380 |
+
const errorData = await response.json();
|
| 1381 |
+
errorDetail = errorData.detail || errorData.error || errorDetail;
|
| 1382 |
+
} catch (e) {
|
| 1383 |
+
errorDetail = `Server error: ${response.status} ${response.statusText}`;
|
| 1384 |
+
}
|
| 1385 |
+
throw new Error(errorDetail);
|
| 1386 |
}
|
| 1387 |
+
|
| 1388 |
+
const result = await response.json();
|
| 1389 |
+
currentAnalysisResult = result;
|
| 1390 |
+
displayResults(result);
|
| 1391 |
+
showScreen('results');
|
| 1392 |
+
|
| 1393 |
} catch (error) {
|
| 1394 |
+
console.error('Error:', error);
|
| 1395 |
+
alert('Error analyzing contract: ' + error.message);
|
| 1396 |
+
showScreen('analyzer');
|
| 1397 |
}
|
| 1398 |
}
|
| 1399 |
|
| 1400 |
+
// Download PDF
|
| 1401 |
+
document.getElementById('downloadBtn').addEventListener('click', async () => {
|
| 1402 |
+
if (!currentAnalysisResult) {
|
| 1403 |
+
alert('No analysis results available to download');
|
| 1404 |
+
return;
|
| 1405 |
+
}
|
| 1406 |
+
|
| 1407 |
+
try {
|
| 1408 |
+
const response = await fetch(`${API_BASE_URL}/generate-pdf`, {
|
| 1409 |
+
method: 'POST',
|
| 1410 |
+
headers: {
|
| 1411 |
+
'Content-Type': 'application/json',
|
| 1412 |
+
},
|
| 1413 |
+
body: JSON.stringify(currentAnalysisResult)
|
| 1414 |
+
});
|
| 1415 |
+
|
| 1416 |
+
if (!response.ok) {
|
| 1417 |
+
throw new Error('Failed to generate PDF');
|
| 1418 |
+
}
|
| 1419 |
+
|
| 1420 |
+
const blob = await response.blob();
|
| 1421 |
+
const url = window.URL.createObjectURL(blob);
|
| 1422 |
+
const a = document.createElement('a');
|
| 1423 |
+
a.style.display = 'none';
|
| 1424 |
+
a.href = url;
|
| 1425 |
+
a.download = `contract_analysis_${currentAnalysisResult.analysis_id}.pdf`;
|
| 1426 |
+
document.body.appendChild(a);
|
| 1427 |
+
a.click();
|
| 1428 |
+
window.URL.revokeObjectURL(url);
|
| 1429 |
+
document.body.removeChild(a);
|
| 1430 |
+
|
| 1431 |
+
} catch (error) {
|
| 1432 |
+
console.error('PDF download error:', error);
|
| 1433 |
+
alert('Error downloading PDF: ' + error.message);
|
| 1434 |
+
}
|
| 1435 |
+
});
|
| 1436 |
+
|
| 1437 |
function displayResults(result) {
|
| 1438 |
const score = result.risk_analysis.overall_score;
|
| 1439 |
const riskLevel = result.risk_analysis.risk_level;
|
| 1440 |
|
| 1441 |
+
// Update risk score
|
| 1442 |
document.getElementById('riskScoreValue').textContent = score;
|
| 1443 |
document.getElementById('riskLevel').textContent = riskLevel.toUpperCase();
|
| 1444 |
document.getElementById('riskLevel').className = 'risk-level risk-' + getRiskClass(score);
|
| 1445 |
|
| 1446 |
+
// Update risk circle
|
| 1447 |
const circumference = 534;
|
| 1448 |
const offset = circumference - (score / 100) * circumference;
|
| 1449 |
const circle = document.getElementById('riskCircle');
|
| 1450 |
circle.style.strokeDashoffset = offset;
|
| 1451 |
+
|
| 1452 |
+
// Get risk color and apply to both circle and text
|
| 1453 |
+
const riskColor = getRiskColor(score);
|
| 1454 |
+
circle.style.stroke = riskColor;
|
| 1455 |
+
|
| 1456 |
+
// Update text color in the middle of the circle
|
| 1457 |
+
const riskScoreValue = document.getElementById('riskScoreValue');
|
| 1458 |
+
riskScoreValue.style.color = riskColor;
|
| 1459 |
+
|
| 1460 |
+
// Update executive summary
|
| 1461 |
document.getElementById('executiveSummary').textContent = result.executive_summary;
|
| 1462 |
|
| 1463 |
+
// Update unfavorable terms
|
| 1464 |
const unfavorableList = document.getElementById('unfavorableTermsList');
|
| 1465 |
unfavorableList.innerHTML = '';
|
| 1466 |
if (result.unfavorable_terms && result.unfavorable_terms.length > 0) {
|
|
|
|
| 1473 |
unfavorableList.innerHTML = '<li>No unfavorable terms detected</li>';
|
| 1474 |
}
|
| 1475 |
|
| 1476 |
+
// Update missing protections
|
| 1477 |
+
const missingList = document.getElementById('missingProtectionsList');
|
| 1478 |
+
missingList.innerHTML = '';
|
| 1479 |
+
if (result.missing_protections && result.missing_protections.length > 0) {
|
| 1480 |
+
result.missing_protections.slice(0, 8).forEach(protection => {
|
| 1481 |
+
const li = document.createElement('li');
|
| 1482 |
+
li.innerHTML = `<span class="item-icon">›</span><span class="item-text"><strong>${protection.protection}:</strong> ${protection.explanation}</span>`;
|
| 1483 |
+
missingList.appendChild(li);
|
| 1484 |
+
});
|
| 1485 |
+
} else {
|
| 1486 |
+
missingList.innerHTML = '<li>No missing protections detected</li>';
|
| 1487 |
+
}
|
| 1488 |
+
|
| 1489 |
+
// Update negotiation points
|
| 1490 |
+
const negotiationList = document.getElementById('negotiationPointsList');
|
| 1491 |
+
negotiationList.innerHTML = '';
|
| 1492 |
+
if (result.negotiation_points && result.negotiation_points.length > 0) {
|
| 1493 |
+
result.negotiation_points.slice(0, 8).forEach(point => {
|
| 1494 |
+
const li = document.createElement('li');
|
| 1495 |
+
li.innerHTML = `<span class="item-icon">›</span><span class="item-text"><strong>${point.point}:</strong> ${point.explanation}</span>`;
|
| 1496 |
+
negotiationList.appendChild(li);
|
| 1497 |
+
});
|
| 1498 |
+
} else {
|
| 1499 |
+
negotiationList.innerHTML = '<li>No negotiation points generated</li>';
|
| 1500 |
+
}
|
| 1501 |
+
|
| 1502 |
+
// Update category breakdown
|
| 1503 |
+
const categoryBreakdown = document.getElementById('categoryBreakdown');
|
| 1504 |
+
categoryBreakdown.innerHTML = '';
|
| 1505 |
+
|
| 1506 |
+
if (result.risk_analysis.category_scores) {
|
| 1507 |
+
Object.entries(result.risk_analysis.category_scores).forEach(([category, score]) => {
|
| 1508 |
+
const categoryItem = document.createElement('div');
|
| 1509 |
+
categoryItem.className = 'category-item';
|
| 1510 |
+
|
| 1511 |
+
const riskClass = getRiskClass(score);
|
| 1512 |
+
const riskColor = getRiskColor(score);
|
| 1513 |
+
|
| 1514 |
+
categoryItem.innerHTML = `
|
| 1515 |
+
<div class="category-header">
|
| 1516 |
+
<span class="category-name">${formatCategoryName(category)}</span>
|
| 1517 |
+
<span class="category-score score-${riskClass}">${score}/100</span>
|
| 1518 |
+
</div>
|
| 1519 |
+
<div class="progress-bar">
|
| 1520 |
+
<div class="progress-fill progress-${riskClass}" style="width: ${score}%"></div>
|
| 1521 |
+
</div>
|
| 1522 |
+
<div class="category-description">
|
| 1523 |
+
${getCategoryDescription(category, score)}
|
| 1524 |
+
</div>
|
| 1525 |
+
`;
|
| 1526 |
+
|
| 1527 |
+
categoryBreakdown.appendChild(categoryItem);
|
| 1528 |
+
});
|
| 1529 |
+
}
|
| 1530 |
+
|
| 1531 |
+
// Update clause analysis
|
| 1532 |
+
const clauseAnalysis = document.getElementById('clauseAnalysis');
|
| 1533 |
+
clauseAnalysis.innerHTML = '';
|
| 1534 |
+
|
| 1535 |
+
if (result.clauses && result.clauses.length > 0) {
|
| 1536 |
+
result.clauses.slice(0, 10).forEach(clause => {
|
| 1537 |
+
const clauseItem = document.createElement('div');
|
| 1538 |
+
clauseItem.className = `clause-item ${getRiskClass(clause.confidence * 100)}`;
|
| 1539 |
+
|
| 1540 |
+
clauseItem.innerHTML = `
|
| 1541 |
+
<div class="clause-header">
|
| 1542 |
+
<div>
|
| 1543 |
+
<div class="clause-label">${clause.reference} • ${clause.category}</div>
|
| 1544 |
+
<div class="clause-text">${clause.text.substring(0, 200)}${clause.text.length > 200 ? '...' : ''}</div>
|
| 1545 |
+
</div>
|
| 1546 |
+
<div class="severity-badge badge-${getRiskClass(clause.confidence * 100)}">
|
| 1547 |
+
${Math.round(clause.confidence * 100)}% confidence
|
| 1548 |
+
</div>
|
| 1549 |
+
</div>
|
| 1550 |
+
${clause.risk_indicators && clause.risk_indicators.length > 0 ? `
|
| 1551 |
+
<div class="clause-section">
|
| 1552 |
+
<div class="clause-section-title">Risk Indicators</div>
|
| 1553 |
+
<div class="clause-section-text">${clause.risk_indicators.join(', ')}</div>
|
| 1554 |
+
</div>
|
| 1555 |
+
` : ''}
|
| 1556 |
+
`;
|
| 1557 |
+
|
| 1558 |
+
clauseAnalysis.appendChild(clauseItem);
|
| 1559 |
+
});
|
| 1560 |
+
}
|
| 1561 |
}
|
| 1562 |
|
| 1563 |
function getRiskClass(score) {
|
|
|
|
| 1574 |
return '#16a34a';
|
| 1575 |
}
|
| 1576 |
|
| 1577 |
+
function formatCategoryName(category) {
|
| 1578 |
+
return category.split('_').map(word =>
|
| 1579 |
+
word.charAt(0).toUpperCase() + word.slice(1)
|
| 1580 |
+
).join(' ');
|
| 1581 |
+
}
|
| 1582 |
+
|
| 1583 |
+
function getCategoryDescription(category, score) {
|
| 1584 |
+
const descriptions = {
|
| 1585 |
+
'termination': score > 60 ? 'High termination risk detected' : 'Termination terms appear reasonable',
|
| 1586 |
+
'compensation': score > 60 ? 'Compensation structure needs review' : 'Compensation terms are clear',
|
| 1587 |
+
'confidentiality': score > 60 ? 'Confidentiality terms may be overly broad' : 'Confidentiality terms are balanced',
|
| 1588 |
+
'liability': score > 60 ? 'Liability allocation needs attention' : 'Liability terms are reasonable',
|
| 1589 |
+
'intellectual_property': score > 60 ? 'IP rights allocation requires review' : 'IP terms are well-defined'
|
| 1590 |
+
};
|
| 1591 |
+
|
| 1592 |
+
return descriptions[category] || 'Review recommended based on risk score';
|
| 1593 |
+
}
|
| 1594 |
+
|
| 1595 |
// Initialize
|
| 1596 |
showScreen('landing');
|
| 1597 |
</script>
|
utils/logger.py
CHANGED
|
@@ -114,7 +114,7 @@ class ContractAnalyzerLogger:
|
|
| 114 |
|
| 115 |
|
| 116 |
@classmethod
|
| 117 |
-
def log_structured(cls, level: int, message: str,
|
| 118 |
"""
|
| 119 |
Log structured data as JSON
|
| 120 |
|
|
@@ -124,15 +124,12 @@ class ContractAnalyzerLogger:
|
|
| 124 |
|
| 125 |
message { str } : Log message
|
| 126 |
|
| 127 |
-
request_id { str } : Optional request ID for tracking
|
| 128 |
-
|
| 129 |
**kwargs : Additional structured data
|
| 130 |
"""
|
| 131 |
logger = cls.get_logger()
|
| 132 |
|
| 133 |
log_data = {"timestamp" : datetime.now().isoformat(),
|
| 134 |
"message" : message,
|
| 135 |
-
"request_id" : request_id,
|
| 136 |
**kwargs
|
| 137 |
}
|
| 138 |
|
|
@@ -140,7 +137,7 @@ class ContractAnalyzerLogger:
|
|
| 140 |
|
| 141 |
|
| 142 |
@classmethod
|
| 143 |
-
def log_error(cls, error: Exception, context: Dict[str, Any] = None
|
| 144 |
"""
|
| 145 |
Log error with full traceback and context
|
| 146 |
|
|
@@ -149,8 +146,6 @@ class ContractAnalyzerLogger:
|
|
| 149 |
error { Exception } : Exception object
|
| 150 |
|
| 151 |
context { dict } : Additional context dictionary
|
| 152 |
-
|
| 153 |
-
request_id { str } : Request ID for tracking
|
| 154 |
"""
|
| 155 |
error_logger = cls._loggers.get("contract_analyzer.error")
|
| 156 |
|
|
@@ -158,7 +153,6 @@ class ContractAnalyzerLogger:
|
|
| 158 |
error_logger = cls.get_logger()
|
| 159 |
|
| 160 |
error_data = {"timestamp" : datetime.now().isoformat(),
|
| 161 |
-
"request_id" : request_id,
|
| 162 |
"error_type" : type(error).__name__,
|
| 163 |
"error_message" : str(error),
|
| 164 |
"traceback" : traceback.format_exc(),
|
|
@@ -169,7 +163,7 @@ class ContractAnalyzerLogger:
|
|
| 169 |
|
| 170 |
|
| 171 |
@classmethod
|
| 172 |
-
def log_performance(cls, operation: str, duration: float,
|
| 173 |
"""
|
| 174 |
Log performance metrics
|
| 175 |
|
|
@@ -179,8 +173,6 @@ class ContractAnalyzerLogger:
|
|
| 179 |
|
| 180 |
duration { float } : Duration in seconds
|
| 181 |
|
| 182 |
-
request_id { str } : Request ID
|
| 183 |
-
|
| 184 |
**metrics : Additional metrics
|
| 185 |
"""
|
| 186 |
perf_logger = cls._loggers.get("contract_analyzer.performance")
|
|
@@ -188,7 +180,6 @@ class ContractAnalyzerLogger:
|
|
| 188 |
perf_logger = cls.get_logger()
|
| 189 |
|
| 190 |
perf_data = {"timestamp" : datetime.now().isoformat(),
|
| 191 |
-
"request_id" : request_id,
|
| 192 |
"operation" : operation,
|
| 193 |
"duration_seconds" : round(duration, 3),
|
| 194 |
**metrics
|
|
|
|
| 114 |
|
| 115 |
|
| 116 |
@classmethod
|
| 117 |
+
def log_structured(cls, level: int, message: str, **kwargs):
|
| 118 |
"""
|
| 119 |
Log structured data as JSON
|
| 120 |
|
|
|
|
| 124 |
|
| 125 |
message { str } : Log message
|
| 126 |
|
|
|
|
|
|
|
| 127 |
**kwargs : Additional structured data
|
| 128 |
"""
|
| 129 |
logger = cls.get_logger()
|
| 130 |
|
| 131 |
log_data = {"timestamp" : datetime.now().isoformat(),
|
| 132 |
"message" : message,
|
|
|
|
| 133 |
**kwargs
|
| 134 |
}
|
| 135 |
|
|
|
|
| 137 |
|
| 138 |
|
| 139 |
@classmethod
|
| 140 |
+
def log_error(cls, error: Exception, context: Dict[str, Any] = None):
|
| 141 |
"""
|
| 142 |
Log error with full traceback and context
|
| 143 |
|
|
|
|
| 146 |
error { Exception } : Exception object
|
| 147 |
|
| 148 |
context { dict } : Additional context dictionary
|
|
|
|
|
|
|
| 149 |
"""
|
| 150 |
error_logger = cls._loggers.get("contract_analyzer.error")
|
| 151 |
|
|
|
|
| 153 |
error_logger = cls.get_logger()
|
| 154 |
|
| 155 |
error_data = {"timestamp" : datetime.now().isoformat(),
|
|
|
|
| 156 |
"error_type" : type(error).__name__,
|
| 157 |
"error_message" : str(error),
|
| 158 |
"traceback" : traceback.format_exc(),
|
|
|
|
| 163 |
|
| 164 |
|
| 165 |
@classmethod
|
| 166 |
+
def log_performance(cls, operation: str, duration: float, **metrics):
|
| 167 |
"""
|
| 168 |
Log performance metrics
|
| 169 |
|
|
|
|
| 173 |
|
| 174 |
duration { float } : Duration in seconds
|
| 175 |
|
|
|
|
|
|
|
| 176 |
**metrics : Additional metrics
|
| 177 |
"""
|
| 178 |
perf_logger = cls._loggers.get("contract_analyzer.performance")
|
|
|
|
| 180 |
perf_logger = cls.get_logger()
|
| 181 |
|
| 182 |
perf_data = {"timestamp" : datetime.now().isoformat(),
|
|
|
|
| 183 |
"operation" : operation,
|
| 184 |
"duration_seconds" : round(duration, 3),
|
| 185 |
**metrics
|