satyaki-mitra commited on
Commit
de8f1bc
·
1 Parent(s): d15efc9

code refactor

Browse files
app.py CHANGED
@@ -3,19 +3,24 @@ FastAPI Application for AI Contract Risk Analyzer
3
  Complete pre-loading approach: All models loaded at startup
4
  Direct synchronous flow: Upload → Analyze → Return Results + PDF
5
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  from fastapi.responses import JSONResponse, FileResponse, Response
7
- from fastapi import FastAPI, File, UploadFile, HTTPException, Form
8
  from fastapi.middleware.cors import CORSMiddleware
9
  from fastapi.staticfiles import StaticFiles
10
  from pydantic import BaseModel, Field
11
- from typing import List, Optional, Dict, Any
12
- import uuid
13
- import os
14
- from datetime import datetime
15
- from pathlib import Path
16
  import sys
17
- import tempfile
18
- import io
19
 
20
  # Add parent directory to path
21
  sys.path.append(str(Path(__file__).parent))
@@ -37,19 +42,140 @@ from services.protection_checker import ProtectionChecker
37
  from services.llm_interpreter import LLMClauseInterpreter
38
  from services.negotiation_engine import NegotiationEngine
39
  from services.market_comparator import MarketComparator
 
40
 
41
  # Import PDF generator
42
  from reporter.pdf_generator import generate_pdf_report
43
 
44
- # Initialize logger
45
- ContractAnalyzerLogger.setup(log_dir="logs", app_name="contract_analyzer")
46
- logger = ContractAnalyzerLogger.get_logger()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  # ============================================================================
49
  # PYDANTIC SCHEMAS
50
  # ============================================================================
51
 
52
- class HealthResponse(BaseModel):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  """Health check response"""
54
  status: str
55
  version: str
@@ -58,14 +184,16 @@ class HealthResponse(BaseModel):
58
  services_loaded: int
59
  memory_usage_mb: float
60
 
61
- class AnalysisOptions(BaseModel):
 
62
  """Analysis options"""
63
  max_clauses: int = Field(default=15, ge=5, le=30)
64
  interpret_clauses: bool = Field(default=True)
65
  generate_negotiation_points: bool = Field(default=True)
66
  compare_to_market: bool = Field(default=True)
67
 
68
- class AnalysisResult(BaseModel):
 
69
  """Complete analysis result"""
70
  analysis_id: str
71
  timestamp: str
@@ -81,12 +209,22 @@ class AnalysisResult(BaseModel):
81
  metadata: Dict[str, Any]
82
  pdf_available: bool = True
83
 
84
- class ErrorResponse(BaseModel):
 
85
  """Error response"""
86
  error: str
87
  detail: str
88
  timestamp: str
89
 
 
 
 
 
 
 
 
 
 
90
  # ============================================================================
91
  # SERVICE INITIALIZATION WITH FULL PRE-LOADING
92
  # ============================================================================
@@ -234,7 +372,7 @@ class PreloadedAnalysisService:
234
 
235
  # Step 1: Classify contract
236
  classification = self.services["classifier"].classify_contract(contract_text)
237
- classification_dict = classification.to_dict()
238
  actual_category = classification.category
239
 
240
  log_info(f"Contract classified as: {actual_category}")
@@ -255,7 +393,7 @@ class PreloadedAnalysisService:
255
 
256
  # Extract clauses
257
  clauses = extractor.extract_clauses(contract_text, options.max_clauses)
258
- clauses_dict = [clause.to_dict() for clause in clauses]
259
  log_info(f"Extracted {len(clauses)} clauses")
260
 
261
  # Step 3: Map to ContractType and get appropriate risk analyzer
@@ -279,17 +417,17 @@ class PreloadedAnalysisService:
279
 
280
  # Analyze risk
281
  risk_score = risk_analyzer.analyze_risk(contract_text, clauses)
282
- risk_dict = risk_score.to_dict()
283
  log_info(f"Risk analysis completed: {risk_dict['overall_score']}/100")
284
 
285
  # Step 4: Find unfavorable terms
286
  unfavorable_terms = self.services["term_analyzer"].analyze_unfavorable_terms(contract_text, clauses)
287
- unfavorable_dict = [term.to_dict() for term in unfavorable_terms]
288
  log_info(f"Found {len(unfavorable_terms)} unfavorable terms")
289
 
290
  # Step 5: Check missing protections
291
  missing_protections = self.services["protection_checker"].check_missing_protections(contract_text, clauses)
292
- missing_dict = [prot.to_dict() for prot in missing_protections]
293
  log_info(f"Found {len(missing_protections)} missing protections")
294
 
295
  # Optional steps
@@ -302,7 +440,7 @@ class PreloadedAnalysisService:
302
  interpretations = self.services["interpreter"].interpret_clauses(
303
  clauses, min(10, options.max_clauses)
304
  )
305
- interpretations_dict = [interp.to_dict() for interp in interpretations]
306
  log_info(f"Interpreted {len(interpretations)} clauses")
307
  except Exception as e:
308
  log_error(f"Clause interpretation failed: {e}")
@@ -313,7 +451,7 @@ class PreloadedAnalysisService:
313
  negotiation_points = self.services["negotiation_engine"].generate_negotiation_points(
314
  risk_score, unfavorable_terms, missing_protections, clauses, 7
315
  )
316
- negotiation_dict = [point.to_dict() for point in negotiation_points]
317
  log_info(f"Generated {len(negotiation_points)} negotiation points")
318
  except Exception as e:
319
  log_error(f"Negotiation points generation failed: {e}")
@@ -322,7 +460,7 @@ class PreloadedAnalysisService:
322
  if options.compare_to_market:
323
  try:
324
  market_comparisons = self.services["market_comparator"].compare_to_market(clauses)
325
- market_dict = [comp.to_dict() for comp in market_comparisons]
326
  log_info(f"Compared {len(market_comparisons)} clauses to market")
327
  except Exception as e:
328
  log_error(f"Market comparison failed: {e}")
@@ -330,7 +468,7 @@ class PreloadedAnalysisService:
330
 
331
  # Generate executive summary
332
  executive_summary = self._generate_executive_summary(
333
- classification_dict, risk_dict, unfavorable_dict, missing_dict
334
  )
335
 
336
  # Build result
@@ -365,53 +503,87 @@ class PreloadedAnalysisService:
365
  raise
366
 
367
  def _generate_executive_summary(self, classification: Dict, risk_score: Dict,
368
- unfavorable_terms: List, missing_protections: List) -> str:
369
- """Generate executive summary"""
370
- category = classification.get("category", "Unknown")
371
- score = risk_score.get("overall_score", 0)
372
- risk_level = risk_score.get("risk_level", "UNKNOWN")
373
-
374
- critical_terms = sum(1 for t in unfavorable_terms if t.get('severity') == 'critical')
375
- critical_protections = sum(1 for p in missing_protections if p.get('importance') == 'critical')
376
-
377
- if score >= 80:
378
- risk_msg = "CRITICAL ATTENTION REQUIRED"
379
- elif score >= 60:
380
- risk_msg = "SIGNIFICANT CONCERNS"
381
- elif score >= 40:
382
- risk_msg = "MODERATE RISK"
383
- else:
384
- risk_msg = "LOW RISK"
385
-
386
- return f"This {category} contract scored {score}/100 ({risk_level.upper()} risk). {risk_msg}. Found {len(unfavorable_terms)} unfavorable terms ({critical_terms} critical) and {len(missing_protections)} missing protections ({critical_protections} critical). Review detailed analysis below."
387
 
388
  # ============================================================================
389
- # FASTAPI APP
390
  # ============================================================================
391
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
  app = FastAPI(
393
  title=settings.APP_NAME,
394
  version=settings.APP_VERSION,
395
  description="AI-powered contract risk analysis with complete model pre-loading",
396
  docs_url="/api/docs",
397
- redoc_url="/api/redoc"
 
 
398
  )
399
 
 
 
 
 
400
  # Serve static files
401
- app.mount("/static", StaticFiles(directory="static"), name="static")
402
 
403
- # CORS middleware
404
  app.add_middleware(
405
  CORSMiddleware,
406
- allow_origins=settings.CORS_ORIGINS,
407
- allow_credentials=settings.CORS_ALLOW_CREDENTIALS,
408
- allow_methods=settings.CORS_ALLOW_METHODS,
409
- allow_headers=settings.CORS_ALLOW_HEADERS
410
  )
411
 
412
- # Initialize pre-loaded analysis service
413
- analysis_service = PreloadedAnalysisService()
414
-
415
  # ============================================================================
416
  # HELPER FUNCTIONS
417
  # ============================================================================
@@ -468,11 +640,14 @@ def validate_contract_text(text: str) -> tuple[bool, str]:
468
  @app.get("/")
469
  async def serve_frontend():
470
  """Serve the frontend"""
471
- return FileResponse("static/index.html")
472
 
473
  @app.get("/api/v1/health", response_model=HealthResponse)
474
  async def health_check():
475
  """Health check endpoint with service status"""
 
 
 
476
  service_status = analysis_service.get_service_status()
477
 
478
  return HealthResponse(
@@ -487,6 +662,8 @@ async def health_check():
487
  @app.get("/api/v1/status")
488
  async def get_detailed_status():
489
  """Get detailed service status"""
 
 
490
  return analysis_service.get_service_status()
491
 
492
  @app.post("/api/v1/analyze/file", response_model=AnalysisResult)
@@ -498,6 +675,9 @@ async def analyze_contract_file(
498
  compare_to_market: bool = Form(True)
499
  ):
500
  """Analyze uploaded contract file - DIRECT SYNC FLOW"""
 
 
 
501
  try:
502
  # Validate file
503
  is_valid, message = validate_file(file)
@@ -552,6 +732,9 @@ async def analyze_contract_text(
552
  compare_to_market: bool = Form(True)
553
  ):
554
  """Analyze pasted contract text - DIRECT SYNC FLOW"""
 
 
 
555
  try:
556
  # Validate contract text
557
  is_valid, message = validate_contract_text(contract_text)
@@ -609,6 +792,9 @@ async def generate_pdf_from_analysis(analysis_result: Dict[str, Any]):
609
  @app.get("/api/v1/categories")
610
  async def get_contract_categories():
611
  """Get list of supported contract categories"""
 
 
 
612
  try:
613
  categories = analysis_service.services["classifier"].get_all_categories()
614
  return {"categories": categories}
@@ -616,55 +802,55 @@ async def get_contract_categories():
616
  log_error(f"Categories fetch failed: {e}")
617
  raise HTTPException(status_code=500, detail=f"Failed to get categories: {str(e)}")
618
 
619
- @app.post("/api/v1/validate/file")
620
  async def validate_contract_file(file: UploadFile = File(...)):
621
  """Quick validation endpoint"""
622
  try:
623
  is_valid, message = validate_file(file)
624
  if not is_valid:
625
- return {"valid": False, "message": message}
626
 
627
  contract_text = read_contract_file(file)
628
 
629
  # Validate text length
630
  is_valid_text, text_message = validate_contract_text(contract_text)
631
  if not is_valid_text:
632
- return {"valid": False, "message": text_message}
633
 
634
  # Validate contract structure using ContractValidator
635
  validator = ContractValidator()
636
  report = validator.get_validation_report(contract_text)
637
 
638
- return {
639
- "valid": report["scores"]["total"] > 50 and is_valid_text,
640
- "message": "Contract appears valid" if report["scores"]["total"] > 50 else "May not be a valid contract",
641
- "confidence": report["scores"]["total"],
642
- "report": report
643
- }
644
 
645
  except Exception as e:
646
  log_error(f"File validation failed: {e}")
647
  raise HTTPException(status_code=400, detail=f"Validation failed: {str(e)}")
648
 
649
- @app.post("/api/v1/validate/text")
650
  async def validate_contract_text_endpoint(contract_text: str = Form(...)):
651
  """Validate pasted contract text"""
652
  try:
653
  # Validate text length
654
  is_valid, message = validate_contract_text(contract_text)
655
  if not is_valid:
656
- return {"valid": False, "message": message}
657
 
658
  # Validate contract structure using ContractValidator
659
  validator = ContractValidator()
660
  report = validator.get_validation_report(contract_text)
661
 
662
- return {
663
- "valid": report["scores"]["total"] > 50 and is_valid,
664
- "message": "Contract appears valid" if report["scores"]["total"] > 50 else "May not be a valid contract",
665
- "confidence": report["scores"]["total"],
666
- "report": report
667
- }
668
 
669
  except Exception as e:
670
  log_error(f"Text validation failed: {e}")
@@ -677,7 +863,7 @@ async def validate_contract_text_endpoint(contract_text: str = Form(...)):
677
  @app.exception_handler(HTTPException)
678
  async def http_exception_handler(request, exc):
679
  """Handle HTTP exceptions"""
680
- return JSONResponse(
681
  status_code=exc.status_code,
682
  content=ErrorResponse(
683
  error=exc.detail,
@@ -690,7 +876,7 @@ async def http_exception_handler(request, exc):
690
  async def general_exception_handler(request, exc):
691
  """Handle general exceptions"""
692
  log_error(f"Unhandled exception: {exc}")
693
- return JSONResponse(
694
  status_code=500,
695
  content=ErrorResponse(
696
  error="Internal server error",
@@ -700,34 +886,40 @@ async def general_exception_handler(request, exc):
700
  )
701
 
702
  # ============================================================================
703
- # STARTUP & SHUTDOWN
704
  # ============================================================================
705
 
706
- @app.on_event("startup")
707
- async def startup_event():
708
- """Startup event - Services are already pre-loaded"""
709
- log_info(f"🚀 {settings.APP_NAME} v{settings.APP_VERSION} STARTED")
710
- log_info(f"📍 Server: {settings.HOST}:{settings.PORT}")
711
- log_info(f"🔧 All models and services pre-loaded")
712
-
713
- @app.on_event("shutdown")
714
- async def shutdown_event():
715
- """Shutdown event"""
716
- log_info("🛑 Shutting down server...")
717
- log_info("✅ Server shutdown complete")
718
 
719
  # ============================================================================
720
  # MAIN
721
  # ============================================================================
722
-
723
  if __name__ == "__main__":
724
- import uvicorn
725
-
726
- uvicorn.run(
727
- "app:app",
728
- host=settings.HOST,
729
- port=settings.PORT,
730
- reload=settings.RELOAD,
731
- workers=1, # Single worker for synchronous flow
732
- log_level=settings.LOG_LEVEL.lower()
733
- )
 
 
 
 
 
 
 
 
 
 
 
3
  Complete pre-loading approach: All models loaded at startup
4
  Direct synchronous flow: Upload → Analyze → Return Results + PDF
5
  """
6
+ import signal
7
+ import os
8
+ import time
9
+ import json
10
+ import uuid
11
+ from typing import Any, List, Dict, Optional
12
+ from pathlib import Path
13
+ from datetime import datetime
14
+ from contextlib import asynccontextmanager
15
+
16
+ import uvicorn
17
+ import numpy as np
18
+ from fastapi import FastAPI, File, UploadFile, HTTPException, Form, Request
19
  from fastapi.responses import JSONResponse, FileResponse, Response
 
20
  from fastapi.middleware.cors import CORSMiddleware
21
  from fastapi.staticfiles import StaticFiles
22
  from pydantic import BaseModel, Field
 
 
 
 
 
23
  import sys
 
 
24
 
25
  # Add parent directory to path
26
  sys.path.append(str(Path(__file__).parent))
 
42
  from services.llm_interpreter import LLMClauseInterpreter
43
  from services.negotiation_engine import NegotiationEngine
44
  from services.market_comparator import MarketComparator
45
+ from services.summary_generator import SummaryGenerator
46
 
47
  # Import PDF generator
48
  from reporter.pdf_generator import generate_pdf_report
49
 
50
+ # ============================================================================
51
+ # CUSTOM SERIALIZATION
52
+ # ============================================================================
53
+
54
+ class NumpyJSONEncoder(json.JSONEncoder):
55
+ """
56
+ Custom JSON encoder that handles NumPy types and custom objects
57
+ """
58
+ def default(self, obj: Any) -> Any:
59
+ """
60
+ Convert non-serializable objects to JSON-serializable types
61
+ """
62
+ # NumPy types
63
+ if isinstance(obj, (np.float32, np.float64)):
64
+ return float(obj)
65
+ elif isinstance(obj, (np.int32, np.int64, np.int8, np.uint8)):
66
+ return int(obj)
67
+ elif isinstance(obj, np.ndarray):
68
+ return obj.tolist()
69
+ elif isinstance(obj, np.bool_):
70
+ return bool(obj)
71
+ elif hasattr(obj, 'item'):
72
+ # numpy scalar types
73
+ return obj.item()
74
+
75
+ # Custom objects with to_dict method
76
+ elif hasattr(obj, 'to_dict'):
77
+ return obj.to_dict()
78
+
79
+ # Pydantic models
80
+ elif hasattr(obj, 'dict'):
81
+ return obj.dict()
82
+
83
+ # Handle other types
84
+ elif isinstance(obj, (set, tuple)):
85
+ return list(obj)
86
+
87
+ return super().default(obj)
88
+
89
+
90
+ class NumpyJSONResponse(JSONResponse):
91
+ """
92
+ Custom JSON response that handles NumPy types
93
+ """
94
+ def render(self, content: Any) -> bytes:
95
+ """
96
+ Render content with NumPy type handling
97
+ """
98
+ return json.dumps(
99
+ content,
100
+ ensure_ascii=False,
101
+ allow_nan=False,
102
+ indent=None,
103
+ separators=(",", ":"),
104
+ cls=NumpyJSONEncoder,
105
+ ).encode("utf-8")
106
+
107
+
108
+ def convert_numpy_types(obj: Any) -> Any:
109
+ """
110
+ Recursively convert numpy types to Python native types
111
+ """
112
+ if obj is None:
113
+ return None
114
+
115
+ # Handle dictionaries
116
+ if isinstance(obj, dict):
117
+ return {key: convert_numpy_types(value) for key, value in obj.items()}
118
+
119
+ # Handle lists, tuples, sets
120
+ elif isinstance(obj, (list, tuple, set)):
121
+ return [convert_numpy_types(item) for item in obj]
122
+
123
+ # Handle NumPy types
124
+ elif isinstance(obj, (np.float32, np.float64)):
125
+ return float(obj)
126
+ elif isinstance(obj, (np.int32, np.int64, np.int8, np.uint8)):
127
+ return int(obj)
128
+ elif isinstance(obj, np.ndarray):
129
+ return obj.tolist()
130
+ elif isinstance(obj, np.bool_):
131
+ return bool(obj)
132
+ elif hasattr(obj, 'item'):
133
+ return obj.item()
134
+
135
+ # Handle custom objects with to_dict method
136
+ elif hasattr(obj, 'to_dict'):
137
+ return convert_numpy_types(obj.to_dict())
138
+
139
+ # Handle Pydantic models
140
+ elif hasattr(obj, 'dict'):
141
+ return convert_numpy_types(obj.dict())
142
+
143
+ # Return as-is for other types
144
+ else:
145
+ return obj
146
+
147
+
148
+ def safe_serialize_response(data: Any) -> Any:
149
+ """
150
+ Safely serialize response data ensuring all types are JSON-compatible
151
+ """
152
+ return convert_numpy_types(data)
153
+
154
 
155
  # ============================================================================
156
  # PYDANTIC SCHEMAS
157
  # ============================================================================
158
 
159
+ class SerializableBaseModel(BaseModel):
160
+ """
161
+ Base model with enhanced serialization for NumPy types
162
+ """
163
+ def dict(self, *args, **kwargs) -> Dict[str, Any]:
164
+ """
165
+ Override dict method to handle NumPy types
166
+ """
167
+ data = super().dict(*args, **kwargs)
168
+ return convert_numpy_types(data)
169
+
170
+ def json(self, *args, **kwargs) -> str:
171
+ """
172
+ Override json method to handle NumPy types
173
+ """
174
+ data = self.dict(*args, **kwargs)
175
+ return json.dumps(data, cls=NumpyJSONEncoder, *args, **kwargs)
176
+
177
+
178
+ class HealthResponse(SerializableBaseModel):
179
  """Health check response"""
180
  status: str
181
  version: str
 
184
  services_loaded: int
185
  memory_usage_mb: float
186
 
187
+
188
+ class AnalysisOptions(SerializableBaseModel):
189
  """Analysis options"""
190
  max_clauses: int = Field(default=15, ge=5, le=30)
191
  interpret_clauses: bool = Field(default=True)
192
  generate_negotiation_points: bool = Field(default=True)
193
  compare_to_market: bool = Field(default=True)
194
 
195
+
196
+ class AnalysisResult(SerializableBaseModel):
197
  """Complete analysis result"""
198
  analysis_id: str
199
  timestamp: str
 
209
  metadata: Dict[str, Any]
210
  pdf_available: bool = True
211
 
212
+
213
+ class ErrorResponse(SerializableBaseModel):
214
  """Error response"""
215
  error: str
216
  detail: str
217
  timestamp: str
218
 
219
+
220
+ class FileValidationResponse(SerializableBaseModel):
221
+ """File validation response"""
222
+ valid: bool
223
+ message: str
224
+ confidence: Optional[float] = None
225
+ report: Optional[Dict[str, Any]] = None
226
+
227
+
228
  # ============================================================================
229
  # SERVICE INITIALIZATION WITH FULL PRE-LOADING
230
  # ============================================================================
 
372
 
373
  # Step 1: Classify contract
374
  classification = self.services["classifier"].classify_contract(contract_text)
375
+ classification_dict = safe_serialize_response(classification.to_dict())
376
  actual_category = classification.category
377
 
378
  log_info(f"Contract classified as: {actual_category}")
 
393
 
394
  # Extract clauses
395
  clauses = extractor.extract_clauses(contract_text, options.max_clauses)
396
+ clauses_dict = [safe_serialize_response(clause.to_dict()) for clause in clauses]
397
  log_info(f"Extracted {len(clauses)} clauses")
398
 
399
  # Step 3: Map to ContractType and get appropriate risk analyzer
 
417
 
418
  # Analyze risk
419
  risk_score = risk_analyzer.analyze_risk(contract_text, clauses)
420
+ risk_dict = safe_serialize_response(risk_score.to_dict())
421
  log_info(f"Risk analysis completed: {risk_dict['overall_score']}/100")
422
 
423
  # Step 4: Find unfavorable terms
424
  unfavorable_terms = self.services["term_analyzer"].analyze_unfavorable_terms(contract_text, clauses)
425
+ unfavorable_dict = [safe_serialize_response(term.to_dict()) for term in unfavorable_terms]
426
  log_info(f"Found {len(unfavorable_terms)} unfavorable terms")
427
 
428
  # Step 5: Check missing protections
429
  missing_protections = self.services["protection_checker"].check_missing_protections(contract_text, clauses)
430
+ missing_dict = [safe_serialize_response(prot.to_dict()) for prot in missing_protections]
431
  log_info(f"Found {len(missing_protections)} missing protections")
432
 
433
  # Optional steps
 
440
  interpretations = self.services["interpreter"].interpret_clauses(
441
  clauses, min(10, options.max_clauses)
442
  )
443
+ interpretations_dict = [safe_serialize_response(interp.to_dict()) for interp in interpretations]
444
  log_info(f"Interpreted {len(interpretations)} clauses")
445
  except Exception as e:
446
  log_error(f"Clause interpretation failed: {e}")
 
451
  negotiation_points = self.services["negotiation_engine"].generate_negotiation_points(
452
  risk_score, unfavorable_terms, missing_protections, clauses, 7
453
  )
454
+ negotiation_dict = [safe_serialize_response(point.to_dict()) for point in negotiation_points]
455
  log_info(f"Generated {len(negotiation_points)} negotiation points")
456
  except Exception as e:
457
  log_error(f"Negotiation points generation failed: {e}")
 
460
  if options.compare_to_market:
461
  try:
462
  market_comparisons = self.services["market_comparator"].compare_to_market(clauses)
463
+ market_dict = [safe_serialize_response(comp.to_dict()) for comp in market_comparisons]
464
  log_info(f"Compared {len(market_comparisons)} clauses to market")
465
  except Exception as e:
466
  log_error(f"Market comparison failed: {e}")
 
468
 
469
  # Generate executive summary
470
  executive_summary = self._generate_executive_summary(
471
+ classification_dict, risk_dict, unfavorable_dict, missing_dict, clauses,
472
  )
473
 
474
  # Build result
 
503
  raise
504
 
505
  def _generate_executive_summary(self, classification: Dict, risk_score: Dict,
506
+ unfavorable_terms: List, missing_protections: List,
507
+ clauses: List[Dict]) -> str:
508
+ """Generate executive summary using LLM"""
509
+ summary_generator = SummaryGenerator()
510
+
511
+ return summary_generator.generate_executive_summary(
512
+ classification=classification,
513
+ risk_analysis=risk_score,
514
+ unfavorable_terms=unfavorable_terms,
515
+ missing_protections=missing_protections,
516
+ clauses=clauses
517
+ )
 
 
 
 
 
 
 
518
 
519
  # ============================================================================
520
+ # FASTAPI APPLICATION
521
  # ============================================================================
522
 
523
+ # Global instances
524
+ analysis_service: Optional[PreloadedAnalysisService] = None
525
+ app_start_time = time.time()
526
+
527
+ # Initialize logger
528
+ ContractAnalyzerLogger.setup(log_dir="logs", app_name="contract_analyzer")
529
+ logger = ContractAnalyzerLogger.get_logger()
530
+
531
+ @asynccontextmanager
532
+ async def lifespan(app: FastAPI):
533
+ """Lifespan events for startup and shutdown"""
534
+ global analysis_service
535
+
536
+ # Startup
537
+ log_info(f"🚀 {settings.APP_NAME} v{settings.APP_VERSION} STARTING UP...")
538
+ log_info("=" * 80)
539
+
540
+ try:
541
+ # Initialize analysis service
542
+ analysis_service = PreloadedAnalysisService()
543
+ log_info("✅ All services initialized successfully")
544
+
545
+ except Exception as e:
546
+ log_error(f"Startup failed: {e}")
547
+ raise
548
+
549
+ log_info(f"📍 Server: {settings.HOST}:{settings.PORT}")
550
+ log_info("=" * 80)
551
+ log_info("✅ AI Contract Risk Analyzer Ready!")
552
+
553
+ try:
554
+ yield
555
+ finally:
556
+ # Shutdown - This runs on normal shutdown and KeyboardInterrupt
557
+ log_info("🛑 Shutting down server...")
558
+ log_info("✅ Server shutdown complete")
559
+
560
+
561
  app = FastAPI(
562
  title=settings.APP_NAME,
563
  version=settings.APP_VERSION,
564
  description="AI-powered contract risk analysis with complete model pre-loading",
565
  docs_url="/api/docs",
566
+ redoc_url="/api/redoc",
567
+ default_response_class=NumpyJSONResponse,
568
+ lifespan=lifespan
569
  )
570
 
571
+ # Get absolute paths
572
+ BASE_DIR = Path(__file__).parent
573
+ STATIC_DIR = BASE_DIR / "static"
574
+
575
  # Serve static files
576
+ app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
577
 
578
+ # Enhanced CORS middleware
579
  app.add_middleware(
580
  CORSMiddleware,
581
+ allow_origins=["*"], # For development - restrict in production
582
+ allow_credentials=True,
583
+ allow_methods=["*"],
584
+ allow_headers=["*"],
585
  )
586
 
 
 
 
587
  # ============================================================================
588
  # HELPER FUNCTIONS
589
  # ============================================================================
 
640
  @app.get("/")
641
  async def serve_frontend():
642
  """Serve the frontend"""
643
+ return FileResponse(str(STATIC_DIR / "index.html"))
644
 
645
  @app.get("/api/v1/health", response_model=HealthResponse)
646
  async def health_check():
647
  """Health check endpoint with service status"""
648
+ if not analysis_service:
649
+ raise HTTPException(status_code=503, detail="Service not initialized")
650
+
651
  service_status = analysis_service.get_service_status()
652
 
653
  return HealthResponse(
 
662
  @app.get("/api/v1/status")
663
  async def get_detailed_status():
664
  """Get detailed service status"""
665
+ if not analysis_service:
666
+ raise HTTPException(status_code=503, detail="Service not initialized")
667
  return analysis_service.get_service_status()
668
 
669
  @app.post("/api/v1/analyze/file", response_model=AnalysisResult)
 
675
  compare_to_market: bool = Form(True)
676
  ):
677
  """Analyze uploaded contract file - DIRECT SYNC FLOW"""
678
+ if not analysis_service:
679
+ raise HTTPException(status_code=503, detail="Service not initialized")
680
+
681
  try:
682
  # Validate file
683
  is_valid, message = validate_file(file)
 
732
  compare_to_market: bool = Form(True)
733
  ):
734
  """Analyze pasted contract text - DIRECT SYNC FLOW"""
735
+ if not analysis_service:
736
+ raise HTTPException(status_code=503, detail="Service not initialized")
737
+
738
  try:
739
  # Validate contract text
740
  is_valid, message = validate_contract_text(contract_text)
 
792
  @app.get("/api/v1/categories")
793
  async def get_contract_categories():
794
  """Get list of supported contract categories"""
795
+ if not analysis_service:
796
+ raise HTTPException(status_code=503, detail="Service not initialized")
797
+
798
  try:
799
  categories = analysis_service.services["classifier"].get_all_categories()
800
  return {"categories": categories}
 
802
  log_error(f"Categories fetch failed: {e}")
803
  raise HTTPException(status_code=500, detail=f"Failed to get categories: {str(e)}")
804
 
805
+ @app.post("/api/v1/validate/file", response_model=FileValidationResponse)
806
  async def validate_contract_file(file: UploadFile = File(...)):
807
  """Quick validation endpoint"""
808
  try:
809
  is_valid, message = validate_file(file)
810
  if not is_valid:
811
+ return FileValidationResponse(valid=False, message=message)
812
 
813
  contract_text = read_contract_file(file)
814
 
815
  # Validate text length
816
  is_valid_text, text_message = validate_contract_text(contract_text)
817
  if not is_valid_text:
818
+ return FileValidationResponse(valid=False, message=text_message)
819
 
820
  # Validate contract structure using ContractValidator
821
  validator = ContractValidator()
822
  report = validator.get_validation_report(contract_text)
823
 
824
+ return FileValidationResponse(
825
+ valid=report["scores"]["total"] > 50 and is_valid_text,
826
+ message="Contract appears valid" if report["scores"]["total"] > 50 else "May not be a valid contract",
827
+ confidence=report["scores"]["total"],
828
+ report=report
829
+ )
830
 
831
  except Exception as e:
832
  log_error(f"File validation failed: {e}")
833
  raise HTTPException(status_code=400, detail=f"Validation failed: {str(e)}")
834
 
835
+ @app.post("/api/v1/validate/text", response_model=FileValidationResponse)
836
  async def validate_contract_text_endpoint(contract_text: str = Form(...)):
837
  """Validate pasted contract text"""
838
  try:
839
  # Validate text length
840
  is_valid, message = validate_contract_text(contract_text)
841
  if not is_valid:
842
+ return FileValidationResponse(valid=False, message=message)
843
 
844
  # Validate contract structure using ContractValidator
845
  validator = ContractValidator()
846
  report = validator.get_validation_report(contract_text)
847
 
848
+ return FileValidationResponse(
849
+ valid=report["scores"]["total"] > 50 and is_valid,
850
+ message="Contract appears valid" if report["scores"]["total"] > 50 else "May not be a valid contract",
851
+ confidence=report["scores"]["total"],
852
+ report=report
853
+ )
854
 
855
  except Exception as e:
856
  log_error(f"Text validation failed: {e}")
 
863
  @app.exception_handler(HTTPException)
864
  async def http_exception_handler(request, exc):
865
  """Handle HTTP exceptions"""
866
+ return NumpyJSONResponse(
867
  status_code=exc.status_code,
868
  content=ErrorResponse(
869
  error=exc.detail,
 
876
  async def general_exception_handler(request, exc):
877
  """Handle general exceptions"""
878
  log_error(f"Unhandled exception: {exc}")
879
+ return NumpyJSONResponse(
880
  status_code=500,
881
  content=ErrorResponse(
882
  error="Internal server error",
 
886
  )
887
 
888
  # ============================================================================
889
+ # REQUEST LOGGING MIDDLEWARE
890
  # ============================================================================
891
 
892
+ @app.middleware("http")
893
+ async def log_requests(request: Request, call_next):
894
+ start_time = time.time()
895
+ response = await call_next(request)
896
+ process_time = time.time() - start_time
897
+
898
+ log_info(f"API Request: {request.method} {request.url.path} - Status: {response.status_code} - Duration: {process_time:.3f}s")
899
+
900
+ return response
 
 
 
901
 
902
  # ============================================================================
903
  # MAIN
904
  # ============================================================================
 
905
  if __name__ == "__main__":
906
+ def signal_handler(sig, frame):
907
+ print("\n👋 Received Ctrl+C, shutting down gracefully...")
908
+ sys.exit(0)
909
+
910
+ signal.signal(signal.SIGINT, signal_handler)
911
+
912
+ try:
913
+ uvicorn.run(
914
+ "app:app",
915
+ host=settings.HOST,
916
+ port=settings.PORT,
917
+ reload=settings.RELOAD,
918
+ workers=1,
919
+ log_level=settings.LOG_LEVEL.lower()
920
+ )
921
+ except KeyboardInterrupt:
922
+ print("\n🎯 Server stopped by user")
923
+ except Exception as e:
924
+ log_error(f"Server error: {e}")
925
+ sys.exit(1)
config/model_config.py CHANGED
@@ -6,8 +6,13 @@ class ModelConfig:
6
  """
7
  Model-specific configurations - FOR AI MODEL SETTINGS ONLY
8
  """
 
 
 
 
9
  # Model Architecture Settings
10
  LEGAL_BERT = {"model_name" : "nlpaueb/legal-bert-base-uncased",
 
11
  "task" : "clause-extraction",
12
  "max_length" : 512,
13
  "batch_size" : 16,
@@ -18,6 +23,7 @@ class ModelConfig:
18
 
19
  # Embedding Model Settings
20
  EMBEDDING_MODEL = {"model_name" : "sentence-transformers/all-MiniLM-L6-v2",
 
21
  "dimension" : 384,
22
  "pooling" : "mean",
23
  "normalize" : True,
@@ -75,6 +81,20 @@ class ModelConfig:
75
  "entity_confidence" : 0.8,
76
  }
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  @classmethod
80
  def get_model_config(cls, model_type: str) -> dict:
@@ -91,4 +111,4 @@ class ModelConfig:
91
  "text_processing" : cls.TEXT_PROCESSING,
92
  }
93
 
94
- return config_map.get(model_type, {})
 
6
  """
7
  Model-specific configurations - FOR AI MODEL SETTINGS ONLY
8
  """
9
+ # Directory Settings
10
+ MODEL_DIR = Path("models")
11
+ CACHE_DIR = Path("cache/models")
12
+
13
  # Model Architecture Settings
14
  LEGAL_BERT = {"model_name" : "nlpaueb/legal-bert-base-uncased",
15
+ "local_path" : MODEL_DIR / "legal-bert",
16
  "task" : "clause-extraction",
17
  "max_length" : 512,
18
  "batch_size" : 16,
 
23
 
24
  # Embedding Model Settings
25
  EMBEDDING_MODEL = {"model_name" : "sentence-transformers/all-MiniLM-L6-v2",
26
+ "local_path" : MODEL_DIR / "embeddings",
27
  "dimension" : 384,
28
  "pooling" : "mean",
29
  "normalize" : True,
 
81
  "entity_confidence" : 0.8,
82
  }
83
 
84
+ @classmethod
85
+ def ensure_directories(cls):
86
+ """
87
+ Ensure all required directories exist
88
+ """
89
+ directories = [cls.MODEL_DIR,
90
+ cls.CACHE_DIR,
91
+ cls.MODEL_DIR / "legal-bert",
92
+ cls.MODEL_DIR / "embeddings",
93
+ ]
94
+
95
+ for directory in directories:
96
+ directory.mkdir(parents = True, exist_ok = True)
97
+
98
 
99
  @classmethod
100
  def get_model_config(cls, model_type: str) -> dict:
 
111
  "text_processing" : cls.TEXT_PROCESSING,
112
  }
113
 
114
+ return config_map.get(model_type, {})
launch.py CHANGED
@@ -57,10 +57,10 @@ def start_api():
57
  time.sleep(3)
58
 
59
  try:
60
- response = requests.get("http://localhost:8000/api/v1/health", timeout=5)
61
  if response.status_code == 200:
62
- print("✓ API Server running at: http://localhost:8000")
63
- print("✓ Documentation at: http://localhost:8000/api/docs")
64
  return True
65
  except:
66
  pass
 
57
  time.sleep(3)
58
 
59
  try:
60
+ response = requests.get("http://localhost:8005/api/v1/health", timeout=5)
61
  if response.status_code == 200:
62
+ print("✓ API Server running at: http://localhost:8005")
63
+ print("✓ Documentation at: http://localhost:8005/api/docs")
64
  return True
65
  except:
66
  pass
model_manager/llm_manager.py CHANGED
@@ -11,6 +11,7 @@ from pathlib import Path
11
  from typing import Literal
12
  from typing import Optional
13
  from dataclasses import dataclass
 
14
 
15
  # Add parent directory to path for imports
16
  sys.path.append(str(Path(__file__).parent.parent))
@@ -80,14 +81,14 @@ class LLMManager:
80
  Unified LLM manager for multiple providers : handles Ollama (local), OpenAI API, and Anthropic API
81
  """
82
  def __init__(self, default_provider: LLMProvider = LLMProvider.OLLAMA, ollama_base_url: Optional[str] = None,
83
- openai_api_key: Optional[str] = None, anthropic_api_key: Optional[str] = None):
84
  """
85
  Initialize LLM Manager
86
 
87
  Arguments:
88
  ----------
89
  default_provider : Default LLM provider to use
90
-
91
  ollama_base_url : Ollama server URL (default: http://localhost:11434)
92
 
93
  openai_api_key : OpenAI API key (or set OPENAI_API_KEY env var)
@@ -101,9 +102,20 @@ class LLMManager:
101
  self.config = ModelConfig()
102
 
103
  # Ollama configuration
104
- self.ollama_base_url = ollama_base_url or self.config.LLM_CONFIG["base_url"]
105
- self.ollama_model = self.config.LLM_CONFIG["model"]
106
- self.ollama_timeout = self.config.LLM_CONFIG["timeout"]
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  # OpenAI configuration
109
  self.openai_api_key = openai_api_key
@@ -116,7 +128,7 @@ class LLMManager:
116
 
117
  if (ANTHROPIC_AVAILABLE and self.anthropic_api_key):
118
  self.anthropic_client = anthropic.Anthropic(api_key = self.anthropic_api_key)
119
-
120
  else:
121
  self.anthropic_client = None
122
 
@@ -133,7 +145,7 @@ class LLMManager:
133
  openai_available = OPENAI_AVAILABLE and bool(self.openai_api_key),
134
  anthropic_available = ANTHROPIC_AVAILABLE and bool(self.anthropic_api_key),
135
  )
136
-
137
 
138
  # PROVIDER AVAILABILITY CHECKS
139
  def _check_ollama_available(self) -> bool:
 
11
  from typing import Literal
12
  from typing import Optional
13
  from dataclasses import dataclass
14
+ from config.settings import settings
15
 
16
  # Add parent directory to path for imports
17
  sys.path.append(str(Path(__file__).parent.parent))
 
81
  Unified LLM manager for multiple providers : handles Ollama (local), OpenAI API, and Anthropic API
82
  """
83
  def __init__(self, default_provider: LLMProvider = LLMProvider.OLLAMA, ollama_base_url: Optional[str] = None,
84
+ openai_api_key: Optional[str] = None, anthropic_api_key: Optional[str] = None):
85
  """
86
  Initialize LLM Manager
87
 
88
  Arguments:
89
  ----------
90
  default_provider : Default LLM provider to use
91
+
92
  ollama_base_url : Ollama server URL (default: http://localhost:11434)
93
 
94
  openai_api_key : OpenAI API key (or set OPENAI_API_KEY env var)
 
102
  self.config = ModelConfig()
103
 
104
  # Ollama configuration
105
+ self.ollama_base_url = ollama_base_url or "http://localhost:11434" # Default Ollama URL
106
+ self.ollama_model = "mistral:7b" # Default model
107
+ self.ollama_timeout = 300 # Default timeout
108
+
109
+ # Get settings from environment or use defaults
110
+ try:
111
+
112
+ self.ollama_base_url = ollama_base_url or settings.OLLAMA_BASE_URL
113
+ self.ollama_model = settings.OLLAMA_MODEL
114
+ self.ollama_timeout = settings.OLLAMA_TIMEOUT
115
+
116
+ except ImportError:
117
+ # Fallback to defaults if settings not available
118
+ pass
119
 
120
  # OpenAI configuration
121
  self.openai_api_key = openai_api_key
 
128
 
129
  if (ANTHROPIC_AVAILABLE and self.anthropic_api_key):
130
  self.anthropic_client = anthropic.Anthropic(api_key = self.anthropic_api_key)
131
+
132
  else:
133
  self.anthropic_client = None
134
 
 
145
  openai_available = OPENAI_AVAILABLE and bool(self.openai_api_key),
146
  anthropic_available = ANTHROPIC_AVAILABLE and bool(self.anthropic_api_key),
147
  )
148
+
149
 
150
  # PROVIDER AVAILABILITY CHECKS
151
  def _check_ollama_available(self) -> bool:
reporter/pdf_generator.py CHANGED
@@ -67,7 +67,7 @@ class PDFReportGenerator:
67
 
68
  # Body text
69
  self.styles.add(ParagraphStyle(
70
- name='BodyText',
71
  parent=self.styles['Normal'],
72
  fontSize=10,
73
  leading=14,
@@ -445,52 +445,3 @@ def generate_pdf_report(analysis_result: Dict[str, Any],
445
  generator = PDFReportGenerator()
446
  return generator.generate_report(analysis_result, output_path)
447
 
448
-
449
- if __name__ == "__main__":
450
- # Test with sample data
451
- sample_result = {
452
- "analysis_id": "test-123",
453
- "timestamp": datetime.now().isoformat(),
454
- "risk_analysis": {
455
- "overall_score": 85,
456
- "risk_level": "CRITICAL",
457
- "risk_breakdown": [
458
- {
459
- "category": "Restrictive Covenants",
460
- "score": 95,
461
- "summary": "The agreement contains exceptionally broad and long-lasting non-compete (24 months) and non-solicitation (5 years) clauses."
462
- },
463
- {
464
- "category": "Penalties & Termination",
465
- "score": 90,
466
- "summary": "The contract includes severe penalties for breach, including forfeiture of earned salary."
467
- }
468
- ]
469
- },
470
- "executive_summary": "This employment agreement is heavily skewed in favor of the Employer, presenting a very high risk.",
471
- "unfavorable_terms": [
472
- {
473
- "term": "Undefined Post-Probation Salary",
474
- "clause_reference": "Clause 8.2",
475
- "severity": "critical",
476
- "explanation": "Post-probation salary is undefined ('as discussed').",
477
- "suggested_fix": "Insist that the exact salary be explicitly stated."
478
- }
479
- ],
480
- "missing_protections": [
481
- {
482
- "protection": "Defined Post-Probation Salary",
483
- "importance": "critical",
484
- "explanation": "The contract lacks a specific, written salary commitment."
485
- }
486
- ],
487
- "negotiation_points": [
488
- {
489
- "issue": "Post-probation salary",
490
- "rationale": "Must be explicitly defined in writing before signing."
491
- }
492
- ]
493
- }
494
-
495
- buffer = generate_pdf_report(sample_result, "test_report.pdf")
496
- print("Test PDF generated successfully!")
 
67
 
68
  # Body text
69
  self.styles.add(ParagraphStyle(
70
+ name='CustomBodyText',
71
  parent=self.styles['Normal'],
72
  fontSize=10,
73
  leading=14,
 
445
  generator = PDFReportGenerator()
446
  return generator.generate_report(analysis_result, output_path)
447
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
services/clause_extractor.py CHANGED
@@ -21,6 +21,7 @@ from utils.logger import log_info
21
  from utils.logger import log_error
22
  from utils.text_processor import TextProcessor
23
  from utils.logger import ContractAnalyzerLogger
 
24
 
25
 
26
  @dataclass
@@ -604,7 +605,7 @@ class ClauseExtractor:
604
  Extract risk indicator keywords from clause text
605
  """
606
  text_lower = text.lower()
607
- found_indicators = dict()
608
 
609
  for severity, indicators in self.RISK_INDICATORS.items():
610
  for indicator in indicators:
 
21
  from utils.logger import log_error
22
  from utils.text_processor import TextProcessor
23
  from utils.logger import ContractAnalyzerLogger
24
+ from model_manager.model_loader import ModelLoader
25
 
26
 
27
  @dataclass
 
605
  Extract risk indicator keywords from clause text
606
  """
607
  text_lower = text.lower()
608
+ found_indicators = list()
609
 
610
  for severity, indicators in self.RISK_INDICATORS.items():
611
  for indicator in indicators:
services/contract_classifier.py CHANGED
@@ -55,127 +55,55 @@ class ContractClassifier:
55
  4. Confidence scoring with explanations
56
  """
57
  # CATEGORY HIERARCHY WITH KEYWORDS
58
- CATEGORY_HIERARCHY = {
59
- 'employment': {
60
- 'subcategories': ['full_time', 'part_time', 'contract_worker', 'internship', 'executive'],
61
- 'keywords': [
62
- 'employee', 'employment', 'job', 'position', 'salary', 'benefits',
63
- 'annual leave', 'sick leave', 'probation', 'job description',
64
- 'work hours', 'overtime', 'performance review', 'bonus structure'
65
- ],
66
- 'weight': 1.0
67
- },
68
-
69
- 'consulting': {
70
- 'subcategories': ['independent_contractor', 'advisory', 'professional_services', 'freelance'],
71
- 'keywords': [
72
- 'consultant', 'consulting', 'independent contractor', 'statement of work',
73
- 'deliverables', 'professional services', 'hourly rate', 'project scope',
74
- 'milestone', 'acceptance criteria', 'work product'
75
- ],
76
- 'weight': 1.0
77
- },
78
-
79
- 'nda': {
80
- 'subcategories': ['mutual_nda', 'unilateral_nda', 'confidentiality_agreement'],
81
- 'keywords': [
82
- 'non-disclosure', 'confidentiality', 'proprietary information',
83
- 'nda', 'disclosure agreement', 'trade secret', 'confidential information',
84
- 'receiving party', 'disclosing party', 'confidentiality obligation'
85
- ],
86
- 'weight': 1.2 # Higher weight as NDAs are distinct
87
- },
88
-
89
- 'technology': {
90
- 'subcategories': ['software_license', 'saas', 'cloud_services', 'development', 'api_access'],
91
- 'keywords': [
92
- 'software', 'license', 'saas', 'subscription', 'source code',
93
- 'object code', 'api', 'cloud', 'hosting', 'maintenance',
94
- 'updates', 'support', 'uptime', 'service level'
95
- ],
96
- 'weight': 1.0
97
- },
98
-
99
- 'intellectual_property': {
100
- 'subcategories': ['ip_assignment', 'licensing', 'patent', 'trademark', 'copyright'],
101
- 'keywords': [
102
- 'intellectual property', 'ip', 'copyright', 'patent', 'trademark',
103
- 'work product', 'inventions', 'ip rights', 'ownership',
104
- 'assignment of rights', 'license grant', 'royalty'
105
- ],
106
- 'weight': 1.1
107
- },
108
-
109
- 'real_estate': {
110
- 'subcategories': ['residential_lease', 'commercial_lease', 'sublease', 'purchase_agreement'],
111
- 'keywords': [
112
- 'landlord', 'tenant', 'lease', 'premises', 'rent', 'property',
113
- 'security deposit', 'utilities', 'maintenance', 'repairs',
114
- 'eviction', 'lease term', 'renewal', 'square footage'
115
- ],
116
- 'weight': 1.0
117
- },
118
-
119
- 'financial': {
120
- 'subcategories': ['loan', 'mortgage', 'credit', 'investment', 'promissory_note'],
121
- 'keywords': [
122
- 'loan', 'borrower', 'lender', 'principal', 'interest rate',
123
- 'collateral', 'default', 'repayment', 'amortization',
124
- 'promissory note', 'security interest', 'mortgage'
125
- ],
126
- 'weight': 1.0
127
- },
128
-
129
- 'business': {
130
- 'subcategories': ['partnership', 'joint_venture', 'shareholders', 'llc_operating', 'merger'],
131
- 'keywords': [
132
- 'partnership', 'joint venture', 'equity', 'shares', 'profit sharing',
133
- 'loss allocation', 'management', 'governance', 'voting rights',
134
- 'dissolution', 'capital contribution', 'distribution'
135
- ],
136
- 'weight': 1.0
137
- },
138
-
139
- 'sales': {
140
- 'subcategories': ['purchase_order', 'sales_agreement', 'distribution', 'supply_agreement'],
141
- 'keywords': [
142
- 'purchase', 'sale', 'buyer', 'seller', 'goods', 'products',
143
- 'delivery', 'shipment', 'payment terms', 'invoice',
144
- 'purchase price', 'quantity', 'specifications'
145
- ],
146
- 'weight': 1.0
147
- },
148
-
149
- 'service_agreement': {
150
- 'subcategories': ['master_services', 'maintenance', 'support', 'subscription'],
151
- 'keywords': [
152
- 'service provider', 'services', 'sla', 'service level agreement',
153
- 'uptime', 'response time', 'support', 'maintenance',
154
- 'service credits', 'performance metrics', 'implementation'
155
- ],
156
- 'weight': 1.0
157
- },
158
-
159
- 'vendor': {
160
- 'subcategories': ['supplier_agreement', 'procurement', 'master_vendor'],
161
- 'keywords': [
162
- 'vendor', 'supplier', 'procurement', 'supply chain',
163
- 'purchase order', 'fulfillment', 'vendor management',
164
- 'pricing', 'terms of supply'
165
- ],
166
- 'weight': 1.0
167
- },
168
-
169
- 'agency': {
170
- 'subcategories': ['marketing_agency', 'recruiting', 'representation'],
171
- 'keywords': [
172
- 'agent', 'agency', 'principal', 'commission', 'representation',
173
- 'authority', 'scope of authority', 'compensation',
174
- 'exclusive rights', 'territory'
175
- ],
176
- 'weight': 1.0
177
- }
178
- }
179
 
180
  # SUBCATEGORY DETECTION PATTERNS
181
  SUBCATEGORY_PATTERNS = {'full_time' : ['full-time', 'full time', 'permanent', 'regular employee', '40 hours', 'exempt employee'],
@@ -325,7 +253,7 @@ class ContractClassifier:
325
  raise ValueError("Contract text too short for classification")
326
 
327
  # Preprocess text (use first 3000 chars for efficiency)
328
- text_excerpt = contract_text
329
 
330
  log_info("Starting contract classification",
331
  text_length = len(contract_text),
@@ -338,8 +266,8 @@ class ContractClassifier:
338
  # Step 2: Semantic similarity
339
  semantic_scores = self._semantic_similarity(text_excerpt)
340
 
341
- # Step 3: Legal-BERT enhanced (optional - can be expensive)
342
- legal_bert_scores = self._legal_bert_classification(text_excerpt)
343
 
344
  # Step 4: Combine scores (weighted average)
345
  combined_scores = self._combine_scores(keyword_scores = keyword_scores,
@@ -369,6 +297,7 @@ class ContractClassifier:
369
  subcategory = subcategory,
370
  keyword_scores = keyword_scores,
371
  semantic_scores = semantic_scores,
 
372
  combined_scores = combined_scores,
373
  )
374
 
@@ -452,9 +381,9 @@ class ContractClassifier:
452
  return similarities
453
 
454
 
455
- def _legal_bert_classification(self, text: str) -> Dict[str, float]:
456
  """
457
- Use Legal-BERT for classification (optional - computationally expensive)
458
 
459
  Arguments:
460
  ----------
@@ -462,7 +391,42 @@ class ContractClassifier:
462
 
463
  Returns:
464
  --------
465
- { dict } : Dictionary of {category: score}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
466
  """
467
  # Tokenize
468
  inputs = self.legal_bert_tokenizer(text,
@@ -475,9 +439,10 @@ class ContractClassifier:
475
  # Get embeddings
476
  with torch.no_grad():
477
  outputs = self.legal_bert_model(**inputs)
 
478
  cls_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()[0]
479
 
480
- return {cat: 0.5 for cat in self.CATEGORY_HIERARCHY.keys()}
481
 
482
 
483
  def _combine_scores(self, keyword_scores: Dict[str, float], semantic_scores: Dict[str, float], legal_bert_scores: Dict[str, float] = None) -> Dict[str, float]:
@@ -490,7 +455,7 @@ class ContractClassifier:
490
 
491
  semantic_scores { dict } : Semantic similarity scores
492
 
493
- legal_bert_scores { dict } : Legal-BERT scores (optional)
494
 
495
  Returns:
496
  --------
@@ -499,22 +464,14 @@ class ContractClassifier:
499
  combined = dict()
500
 
501
  # Weights for each method
502
- keyword_weight = 0.40
503
- semantic_weight = 0.60
504
- legal_bert_weight = 0.00 # Set to 0 if not using Legal-BERT
505
-
506
- if legal_bert_scores:
507
- # Normalize weights
508
- total_weight = keyword_weight + semantic_weight + legal_bert_weight
509
- keyword_weight /= total_weight
510
- semantic_weight /= total_weight
511
- legal_bert_weight /= total_weight
512
 
513
  for category in self.CATEGORY_HIERARCHY.keys():
514
- score = (keyword_scores.get(category, 0) * keyword_weight + semantic_scores.get(category, 0) * semantic_weight)
515
-
516
- if legal_bert_scores:
517
- score += legal_bert_scores.get(category, 0) * legal_bert_weight
518
 
519
  combined[category] = score
520
 
@@ -562,8 +519,9 @@ class ContractClassifier:
562
  return None
563
 
564
 
565
- def _generate_reasoning(self, contract_text: str, primary_category: str, subcategory: Optional[str], keyword_scores: Dict[str, float], semantic_scores: Dict[str, float],
566
- combined_scores: Dict[str, float]) -> List[str]:
 
567
  """
568
  Generate human-readable reasoning for classification
569
 
@@ -576,6 +534,7 @@ class ContractClassifier:
576
  # Primary category reasoning
577
  keyword_match = keyword_scores.get(primary_category, 0)
578
  semantic_match = semantic_scores.get(primary_category, 0)
 
579
 
580
  if (keyword_match > 0.5):
581
  reasoning.append(f"Strong keyword indicators for {primary_category.replace('_', ' ')} category "
@@ -597,6 +556,11 @@ class ContractClassifier:
597
  f"(similarity: {semantic_match:.2f})"
598
  )
599
 
 
 
 
 
 
600
  # Subcategory reasoning
601
  if subcategory:
602
  reasoning.append(f"Specific subcategory identified: {subcategory.replace('_', ' ')}")
@@ -659,9 +623,10 @@ class ContractClassifier:
659
  log_info("Starting multi-label classification", threshold = threshold)
660
 
661
  # Get scores
662
- keyword_scores = self._score_keywords(text.lower())
663
- semantic_scores = self._semantic_similarity(text)
664
- combined_scores = self._combine_scores(keyword_scores, semantic_scores)
 
665
 
666
  # Get all categories above threshold
667
  matches = list()
@@ -669,7 +634,8 @@ class ContractClassifier:
669
  for category, score in combined_scores.items():
670
  if (score >= threshold):
671
  subcategory = self._detect_subcategory(text, category)
672
- reasoning = self._generate_reasoning(text, category, subcategory, keyword_scores, semantic_scores, combined_scores)
 
673
  keywords = self._extract_detected_keywords(text, category)
674
 
675
  matches.append(ContractCategory(category = category,
@@ -720,4 +686,4 @@ class ContractClassifier:
720
  """
721
  Get subcategories for a specific category
722
  """
723
- return self.CATEGORY_HIERARCHY.get(category, {}).get('subcategories', [])
 
55
  4. Confidence scoring with explanations
56
  """
57
  # CATEGORY HIERARCHY WITH KEYWORDS
58
+ CATEGORY_HIERARCHY = {'employment' : {'subcategories' : ['full_time', 'part_time', 'contract_worker', 'internship', 'executive'],
59
+ 'keywords' : ['employee', 'employment', 'job', 'position', 'salary', 'benefits', 'annual leave', 'sick leave', 'probation', 'job description', 'work hours', 'overtime', 'performance review', 'bonus structure'],
60
+ 'weight' : 1.0,
61
+ },
62
+ 'consulting' : {'subcategories' : ['independent_contractor', 'advisory', 'professional_services', 'freelance'],
63
+ 'keywords' : ['consultant', 'consulting', 'independent contractor', 'statement of work', 'deliverables', 'professional services', 'hourly rate', 'project scope', 'milestone', 'acceptance criteria', 'work product'],
64
+ 'weight' : 1.0,
65
+ },
66
+ 'nda' : {'subcategories' : ['mutual_nda', 'unilateral_nda', 'confidentiality_agreement'],
67
+ 'keywords' : ['non-disclosure', 'confidentiality', 'proprietary information', 'nda', 'disclosure agreement', 'trade secret', 'confidential information', 'receiving party', 'disclosing party', 'confidentiality obligation'],
68
+ 'weight' : 1.2,
69
+ },
70
+ 'technology' : {'subcategories' : ['software_license', 'saas', 'cloud_services', 'development', 'api_access'],
71
+ 'keywords' : ['software', 'license', 'saas', 'subscription', 'source code', 'object code', 'api', 'cloud', 'hosting', 'maintenance', 'updates', 'support', 'uptime', 'service level'],
72
+ 'weight' : 1.0,
73
+ },
74
+ 'intellectual_property' : {'subcategories' : ['ip_assignment', 'licensing', 'patent', 'trademark', 'copyright'],
75
+ 'keywords' : ['intellectual property', 'ip', 'copyright', 'patent', 'trademark', 'work product', 'inventions', 'ip rights', 'ownership', 'assignment of rights', 'license grant', 'royalty'],
76
+ 'weight' : 1.1,
77
+ },
78
+ 'real_estate' : {'subcategories' : ['residential_lease', 'commercial_lease', 'sublease', 'purchase_agreement'],
79
+ 'keywords' : ['landlord', 'tenant', 'lease', 'premises', 'rent', 'property', 'security deposit', 'utilities', 'maintenance', 'repairs', 'eviction', 'lease term', 'renewal', 'square footage'],
80
+ 'weight' : 1.0,
81
+ },
82
+ 'financial' : {'subcategories' : ['loan', 'mortgage', 'credit', 'investment', 'promissory_note'],
83
+ 'keywords' : ['loan', 'borrower', 'lender', 'principal', 'interest rate', 'collateral', 'default', 'repayment', 'amortization', 'promissory note', 'security interest', 'mortgage'],
84
+ 'weight' : 1.0,
85
+ },
86
+ 'business' : {'subcategories' : ['partnership', 'joint_venture', 'shareholders', 'llc_operating', 'merger'],
87
+ 'keywords' : ['partnership', 'joint venture', 'equity', 'shares', 'profit sharing', 'loss allocation', 'management', 'governance', 'voting rights', 'dissolution', 'capital contribution', 'distribution'],
88
+ 'weight' : 1.0,
89
+ },
90
+ 'sales' : {'subcategories' : ['purchase_order', 'sales_agreement', 'distribution', 'supply_agreement'],
91
+ 'keywords' : ['purchase', 'sale', 'buyer', 'seller', 'goods', 'products', 'delivery', 'shipment', 'payment terms', 'invoice', 'purchase price', 'quantity', 'specifications'],
92
+ 'weight' : 1.0,
93
+ },
94
+ 'service_agreement' : {'subcategories' : ['master_services', 'maintenance', 'support', 'subscription'],
95
+ 'keywords' : ['service provider', 'services', 'sla', 'service level agreement', 'uptime', 'response time', 'support', 'maintenance', 'service credits', 'performance metrics', 'implementation'],
96
+ 'weight' : 1.0,
97
+ },
98
+ 'vendor' : {'subcategories' : ['supplier_agreement', 'procurement', 'master_vendor'],
99
+ 'keywords' : ['vendor', 'supplier', 'procurement', 'supply chain', 'purchase order', 'fulfillment', 'vendor management', 'pricing', 'terms of supply'],
100
+ 'weight' : 1.0,
101
+ },
102
+ 'agency' : {'subcategories' : ['marketing_agency', 'recruiting', 'representation'],
103
+ 'keywords' : ['agent', 'agency', 'principal', 'commission', 'representation', 'authority', 'scope of authority', 'compensation', 'exclusive rights', 'territory'],
104
+ 'weight' : 1.0,
105
+ },
106
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  # SUBCATEGORY DETECTION PATTERNS
109
  SUBCATEGORY_PATTERNS = {'full_time' : ['full-time', 'full time', 'permanent', 'regular employee', '40 hours', 'exempt employee'],
 
253
  raise ValueError("Contract text too short for classification")
254
 
255
  # Preprocess text (use first 3000 chars for efficiency)
256
+ text_excerpt = contract_text[:3000]
257
 
258
  log_info("Starting contract classification",
259
  text_length = len(contract_text),
 
266
  # Step 2: Semantic similarity
267
  semantic_scores = self._semantic_similarity(text_excerpt)
268
 
269
+ # Step 3: Legal-BERT semantic similarity (enhanced)
270
+ legal_bert_scores = self._legal_bert_similarity(text_excerpt)
271
 
272
  # Step 4: Combine scores (weighted average)
273
  combined_scores = self._combine_scores(keyword_scores = keyword_scores,
 
297
  subcategory = subcategory,
298
  keyword_scores = keyword_scores,
299
  semantic_scores = semantic_scores,
300
+ legal_bert_scores = legal_bert_scores,
301
  combined_scores = combined_scores,
302
  )
303
 
 
381
  return similarities
382
 
383
 
384
+ def _legal_bert_similarity(self, text: str) -> Dict[str, float]:
385
  """
386
+ Use Legal-BERT for semantic similarity calculation
387
 
388
  Arguments:
389
  ----------
 
391
 
392
  Returns:
393
  --------
394
+ { dict } : Dictionary of {category: similarity_score} using Legal-BERT embeddings
395
+ """
396
+ # Get Legal-BERT embedding for the text
397
+ text_embedding = self._get_legal_bert_embedding(text)
398
+
399
+ # Calculate similarity to each category's Legal-BERT embedding
400
+ similarities = dict()
401
+
402
+ for category in self.CATEGORY_HIERARCHY.keys():
403
+ # Get pre-computed category embedding
404
+ cat_embedding = self._get_legal_bert_embedding(
405
+ f"This is a {category.replace('_', ' ')} contract agreement"
406
+ )
407
+
408
+ # Calculate cosine similarity
409
+ similarity = torch.nn.functional.cosine_similarity(
410
+ torch.tensor(text_embedding).unsqueeze(0),
411
+ torch.tensor(cat_embedding).unsqueeze(0)
412
+ ).item()
413
+
414
+ similarities[category] = similarity
415
+
416
+ return similarities
417
+
418
+
419
+ def _get_legal_bert_embedding(self, text: str) -> np.ndarray:
420
+ """
421
+ Get Legal-BERT embedding for text using [CLS] token
422
+
423
+ Arguments:
424
+ ----------
425
+ text { str } : Input text
426
+
427
+ Returns:
428
+ --------
429
+ { np.ndarray } : Embedding vector
430
  """
431
  # Tokenize
432
  inputs = self.legal_bert_tokenizer(text,
 
439
  # Get embeddings
440
  with torch.no_grad():
441
  outputs = self.legal_bert_model(**inputs)
442
+ # Use [CLS] token embedding (first token)
443
  cls_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()[0]
444
 
445
+ return cls_embedding
446
 
447
 
448
  def _combine_scores(self, keyword_scores: Dict[str, float], semantic_scores: Dict[str, float], legal_bert_scores: Dict[str, float] = None) -> Dict[str, float]:
 
455
 
456
  semantic_scores { dict } : Semantic similarity scores
457
 
458
+ legal_bert_scores { dict } : Legal-BERT similarity scores (optional)
459
 
460
  Returns:
461
  --------
 
464
  combined = dict()
465
 
466
  # Weights for each method
467
+ keyword_weight = 0.30
468
+ semantic_weight = 0.40
469
+ legal_bert_weight = 0.30
 
 
 
 
 
 
 
470
 
471
  for category in self.CATEGORY_HIERARCHY.keys():
472
+ score = (keyword_scores.get(category, 0) * keyword_weight +
473
+ semantic_scores.get(category, 0) * semantic_weight +
474
+ legal_bert_scores.get(category, 0) * legal_bert_weight)
 
475
 
476
  combined[category] = score
477
 
 
519
  return None
520
 
521
 
522
+ def _generate_reasoning(self, contract_text: str, primary_category: str, subcategory: Optional[str],
523
+ keyword_scores: Dict[str, float], semantic_scores: Dict[str, float],
524
+ legal_bert_scores: Dict[str, float], combined_scores: Dict[str, float]) -> List[str]:
525
  """
526
  Generate human-readable reasoning for classification
527
 
 
534
  # Primary category reasoning
535
  keyword_match = keyword_scores.get(primary_category, 0)
536
  semantic_match = semantic_scores.get(primary_category, 0)
537
+ legal_bert_match = legal_bert_scores.get(primary_category, 0)
538
 
539
  if (keyword_match > 0.5):
540
  reasoning.append(f"Strong keyword indicators for {primary_category.replace('_', ' ')} category "
 
556
  f"(similarity: {semantic_match:.2f})"
557
  )
558
 
559
+ if (legal_bert_match > 0.60):
560
+ reasoning.append(f"Legal-BERT semantic analysis confirms {primary_category.replace('_', ' ')} classification "
561
+ f"(similarity: {legal_bert_match:.2f})"
562
+ )
563
+
564
  # Subcategory reasoning
565
  if subcategory:
566
  reasoning.append(f"Specific subcategory identified: {subcategory.replace('_', ' ')}")
 
623
  log_info("Starting multi-label classification", threshold = threshold)
624
 
625
  # Get scores
626
+ keyword_scores = self._score_keywords(text.lower())
627
+ semantic_scores = self._semantic_similarity(text[:3000])
628
+ legal_bert_scores = self._legal_bert_similarity(text[:3000])
629
+ combined_scores = self._combine_scores(keyword_scores, semantic_scores, legal_bert_scores)
630
 
631
  # Get all categories above threshold
632
  matches = list()
 
634
  for category, score in combined_scores.items():
635
  if (score >= threshold):
636
  subcategory = self._detect_subcategory(text, category)
637
+ reasoning = self._generate_reasoning(text, category, subcategory, keyword_scores,
638
+ semantic_scores, legal_bert_scores, combined_scores)
639
  keywords = self._extract_detected_keywords(text, category)
640
 
641
  matches.append(ContractCategory(category = category,
 
686
  """
687
  Get subcategories for a specific category
688
  """
689
+ return self.CATEGORY_HIERARCHY.get(category, {}).get('subcategories', [])
services/risk_analyzer.py CHANGED
@@ -1,70 +1,82 @@
1
- """
2
- Multi-Factor Risk Analyzer with sophisticated rule-based scoring
3
- Analyzes contracts using keyword severity, structural patterns, clause-level analysis,
4
- industry benchmarks, and missing protections detection
5
- """
6
-
7
  import re
8
- from typing import Dict, List, Tuple, Optional, Any
9
- from dataclasses import dataclass, field
10
- from collections import defaultdict
11
  import sys
 
 
 
 
12
  from pathlib import Path
 
 
 
 
13
 
14
  # Add parent directory to path for imports
15
  sys.path.append(str(Path(__file__).parent.parent))
16
 
17
- from config.risk_rules import RiskRules, ContractType
18
- from services.clause_extractor import ExtractedClause
19
- from utils.logger import ContractAnalyzerLogger, log_info, log_error
 
20
  from utils.text_processor import TextProcessor
 
 
 
21
 
22
 
23
  @dataclass
24
  class RiskBreakdownItem:
25
- """Individual risk category breakdown"""
26
- category: str
27
- score: int # 0-100
28
- summary: str
29
- findings: List[str] = field(default_factory=list)
 
 
30
 
 
31
  def to_dict(self) -> Dict[str, Any]:
32
- """Convert to dictionary"""
33
- return {
34
- "category": self.category,
35
- "score": self.score,
36
- "summary": self.summary,
37
- "findings": self.findings
38
- }
 
39
 
40
 
41
  @dataclass
42
  class RiskScore:
43
- """Comprehensive risk score with detailed breakdown"""
44
- overall_score: int # 0-100
45
- risk_level: str # "CRITICAL", "HIGH", "MEDIUM", "LOW"
46
- category_scores: Dict[str, int]
47
- risk_factors: List[str]
48
- detailed_findings: Dict[str, List[str]]
49
- benchmark_comparison: Dict[str, str]
50
- risk_breakdown: List[RiskBreakdownItem]
 
 
51
 
 
52
  def to_dict(self) -> Dict[str, Any]:
53
- """Convert to dictionary for serialization"""
54
- return {
55
- "overall_score": self.overall_score,
56
- "risk_level": self.risk_level,
57
- "category_scores": self.category_scores,
58
- "risk_factors": self.risk_factors,
59
- "detailed_findings": self.detailed_findings,
60
- "benchmark_comparison": self.benchmark_comparison,
61
- "risk_breakdown": [item.to_dict() for item in self.risk_breakdown]
62
- }
 
63
 
64
 
65
  class MultiFactorRiskAnalyzer:
66
  """
67
- Sophisticated multi-factor risk analysis engine
68
 
69
  Analysis Factors:
70
  1. Keyword severity scoring (critical/high/medium keywords)
@@ -74,353 +86,386 @@ class MultiFactorRiskAnalyzer:
74
  5. Missing protections check
75
  6. Contract type-specific weight adjustments
76
  """
77
-
78
  def __init__(self, contract_type: ContractType = ContractType.GENERAL):
79
  """
80
  Initialize risk analyzer
81
 
82
- Args:
83
- contract_type: Type of contract for specialized analysis
 
84
  """
85
- self.contract_type = contract_type
86
- self.rules = RiskRules()
87
  self.adjusted_weights = self.rules.get_adjusted_weights(contract_type)
88
- self.text_processor = TextProcessor(use_spacy=False)
89
- self.logger = ContractAnalyzerLogger.get_logger()
90
 
91
  log_info("MultiFactorRiskAnalyzer initialized",
92
- contract_type=contract_type.value,
93
- adjusted_weights=self.adjusted_weights)
94
-
95
- # =========================================================================
96
- # MAIN ANALYSIS METHOD
97
- # =========================================================================
98
 
 
99
  @ContractAnalyzerLogger.log_execution_time("analyze_risk")
100
- def analyze_risk(self, contract_text: str,
101
- clauses: List[ExtractedClause]) -> RiskScore:
102
  """
103
  Comprehensive multi-factor risk analysis
104
 
105
- Args:
106
- contract_text: Full contract text
107
- clauses: Extracted clauses from ClauseExtractor
 
 
108
 
109
  Returns:
110
- RiskScore object with detailed analysis
 
111
  """
112
 
113
- log_info("Starting risk analysis",
114
- text_length=len(contract_text),
115
- num_clauses=len(clauses),
116
- contract_type=self.contract_type.value)
117
 
118
  # Initialize scoring containers
119
- category_scores = defaultdict(list)
120
- risk_factors = []
121
- detailed_findings = defaultdict(list)
122
-
123
- # Factor 1: Keyword Severity Scoring
124
- keyword_risks = self._score_keywords(contract_text)
125
- log_info("Keyword analysis complete",
126
- critical_score=keyword_risks.get('critical', 0),
127
- high_score=keyword_risks.get('high', 0))
128
-
129
- # Factor 2: Structural Pattern Analysis
130
- pattern_risks = self._analyze_patterns(contract_text)
131
  log_info(f"Pattern analysis found {len(pattern_risks)} risky patterns")
132
 
133
- # Factor 3: Clause-Level Analysis
134
- clause_risks = self._analyze_clauses(clauses)
135
  log_info(f"Clause analysis complete for {len(clause_risks)} categories")
136
 
137
- # Factor 4: Missing Protections
138
- missing_risks = self._check_missing_protections(contract_text, clauses)
139
  log_info(f"Missing protections analysis complete")
140
 
141
- # Factor 5: Industry Benchmark Comparison
142
  benchmark_comparison = self._compare_to_benchmarks(contract_text, clauses)
143
  log_info(f"Benchmark comparison complete")
144
 
145
  # Aggregate scores by category
146
  for category in self.adjusted_weights.keys():
147
- category_risk = self._calculate_category_risk(
148
- category=category,
149
- keyword_risks=keyword_risks,
150
- pattern_risks=pattern_risks,
151
- clause_risks=clause_risks,
152
- missing_risks=missing_risks,
153
- benchmark_comparison=benchmark_comparison
154
- )
155
- category_scores[category] = category_risk["score"]
156
  detailed_findings[category] = category_risk["findings"]
157
 
158
- if category_risk["score"] >= self.rules.RISK_THRESHOLDS["high"]:
159
  risk_factors.append(category)
160
 
161
  # Calculate weighted overall score
162
- overall_score = self._calculate_weighted_score(category_scores)
163
- risk_level = self._get_risk_level(overall_score)
164
 
165
  # Create risk breakdown items
166
- risk_breakdown = self._create_risk_breakdown(
167
- category_scores,
168
- detailed_findings
169
- )
170
-
171
- result = RiskScore(
172
- overall_score=overall_score,
173
- risk_level=risk_level,
174
- category_scores=dict(category_scores),
175
- risk_factors=risk_factors,
176
- detailed_findings=dict(detailed_findings),
177
- benchmark_comparison=benchmark_comparison,
178
- risk_breakdown=risk_breakdown
179
- )
180
-
181
- log_info("Risk analysis complete",
182
- overall_score=overall_score,
183
- risk_level=risk_level,
184
- high_risk_categories=len(risk_factors))
185
 
186
  return result
187
 
188
- # =========================================================================
189
- # FACTOR 1: KEYWORD SEVERITY SCORING
190
- # =========================================================================
191
 
192
  def _score_keywords(self, text: str) -> Dict[str, int]:
193
  """
194
  Score text based on keyword severity tiers
195
 
196
  Returns:
197
- Dictionary with 'critical', 'high', 'medium' scores
 
198
  """
199
  text_lower = text.lower()
200
- scores = defaultdict(int)
201
 
202
  # Critical keywords (Tier 1)
203
  for keyword, weight in self.rules.CRITICAL_KEYWORDS.items():
204
  if keyword in text_lower:
205
- count = text_lower.count(keyword)
206
- scores["critical"] += weight * min(count, 3) # Cap at 3 occurrences
 
 
207
 
208
  # High-risk keywords (Tier 2)
209
  for keyword, weight in self.rules.HIGH_RISK_KEYWORDS.items():
210
  if keyword in text_lower:
211
- count = text_lower.count(keyword)
212
  scores["high"] += weight * min(count, 2)
213
 
214
  # Medium-risk keywords (Tier 3)
215
  for keyword, weight in self.rules.MEDIUM_RISK_KEYWORDS.items():
216
  if keyword in text_lower:
217
- count = text_lower.count(keyword)
218
  scores["medium"] += weight * min(count, 2)
219
 
220
  return dict(scores)
221
 
222
- # =========================================================================
223
- # FACTOR 2: STRUCTURAL PATTERN ANALYSIS
224
- # =========================================================================
225
 
226
  def _analyze_patterns(self, text: str) -> List[Dict]:
227
  """
228
  Detect risky structural patterns in contract
229
 
230
  Returns:
231
- List of detected pattern dictionaries
 
232
  """
233
- findings = []
234
 
235
  for pattern, risk_points, description in self.rules.RISKY_PATTERNS:
236
  matches = re.finditer(pattern, text, re.IGNORECASE)
237
  for match in matches:
238
- findings.append({
239
- "pattern": description,
240
- "risk_points": risk_points,
241
- "match": match.group(0)[:100], # First 100 chars
242
- "position": match.start()
243
- })
244
 
245
  return findings
246
 
247
- # =========================================================================
248
- # FACTOR 3: CLAUSE-LEVEL DETAILED ANALYSIS
249
- # =========================================================================
250
 
251
  def _analyze_clauses(self, clauses: List[ExtractedClause]) -> Dict[str, List[Dict]]:
252
  """
253
  Deep dive into each clause with specific risk factors
254
 
255
  Returns:
256
- Dictionary mapping categories to clause analysis results
 
257
  """
258
  clause_analysis = defaultdict(list)
259
 
260
  for clause in clauses:
261
  # Get risk factors for this clause category
262
- if clause.category in self.rules.CLAUSE_RISK_FACTORS:
263
  analysis = self._analyze_single_clause(clause)
264
  clause_analysis[clause.category].append(analysis)
265
 
266
  return dict(clause_analysis)
267
 
 
268
  def _analyze_single_clause(self, clause: ExtractedClause) -> Dict:
269
  """
270
  Analyze a single clause with detailed risk factors
271
 
272
  Returns:
273
- Dictionary with risk_score and findings
 
274
  """
275
  risk_config = self.rules.CLAUSE_RISK_FACTORS.get(clause.category, {})
276
- base_risk = risk_config.get("base_risk", 50)
277
 
278
- risk_score = base_risk
279
- findings = []
280
 
281
- text_lower = clause.text.lower()
282
 
283
  # Check red flags
284
- if "red_flags" in risk_config:
285
  for flag, adjustment in risk_config["red_flags"].items():
286
  if flag in text_lower:
287
  risk_score += adjustment
288
- severity = "increases" if adjustment > 0 else "decreases"
 
289
  findings.append(f"Found '{flag}' ({severity} risk by {abs(adjustment)} points)")
290
 
291
  # Special handling for specific clause types
292
- if clause.category == "non_compete":
293
  duration_risk = self._analyze_noncompete_duration(clause.text)
294
- risk_score += duration_risk["adjustment"]
 
295
  findings.extend(duration_risk["findings"])
296
 
297
- scope_risk = self._analyze_noncompete_scope(clause.text)
298
- risk_score += scope_risk["adjustment"]
 
299
  findings.extend(scope_risk["findings"])
300
 
301
- elif clause.category == "termination":
302
  notice_risk = self._analyze_notice_period(clause.text)
303
  risk_score += notice_risk["adjustment"]
 
304
  findings.extend(notice_risk["findings"])
305
 
306
- elif clause.category == "indemnification":
307
  mutual_risk = self._analyze_indemnification_mutuality(clause.text)
308
  risk_score += mutual_risk["adjustment"]
 
309
  findings.extend(mutual_risk["findings"])
310
 
311
- elif clause.category == "compensation":
312
  clarity_risk = self._analyze_compensation_clarity(clause.text)
313
- risk_score += clarity_risk["adjustment"]
 
314
  findings.extend(clarity_risk["findings"])
315
 
316
- elif clause.category == "intellectual_property":
317
- scope_risk = self._analyze_ip_scope(clause.text)
318
  risk_score += scope_risk["adjustment"]
 
319
  findings.extend(scope_risk["findings"])
320
 
321
  # Cap score between 0 and 100
322
  risk_score = max(0, min(100, risk_score))
323
 
324
- return {
325
- "clause_reference": clause.reference,
326
- "risk_score": risk_score,
327
- "findings": findings,
328
- "confidence": clause.confidence
329
- }
330
 
 
331
  def _analyze_noncompete_duration(self, text: str) -> Dict:
332
- """Analyze non-compete duration reasonableness"""
 
 
333
  duration_pattern = r'(\d+)\s*(year|yr|month|mo)s?'
334
- matches = re.findall(duration_pattern, text, re.IGNORECASE)
335
 
336
  if not matches:
337
- return {"adjustment": 0, "findings": ["No specific duration found"]}
 
 
338
 
339
  # Convert to months
340
  duration_months = 0
 
341
  for num, unit in matches:
342
- months = int(num) * (12 if 'year' in unit.lower() or 'yr' in unit.lower() else 1)
343
  duration_months = max(duration_months, months)
344
 
345
  # Get benchmark
346
- industry = self._detect_industry()
347
  benchmark = self.rules.INDUSTRY_BENCHMARKS["non_compete_duration"][industry]
348
 
349
- if duration_months <= benchmark["reasonable"]:
350
  return {"adjustment": -10, "findings": [f"Duration of {duration_months} months is reasonable"]}
351
- elif duration_months <= benchmark["standard"]:
 
352
  return {"adjustment": 0, "findings": [f"Duration of {duration_months} months is standard"]}
353
- elif duration_months <= benchmark["excessive"]:
 
354
  return {"adjustment": +15, "findings": [f"Duration of {duration_months} months is lengthy"]}
 
355
  else:
356
  return {"adjustment": +30, "findings": [f"Duration of {duration_months} months is excessive"]}
357
 
 
358
  def _analyze_noncompete_scope(self, text: str) -> Dict:
359
- """Analyze non-compete scope reasonableness"""
360
- text_lower = text.lower()
361
- adjustment = 0
362
- findings = []
 
 
363
 
364
  scope_config = self.rules.CLAUSE_RISK_FACTORS["non-compete"]["scope_keywords"]
365
 
366
  for keyword, adj in scope_config.items():
367
  if keyword in text_lower:
368
  adjustment += adj
369
- severity = "reasonable" if adj < 0 else "concerning"
 
370
  findings.append(f"Scope includes '{keyword}' ({severity})")
371
 
372
  return {"adjustment": adjustment, "findings": findings}
373
 
 
374
  def _analyze_notice_period(self, text: str) -> Dict:
375
- """Analyze termination notice period balance"""
 
 
376
  notice_pattern = r'(\d+)\s*days?\s*(?:notice|prior\s+notice)'
377
- matches = re.findall(notice_pattern, text, re.IGNORECASE)
378
 
379
- if len(matches) < 2:
380
- return {"adjustment": 0, "findings": ["Notice period analysis inconclusive"]}
 
 
381
 
382
  periods = [int(m) for m in matches]
383
 
384
- if len(periods) >= 2:
385
  ratio = max(periods) / min(periods)
386
 
387
- if ratio >= 4:
388
- return {"adjustment": +25, "findings": [f"Notice periods highly imbalanced ({max(periods)} vs {min(periods)} days)"]}
389
- elif ratio >= 3:
390
- return {"adjustment": +18, "findings": [f"Notice periods significantly imbalanced ({max(periods)} vs {min(periods)} days)"]}
391
- elif ratio >= 2:
392
- return {"adjustment": +10, "findings": [f"Notice periods moderately imbalanced ({max(periods)} vs {min(periods)} days)"]}
 
 
 
 
 
 
 
393
  else:
394
- return {"adjustment": -5, "findings": [f"Notice periods balanced ({max(periods)} vs {min(periods)} days)"]}
 
 
395
 
396
- return {"adjustment": 0, "findings": ["Could not determine notice period balance"]}
 
 
397
 
 
398
  def _analyze_indemnification_mutuality(self, text: str) -> Dict:
399
- """Check if indemnification is mutual or one-sided"""
400
- text_lower = text.lower()
 
 
401
 
402
- mutual_indicators = ["mutual", "both parties", "each party", "reciprocal"]
403
- one_sided_indicators = ["employee shall indemnify", "consultant shall indemnify",
404
- "contractor shall indemnify", "you shall indemnify"]
405
 
406
- has_mutual = any(ind in text_lower for ind in mutual_indicators)
407
- has_one_sided = any(ind in text_lower for ind in one_sided_indicators)
408
 
409
- if has_mutual and not has_one_sided:
410
- return {"adjustment": -15, "findings": ["Mutual indemnification (balanced)"]}
 
 
 
411
  elif has_one_sided:
412
- return {"adjustment": +20, "findings": ["One-sided indemnification (unfavorable)"]}
 
 
 
413
  else:
414
- return {"adjustment": 0, "findings": ["Indemnification mutuality unclear"]}
 
 
 
415
 
416
  def _analyze_compensation_clarity(self, text: str) -> Dict:
417
- """Analyze clarity of compensation terms"""
418
- text_lower = text.lower()
419
- adjustment = 0
420
- findings = []
 
 
421
 
422
  # Check for vague terms
423
  vague_terms = ["to be determined", "tbd", "subject to review", "discretionary"]
 
424
  for term in vague_terms:
425
  if term in text_lower:
426
  adjustment += 10
@@ -431,16 +476,22 @@ class MultiFactorRiskAnalyzer:
431
  adjustment -= 10
432
  findings.append("Specific monetary amount provided (good)")
433
 
434
- return {"adjustment": adjustment, "findings": findings}
 
 
 
435
 
436
  def _analyze_ip_scope(self, text: str) -> Dict:
437
- """Analyze IP assignment scope"""
438
- text_lower = text.lower()
439
- adjustment = 0
440
- findings = []
 
 
441
 
442
  # Overly broad indicators
443
  broad_terms = ["all work product", "anything created", "whether or not related"]
 
444
  for term in broad_terms:
445
  if term in text_lower:
446
  adjustment += 15
@@ -449,25 +500,31 @@ class MultiFactorRiskAnalyzer:
449
  # Protective terms
450
  protective_terms = ["prior ip excluded", "personal projects excluded"]
451
  for term in protective_terms:
452
- if term in text_lower:
 
453
  adjustment -= 15
454
  findings.append(f"Protective IP term present: '{term}'")
455
 
456
- return {"adjustment": adjustment, "findings": findings}
 
 
457
 
458
- # =========================================================================
459
- # FACTOR 4: MISSING PROTECTIONS CHECK
460
- # =========================================================================
461
 
462
- def _check_missing_protections(self, text: str,
463
- clauses: List[ExtractedClause]) -> Dict[str, int]:
464
  """
465
  Check for missing critical protections
466
 
 
 
 
 
 
 
467
  Returns:
468
- Dictionary mapping categories to risk scores for missing items
 
469
  """
470
- text_lower = text.lower()
471
  missing_risks = defaultdict(int)
472
 
473
  for protection_id, config in self.rules.PROTECTION_CHECKLIST.items():
@@ -480,11 +537,11 @@ class MultiFactorRiskAnalyzer:
480
 
481
  return dict(missing_risks)
482
 
483
- def _check_protection_present(self, protection_id: str,
484
- text_lower: str,
485
- clauses: List[ExtractedClause]) -> bool:
486
- """Check if a specific protection is present"""
487
-
488
  protection_indicators = {
489
  "for_cause_definition": ["for cause", "cause defined", "grounds for termination"],
490
  "severance_provision": ["severance", "severance pay", "separation pay"],
 
1
+ # DEPENDENCIES
 
 
 
 
 
2
  import re
 
 
 
3
  import sys
4
+ from typing import Any
5
+ from typing import List
6
+ from typing import Dict
7
+ from typing import Tuple
8
  from pathlib import Path
9
+ from typing import Optional
10
+ from dataclasses import field
11
+ from dataclasses import dataclass
12
+ from collections import defaultdict
13
 
14
  # Add parent directory to path for imports
15
  sys.path.append(str(Path(__file__).parent.parent))
16
 
17
+ from utils.logger import log_info
18
+ from utils.logger import log_error
19
+ from config.risk_rules import RiskRules
20
+ from config.risk_rules import ContractType
21
  from utils.text_processor import TextProcessor
22
+ from utils.logger import ContractAnalyzerLogger
23
+ from services.clause_extractor import ExtractedClause
24
+
25
 
26
 
27
  @dataclass
28
  class RiskBreakdownItem:
29
+ """
30
+ Individual risk category breakdown
31
+ """
32
+ category : str
33
+ score : int # 0-100
34
+ summary : str
35
+ findings : List[str] = field(default_factory=list)
36
 
37
+
38
  def to_dict(self) -> Dict[str, Any]:
39
+ """
40
+ Convert to dictionary
41
+ """
42
+ return {"category" : self.category,
43
+ "score" : self.score,
44
+ "summary" : self.summary,
45
+ "findings" : self.findings,
46
+ }
47
 
48
 
49
  @dataclass
50
  class RiskScore:
51
+ """
52
+ Comprehensive risk score with detailed breakdown
53
+ """
54
+ overall_score : int # 0-100
55
+ risk_level : str # "CRITICAL", "HIGH", "MEDIUM", "LOW"
56
+ category_scores : Dict[str, int]
57
+ risk_factors : List[str]
58
+ detailed_findings : Dict[str, List[str]]
59
+ benchmark_comparison : Dict[str, str]
60
+ risk_breakdown : List[RiskBreakdownItem]
61
 
62
+
63
  def to_dict(self) -> Dict[str, Any]:
64
+ """
65
+ Convert to dictionary for serialization
66
+ """
67
+ return {"overall_score" : self.overall_score,
68
+ "risk_level" : self.risk_level,
69
+ "category_scores" : self.category_scores,
70
+ "risk_factors" : self.risk_factors,
71
+ "detailed_findings" : self.detailed_findings,
72
+ "benchmark_comparison" : self.benchmark_comparison,
73
+ "risk_breakdown" : [item.to_dict() for item in self.risk_breakdown],
74
+ }
75
 
76
 
77
  class MultiFactorRiskAnalyzer:
78
  """
79
+ Multi-factor risk analysis engine
80
 
81
  Analysis Factors:
82
  1. Keyword severity scoring (critical/high/medium keywords)
 
86
  5. Missing protections check
87
  6. Contract type-specific weight adjustments
88
  """
 
89
  def __init__(self, contract_type: ContractType = ContractType.GENERAL):
90
  """
91
  Initialize risk analyzer
92
 
93
+ Arguments:
94
+ ----------
95
+ contract_type { ContractType } : Type of contract for specialized analysis
96
  """
97
+ self.contract_type = contract_type
98
+ self.rules = RiskRules()
99
  self.adjusted_weights = self.rules.get_adjusted_weights(contract_type)
100
+ self.text_processor = TextProcessor(use_spacy = True)
101
+ self.logger = ContractAnalyzerLogger.get_logger()
102
 
103
  log_info("MultiFactorRiskAnalyzer initialized",
104
+ contract_type = contract_type.value,
105
+ adjusted_weights = self.adjusted_weights,
106
+ )
 
 
 
107
 
108
+
109
  @ContractAnalyzerLogger.log_execution_time("analyze_risk")
110
+ def analyze_risk(self, contract_text: str, clauses: List[ExtractedClause]) -> RiskScore:
 
111
  """
112
  Comprehensive multi-factor risk analysis
113
 
114
+ Arguments:
115
+ ----------
116
+ contract_text { str } : Full contract text
117
+
118
+ clauses { list } : Extracted clauses from ClauseExtractor
119
 
120
  Returns:
121
+ --------
122
+ { RiskScore } : RiskScore object with detailed analysis
123
  """
124
 
125
+ log_info("Starting risk analysis", text_length = len(contract_text), num_clauses = len(clauses), contract_type = self.contract_type.value)
 
 
 
126
 
127
  # Initialize scoring containers
128
+ category_scores = defaultdict(list)
129
+ detailed_findings = defaultdict(list)
130
+ risk_factors = list()
131
+
132
+ # Keyword Severity Scoring
133
+ keyword_risks = self._score_keywords(contract_text)
134
+ log_info("Keyword analysis complete", critical_score = keyword_risks.get('critical', 0), high_score = keyword_risks.get('high', 0))
135
+
136
+ # Structural Pattern Analysis
137
+ pattern_risks = self._analyze_patterns(contract_text)
 
 
138
  log_info(f"Pattern analysis found {len(pattern_risks)} risky patterns")
139
 
140
+ # Clause-Level Analysis
141
+ clause_risks = self._analyze_clauses(clauses)
142
  log_info(f"Clause analysis complete for {len(clause_risks)} categories")
143
 
144
+ # Missing Protections
145
+ missing_risks = self._check_missing_protections(contract_text, clauses)
146
  log_info(f"Missing protections analysis complete")
147
 
148
+ # Industry Benchmark Comparison
149
  benchmark_comparison = self._compare_to_benchmarks(contract_text, clauses)
150
  log_info(f"Benchmark comparison complete")
151
 
152
  # Aggregate scores by category
153
  for category in self.adjusted_weights.keys():
154
+ category_risk = self._calculate_category_risk(category = category,
155
+ keyword_risks = keyword_risks,
156
+ pattern_risks = pattern_risks,
157
+ clause_risks = clause_risks,
158
+ missing_risks = missing_risks,
159
+ benchmark_comparison = benchmark_comparison,
160
+ )
161
+ category_scores[category] = category_risk["score"]
 
162
  detailed_findings[category] = category_risk["findings"]
163
 
164
+ if (category_risk["score"] >= self.rules.RISK_THRESHOLDS["high"]):
165
  risk_factors.append(category)
166
 
167
  # Calculate weighted overall score
168
+ overall_score = self._calculate_weighted_score(category_scores)
169
+ risk_level = self._get_risk_level(overall_score)
170
 
171
  # Create risk breakdown items
172
+ risk_breakdown = self._create_risk_breakdown(category_scores, detailed_findings)
173
+
174
+ result = RiskScore(overall_score = overall_score,
175
+ risk_level = risk_level,
176
+ category_scores = dict(category_scores),
177
+ risk_factors = risk_factors,
178
+ detailed_findings = dict(detailed_findings),
179
+ benchmark_comparison = benchmark_comparison,
180
+ risk_breakdown = risk_breakdown,
181
+ )
182
+
183
+ log_info("Risk analysis complete", overall_score = overall_score, risk_level = risk_level, high_risk_categories = len(risk_factors))
 
 
 
 
 
 
 
184
 
185
  return result
186
 
 
 
 
187
 
188
  def _score_keywords(self, text: str) -> Dict[str, int]:
189
  """
190
  Score text based on keyword severity tiers
191
 
192
  Returns:
193
+ --------
194
+ { dict } : Dictionary with 'critical', 'high', 'medium' scores
195
  """
196
  text_lower = text.lower()
197
+ scores = defaultdict(int)
198
 
199
  # Critical keywords (Tier 1)
200
  for keyword, weight in self.rules.CRITICAL_KEYWORDS.items():
201
  if keyword in text_lower:
202
+ count = text_lower.count(keyword)
203
+
204
+ # Cap at 3 occurrences
205
+ scores["critical"] += weight * min(count, 3)
206
 
207
  # High-risk keywords (Tier 2)
208
  for keyword, weight in self.rules.HIGH_RISK_KEYWORDS.items():
209
  if keyword in text_lower:
210
+ count = text_lower.count(keyword)
211
  scores["high"] += weight * min(count, 2)
212
 
213
  # Medium-risk keywords (Tier 3)
214
  for keyword, weight in self.rules.MEDIUM_RISK_KEYWORDS.items():
215
  if keyword in text_lower:
216
+ count = text_lower.count(keyword)
217
  scores["medium"] += weight * min(count, 2)
218
 
219
  return dict(scores)
220
 
 
 
 
221
 
222
  def _analyze_patterns(self, text: str) -> List[Dict]:
223
  """
224
  Detect risky structural patterns in contract
225
 
226
  Returns:
227
+ --------
228
+ { list } : List of detected pattern dictionaries
229
  """
230
+ findings = list()
231
 
232
  for pattern, risk_points, description in self.rules.RISKY_PATTERNS:
233
  matches = re.finditer(pattern, text, re.IGNORECASE)
234
  for match in matches:
235
+ findings.append({"pattern" : description,
236
+ "risk_points" : risk_points,
237
+ "match" : match.group(0)[:100], # First 100 chars
238
+ "position" : match.start(),
239
+ })
 
240
 
241
  return findings
242
 
 
 
 
243
 
244
  def _analyze_clauses(self, clauses: List[ExtractedClause]) -> Dict[str, List[Dict]]:
245
  """
246
  Deep dive into each clause with specific risk factors
247
 
248
  Returns:
249
+ --------
250
+ { dict } : Dictionary mapping categories to clause analysis results
251
  """
252
  clause_analysis = defaultdict(list)
253
 
254
  for clause in clauses:
255
  # Get risk factors for this clause category
256
+ if (clause.category in self.rules.CLAUSE_RISK_FACTORS):
257
  analysis = self._analyze_single_clause(clause)
258
  clause_analysis[clause.category].append(analysis)
259
 
260
  return dict(clause_analysis)
261
 
262
+
263
  def _analyze_single_clause(self, clause: ExtractedClause) -> Dict:
264
  """
265
  Analyze a single clause with detailed risk factors
266
 
267
  Returns:
268
+ --------
269
+ { dict } : Dictionary with risk_score and findings
270
  """
271
  risk_config = self.rules.CLAUSE_RISK_FACTORS.get(clause.category, {})
272
+ base_risk = risk_config.get("base_risk", 50)
273
 
274
+ risk_score = base_risk
275
+ findings = list()
276
 
277
+ text_lower = clause.text.lower()
278
 
279
  # Check red flags
280
+ if ("red_flags" in risk_config):
281
  for flag, adjustment in risk_config["red_flags"].items():
282
  if flag in text_lower:
283
  risk_score += adjustment
284
+ severity = "increases" if (adjustment > 0) else "decreases"
285
+
286
  findings.append(f"Found '{flag}' ({severity} risk by {abs(adjustment)} points)")
287
 
288
  # Special handling for specific clause types
289
+ if (clause.category == "non_compete"):
290
  duration_risk = self._analyze_noncompete_duration(clause.text)
291
+ risk_score += duration_risk["adjustment"]
292
+
293
  findings.extend(duration_risk["findings"])
294
 
295
+ scope_risk = self._analyze_noncompete_scope(clause.text)
296
+ risk_score += scope_risk["adjustment"]
297
+
298
  findings.extend(scope_risk["findings"])
299
 
300
+ elif (clause.category == "termination"):
301
  notice_risk = self._analyze_notice_period(clause.text)
302
  risk_score += notice_risk["adjustment"]
303
+
304
  findings.extend(notice_risk["findings"])
305
 
306
+ elif (clause.category == "indemnification"):
307
  mutual_risk = self._analyze_indemnification_mutuality(clause.text)
308
  risk_score += mutual_risk["adjustment"]
309
+
310
  findings.extend(mutual_risk["findings"])
311
 
312
+ elif (clause.category == "compensation"):
313
  clarity_risk = self._analyze_compensation_clarity(clause.text)
314
+ risk_score += clarity_risk["adjustment"]
315
+
316
  findings.extend(clarity_risk["findings"])
317
 
318
+ elif (clause.category == "intellectual_property"):
319
+ scope_risk = self._analyze_ip_scope(clause.text)
320
  risk_score += scope_risk["adjustment"]
321
+
322
  findings.extend(scope_risk["findings"])
323
 
324
  # Cap score between 0 and 100
325
  risk_score = max(0, min(100, risk_score))
326
 
327
+ return {"clause_reference" : clause.reference,
328
+ "risk_score" : risk_score,
329
+ "findings" : findings,
330
+ "confidence" : clause.confidence,
331
+ }
 
332
 
333
+
334
  def _analyze_noncompete_duration(self, text: str) -> Dict:
335
+ """
336
+ Analyze non-compete duration reasonableness
337
+ """
338
  duration_pattern = r'(\d+)\s*(year|yr|month|mo)s?'
339
+ matches = re.findall(duration_pattern, text, re.IGNORECASE)
340
 
341
  if not matches:
342
+ return {"adjustment" : 0,
343
+ "findings" : ["No specific duration found"],
344
+ }
345
 
346
  # Convert to months
347
  duration_months = 0
348
+
349
  for num, unit in matches:
350
+ months = int(num) * (12 if 'year' in unit.lower() or 'yr' in unit.lower() else 1)
351
  duration_months = max(duration_months, months)
352
 
353
  # Get benchmark
354
+ industry = self._detect_industry()
355
  benchmark = self.rules.INDUSTRY_BENCHMARKS["non_compete_duration"][industry]
356
 
357
+ if (duration_months <= benchmark["reasonable"]):
358
  return {"adjustment": -10, "findings": [f"Duration of {duration_months} months is reasonable"]}
359
+
360
+ elif (duration_months <= benchmark["standard"]):
361
  return {"adjustment": 0, "findings": [f"Duration of {duration_months} months is standard"]}
362
+
363
+ elif (duration_months <= benchmark["excessive"]):
364
  return {"adjustment": +15, "findings": [f"Duration of {duration_months} months is lengthy"]}
365
+
366
  else:
367
  return {"adjustment": +30, "findings": [f"Duration of {duration_months} months is excessive"]}
368
 
369
+
370
  def _analyze_noncompete_scope(self, text: str) -> Dict:
371
+ """
372
+ Analyze non-compete scope reasonableness
373
+ """
374
+ text_lower = text.lower()
375
+ adjustment = 0
376
+ findings = list()
377
 
378
  scope_config = self.rules.CLAUSE_RISK_FACTORS["non-compete"]["scope_keywords"]
379
 
380
  for keyword, adj in scope_config.items():
381
  if keyword in text_lower:
382
  adjustment += adj
383
+ severity = "reasonable" if adj < 0 else "concerning"
384
+
385
  findings.append(f"Scope includes '{keyword}' ({severity})")
386
 
387
  return {"adjustment": adjustment, "findings": findings}
388
 
389
+
390
  def _analyze_notice_period(self, text: str) -> Dict:
391
+ """
392
+ Analyze termination notice period balance
393
+ """
394
  notice_pattern = r'(\d+)\s*days?\s*(?:notice|prior\s+notice)'
395
+ matches = re.findall(notice_pattern, text, re.IGNORECASE)
396
 
397
+ if (len(matches) < 2):
398
+ return {"adjustment" : 0,
399
+ "findings" : ["Notice period analysis inconclusive"],
400
+ }
401
 
402
  periods = [int(m) for m in matches]
403
 
404
+ if (len(periods) >= 2):
405
  ratio = max(periods) / min(periods)
406
 
407
+ if (ratio >= 4):
408
+ return {"adjustment" : +25, "findings": [f"Notice periods highly imbalanced ({max(periods)} vs {min(periods)} days)"]}
409
+
410
+ elif (ratio >= 3):
411
+ return {"adjustment" : +18,
412
+ "findings" : [f"Notice periods significantly imbalanced ({max(periods)} vs {min(periods)} days)"],
413
+ }
414
+
415
+ elif (ratio >= 2):
416
+ return {"adjustment" : +10,
417
+ "findings" : [f"Notice periods moderately imbalanced ({max(periods)} vs {min(periods)} days)"],
418
+ }
419
+
420
  else:
421
+ return {"adjustment" : -5,
422
+ "findings" : [f"Notice periods balanced ({max(periods)} vs {min(periods)} days)"],
423
+ }
424
 
425
+ return {"adjustment" : 0,
426
+ "findings" : ["Could not determine notice period balance"],
427
+ }
428
 
429
+
430
  def _analyze_indemnification_mutuality(self, text: str) -> Dict:
431
+ """
432
+ Check if indemnification is mutual or one-sided
433
+ """
434
+ text_lower = text.lower()
435
 
436
+ mutual_indicators = ["mutual", "both parties", "each party", "reciprocal"]
437
+ one_sided_indicators = ["employee shall indemnify", "consultant shall indemnify", "contractor shall indemnify", "you shall indemnify"]
 
438
 
439
+ has_mutual = any(ind in text_lower for ind in mutual_indicators)
440
+ has_one_sided = any(ind in text_lower for ind in one_sided_indicators)
441
 
442
+ if (has_mutual and not has_one_sided):
443
+ return {"adjustment" : -15,
444
+ "findings" : ["Mutual indemnification (balanced)"],
445
+ }
446
+
447
  elif has_one_sided:
448
+ return {"adjustment" : +20,
449
+ "findings" : ["One-sided indemnification (unfavorable)"],
450
+ }
451
+
452
  else:
453
+ return {"adjustment" : 0,
454
+ "findings" : ["Indemnification mutuality unclear"],
455
+ }
456
+
457
 
458
  def _analyze_compensation_clarity(self, text: str) -> Dict:
459
+ """
460
+ Analyze clarity of compensation terms
461
+ """
462
+ text_lower = text.lower()
463
+ adjustment = 0
464
+ findings = list()
465
 
466
  # Check for vague terms
467
  vague_terms = ["to be determined", "tbd", "subject to review", "discretionary"]
468
+
469
  for term in vague_terms:
470
  if term in text_lower:
471
  adjustment += 10
 
476
  adjustment -= 10
477
  findings.append("Specific monetary amount provided (good)")
478
 
479
+ return {"adjustment" : adjustment,
480
+ "findings" : findings,
481
+ }
482
+
483
 
484
  def _analyze_ip_scope(self, text: str) -> Dict:
485
+ """
486
+ Analyze IP assignment scope
487
+ """
488
+ text_lower = text.lower()
489
+ adjustment = 0
490
+ findings = list()
491
 
492
  # Overly broad indicators
493
  broad_terms = ["all work product", "anything created", "whether or not related"]
494
+
495
  for term in broad_terms:
496
  if term in text_lower:
497
  adjustment += 15
 
500
  # Protective terms
501
  protective_terms = ["prior ip excluded", "personal projects excluded"]
502
  for term in protective_terms:
503
+
504
+ if (term in text_lower):
505
  adjustment -= 15
506
  findings.append(f"Protective IP term present: '{term}'")
507
 
508
+ return {"adjustment" : adjustment,
509
+ "findings" : findings,
510
+ }
511
 
 
 
 
512
 
513
+ def _check_missing_protections(self, text: str, clauses: List[ExtractedClause]) -> Dict[str, int]:
 
514
  """
515
  Check for missing critical protections
516
 
517
+ Arguments:
518
+ ----------
519
+ text { str } :
520
+
521
+ clauses { list } :
522
+
523
  Returns:
524
+ --------
525
+ { dict } : Dictionary mapping categories to risk scores for missing items
526
  """
527
+ text_lower = text.lower()
528
  missing_risks = defaultdict(int)
529
 
530
  for protection_id, config in self.rules.PROTECTION_CHECKLIST.items():
 
537
 
538
  return dict(missing_risks)
539
 
540
+
541
+ def _check_protection_present(self, protection_id: str, text_lower: str, clauses: List[ExtractedClause]) -> bool:
542
+ """
543
+ Check if a specific protection is present
544
+ """
545
  protection_indicators = {
546
  "for_cause_definition": ["for cause", "cause defined", "grounds for termination"],
547
  "severance_provision": ["severance", "severance pay", "separation pay"],
services/summary_generator.py ADDED
@@ -0,0 +1,570 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # services/summary_generator.py
2
+
3
+ import logging
4
+ from typing import Dict, List, Optional
5
+ from dataclasses import dataclass
6
+
7
+ from utils.logger import ContractAnalyzerLogger
8
+ from model_manager.llm_manager import LLMManager, LLMProvider
9
+
10
+ logger = ContractAnalyzerLogger.get_logger()
11
+
12
+ @dataclass
13
+ class SummaryContext:
14
+ """Context data for summary generation"""
15
+ contract_type: str
16
+ risk_score: int
17
+ risk_level: str
18
+ category_scores: Dict[str, int]
19
+ unfavorable_terms: List[Dict]
20
+ missing_protections: List[Dict]
21
+ clauses: List
22
+ key_findings: List[str]
23
+
24
+
25
+ class SummaryGenerator:
26
+ """
27
+ LLM-powered executive summary generator for contract analysis
28
+ Generates professional, detailed executive summaries like legal professionals
29
+ """
30
+
31
+ def __init__(self, llm_manager: Optional[LLMManager] = None):
32
+ """
33
+ Initialize the summary generator
34
+
35
+ Args:
36
+ llm_manager: LLM manager instance (if None, creates one with default settings)
37
+ """
38
+ self.llm_manager = llm_manager or LLMManager()
39
+ self.logger = ContractAnalyzerLogger.get_logger()
40
+
41
+ # Use proper logging syntax without keyword arguments
42
+ logger.info("Summary generator initialized")
43
+
44
+ def generate_executive_summary(self,
45
+ classification: Dict,
46
+ risk_analysis: Dict,
47
+ unfavorable_terms: List[Dict],
48
+ missing_protections: List[Dict],
49
+ clauses: List) -> str:
50
+ """
51
+ Generate a comprehensive executive summary using LLM
52
+
53
+ Args:
54
+ classification: Contract classification data
55
+ risk_analysis: Risk analysis results
56
+ unfavorable_terms: List of unfavorable terms
57
+ missing_protections: List of missing protections
58
+ clauses: List of analyzed clauses (ExtractedClause objects)
59
+
60
+ Returns:
61
+ Generated executive summary string
62
+ """
63
+ try:
64
+ # Prepare context for the LLM
65
+ context = self._prepare_summary_context(
66
+ classification, risk_analysis, unfavorable_terms,
67
+ missing_protections, clauses
68
+ )
69
+
70
+ # Generate summary using LLM
71
+ summary = self._generate_with_llm(context)
72
+
73
+ # Use proper logging syntax
74
+ logger.info(f"Executive summary generated successfully - Risk score: {context.risk_score}, Risk level: {context.risk_level}")
75
+
76
+ return summary
77
+
78
+ except Exception as e:
79
+ logger.error(f"Failed to generate executive summary: {e}")
80
+
81
+ # Create fallback context if preparation failed
82
+ fallback_context = SummaryContext(
83
+ contract_type=classification.get("category", "contract"),
84
+ risk_score=risk_analysis.get("overall_score", 0),
85
+ risk_level=risk_analysis.get("risk_level", "unknown"),
86
+ category_scores=risk_analysis.get("category_scores", {}),
87
+ unfavorable_terms=unfavorable_terms,
88
+ missing_protections=missing_protections,
89
+ clauses=clauses,
90
+ key_findings=[]
91
+ )
92
+
93
+ # Fallback to simple summary
94
+ return self._generate_fallback_summary(fallback_context)
95
+
96
+ def _prepare_summary_context(self,
97
+ classification: Dict,
98
+ risk_analysis: Dict,
99
+ unfavorable_terms: List[Dict],
100
+ missing_protections: List[Dict],
101
+ clauses: List) -> SummaryContext:
102
+ """Prepare structured context for summary generation"""
103
+
104
+ contract_type = classification.get("category", "contract")
105
+ risk_score = risk_analysis.get("overall_score", 0)
106
+ risk_level = risk_analysis.get("risk_level", "unknown")
107
+ category_scores = risk_analysis.get("category_scores", {})
108
+
109
+ # Extract key findings
110
+ key_findings = self._extract_key_findings(
111
+ unfavorable_terms, missing_protections, clauses, risk_score
112
+ )
113
+
114
+ return SummaryContext(
115
+ contract_type=contract_type,
116
+ risk_score=risk_score,
117
+ risk_level=risk_level,
118
+ category_scores=category_scores,
119
+ unfavorable_terms=unfavorable_terms,
120
+ missing_protections=missing_protections,
121
+ clauses=clauses,
122
+ key_findings=key_findings
123
+ )
124
+
125
+ def _extract_key_findings(self,
126
+ unfavorable_terms: List[Dict],
127
+ missing_protections: List[Dict],
128
+ clauses: List,
129
+ risk_score: int) -> List[str]:
130
+ """Extract the most important findings for the summary"""
131
+
132
+ findings = []
133
+
134
+ # High-risk clauses - handle both dict and object clauses
135
+ high_risk_clauses = []
136
+ for clause in clauses:
137
+ try:
138
+ # Try to access as object first, then as dict
139
+ if hasattr(clause, 'confidence'):
140
+ confidence = clause.confidence
141
+ risk_level = getattr(clause, 'risk_level', None)
142
+ category = getattr(clause, 'category', 'clause')
143
+ text = getattr(clause, 'text', '')
144
+ else:
145
+ # Fallback to dict access
146
+ confidence = clause.get('confidence', 0)
147
+ risk_level = clause.get('risk_level')
148
+ category = clause.get('category', 'clause')
149
+ text = clause.get('text', '')
150
+
151
+ if confidence > 0.7 and risk_level in ['high', 'critical']:
152
+ high_risk_clauses.append({
153
+ 'category': category,
154
+ 'text': text,
155
+ 'confidence': confidence,
156
+ 'risk_level': risk_level
157
+ })
158
+ except (AttributeError, KeyError, TypeError):
159
+ # Skip clauses that can't be processed
160
+ continue
161
+
162
+ for clause in high_risk_clauses[:3]: # Top 3 high-risk clauses
163
+ clause_text = clause['text'][:100] + '...' if len(clause['text']) > 100 else clause['text']
164
+ findings.append(f"High-risk {clause['category']}: {clause_text}")
165
+
166
+ # Critical unfavorable terms
167
+ critical_terms = []
168
+ for term in unfavorable_terms:
169
+ try:
170
+ if hasattr(term, 'severity'):
171
+ severity = term.severity
172
+ term_name = getattr(term, 'term', 'Unknown')
173
+ explanation = getattr(term, 'explanation', '')
174
+ else:
175
+ severity = term.get('severity')
176
+ term_name = term.get('term', 'Unknown')
177
+ explanation = term.get('explanation', '')
178
+
179
+ if severity == 'critical':
180
+ critical_terms.append({
181
+ 'term': term_name,
182
+ 'explanation': explanation
183
+ })
184
+ except (AttributeError, KeyError, TypeError):
185
+ continue
186
+
187
+ for term in critical_terms[:2]:
188
+ findings.append(f"Critical term: {term['term']} - {term['explanation']}")
189
+
190
+ # Important missing protections
191
+ critical_protections = []
192
+ for prot in missing_protections:
193
+ try:
194
+ if hasattr(prot, 'importance'):
195
+ importance = prot.importance
196
+ protection_name = getattr(prot, 'protection', 'Unknown')
197
+ explanation = getattr(prot, 'explanation', '')
198
+ else:
199
+ importance = prot.get('importance')
200
+ protection_name = prot.get('protection', 'Unknown')
201
+ explanation = prot.get('explanation', '')
202
+
203
+ if importance == 'critical':
204
+ critical_protections.append({
205
+ 'protection': protection_name,
206
+ 'explanation': explanation
207
+ })
208
+ except (AttributeError, KeyError, TypeError):
209
+ continue
210
+
211
+ for prot in critical_protections[:2]:
212
+ findings.append(f"Missing protection: {prot['protection']}")
213
+
214
+ # Overall risk context
215
+ if risk_score >= 80:
216
+ findings.append("Contract presents critical level of risk requiring immediate attention")
217
+ elif risk_score >= 60:
218
+ findings.append("Significant concerns identified requiring careful review")
219
+
220
+ return findings
221
+
222
+ def _generate_with_llm(self, context: SummaryContext) -> str:
223
+ """Generate summary using LLM"""
224
+
225
+ prompt = self._build_summary_prompt(context)
226
+ system_prompt = self._build_system_prompt()
227
+
228
+ try:
229
+ response = self.llm_manager.complete(
230
+ prompt=prompt,
231
+ system_prompt=system_prompt,
232
+ temperature=0.3, # Lower temperature for more consistent, professional output
233
+ max_tokens=800, # Limit summary length
234
+ json_mode=False
235
+ )
236
+
237
+ if response.success and response.text.strip():
238
+ return self._clean_summary_response(response.text)
239
+ else:
240
+ raise ValueError(f"LLM generation failed: {response.error_message}")
241
+
242
+ except Exception as e:
243
+ logger.error(f"LLM summary generation failed: {e}")
244
+ raise
245
+
246
+ def _build_system_prompt(self) -> str:
247
+ """Build system prompt for professional summary generation"""
248
+
249
+ return """You are a senior legal analyst specializing in contract risk assessment. Your task is to generate concise, professional executive summaries that:
250
+
251
+ KEY REQUIREMENTS:
252
+ 1. Write in formal, professional business language
253
+ 2. Focus on the most critical risks and implications
254
+ 3. Be specific about contractual provisions and their impact
255
+ 4. Maintain objective, factual tone
256
+ 5. Keep summary length between 100-200 words
257
+ 6. Structure: Start with overall risk assessment, then key findings, then implications
258
+
259
+ WRITING STYLE:
260
+ - Use precise legal/business terminology
261
+ - Avoid markdown formatting
262
+ - Be direct and actionable
263
+ - Highlight asymmetrical terms and missing protections
264
+ - Focus on practical consequences for the signing party
265
+
266
+ OUTPUT FORMAT:
267
+ Return only the executive summary text, no headings, no bullet points, just clean paragraph text."""
268
+
269
+ def _build_summary_prompt(self, context: SummaryContext) -> str:
270
+ """Build detailed prompt for summary generation"""
271
+
272
+ # Build risk context
273
+ risk_context = self._build_risk_context(context)
274
+
275
+ # Build key provisions section
276
+ key_provisions = self._build_key_provisions_context(context)
277
+
278
+ # Build missing protections section
279
+ missing_protections_text = self._build_missing_protections_context(context)
280
+
281
+ prompt = f"""
282
+ CONTRACT ANALYSIS DATA:
283
+
284
+ {risk_context}
285
+
286
+ {key_provisions}
287
+
288
+ {missing_protections_text}
289
+
290
+ GENERATION INSTRUCTIONS:
291
+ Based on the analysis above, write a professional executive summary that:
292
+ 1. Starts with the overall risk assessment for the {context.contract_type}
293
+ 2. Highlights the 2-3 most critical issues
294
+ 3. Explains the practical implications for the signing party
295
+ 4. Mentions any severely imbalanced or punitive clauses
296
+ 5. Notes significant missing protections
297
+
298
+ Focus on clarity, specificity, and actionable insights.
299
+ """
300
+ return prompt
301
+
302
+ def _build_risk_context(self, context: SummaryContext) -> str:
303
+ """Build risk assessment context"""
304
+
305
+ risk_level_descriptions = {
306
+ "critical": "CRITICAL level of risk requiring immediate attention",
307
+ "high": "HIGH level of risk requiring significant review",
308
+ "medium": "MODERATE level of risk with some concerns",
309
+ "low": "LOW level of risk, generally favorable"
310
+ }
311
+
312
+ risk_desc = risk_level_descriptions.get(context.risk_level.lower(), "UNKNOWN level of risk")
313
+
314
+ text = f"RISK ASSESSMENT:\n"
315
+ text += f"- Overall Score: {context.risk_score}/100 ({risk_desc})\n"
316
+ text += f"- Contract Type: {context.contract_type.replace('_', ' ').title()}\n"
317
+
318
+ # Add category scores
319
+ if context.category_scores:
320
+ text += "- Risk by Category:\n"
321
+ for category, score in context.category_scores.items():
322
+ category_name = category.replace('_', ' ').title()
323
+ text += f" * {category_name}: {score}/100\n"
324
+
325
+ return text
326
+
327
+ def _build_key_provisions_context(self, context: SummaryContext) -> str:
328
+ """Build context about key provisions and unfavorable terms"""
329
+
330
+ text = "KEY PROVISIONS & UNFAVORABLE TERMS:\n"
331
+
332
+ # Critical terms first
333
+ critical_terms = []
334
+ for term in context.unfavorable_terms:
335
+ try:
336
+ if hasattr(term, 'severity'):
337
+ severity = term.severity
338
+ else:
339
+ severity = term.get('severity')
340
+
341
+ if severity == 'critical':
342
+ critical_terms.append(term)
343
+ except (AttributeError, KeyError):
344
+ continue
345
+
346
+ high_terms = []
347
+ for term in context.unfavorable_terms:
348
+ try:
349
+ if hasattr(term, 'severity'):
350
+ severity = term.severity
351
+ else:
352
+ severity = term.get('severity')
353
+
354
+ if severity == 'high':
355
+ high_terms.append(term)
356
+ except (AttributeError, KeyError):
357
+ continue
358
+
359
+ if critical_terms:
360
+ text += f"- Critical Issues Found: {len(critical_terms)}\n"
361
+ for term in critical_terms[:3]:
362
+ try:
363
+ if hasattr(term, 'term'):
364
+ term_name = term.term
365
+ explanation = getattr(term, 'explanation', '')
366
+ else:
367
+ term_name = term.get('term', 'Unknown')
368
+ explanation = term.get('explanation', '')
369
+ text += f" * {term_name}: {explanation}\n"
370
+ except (AttributeError, KeyError):
371
+ continue
372
+
373
+ if high_terms:
374
+ text += f"- Significant Concerns: {len(high_terms)}\n"
375
+ for term in high_terms[:2]:
376
+ try:
377
+ if hasattr(term, 'term'):
378
+ term_name = term.term
379
+ explanation = getattr(term, 'explanation', '')
380
+ else:
381
+ term_name = term.get('term', 'Unknown')
382
+ explanation = term.get('explanation', '')
383
+ text += f" * {term_name}: {explanation}\n"
384
+ except (AttributeError, KeyError):
385
+ continue
386
+
387
+ # High-risk clauses
388
+ high_risk_clauses = []
389
+ for clause in context.clauses:
390
+ try:
391
+ if hasattr(clause, 'confidence'):
392
+ confidence = clause.confidence
393
+ risk_level = getattr(clause, 'risk_level', None)
394
+ else:
395
+ confidence = clause.get('confidence', 0)
396
+ risk_level = clause.get('risk_level')
397
+
398
+ if confidence > 0.7 and risk_level in ['high', 'critical']:
399
+ high_risk_clauses.append(clause)
400
+ except (AttributeError, KeyError, TypeError):
401
+ continue
402
+
403
+ if high_risk_clauses:
404
+ text += f"- High-Risk Clauses Identified: {len(high_risk_clauses)}\n"
405
+ for clause in high_risk_clauses[:2]:
406
+ try:
407
+ if hasattr(clause, 'category'):
408
+ category = clause.category
409
+ clause_text = getattr(clause, 'text', '')
410
+ else:
411
+ category = clause.get('category', 'Unknown')
412
+ clause_text = clause.get('text', '')
413
+
414
+ display_text = clause_text[:80] + '...' if len(clause_text) > 80 else clause_text
415
+ text += f" * {category}: {display_text}\n"
416
+ except (AttributeError, KeyError):
417
+ continue
418
+
419
+ return text
420
+
421
+ def _build_missing_protections_context(self, context: SummaryContext) -> str:
422
+ """Build context about missing protections"""
423
+
424
+ text = "MISSING PROTECTIONS:\n"
425
+
426
+ critical_protections = []
427
+ for prot in context.missing_protections:
428
+ try:
429
+ if hasattr(prot, 'importance'):
430
+ importance = prot.importance
431
+ else:
432
+ importance = prot.get('importance')
433
+
434
+ if importance == 'critical':
435
+ critical_protections.append(prot)
436
+ except (AttributeError, KeyError):
437
+ continue
438
+
439
+ important_protections = []
440
+ for prot in context.missing_protections:
441
+ try:
442
+ if hasattr(prot, 'importance'):
443
+ importance = prot.importance
444
+ else:
445
+ importance = prot.get('importance')
446
+
447
+ if importance == 'high':
448
+ important_protections.append(prot)
449
+ except (AttributeError, KeyError):
450
+ continue
451
+
452
+ if critical_protections:
453
+ text += f"- Critical Protections Missing: {len(critical_protections)}\n"
454
+ for prot in critical_protections[:3]:
455
+ try:
456
+ if hasattr(prot, 'protection'):
457
+ protection_name = prot.protection
458
+ explanation = getattr(prot, 'explanation', '')
459
+ else:
460
+ protection_name = prot.get('protection', 'Unknown')
461
+ explanation = prot.get('explanation', '')
462
+ text += f" * {protection_name}: {explanation}\n"
463
+ except (AttributeError, KeyError):
464
+ continue
465
+
466
+ if important_protections:
467
+ text += f"- Important Protections Missing: {len(important_protections)}\n"
468
+ for prot in important_protections[:2]:
469
+ try:
470
+ if hasattr(prot, 'protection'):
471
+ protection_name = prot.protection
472
+ explanation = getattr(prot, 'explanation', '')
473
+ else:
474
+ protection_name = prot.get('protection', 'Unknown')
475
+ explanation = prot.get('explanation', '')
476
+ text += f" * {protection_name}: {explanation}\n"
477
+ except (AttributeError, KeyError):
478
+ continue
479
+
480
+ if not critical_protections and not important_protections:
481
+ text += "- No critical protections missing\n"
482
+
483
+ return text
484
+
485
+ def _clean_summary_response(self, text: str) -> str:
486
+ """Clean and format the LLM response"""
487
+
488
+ # Remove any markdown formatting
489
+ text = text.replace('**', '').replace('*', '').replace('#', '')
490
+
491
+ # Remove common LLM artifacts
492
+ lines = text.split('\n')
493
+ cleaned_lines = []
494
+
495
+ for line in lines:
496
+ line = line.strip()
497
+ if line and not line.lower().startswith(('executive summary', 'summary:', 'here is', 'based on')):
498
+ cleaned_lines.append(line)
499
+
500
+ # Join into coherent paragraph
501
+ summary = ' '.join(cleaned_lines)
502
+
503
+ # Ensure proper sentence structure
504
+ if summary and not summary[0].isupper():
505
+ summary = summary[0].upper() + summary[1:]
506
+
507
+ if summary and not summary.endswith(('.', '!', '?')):
508
+ summary += '.'
509
+
510
+ return summary
511
+
512
+ def _generate_fallback_summary(self, context: SummaryContext) -> str:
513
+ """Generate a fallback summary when LLM is not available"""
514
+
515
+ contract_type_display = context.contract_type.replace('_', ' ').title()
516
+
517
+ # Count critical items
518
+ critical_terms = 0
519
+ for term in context.unfavorable_terms:
520
+ try:
521
+ if hasattr(term, 'severity'):
522
+ if term.severity == 'critical':
523
+ critical_terms += 1
524
+ else:
525
+ if term.get('severity') == 'critical':
526
+ critical_terms += 1
527
+ except (AttributeError, KeyError):
528
+ continue
529
+
530
+ critical_protections = 0
531
+ for prot in context.missing_protections:
532
+ try:
533
+ if hasattr(prot, 'importance'):
534
+ if prot.importance == 'critical':
535
+ critical_protections += 1
536
+ else:
537
+ if prot.get('importance') == 'critical':
538
+ critical_protections += 1
539
+ except (AttributeError, KeyError):
540
+ continue
541
+
542
+ if context.risk_score >= 80:
543
+ risk_assessment = f"This {contract_type_display} presents a CRITICAL level of risk"
544
+ action = "requires immediate attention and significant revision"
545
+ elif context.risk_score >= 60:
546
+ risk_assessment = f"This {contract_type_display} presents a HIGH level of risk"
547
+ action = "requires careful review and substantial negotiation"
548
+ elif context.risk_score >= 40:
549
+ risk_assessment = f"This {contract_type_display} presents a MODERATE level of risk"
550
+ action = "requires review and selective negotiation"
551
+ else:
552
+ risk_assessment = f"This {contract_type_display} presents a LOW level of risk"
553
+ action = "appears generally reasonable but should be reviewed"
554
+
555
+ summary = f"{risk_assessment} with a score of {context.risk_score}/100. "
556
+ summary += f"The agreement {action}. "
557
+
558
+ if critical_terms > 0:
559
+ summary += f"Found {critical_terms} critical unfavorable terms and "
560
+ else:
561
+ summary += f"Found {len(context.unfavorable_terms)} unfavorable terms and "
562
+
563
+ if critical_protections > 0:
564
+ summary += f"{critical_protections} critical missing protections. "
565
+ else:
566
+ summary += f"{len(context.missing_protections)} missing protections. "
567
+
568
+ summary += "Review the detailed analysis below for specific clauses and recommendations."
569
+
570
+ return summary
static/index.html CHANGED
@@ -15,14 +15,14 @@
15
  font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
16
  background: #ffffff;
17
  color: #333;
18
- line-height: 1.6;
19
  }
20
 
21
  /* Header */
22
  .header {
23
  background: white;
24
- border-bottom: 1px solid #e5e5e5;
25
- padding: 1rem 2rem;
26
  display: flex;
27
  justify-content: space-between;
28
  align-items: center;
@@ -49,32 +49,32 @@
49
  align-items: center;
50
  justify-content: center;
51
  color: white;
52
- font-size: 18px;
53
  }
54
 
55
  .subtitle {
56
  color: #666;
57
- font-size: 0.9rem;
58
- font-weight: 400;
59
  }
60
 
61
  .container {
62
  max-width: 1200px;
63
  margin: 0 auto;
64
- padding: 0 2rem;
65
  }
66
 
67
- /* Landing Page Styles - Updated to match screenshot */
68
  .landing-screen {
69
- padding-top: 80px;
70
  }
71
 
72
  .hero-section {
73
  text-align: center;
74
- padding: 6rem 0 4rem;
75
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
76
  color: white;
77
- margin-bottom: 4rem;
78
  }
79
 
80
  .hero-title {
@@ -88,7 +88,7 @@
88
  font-size: 1.3rem;
89
  margin-bottom: 2.5rem;
90
  opacity: 0.95;
91
- max-width: 600px;
92
  margin-left: auto;
93
  margin-right: auto;
94
  }
@@ -112,42 +112,42 @@
112
  }
113
 
114
  .section {
115
- padding: 4rem 0;
116
  text-align: center;
117
  }
118
 
119
  .section-title {
120
  font-size: 2.2rem;
121
  font-weight: 600;
122
- margin-bottom: 3rem;
123
  color: #333;
124
  }
125
 
126
  .section-subtitle {
127
  font-size: 1.2rem;
128
  color: #666;
129
- margin-bottom: 3rem;
130
- max-width: 800px;
131
  margin-left: auto;
132
  margin-right: auto;
133
- line-height: 1.8;
134
  }
135
 
136
  .features-grid {
137
  display: grid;
138
  grid-template-columns: repeat(3, 1fr);
139
  gap: 3rem;
140
- margin-bottom: 4rem;
141
  }
142
 
143
  .feature-card {
144
  text-align: center;
145
- padding: 2rem;
146
  }
147
 
148
  .feature-icon {
149
  font-size: 3rem;
150
- margin-bottom: 1.5rem;
151
  }
152
 
153
  .feature-title {
@@ -159,25 +159,26 @@
159
 
160
  .feature-description {
161
  color: #666;
162
- line-height: 1.7;
163
  font-size: 1rem;
164
  }
165
 
166
  .steps-section {
167
- background: #f8f9fa;
168
- padding: 5rem 0;
 
169
  }
170
 
171
  .steps-grid {
172
  display: grid;
173
  grid-template-columns: repeat(3, 1fr);
174
  gap: 3rem;
175
- margin-top: 3rem;
176
  }
177
 
178
  .step-card {
179
  text-align: center;
180
- padding: 2rem;
181
  }
182
 
183
  .step-number {
@@ -191,7 +192,7 @@
191
  justify-content: center;
192
  font-size: 1.5rem;
193
  font-weight: 700;
194
- margin: 0 auto 1.5rem;
195
  }
196
 
197
  .step-title {
@@ -203,16 +204,16 @@
203
 
204
  .step-description {
205
  color: #666;
206
- line-height: 1.7;
207
  }
208
 
209
  .footer {
210
  text-align: center;
211
- padding: 3rem 2rem;
212
  color: #999;
213
  font-size: 0.9rem;
214
- border-top: 1px solid #e5e5e5;
215
- background: #f8f9fa;
216
  }
217
 
218
  /* Analyzer Styles */
@@ -240,6 +241,14 @@
240
  margin-bottom: 2rem;
241
  }
242
 
 
 
 
 
 
 
 
 
243
  .upload-card {
244
  background: white;
245
  border-radius: 12px;
@@ -446,11 +455,11 @@
446
  font-size: 1rem;
447
  }
448
 
449
- .results-screen {
450
  display: none;
451
  }
452
 
453
- .results-screen.active {
454
  display: block;
455
  }
456
 
@@ -493,22 +502,26 @@
493
  border: 1px solid #fecaca;
494
  }
495
 
496
- /* Results screen styles */
497
  .results-header {
498
  display: flex;
499
  justify-content: space-between;
500
- align-items: center;
501
  margin-bottom: 2rem;
 
502
  }
503
 
504
  .results-title {
505
  font-size: 2rem;
506
  font-weight: 700;
 
507
  }
508
 
509
  .results-actions {
510
  display: flex;
511
  gap: 1rem;
 
 
512
  }
513
 
514
  .btn {
@@ -519,6 +532,10 @@
519
  cursor: pointer;
520
  border: none;
521
  transition: all 0.2s;
 
 
 
 
522
  }
523
 
524
  .btn-primary {
@@ -582,7 +599,6 @@
582
  transform: translate(-50%, -50%);
583
  font-size: 3rem;
584
  font-weight: 700;
585
- color: #dc2626;
586
  }
587
 
588
  .risk-level {
@@ -864,9 +880,10 @@
864
  <!-- Hero Section -->
865
  <section class="hero-section">
866
  <div class="container">
867
- <h1 class="hero-title">Unlock Legal Intelligence<br>Analyze Contracts with AI</h1>
868
  <p class="hero-subtitle">
869
  Instantly identify risks, uncover unfavorable terms, and gain actionable negotiation points.
 
870
  Our AI-powered platform gives you the clarity and confidence to sign better contracts.
871
  </p>
872
  <button class="cta-button" id="getStartedBtn">Try Now for Free</button>
@@ -954,48 +971,46 @@
954
  ← Back to Overview
955
  </button>
956
 
957
- <div class="hero-section-analyzer">
958
- <h1 class="hero-title-analyzer">Analyze Your Contract in Seconds</h1>
959
- <p class="hero-description">Paste your contract or upload a file to get an instant, AI-powered risk assessment.</p>
960
- </div>
961
-
962
- <!-- API Status Indicator -->
963
- <div id="apiStatus" class="api-status" style="display: none;">
964
- Checking backend connection...
965
- </div>
966
-
967
- <div class="upload-card">
968
- <div class="tabs">
969
- <button class="tab active" data-tab="paste">Paste Text</button>
970
- <button class="tab" data-tab="upload">Upload File</button>
971
  </div>
972
 
973
- <div id="pasteTab" class="tab-content active">
974
- <textarea class="textarea" id="contractText" placeholder="Paste your full contract text here..."></textarea>
975
- </div>
 
 
976
 
977
- <div id="uploadTab" class="tab-content">
978
- <div class="file-upload-area" id="fileUploadArea">
979
- <input type="file" id="fileInput" class="file-input" accept=".pdf,.docx,.txt">
980
- <div class="upload-icon">📄</div>
981
- <div class="upload-text">Click to upload or drag and drop</div>
982
- <div class="upload-hint">PDF, DOCX, or TXT files (Max 10MB)</div>
983
  </div>
984
- <div id="selectedFile" class="selected-file" style="display: none;">
985
- <div class="file-icon">📄</div>
986
- <div class="file-info">
987
- <div class="file-name" id="fileName"></div>
988
- <div class="file-size" id="fileSize"></div>
 
 
 
 
 
 
 
 
 
 
989
  </div>
990
- <button class="remove-file" id="removeFile">×</button>
991
  </div>
992
- </div>
993
 
994
- <div class="analyze-btn-container">
995
- <button class="analyze-btn" id="analyzeBtn">
996
- <span>🔍</span>
997
- <span>Analyze Contract</span>
998
- </button>
 
999
  </div>
1000
  </div>
1001
 
@@ -1006,8 +1021,8 @@
1006
  <p class="loading-text">This may take a moment for large documents.</p>
1007
  </div>
1008
 
1009
- <!-- Results Screen -->
1010
- <div id="resultsScreen" class="results-screen">
1011
  <div class="results-header">
1012
  <h1 class="results-title">Analysis Report</h1>
1013
  <div class="results-actions">
@@ -1098,20 +1113,19 @@
1098
  </div>
1099
 
1100
  <script>
1101
- const API_BASE_URL = window.location.hostname === 'localhost'
1102
- ? 'http://localhost:8000/api/v1'
1103
- : '/api/v1';
1104
 
1105
  let selectedFile = null;
1106
- let currentJobId = null;
1107
- let pollInterval = null;
1108
 
1109
  // Screen management
1110
  function showScreen(screenName) {
1111
  document.getElementById('landingScreen').style.display = 'none';
1112
  document.getElementById('analyzerScreen').style.display = 'none';
1113
  document.getElementById('loadingScreen').classList.remove('active');
1114
- document.getElementById('resultsScreen').classList.remove('active');
 
1115
 
1116
  if (screenName === 'landing') {
1117
  document.getElementById('landingScreen').style.display = 'block';
@@ -1121,9 +1135,11 @@
1121
  } else if (screenName === 'loading') {
1122
  document.getElementById('analyzerScreen').style.display = 'block';
1123
  document.getElementById('loadingScreen').classList.add('active');
 
1124
  } else if (screenName === 'results') {
1125
  document.getElementById('analyzerScreen').style.display = 'block';
1126
- document.getElementById('resultsScreen').classList.add('active');
 
1127
  }
1128
  }
1129
 
@@ -1143,14 +1159,15 @@
1143
  });
1144
 
1145
  if (response.ok) {
1146
- statusElement.textContent = '✓ Backend connected successfully';
 
1147
  statusElement.className = 'api-status connected';
1148
  } else {
1149
  throw new Error('Backend not responding properly');
1150
  }
1151
  } catch (error) {
1152
  console.error('Backend connection failed:', error);
1153
- statusElement.textContent = '✗ Cannot connect to backend. Make sure the server is running on port 8000.';
1154
  statusElement.className = 'api-status disconnected';
1155
 
1156
  setTimeout(() => {
@@ -1168,6 +1185,18 @@
1168
  showScreen('landing');
1169
  });
1170
 
 
 
 
 
 
 
 
 
 
 
 
 
1171
  // Tab switching
1172
  document.querySelectorAll('.tab').forEach(tab => {
1173
  tab.addEventListener('click', (e) => {
@@ -1271,15 +1300,13 @@
1271
  alert('Please paste contract text');
1272
  return;
1273
  }
1274
- const blob = new Blob([text], { type: 'text/plain' });
1275
- const file = new File([blob], 'contract.txt', { type: 'text/plain' });
1276
- await analyzeContract(file);
1277
  } else {
1278
  if (!selectedFile) {
1279
  alert('Please select a file');
1280
  return;
1281
  }
1282
- await analyzeContract(selectedFile);
1283
  }
1284
  } catch (error) {
1285
  console.error('Analysis error:', error);
@@ -1290,7 +1317,8 @@
1290
  }
1291
  });
1292
 
1293
- async function analyzeContract(file) {
 
1294
  try {
1295
  showScreen('loading');
1296
 
@@ -1300,9 +1328,8 @@
1300
  formData.append('interpret_clauses', 'true');
1301
  formData.append('generate_negotiation_points', 'true');
1302
  formData.append('compare_to_market', 'true');
1303
- formData.append('llm_provider', 'ollama');
1304
 
1305
- const response = await fetch(`${API_BASE_URL}/analyze`, {
1306
  method: 'POST',
1307
  body: formData
1308
  });
@@ -1318,10 +1345,10 @@
1318
  throw new Error(errorDetail);
1319
  }
1320
 
1321
- const job = await response.json();
1322
- currentJobId = job.job_id;
1323
-
1324
- pollInterval = setInterval(() => pollJobStatus(currentJobId), 2000);
1325
 
1326
  } catch (error) {
1327
  console.error('Error:', error);
@@ -1330,44 +1357,110 @@
1330
  }
1331
  }
1332
 
1333
- async function pollJobStatus(jobId) {
 
1334
  try {
1335
- const response = await fetch(`${API_BASE_URL}/jobs/${jobId}`);
1336
- if (!response.ok) throw new Error('Failed to fetch job status');
1337
-
1338
- const job = await response.json();
1339
-
1340
- if (job.status === 'completed') {
1341
- clearInterval(pollInterval);
1342
- displayResults(job.result);
1343
- showScreen('results');
1344
- } else if (job.status === 'failed') {
1345
- clearInterval(pollInterval);
1346
- alert('Analysis failed: ' + job.error);
1347
- showScreen('analyzer');
 
 
 
 
 
 
 
 
 
 
1348
  }
 
 
 
 
 
 
1349
  } catch (error) {
1350
- console.error('Polling error:', error);
 
 
1351
  }
1352
  }
1353
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1354
  function displayResults(result) {
1355
  const score = result.risk_analysis.overall_score;
1356
  const riskLevel = result.risk_analysis.risk_level;
1357
 
 
1358
  document.getElementById('riskScoreValue').textContent = score;
1359
  document.getElementById('riskLevel').textContent = riskLevel.toUpperCase();
1360
  document.getElementById('riskLevel').className = 'risk-level risk-' + getRiskClass(score);
1361
 
 
1362
  const circumference = 534;
1363
  const offset = circumference - (score / 100) * circumference;
1364
  const circle = document.getElementById('riskCircle');
1365
  circle.style.strokeDashoffset = offset;
1366
- circle.style.stroke = getRiskColor(score);
1367
-
 
 
 
 
 
 
 
 
1368
  document.getElementById('executiveSummary').textContent = result.executive_summary;
1369
 
1370
- // Update other result sections...
1371
  const unfavorableList = document.getElementById('unfavorableTermsList');
1372
  unfavorableList.innerHTML = '';
1373
  if (result.unfavorable_terms && result.unfavorable_terms.length > 0) {
@@ -1380,7 +1473,91 @@
1380
  unfavorableList.innerHTML = '<li>No unfavorable terms detected</li>';
1381
  }
1382
 
1383
- // Similar updates for other sections...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1384
  }
1385
 
1386
  function getRiskClass(score) {
@@ -1397,6 +1574,24 @@
1397
  return '#16a34a';
1398
  }
1399
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1400
  // Initialize
1401
  showScreen('landing');
1402
  </script>
 
15
  font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
16
  background: #ffffff;
17
  color: #333;
18
+ line-height: 1.5;
19
  }
20
 
21
  /* Header */
22
  .header {
23
  background: white;
24
+ border-bottom: 0.2px solid #e5e5e5;
25
+ padding: 0.3rem 2.0rem;
26
  display: flex;
27
  justify-content: space-between;
28
  align-items: center;
 
49
  align-items: center;
50
  justify-content: center;
51
  color: white;
52
+ font-size: 20px;
53
  }
54
 
55
  .subtitle {
56
  color: #666;
57
+ font-size: 1.0rem;
58
+ font-weight: 500;
59
  }
60
 
61
  .container {
62
  max-width: 1200px;
63
  margin: 0 auto;
64
+ padding: 0 0.2rem;
65
  }
66
 
67
+ /* Landing Page Styles */
68
  .landing-screen {
69
+ padding-top: 50px;
70
  }
71
 
72
  .hero-section {
73
  text-align: center;
74
+ padding: 1rem 0 1rem;
75
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
76
  color: white;
77
+ margin-bottom: 2rem;
78
  }
79
 
80
  .hero-title {
 
88
  font-size: 1.3rem;
89
  margin-bottom: 2.5rem;
90
  opacity: 0.95;
91
+ max-width: 1000px;
92
  margin-left: auto;
93
  margin-right: auto;
94
  }
 
112
  }
113
 
114
  .section {
115
+ padding: 0.5rem 0;
116
  text-align: center;
117
  }
118
 
119
  .section-title {
120
  font-size: 2.2rem;
121
  font-weight: 600;
122
+ margin-bottom: 1rem;
123
  color: #333;
124
  }
125
 
126
  .section-subtitle {
127
  font-size: 1.2rem;
128
  color: #666;
129
+ margin-bottom: 1rem;
130
+ max-width: 1000px;
131
  margin-left: auto;
132
  margin-right: auto;
133
+ line-height: 0.5;
134
  }
135
 
136
  .features-grid {
137
  display: grid;
138
  grid-template-columns: repeat(3, 1fr);
139
  gap: 3rem;
140
+ margin-bottom: 0.1rem;
141
  }
142
 
143
  .feature-card {
144
  text-align: center;
145
+ padding: 1rem;
146
  }
147
 
148
  .feature-icon {
149
  font-size: 3rem;
150
+ margin-bottom: 1.0rem;
151
  }
152
 
153
  .feature-title {
 
159
 
160
  .feature-description {
161
  color: #666;
162
+ line-height: 1.5;
163
  font-size: 1rem;
164
  }
165
 
166
  .steps-section {
167
+ background: white;
168
+ padding: 1rem 0;
169
+ text-align: center;
170
  }
171
 
172
  .steps-grid {
173
  display: grid;
174
  grid-template-columns: repeat(3, 1fr);
175
  gap: 3rem;
176
+ margin-top: 0.5rem;
177
  }
178
 
179
  .step-card {
180
  text-align: center;
181
+ padding: 0.5rem;
182
  }
183
 
184
  .step-number {
 
192
  justify-content: center;
193
  font-size: 1.5rem;
194
  font-weight: 700;
195
+ margin: 0 auto 1.2rem;
196
  }
197
 
198
  .step-title {
 
204
 
205
  .step-description {
206
  color: #666;
207
+ line-height: 1.5;
208
  }
209
 
210
  .footer {
211
  text-align: center;
212
+ padding: 1rem 0.5rem;
213
  color: #999;
214
  font-size: 0.9rem;
215
+ border-top: 0.5px solid #e5e5e5;
216
+ background: white;
217
  }
218
 
219
  /* Analyzer Styles */
 
241
  margin-bottom: 2rem;
242
  }
243
 
244
+ .upload-section {
245
+ transition: all 0.3s ease;
246
+ }
247
+
248
+ .upload-section.hidden {
249
+ display: none !important;
250
+ }
251
+
252
  .upload-card {
253
  background: white;
254
  border-radius: 12px;
 
455
  font-size: 1rem;
456
  }
457
 
458
+ .results-content {
459
  display: none;
460
  }
461
 
462
+ .results-content.active {
463
  display: block;
464
  }
465
 
 
502
  border: 1px solid #fecaca;
503
  }
504
 
505
+ /* Results screen styles - UPDATED BUTTON POSITIONING */
506
  .results-header {
507
  display: flex;
508
  justify-content: space-between;
509
+ align-items: flex-start;
510
  margin-bottom: 2rem;
511
+ gap: 2rem;
512
  }
513
 
514
  .results-title {
515
  font-size: 2rem;
516
  font-weight: 700;
517
+ flex: 1;
518
  }
519
 
520
  .results-actions {
521
  display: flex;
522
  gap: 1rem;
523
+ align-items: center;
524
+ justify-content: flex-end;
525
  }
526
 
527
  .btn {
 
532
  cursor: pointer;
533
  border: none;
534
  transition: all 0.2s;
535
+ display: flex;
536
+ align-items: center;
537
+ gap: 0.5rem;
538
+ white-space: nowrap;
539
  }
540
 
541
  .btn-primary {
 
599
  transform: translate(-50%, -50%);
600
  font-size: 3rem;
601
  font-weight: 700;
 
602
  }
603
 
604
  .risk-level {
 
880
  <!-- Hero Section -->
881
  <section class="hero-section">
882
  <div class="container">
883
+ <h1 class="hero-title">Unlock Legal Intelligence : Analyze Contracts with AI</h1>
884
  <p class="hero-subtitle">
885
  Instantly identify risks, uncover unfavorable terms, and gain actionable negotiation points.
886
+ <br>
887
  Our AI-powered platform gives you the clarity and confidence to sign better contracts.
888
  </p>
889
  <button class="cta-button" id="getStartedBtn">Try Now for Free</button>
 
971
  ← Back to Overview
972
  </button>
973
 
974
+ <!-- Upload Section - Shown by default -->
975
+ <div id="uploadSection" class="upload-section">
976
+ <div class="hero-section-analyzer">
977
+ <h1 class="hero-title-analyzer">Analyze Your Contract in Seconds</h1>
978
+ <p class="hero-description">Paste your contract or upload a file to get an instant, AI-powered risk assessment.</p>
 
 
 
 
 
 
 
 
 
979
  </div>
980
 
981
+ <div class="upload-card">
982
+ <div class="tabs">
983
+ <button class="tab active" data-tab="paste">Paste Text</button>
984
+ <button class="tab" data-tab="upload">Upload File</button>
985
+ </div>
986
 
987
+ <div id="pasteTab" class="tab-content active">
988
+ <textarea class="textarea" id="contractText" placeholder="Paste your full contract text here..."></textarea>
 
 
 
 
989
  </div>
990
+
991
+ <div id="uploadTab" class="tab-content">
992
+ <div class="file-upload-area" id="fileUploadArea">
993
+ <input type="file" id="fileInput" class="file-input" accept=".pdf,.docx,.txt">
994
+ <div class="upload-icon">📄</div>
995
+ <div class="upload-text">Click to upload or drag and drop</div>
996
+ <div class="upload-hint">PDF, DOCX, or TXT files (Max 10MB)</div>
997
+ </div>
998
+ <div id="selectedFile" class="selected-file" style="display: none;">
999
+ <div class="file-icon">📄</div>
1000
+ <div class="file-info">
1001
+ <div class="file-name" id="fileName"></div>
1002
+ <div class="file-size" id="fileSize"></div>
1003
+ </div>
1004
+ <button class="remove-file" id="removeFile">×</button>
1005
  </div>
 
1006
  </div>
 
1007
 
1008
+ <div class="analyze-btn-container">
1009
+ <button class="analyze-btn" id="analyzeBtn">
1010
+ <span>🔍</span>
1011
+ <span>Analyze Contract</span>
1012
+ </button>
1013
+ </div>
1014
  </div>
1015
  </div>
1016
 
 
1021
  <p class="loading-text">This may take a moment for large documents.</p>
1022
  </div>
1023
 
1024
+ <!-- Results Content - Hidden by default -->
1025
+ <div id="resultsContent" class="results-content">
1026
  <div class="results-header">
1027
  <h1 class="results-title">Analysis Report</h1>
1028
  <div class="results-actions">
 
1113
  </div>
1114
 
1115
  <script>
1116
+ // DYNAMIC API BASE URL - Automatically detects current port
1117
+ const API_BASE_URL = `${window.location.protocol}//${window.location.host}/api/v1`;
 
1118
 
1119
  let selectedFile = null;
1120
+ let currentAnalysisResult = null;
 
1121
 
1122
  // Screen management
1123
  function showScreen(screenName) {
1124
  document.getElementById('landingScreen').style.display = 'none';
1125
  document.getElementById('analyzerScreen').style.display = 'none';
1126
  document.getElementById('loadingScreen').classList.remove('active');
1127
+ document.getElementById('resultsContent').classList.remove('active');
1128
+ document.getElementById('uploadSection').classList.remove('hidden');
1129
 
1130
  if (screenName === 'landing') {
1131
  document.getElementById('landingScreen').style.display = 'block';
 
1135
  } else if (screenName === 'loading') {
1136
  document.getElementById('analyzerScreen').style.display = 'block';
1137
  document.getElementById('loadingScreen').classList.add('active');
1138
+ document.getElementById('uploadSection').classList.add('hidden');
1139
  } else if (screenName === 'results') {
1140
  document.getElementById('analyzerScreen').style.display = 'block';
1141
+ document.getElementById('resultsContent').classList.add('active');
1142
+ document.getElementById('uploadSection').classList.add('hidden');
1143
  }
1144
  }
1145
 
 
1159
  });
1160
 
1161
  if (response.ok) {
1162
+ const data = await response.json();
1163
+ statusElement.textContent = `✓ Backend connected (${data.models_loaded} models, ${data.services_loaded} services)`;
1164
  statusElement.className = 'api-status connected';
1165
  } else {
1166
  throw new Error('Backend not responding properly');
1167
  }
1168
  } catch (error) {
1169
  console.error('Backend connection failed:', error);
1170
+ statusElement.textContent = '✗ Cannot connect to backend. Make sure the server is running.';
1171
  statusElement.className = 'api-status disconnected';
1172
 
1173
  setTimeout(() => {
 
1185
  showScreen('landing');
1186
  });
1187
 
1188
+ document.getElementById('analyzeAnotherBtn').addEventListener('click', () => {
1189
+ // Reset form
1190
+ document.getElementById('contractText').value = '';
1191
+ selectedFile = null;
1192
+ fileInput.value = '';
1193
+ selectedFileDiv.style.display = 'none';
1194
+ fileUploadArea.style.display = 'block';
1195
+
1196
+ // Show upload section again
1197
+ showScreen('analyzer');
1198
+ });
1199
+
1200
  // Tab switching
1201
  document.querySelectorAll('.tab').forEach(tab => {
1202
  tab.addEventListener('click', (e) => {
 
1300
  alert('Please paste contract text');
1301
  return;
1302
  }
1303
+ await analyzeContractText(text);
 
 
1304
  } else {
1305
  if (!selectedFile) {
1306
  alert('Please select a file');
1307
  return;
1308
  }
1309
+ await analyzeContractFile(selectedFile);
1310
  }
1311
  } catch (error) {
1312
  console.error('Analysis error:', error);
 
1317
  }
1318
  });
1319
 
1320
+ // Direct file analysis (synchronous)
1321
+ async function analyzeContractFile(file) {
1322
  try {
1323
  showScreen('loading');
1324
 
 
1328
  formData.append('interpret_clauses', 'true');
1329
  formData.append('generate_negotiation_points', 'true');
1330
  formData.append('compare_to_market', 'true');
 
1331
 
1332
+ const response = await fetch(`${API_BASE_URL}/analyze/file`, {
1333
  method: 'POST',
1334
  body: formData
1335
  });
 
1345
  throw new Error(errorDetail);
1346
  }
1347
 
1348
+ const result = await response.json();
1349
+ currentAnalysisResult = result;
1350
+ displayResults(result);
1351
+ showScreen('results');
1352
 
1353
  } catch (error) {
1354
  console.error('Error:', error);
 
1357
  }
1358
  }
1359
 
1360
+ // Direct text analysis (synchronous)
1361
+ async function analyzeContractText(text) {
1362
  try {
1363
+ showScreen('loading');
1364
+
1365
+ const formData = new FormData();
1366
+ formData.append('contract_text', text);
1367
+ formData.append('max_clauses', '15');
1368
+ formData.append('interpret_clauses', 'true');
1369
+ formData.append('generate_negotiation_points', 'true');
1370
+ formData.append('compare_to_market', 'true');
1371
+
1372
+ const response = await fetch(`${API_BASE_URL}/analyze/text`, {
1373
+ method: 'POST',
1374
+ body: formData
1375
+ });
1376
+
1377
+ if (!response.ok) {
1378
+ let errorDetail = 'Analysis failed';
1379
+ try {
1380
+ const errorData = await response.json();
1381
+ errorDetail = errorData.detail || errorData.error || errorDetail;
1382
+ } catch (e) {
1383
+ errorDetail = `Server error: ${response.status} ${response.statusText}`;
1384
+ }
1385
+ throw new Error(errorDetail);
1386
  }
1387
+
1388
+ const result = await response.json();
1389
+ currentAnalysisResult = result;
1390
+ displayResults(result);
1391
+ showScreen('results');
1392
+
1393
  } catch (error) {
1394
+ console.error('Error:', error);
1395
+ alert('Error analyzing contract: ' + error.message);
1396
+ showScreen('analyzer');
1397
  }
1398
  }
1399
 
1400
+ // Download PDF
1401
+ document.getElementById('downloadBtn').addEventListener('click', async () => {
1402
+ if (!currentAnalysisResult) {
1403
+ alert('No analysis results available to download');
1404
+ return;
1405
+ }
1406
+
1407
+ try {
1408
+ const response = await fetch(`${API_BASE_URL}/generate-pdf`, {
1409
+ method: 'POST',
1410
+ headers: {
1411
+ 'Content-Type': 'application/json',
1412
+ },
1413
+ body: JSON.stringify(currentAnalysisResult)
1414
+ });
1415
+
1416
+ if (!response.ok) {
1417
+ throw new Error('Failed to generate PDF');
1418
+ }
1419
+
1420
+ const blob = await response.blob();
1421
+ const url = window.URL.createObjectURL(blob);
1422
+ const a = document.createElement('a');
1423
+ a.style.display = 'none';
1424
+ a.href = url;
1425
+ a.download = `contract_analysis_${currentAnalysisResult.analysis_id}.pdf`;
1426
+ document.body.appendChild(a);
1427
+ a.click();
1428
+ window.URL.revokeObjectURL(url);
1429
+ document.body.removeChild(a);
1430
+
1431
+ } catch (error) {
1432
+ console.error('PDF download error:', error);
1433
+ alert('Error downloading PDF: ' + error.message);
1434
+ }
1435
+ });
1436
+
1437
  function displayResults(result) {
1438
  const score = result.risk_analysis.overall_score;
1439
  const riskLevel = result.risk_analysis.risk_level;
1440
 
1441
+ // Update risk score
1442
  document.getElementById('riskScoreValue').textContent = score;
1443
  document.getElementById('riskLevel').textContent = riskLevel.toUpperCase();
1444
  document.getElementById('riskLevel').className = 'risk-level risk-' + getRiskClass(score);
1445
 
1446
+ // Update risk circle
1447
  const circumference = 534;
1448
  const offset = circumference - (score / 100) * circumference;
1449
  const circle = document.getElementById('riskCircle');
1450
  circle.style.strokeDashoffset = offset;
1451
+
1452
+ // Get risk color and apply to both circle and text
1453
+ const riskColor = getRiskColor(score);
1454
+ circle.style.stroke = riskColor;
1455
+
1456
+ // Update text color in the middle of the circle
1457
+ const riskScoreValue = document.getElementById('riskScoreValue');
1458
+ riskScoreValue.style.color = riskColor;
1459
+
1460
+ // Update executive summary
1461
  document.getElementById('executiveSummary').textContent = result.executive_summary;
1462
 
1463
+ // Update unfavorable terms
1464
  const unfavorableList = document.getElementById('unfavorableTermsList');
1465
  unfavorableList.innerHTML = '';
1466
  if (result.unfavorable_terms && result.unfavorable_terms.length > 0) {
 
1473
  unfavorableList.innerHTML = '<li>No unfavorable terms detected</li>';
1474
  }
1475
 
1476
+ // Update missing protections
1477
+ const missingList = document.getElementById('missingProtectionsList');
1478
+ missingList.innerHTML = '';
1479
+ if (result.missing_protections && result.missing_protections.length > 0) {
1480
+ result.missing_protections.slice(0, 8).forEach(protection => {
1481
+ const li = document.createElement('li');
1482
+ li.innerHTML = `<span class="item-icon">›</span><span class="item-text"><strong>${protection.protection}:</strong> ${protection.explanation}</span>`;
1483
+ missingList.appendChild(li);
1484
+ });
1485
+ } else {
1486
+ missingList.innerHTML = '<li>No missing protections detected</li>';
1487
+ }
1488
+
1489
+ // Update negotiation points
1490
+ const negotiationList = document.getElementById('negotiationPointsList');
1491
+ negotiationList.innerHTML = '';
1492
+ if (result.negotiation_points && result.negotiation_points.length > 0) {
1493
+ result.negotiation_points.slice(0, 8).forEach(point => {
1494
+ const li = document.createElement('li');
1495
+ li.innerHTML = `<span class="item-icon">›</span><span class="item-text"><strong>${point.point}:</strong> ${point.explanation}</span>`;
1496
+ negotiationList.appendChild(li);
1497
+ });
1498
+ } else {
1499
+ negotiationList.innerHTML = '<li>No negotiation points generated</li>';
1500
+ }
1501
+
1502
+ // Update category breakdown
1503
+ const categoryBreakdown = document.getElementById('categoryBreakdown');
1504
+ categoryBreakdown.innerHTML = '';
1505
+
1506
+ if (result.risk_analysis.category_scores) {
1507
+ Object.entries(result.risk_analysis.category_scores).forEach(([category, score]) => {
1508
+ const categoryItem = document.createElement('div');
1509
+ categoryItem.className = 'category-item';
1510
+
1511
+ const riskClass = getRiskClass(score);
1512
+ const riskColor = getRiskColor(score);
1513
+
1514
+ categoryItem.innerHTML = `
1515
+ <div class="category-header">
1516
+ <span class="category-name">${formatCategoryName(category)}</span>
1517
+ <span class="category-score score-${riskClass}">${score}/100</span>
1518
+ </div>
1519
+ <div class="progress-bar">
1520
+ <div class="progress-fill progress-${riskClass}" style="width: ${score}%"></div>
1521
+ </div>
1522
+ <div class="category-description">
1523
+ ${getCategoryDescription(category, score)}
1524
+ </div>
1525
+ `;
1526
+
1527
+ categoryBreakdown.appendChild(categoryItem);
1528
+ });
1529
+ }
1530
+
1531
+ // Update clause analysis
1532
+ const clauseAnalysis = document.getElementById('clauseAnalysis');
1533
+ clauseAnalysis.innerHTML = '';
1534
+
1535
+ if (result.clauses && result.clauses.length > 0) {
1536
+ result.clauses.slice(0, 10).forEach(clause => {
1537
+ const clauseItem = document.createElement('div');
1538
+ clauseItem.className = `clause-item ${getRiskClass(clause.confidence * 100)}`;
1539
+
1540
+ clauseItem.innerHTML = `
1541
+ <div class="clause-header">
1542
+ <div>
1543
+ <div class="clause-label">${clause.reference} • ${clause.category}</div>
1544
+ <div class="clause-text">${clause.text.substring(0, 200)}${clause.text.length > 200 ? '...' : ''}</div>
1545
+ </div>
1546
+ <div class="severity-badge badge-${getRiskClass(clause.confidence * 100)}">
1547
+ ${Math.round(clause.confidence * 100)}% confidence
1548
+ </div>
1549
+ </div>
1550
+ ${clause.risk_indicators && clause.risk_indicators.length > 0 ? `
1551
+ <div class="clause-section">
1552
+ <div class="clause-section-title">Risk Indicators</div>
1553
+ <div class="clause-section-text">${clause.risk_indicators.join(', ')}</div>
1554
+ </div>
1555
+ ` : ''}
1556
+ `;
1557
+
1558
+ clauseAnalysis.appendChild(clauseItem);
1559
+ });
1560
+ }
1561
  }
1562
 
1563
  function getRiskClass(score) {
 
1574
  return '#16a34a';
1575
  }
1576
 
1577
+ function formatCategoryName(category) {
1578
+ return category.split('_').map(word =>
1579
+ word.charAt(0).toUpperCase() + word.slice(1)
1580
+ ).join(' ');
1581
+ }
1582
+
1583
+ function getCategoryDescription(category, score) {
1584
+ const descriptions = {
1585
+ 'termination': score > 60 ? 'High termination risk detected' : 'Termination terms appear reasonable',
1586
+ 'compensation': score > 60 ? 'Compensation structure needs review' : 'Compensation terms are clear',
1587
+ 'confidentiality': score > 60 ? 'Confidentiality terms may be overly broad' : 'Confidentiality terms are balanced',
1588
+ 'liability': score > 60 ? 'Liability allocation needs attention' : 'Liability terms are reasonable',
1589
+ 'intellectual_property': score > 60 ? 'IP rights allocation requires review' : 'IP terms are well-defined'
1590
+ };
1591
+
1592
+ return descriptions[category] || 'Review recommended based on risk score';
1593
+ }
1594
+
1595
  // Initialize
1596
  showScreen('landing');
1597
  </script>
utils/logger.py CHANGED
@@ -114,7 +114,7 @@ class ContractAnalyzerLogger:
114
 
115
 
116
  @classmethod
117
- def log_structured(cls, level: int, message: str, request_id: Optional[str] = None, **kwargs):
118
  """
119
  Log structured data as JSON
120
 
@@ -124,15 +124,12 @@ class ContractAnalyzerLogger:
124
 
125
  message { str } : Log message
126
 
127
- request_id { str } : Optional request ID for tracking
128
-
129
  **kwargs : Additional structured data
130
  """
131
  logger = cls.get_logger()
132
 
133
  log_data = {"timestamp" : datetime.now().isoformat(),
134
  "message" : message,
135
- "request_id" : request_id,
136
  **kwargs
137
  }
138
 
@@ -140,7 +137,7 @@ class ContractAnalyzerLogger:
140
 
141
 
142
  @classmethod
143
- def log_error(cls, error: Exception, context: Dict[str, Any] = None, request_id: Optional[str] = None):
144
  """
145
  Log error with full traceback and context
146
 
@@ -149,8 +146,6 @@ class ContractAnalyzerLogger:
149
  error { Exception } : Exception object
150
 
151
  context { dict } : Additional context dictionary
152
-
153
- request_id { str } : Request ID for tracking
154
  """
155
  error_logger = cls._loggers.get("contract_analyzer.error")
156
 
@@ -158,7 +153,6 @@ class ContractAnalyzerLogger:
158
  error_logger = cls.get_logger()
159
 
160
  error_data = {"timestamp" : datetime.now().isoformat(),
161
- "request_id" : request_id,
162
  "error_type" : type(error).__name__,
163
  "error_message" : str(error),
164
  "traceback" : traceback.format_exc(),
@@ -169,7 +163,7 @@ class ContractAnalyzerLogger:
169
 
170
 
171
  @classmethod
172
- def log_performance(cls, operation: str, duration: float, request_id: Optional[str] = None, **metrics):
173
  """
174
  Log performance metrics
175
 
@@ -179,8 +173,6 @@ class ContractAnalyzerLogger:
179
 
180
  duration { float } : Duration in seconds
181
 
182
- request_id { str } : Request ID
183
-
184
  **metrics : Additional metrics
185
  """
186
  perf_logger = cls._loggers.get("contract_analyzer.performance")
@@ -188,7 +180,6 @@ class ContractAnalyzerLogger:
188
  perf_logger = cls.get_logger()
189
 
190
  perf_data = {"timestamp" : datetime.now().isoformat(),
191
- "request_id" : request_id,
192
  "operation" : operation,
193
  "duration_seconds" : round(duration, 3),
194
  **metrics
 
114
 
115
 
116
  @classmethod
117
+ def log_structured(cls, level: int, message: str, **kwargs):
118
  """
119
  Log structured data as JSON
120
 
 
124
 
125
  message { str } : Log message
126
 
 
 
127
  **kwargs : Additional structured data
128
  """
129
  logger = cls.get_logger()
130
 
131
  log_data = {"timestamp" : datetime.now().isoformat(),
132
  "message" : message,
 
133
  **kwargs
134
  }
135
 
 
137
 
138
 
139
  @classmethod
140
+ def log_error(cls, error: Exception, context: Dict[str, Any] = None):
141
  """
142
  Log error with full traceback and context
143
 
 
146
  error { Exception } : Exception object
147
 
148
  context { dict } : Additional context dictionary
 
 
149
  """
150
  error_logger = cls._loggers.get("contract_analyzer.error")
151
 
 
153
  error_logger = cls.get_logger()
154
 
155
  error_data = {"timestamp" : datetime.now().isoformat(),
 
156
  "error_type" : type(error).__name__,
157
  "error_message" : str(error),
158
  "traceback" : traceback.format_exc(),
 
163
 
164
 
165
  @classmethod
166
+ def log_performance(cls, operation: str, duration: float, **metrics):
167
  """
168
  Log performance metrics
169
 
 
173
 
174
  duration { float } : Duration in seconds
175
 
 
 
176
  **metrics : Additional metrics
177
  """
178
  perf_logger = cls._loggers.get("contract_analyzer.performance")
 
180
  perf_logger = cls.get_logger()
181
 
182
  perf_data = {"timestamp" : datetime.now().isoformat(),
 
183
  "operation" : operation,
184
  "duration_seconds" : round(duration, 3),
185
  **metrics