Spaces:

satyaki-mitra
/

ContractIntel_AI

Sleeping

App Files Files Community

satyaki-mitra commited on Nov 12, 2025

Commit

de8f1bc

1 Parent(s): d15efc9

code refactor

Browse files

Files changed (11) hide show

app.py +290 -98
config/model_config.py +21 -1
launch.py +3 -3
model_manager/llm_manager.py +19 -7
reporter/pdf_generator.py +1 -50
services/clause_extractor.py +2 -1
services/contract_classifier.py +116 -150
services/risk_analyzer.py +279 -222
services/summary_generator.py +570 -0
static/index.html +307 -112
utils/logger.py +3 -12

app.py CHANGED Viewed

@@ -3,19 +3,24 @@ FastAPI Application for AI Contract Risk Analyzer
 Complete pre-loading approach: All models loaded at startup
 Direct synchronous flow: Upload → Analyze → Return Results + PDF
 """
 from fastapi.responses import JSONResponse, FileResponse, Response
-from fastapi import FastAPI, File, UploadFile, HTTPException, Form
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel, Field
-from typing import List, Optional, Dict, Any
-import uuid
-import os
-from datetime import datetime
-from pathlib import Path
 import sys
-import tempfile
-import io
 # Add parent directory to path
 sys.path.append(str(Path(__file__).parent))
@@ -37,19 +42,140 @@ from services.protection_checker import ProtectionChecker
 from services.llm_interpreter import LLMClauseInterpreter
 from services.negotiation_engine import NegotiationEngine
 from services.market_comparator import MarketComparator
 # Import PDF generator
 from reporter.pdf_generator import generate_pdf_report
-# Initialize logger
-ContractAnalyzerLogger.setup(log_dir="logs", app_name="contract_analyzer")
-logger = ContractAnalyzerLogger.get_logger()
 # ============================================================================
 # PYDANTIC SCHEMAS
 # ============================================================================
-class HealthResponse(BaseModel):
     """Health check response"""
     status: str
     version: str
@@ -58,14 +184,16 @@ class HealthResponse(BaseModel):
     services_loaded: int
     memory_usage_mb: float
-class AnalysisOptions(BaseModel):
     """Analysis options"""
     max_clauses: int = Field(default=15, ge=5, le=30)
     interpret_clauses: bool = Field(default=True)
     generate_negotiation_points: bool = Field(default=True)
     compare_to_market: bool = Field(default=True)
-class AnalysisResult(BaseModel):
     """Complete analysis result"""
     analysis_id: str
     timestamp: str
@@ -81,12 +209,22 @@ class AnalysisResult(BaseModel):
     metadata: Dict[str, Any]
     pdf_available: bool = True
-class ErrorResponse(BaseModel):
     """Error response"""
     error: str
     detail: str
     timestamp: str
 # ============================================================================
 # SERVICE INITIALIZATION WITH FULL PRE-LOADING
 # ============================================================================
@@ -234,7 +372,7 @@ class PreloadedAnalysisService:
             # Step 1: Classify contract
             classification = self.services["classifier"].classify_contract(contract_text)
-            classification_dict = classification.to_dict()
             actual_category = classification.category
             log_info(f"Contract classified as: {actual_category}")
@@ -255,7 +393,7 @@ class PreloadedAnalysisService:
             # Extract clauses
             clauses = extractor.extract_clauses(contract_text, options.max_clauses)
-            clauses_dict = [clause.to_dict() for clause in clauses]
             log_info(f"Extracted {len(clauses)} clauses")
             # Step 3: Map to ContractType and get appropriate risk analyzer
@@ -279,17 +417,17 @@ class PreloadedAnalysisService:
             # Analyze risk
             risk_score = risk_analyzer.analyze_risk(contract_text, clauses)
-            risk_dict = risk_score.to_dict()
             log_info(f"Risk analysis completed: {risk_dict['overall_score']}/100")
             # Step 4: Find unfavorable terms
             unfavorable_terms = self.services["term_analyzer"].analyze_unfavorable_terms(contract_text, clauses)
-            unfavorable_dict = [term.to_dict() for term in unfavorable_terms]
             log_info(f"Found {len(unfavorable_terms)} unfavorable terms")
             # Step 5: Check missing protections
             missing_protections = self.services["protection_checker"].check_missing_protections(contract_text, clauses)
-            missing_dict = [prot.to_dict() for prot in missing_protections]
             log_info(f"Found {len(missing_protections)} missing protections")
             # Optional steps
@@ -302,7 +440,7 @@ class PreloadedAnalysisService:
                     interpretations = self.services["interpreter"].interpret_clauses(
                         clauses, min(10, options.max_clauses)
                     )
-                    interpretations_dict = [interp.to_dict() for interp in interpretations]
                     log_info(f"Interpreted {len(interpretations)} clauses")
                 except Exception as e:
                     log_error(f"Clause interpretation failed: {e}")
@@ -313,7 +451,7 @@ class PreloadedAnalysisService:
                     negotiation_points = self.services["negotiation_engine"].generate_negotiation_points(
                         risk_score, unfavorable_terms, missing_protections, clauses, 7
                     )
-                    negotiation_dict = [point.to_dict() for point in negotiation_points]
                     log_info(f"Generated {len(negotiation_points)} negotiation points")
                 except Exception as e:
                     log_error(f"Negotiation points generation failed: {e}")
@@ -322,7 +460,7 @@ class PreloadedAnalysisService:
             if options.compare_to_market:
                 try:
                     market_comparisons = self.services["market_comparator"].compare_to_market(clauses)
-                    market_dict = [comp.to_dict() for comp in market_comparisons]
                     log_info(f"Compared {len(market_comparisons)} clauses to market")
                 except Exception as e:
                     log_error(f"Market comparison failed: {e}")
@@ -330,7 +468,7 @@ class PreloadedAnalysisService:
             # Generate executive summary
             executive_summary = self._generate_executive_summary(
-                classification_dict, risk_dict, unfavorable_dict, missing_dict
             )
             # Build result
@@ -365,53 +503,87 @@ class PreloadedAnalysisService:
             raise
     def _generate_executive_summary(self, classification: Dict, risk_score: Dict,
-                                   unfavorable_terms: List, missing_protections: List) -> str:
-        """Generate executive summary"""
-        category = classification.get("category", "Unknown")
-        score = risk_score.get("overall_score", 0)
-        risk_level = risk_score.get("risk_level", "UNKNOWN")
-        critical_terms = sum(1 for t in unfavorable_terms if t.get('severity') == 'critical')
-        critical_protections = sum(1 for p in missing_protections if p.get('importance') == 'critical')
-        if score >= 80:
-            risk_msg = "CRITICAL ATTENTION REQUIRED"
-        elif score >= 60:
-            risk_msg = "SIGNIFICANT CONCERNS"
-        elif score >= 40:
-            risk_msg = "MODERATE RISK"
-        else:
-            risk_msg = "LOW RISK"
-        return f"This {category} contract scored {score}/100 ({risk_level.upper()} risk). {risk_msg}. Found {len(unfavorable_terms)} unfavorable terms ({critical_terms} critical) and {len(missing_protections)} missing protections ({critical_protections} critical). Review detailed analysis below."
 # ============================================================================
-# FASTAPI APP
 # ============================================================================
 app = FastAPI(
     title=settings.APP_NAME,
     version=settings.APP_VERSION,
     description="AI-powered contract risk analysis with complete model pre-loading",
     docs_url="/api/docs",
-    redoc_url="/api/redoc"
 )
 # Serve static files
-app.mount("/static", StaticFiles(directory="static"), name="static")
-# CORS middleware
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=settings.CORS_ORIGINS,
-    allow_credentials=settings.CORS_ALLOW_CREDENTIALS,
-    allow_methods=settings.CORS_ALLOW_METHODS,
-    allow_headers=settings.CORS_ALLOW_HEADERS
 )
-# Initialize pre-loaded analysis service
-analysis_service = PreloadedAnalysisService()
 # ============================================================================
 # HELPER FUNCTIONS
 # ============================================================================
@@ -468,11 +640,14 @@ def validate_contract_text(text: str) -> tuple[bool, str]:
 @app.get("/")
 async def serve_frontend():
     """Serve the frontend"""
-    return FileResponse("static/index.html")
 @app.get("/api/v1/health", response_model=HealthResponse)
 async def health_check():
     """Health check endpoint with service status"""
     service_status = analysis_service.get_service_status()
     return HealthResponse(
@@ -487,6 +662,8 @@ async def health_check():
 @app.get("/api/v1/status")
 async def get_detailed_status():
     """Get detailed service status"""
     return analysis_service.get_service_status()
 @app.post("/api/v1/analyze/file", response_model=AnalysisResult)
@@ -498,6 +675,9 @@ async def analyze_contract_file(
     compare_to_market: bool = Form(True)
 ):
     """Analyze uploaded contract file - DIRECT SYNC FLOW"""
     try:
         # Validate file
         is_valid, message = validate_file(file)
@@ -552,6 +732,9 @@ async def analyze_contract_text(
     compare_to_market: bool = Form(True)
 ):
     """Analyze pasted contract text - DIRECT SYNC FLOW"""
     try:
         # Validate contract text
         is_valid, message = validate_contract_text(contract_text)
@@ -609,6 +792,9 @@ async def generate_pdf_from_analysis(analysis_result: Dict[str, Any]):
 @app.get("/api/v1/categories")
 async def get_contract_categories():
     """Get list of supported contract categories"""
     try:
         categories = analysis_service.services["classifier"].get_all_categories()
         return {"categories": categories}
@@ -616,55 +802,55 @@ async def get_contract_categories():
         log_error(f"Categories fetch failed: {e}")
         raise HTTPException(status_code=500, detail=f"Failed to get categories: {str(e)}")
-@app.post("/api/v1/validate/file")
 async def validate_contract_file(file: UploadFile = File(...)):
     """Quick validation endpoint"""
     try:
         is_valid, message = validate_file(file)
         if not is_valid:
-            return {"valid": False, "message": message}
         contract_text = read_contract_file(file)
         # Validate text length
         is_valid_text, text_message = validate_contract_text(contract_text)
         if not is_valid_text:
-            return {"valid": False, "message": text_message}
         # Validate contract structure using ContractValidator
         validator = ContractValidator()
         report = validator.get_validation_report(contract_text)
-        return {
-            "valid": report["scores"]["total"] > 50 and is_valid_text,
-            "message": "Contract appears valid" if report["scores"]["total"] > 50 else "May not be a valid contract",
-            "confidence": report["scores"]["total"],
-            "report": report
-        }
     except Exception as e:
         log_error(f"File validation failed: {e}")
         raise HTTPException(status_code=400, detail=f"Validation failed: {str(e)}")
-@app.post("/api/v1/validate/text")
 async def validate_contract_text_endpoint(contract_text: str = Form(...)):
     """Validate pasted contract text"""
     try:
         # Validate text length
         is_valid, message = validate_contract_text(contract_text)
         if not is_valid:
-            return {"valid": False, "message": message}
         # Validate contract structure using ContractValidator
         validator = ContractValidator()
         report = validator.get_validation_report(contract_text)
-        return {
-            "valid": report["scores"]["total"] > 50 and is_valid,
-            "message": "Contract appears valid" if report["scores"]["total"] > 50 else "May not be a valid contract",
-            "confidence": report["scores"]["total"],
-            "report": report
-        }
     except Exception as e:
         log_error(f"Text validation failed: {e}")
@@ -677,7 +863,7 @@ async def validate_contract_text_endpoint(contract_text: str = Form(...)):
 @app.exception_handler(HTTPException)
 async def http_exception_handler(request, exc):
     """Handle HTTP exceptions"""
-    return JSONResponse(
         status_code=exc.status_code,
         content=ErrorResponse(
             error=exc.detail,
@@ -690,7 +876,7 @@ async def http_exception_handler(request, exc):
 async def general_exception_handler(request, exc):
     """Handle general exceptions"""
     log_error(f"Unhandled exception: {exc}")
-    return JSONResponse(
         status_code=500,
         content=ErrorResponse(
             error="Internal server error",
@@ -700,34 +886,40 @@ async def general_exception_handler(request, exc):
     )
 # ============================================================================
-# STARTUP & SHUTDOWN
 # ============================================================================
-@app.on_event("startup")
-async def startup_event():
-    """Startup event - Services are already pre-loaded"""
-    log_info(f"🚀 {settings.APP_NAME} v{settings.APP_VERSION} STARTED")
-    log_info(f"📍 Server: {settings.HOST}:{settings.PORT}")
-    log_info(f"🔧 All models and services pre-loaded")
-@app.on_event("shutdown")
-async def shutdown_event():
-    """Shutdown event"""
-    log_info("🛑 Shutting down server...")
-    log_info("✅ Server shutdown complete")
 # ============================================================================
 # MAIN
 # ============================================================================
 if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(
-        "app:app",
-        host=settings.HOST,
-        port=settings.PORT,
-        reload=settings.RELOAD,
-        workers=1,  # Single worker for synchronous flow
-        log_level=settings.LOG_LEVEL.lower()
-    )

 Complete pre-loading approach: All models loaded at startup
 Direct synchronous flow: Upload → Analyze → Return Results + PDF
 """
+import signal
+import os
+import time
+import json
+import uuid
+from typing import Any, List, Dict, Optional
+from pathlib import Path
+from datetime import datetime
+from contextlib import asynccontextmanager
+import uvicorn
+import numpy as np
+from fastapi import FastAPI, File, UploadFile, HTTPException, Form, Request
 from fastapi.responses import JSONResponse, FileResponse, Response
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel, Field
 import sys
 # Add parent directory to path
 sys.path.append(str(Path(__file__).parent))
 from services.llm_interpreter import LLMClauseInterpreter
 from services.negotiation_engine import NegotiationEngine
 from services.market_comparator import MarketComparator
+from services.summary_generator import SummaryGenerator
 # Import PDF generator
 from reporter.pdf_generator import generate_pdf_report
+# ============================================================================
+# CUSTOM SERIALIZATION
+# ============================================================================
+class NumpyJSONEncoder(json.JSONEncoder):
+    """
+    Custom JSON encoder that handles NumPy types and custom objects
+    """
+    def default(self, obj: Any) -> Any:
+        """
+        Convert non-serializable objects to JSON-serializable types
+        """
+        # NumPy types
+        if isinstance(obj, (np.float32, np.float64)):
+            return float(obj)
+        elif isinstance(obj, (np.int32, np.int64, np.int8, np.uint8)):
+            return int(obj)
+        elif isinstance(obj, np.ndarray):
+            return obj.tolist()
+        elif isinstance(obj, np.bool_):
+            return bool(obj)
+        elif hasattr(obj, 'item'):
+            # numpy scalar types
+            return obj.item()
+        # Custom objects with to_dict method
+        elif hasattr(obj, 'to_dict'):
+            return obj.to_dict()
+        # Pydantic models
+        elif hasattr(obj, 'dict'):
+            return obj.dict()
+        # Handle other types
+        elif isinstance(obj, (set, tuple)):
+            return list(obj)
+        return super().default(obj)
+class NumpyJSONResponse(JSONResponse):
+    """
+    Custom JSON response that handles NumPy types
+    """
+    def render(self, content: Any) -> bytes:
+        """
+        Render content with NumPy type handling
+        """
+        return json.dumps(
+            content,
+            ensure_ascii=False,
+            allow_nan=False,
+            indent=None,
+            separators=(",", ":"),
+            cls=NumpyJSONEncoder,
+        ).encode("utf-8")
+def convert_numpy_types(obj: Any) -> Any:
+    """
+    Recursively convert numpy types to Python native types
+    """
+    if obj is None:
+        return None
+    # Handle dictionaries
+    if isinstance(obj, dict):
+        return {key: convert_numpy_types(value) for key, value in obj.items()}
+    # Handle lists, tuples, sets
+    elif isinstance(obj, (list, tuple, set)):
+        return [convert_numpy_types(item) for item in obj]
+    # Handle NumPy types
+    elif isinstance(obj, (np.float32, np.float64)):
+        return float(obj)
+    elif isinstance(obj, (np.int32, np.int64, np.int8, np.uint8)):
+        return int(obj)
+    elif isinstance(obj, np.ndarray):
+        return obj.tolist()
+    elif isinstance(obj, np.bool_):
+        return bool(obj)
+    elif hasattr(obj, 'item'):
+        return obj.item()
+    # Handle custom objects with to_dict method
+    elif hasattr(obj, 'to_dict'):
+        return convert_numpy_types(obj.to_dict())
+    # Handle Pydantic models
+    elif hasattr(obj, 'dict'):
+        return convert_numpy_types(obj.dict())
+    # Return as-is for other types
+    else:
+        return obj
+def safe_serialize_response(data: Any) -> Any:
+    """
+    Safely serialize response data ensuring all types are JSON-compatible
+    """
+    return convert_numpy_types(data)
 # ============================================================================
 # PYDANTIC SCHEMAS
 # ============================================================================
+class SerializableBaseModel(BaseModel):
+    """
+    Base model with enhanced serialization for NumPy types
+    """
+    def dict(self, *args, **kwargs) -> Dict[str, Any]:
+        """
+        Override dict method to handle NumPy types
+        """
+        data = super().dict(*args, **kwargs)
+        return convert_numpy_types(data)
+    def json(self, *args, **kwargs) -> str:
+        """
+        Override json method to handle NumPy types
+        """
+        data = self.dict(*args, **kwargs)
+        return json.dumps(data, cls=NumpyJSONEncoder, *args, **kwargs)
+class HealthResponse(SerializableBaseModel):
     """Health check response"""
     status: str
     version: str
     services_loaded: int
     memory_usage_mb: float
+class AnalysisOptions(SerializableBaseModel):
     """Analysis options"""
     max_clauses: int = Field(default=15, ge=5, le=30)
     interpret_clauses: bool = Field(default=True)
     generate_negotiation_points: bool = Field(default=True)
     compare_to_market: bool = Field(default=True)
+class AnalysisResult(SerializableBaseModel):
     """Complete analysis result"""
     analysis_id: str
     timestamp: str
     metadata: Dict[str, Any]
     pdf_available: bool = True
+class ErrorResponse(SerializableBaseModel):
     """Error response"""
     error: str
     detail: str
     timestamp: str
+class FileValidationResponse(SerializableBaseModel):
+    """File validation response"""
+    valid: bool
+    message: str
+    confidence: Optional[float] = None
+    report: Optional[Dict[str, Any]] = None
 # ============================================================================
 # SERVICE INITIALIZATION WITH FULL PRE-LOADING
 # ============================================================================
             # Step 1: Classify contract
             classification = self.services["classifier"].classify_contract(contract_text)
+            classification_dict = safe_serialize_response(classification.to_dict())
             actual_category = classification.category
             log_info(f"Contract classified as: {actual_category}")
             # Extract clauses
             clauses = extractor.extract_clauses(contract_text, options.max_clauses)
+            clauses_dict = [safe_serialize_response(clause.to_dict()) for clause in clauses]
             log_info(f"Extracted {len(clauses)} clauses")
             # Step 3: Map to ContractType and get appropriate risk analyzer
             # Analyze risk
             risk_score = risk_analyzer.analyze_risk(contract_text, clauses)
+            risk_dict = safe_serialize_response(risk_score.to_dict())
             log_info(f"Risk analysis completed: {risk_dict['overall_score']}/100")
             # Step 4: Find unfavorable terms
             unfavorable_terms = self.services["term_analyzer"].analyze_unfavorable_terms(contract_text, clauses)
+            unfavorable_dict = [safe_serialize_response(term.to_dict()) for term in unfavorable_terms]
             log_info(f"Found {len(unfavorable_terms)} unfavorable terms")
             # Step 5: Check missing protections
             missing_protections = self.services["protection_checker"].check_missing_protections(contract_text, clauses)
+            missing_dict = [safe_serialize_response(prot.to_dict()) for prot in missing_protections]
             log_info(f"Found {len(missing_protections)} missing protections")
             # Optional steps
                     interpretations = self.services["interpreter"].interpret_clauses(
                         clauses, min(10, options.max_clauses)
                     )
+                    interpretations_dict = [safe_serialize_response(interp.to_dict()) for interp in interpretations]
                     log_info(f"Interpreted {len(interpretations)} clauses")
                 except Exception as e:
                     log_error(f"Clause interpretation failed: {e}")
                     negotiation_points = self.services["negotiation_engine"].generate_negotiation_points(
                         risk_score, unfavorable_terms, missing_protections, clauses, 7
                     )
+                    negotiation_dict = [safe_serialize_response(point.to_dict()) for point in negotiation_points]
                     log_info(f"Generated {len(negotiation_points)} negotiation points")
                 except Exception as e:
                     log_error(f"Negotiation points generation failed: {e}")
             if options.compare_to_market:
                 try:
                     market_comparisons = self.services["market_comparator"].compare_to_market(clauses)
+                    market_dict = [safe_serialize_response(comp.to_dict()) for comp in market_comparisons]
                     log_info(f"Compared {len(market_comparisons)} clauses to market")
                 except Exception as e:
                     log_error(f"Market comparison failed: {e}")
             # Generate executive summary
             executive_summary = self._generate_executive_summary(
+                classification_dict, risk_dict, unfavorable_dict, missing_dict, clauses,
             )
             # Build result
             raise
     def _generate_executive_summary(self, classification: Dict, risk_score: Dict,
+                               unfavorable_terms: List, missing_protections: List,
+                               clauses: List[Dict]) -> str:
+        """Generate executive summary using LLM"""
+        summary_generator = SummaryGenerator()
+        return summary_generator.generate_executive_summary(
+            classification=classification,
+            risk_analysis=risk_score,
+            unfavorable_terms=unfavorable_terms,
+            missing_protections=missing_protections,
+            clauses=clauses
+        )
 # ============================================================================
+# FASTAPI APPLICATION
 # ============================================================================
+# Global instances
+analysis_service: Optional[PreloadedAnalysisService] = None
+app_start_time = time.time()
+# Initialize logger
+ContractAnalyzerLogger.setup(log_dir="logs", app_name="contract_analyzer")
+logger = ContractAnalyzerLogger.get_logger()
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Lifespan events for startup and shutdown"""
+    global analysis_service
+    # Startup
+    log_info(f"🚀 {settings.APP_NAME} v{settings.APP_VERSION} STARTING UP...")
+    log_info("=" * 80)
+    try:
+        # Initialize analysis service
+        analysis_service = PreloadedAnalysisService()
+        log_info("✅ All services initialized successfully")
+    except Exception as e:
+        log_error(f"Startup failed: {e}")
+        raise
+    log_info(f"📍 Server: {settings.HOST}:{settings.PORT}")
+    log_info("=" * 80)
+    log_info("✅ AI Contract Risk Analyzer Ready!")
+    try:
+        yield
+    finally:
+        # Shutdown - This runs on normal shutdown and KeyboardInterrupt
+        log_info("🛑 Shutting down server...")
+        log_info("✅ Server shutdown complete")
 app = FastAPI(
     title=settings.APP_NAME,
     version=settings.APP_VERSION,
     description="AI-powered contract risk analysis with complete model pre-loading",
     docs_url="/api/docs",
+    redoc_url="/api/redoc",
+    default_response_class=NumpyJSONResponse,
+    lifespan=lifespan
 )
+# Get absolute paths
+BASE_DIR = Path(__file__).parent
+STATIC_DIR = BASE_DIR / "static"
 # Serve static files
+app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
+# Enhanced CORS middleware
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],  # For development - restrict in production
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
 )
 # ============================================================================
 # HELPER FUNCTIONS
 # ============================================================================
 @app.get("/")
 async def serve_frontend():
     """Serve the frontend"""
+    return FileResponse(str(STATIC_DIR / "index.html"))
 @app.get("/api/v1/health", response_model=HealthResponse)
 async def health_check():
     """Health check endpoint with service status"""
+    if not analysis_service:
+        raise HTTPException(status_code=503, detail="Service not initialized")
     service_status = analysis_service.get_service_status()
     return HealthResponse(
 @app.get("/api/v1/status")
 async def get_detailed_status():
     """Get detailed service status"""
+    if not analysis_service:
+        raise HTTPException(status_code=503, detail="Service not initialized")
     return analysis_service.get_service_status()
 @app.post("/api/v1/analyze/file", response_model=AnalysisResult)
     compare_to_market: bool = Form(True)
 ):
     """Analyze uploaded contract file - DIRECT SYNC FLOW"""
+    if not analysis_service:
+        raise HTTPException(status_code=503, detail="Service not initialized")
     try:
         # Validate file
         is_valid, message = validate_file(file)
     compare_to_market: bool = Form(True)
 ):
     """Analyze pasted contract text - DIRECT SYNC FLOW"""
+    if not analysis_service:
+        raise HTTPException(status_code=503, detail="Service not initialized")
     try:
         # Validate contract text
         is_valid, message = validate_contract_text(contract_text)
 @app.get("/api/v1/categories")
 async def get_contract_categories():
     """Get list of supported contract categories"""
+    if not analysis_service:
+        raise HTTPException(status_code=503, detail="Service not initialized")
     try:
         categories = analysis_service.services["classifier"].get_all_categories()
         return {"categories": categories}
         log_error(f"Categories fetch failed: {e}")
         raise HTTPException(status_code=500, detail=f"Failed to get categories: {str(e)}")
+@app.post("/api/v1/validate/file", response_model=FileValidationResponse)
 async def validate_contract_file(file: UploadFile = File(...)):
     """Quick validation endpoint"""
     try:
         is_valid, message = validate_file(file)
         if not is_valid:
+            return FileValidationResponse(valid=False, message=message)
         contract_text = read_contract_file(file)
         # Validate text length
         is_valid_text, text_message = validate_contract_text(contract_text)
         if not is_valid_text:
+            return FileValidationResponse(valid=False, message=text_message)
         # Validate contract structure using ContractValidator
         validator = ContractValidator()
         report = validator.get_validation_report(contract_text)
+        return FileValidationResponse(
+            valid=report["scores"]["total"] > 50 and is_valid_text,
+            message="Contract appears valid" if report["scores"]["total"] > 50 else "May not be a valid contract",
+            confidence=report["scores"]["total"],
+            report=report
+        )
     except Exception as e:
         log_error(f"File validation failed: {e}")
         raise HTTPException(status_code=400, detail=f"Validation failed: {str(e)}")
+@app.post("/api/v1/validate/text", response_model=FileValidationResponse)
 async def validate_contract_text_endpoint(contract_text: str = Form(...)):
     """Validate pasted contract text"""
     try:
         # Validate text length
         is_valid, message = validate_contract_text(contract_text)
         if not is_valid:
+            return FileValidationResponse(valid=False, message=message)
         # Validate contract structure using ContractValidator
         validator = ContractValidator()
         report = validator.get_validation_report(contract_text)
+        return FileValidationResponse(
+            valid=report["scores"]["total"] > 50 and is_valid,
+            message="Contract appears valid" if report["scores"]["total"] > 50 else "May not be a valid contract",
+            confidence=report["scores"]["total"],
+            report=report
+        )
     except Exception as e:
         log_error(f"Text validation failed: {e}")
 @app.exception_handler(HTTPException)
 async def http_exception_handler(request, exc):
     """Handle HTTP exceptions"""
+    return NumpyJSONResponse(
         status_code=exc.status_code,
         content=ErrorResponse(
             error=exc.detail,
 async def general_exception_handler(request, exc):
     """Handle general exceptions"""
     log_error(f"Unhandled exception: {exc}")
+    return NumpyJSONResponse(
         status_code=500,
         content=ErrorResponse(
             error="Internal server error",
     )
 # ============================================================================
+# REQUEST LOGGING MIDDLEWARE
 # ============================================================================
+@app.middleware("http")
+async def log_requests(request: Request, call_next):
+    start_time = time.time()
+    response = await call_next(request)
+    process_time = time.time() - start_time
+    log_info(f"API Request: {request.method} {request.url.path} - Status: {response.status_code} - Duration: {process_time:.3f}s")
+    return response
 # ============================================================================
 # MAIN
 # ============================================================================
 if __name__ == "__main__":
+    def signal_handler(sig, frame):
+        print("\n👋 Received Ctrl+C, shutting down gracefully...")
+        sys.exit(0)
+    signal.signal(signal.SIGINT, signal_handler)
+    try:
+        uvicorn.run(
+            "app:app",
+            host=settings.HOST,
+            port=settings.PORT,
+            reload=settings.RELOAD,
+            workers=1,
+            log_level=settings.LOG_LEVEL.lower()
+        )
+    except KeyboardInterrupt:
+        print("\n🎯 Server stopped by user")
+    except Exception as e:
+        log_error(f"Server error: {e}")
+        sys.exit(1)

config/model_config.py CHANGED Viewed

@@ -6,8 +6,13 @@ class ModelConfig:
     """
     Model-specific configurations - FOR AI MODEL SETTINGS ONLY
     """
     # Model Architecture Settings
     LEGAL_BERT        = {"model_name"      : "nlpaueb/legal-bert-base-uncased",
                          "task"            : "clause-extraction",
                          "max_length"      : 512,
                          "batch_size"      : 16,
@@ -18,6 +23,7 @@ class ModelConfig:
     # Embedding Model Settings
     EMBEDDING_MODEL   = {"model_name"           : "sentence-transformers/all-MiniLM-L6-v2",
                          "dimension"            : 384,
                          "pooling"              : "mean",
                          "normalize"            : True,
@@ -75,6 +81,20 @@ class ModelConfig:
                          "entity_confidence"   : 0.8,
                         }
     @classmethod
     def get_model_config(cls, model_type: str) -> dict:
@@ -91,4 +111,4 @@ class ModelConfig:
                       "text_processing"   : cls.TEXT_PROCESSING,
                      }
-        return config_map.get(model_type, {})

     """
     Model-specific configurations - FOR AI MODEL SETTINGS ONLY
     """
+    # Directory Settings
+    MODEL_DIR = Path("models")
+    CACHE_DIR = Path("cache/models")
     # Model Architecture Settings
     LEGAL_BERT        = {"model_name"      : "nlpaueb/legal-bert-base-uncased",
+                         "local_path"      : MODEL_DIR / "legal-bert",
                          "task"            : "clause-extraction",
                          "max_length"      : 512,
                          "batch_size"      : 16,
     # Embedding Model Settings
     EMBEDDING_MODEL   = {"model_name"           : "sentence-transformers/all-MiniLM-L6-v2",
+                         "local_path"           : MODEL_DIR / "embeddings",
                          "dimension"            : 384,
                          "pooling"              : "mean",
                          "normalize"            : True,
                          "entity_confidence"   : 0.8,
                         }
+    @classmethod
+    def ensure_directories(cls):
+        """
+        Ensure all required directories exist
+        """
+        directories = [cls.MODEL_DIR,
+                       cls.CACHE_DIR,
+                       cls.MODEL_DIR / "legal-bert",
+                       cls.MODEL_DIR / "embeddings",
+                      ]
+        for directory in directories:
+            directory.mkdir(parents = True, exist_ok = True)
     @classmethod
     def get_model_config(cls, model_type: str) -> dict:
                       "text_processing"   : cls.TEXT_PROCESSING,
                      }
+        return config_map.get(model_type, {})

launch.py CHANGED Viewed

@@ -57,10 +57,10 @@ def start_api():
     time.sleep(3)
     try:
-        response = requests.get("http://localhost:8000/api/v1/health", timeout=5)
         if response.status_code == 200:
-            print("✓ API Server running at: http://localhost:8000")
-            print("✓ Documentation at: http://localhost:8000/api/docs")
             return True
     except:
         pass

     time.sleep(3)
     try:
+        response = requests.get("http://localhost:8005/api/v1/health", timeout=5)
         if response.status_code == 200:
+            print("✓ API Server running at: http://localhost:8005")
+            print("✓ Documentation at: http://localhost:8005/api/docs")
             return True
     except:
         pass

model_manager/llm_manager.py CHANGED Viewed

@@ -11,6 +11,7 @@ from pathlib import Path
 from typing import Literal
 from typing import Optional
 from dataclasses import dataclass
 # Add parent directory to path for imports
 sys.path.append(str(Path(__file__).parent.parent))
@@ -80,14 +81,14 @@ class LLMManager:
     Unified LLM manager for multiple providers : handles Ollama (local), OpenAI API, and Anthropic API
     """
     def __init__(self, default_provider: LLMProvider = LLMProvider.OLLAMA, ollama_base_url: Optional[str] = None,
-                 openai_api_key: Optional[str] = None, anthropic_api_key: Optional[str] = None):
         """
         Initialize LLM Manager
         Arguments:
         ----------
             default_provider  : Default LLM provider to use
             ollama_base_url   : Ollama server URL (default: http://localhost:11434)
             openai_api_key    : OpenAI API key (or set OPENAI_API_KEY env var)
@@ -101,9 +102,20 @@ class LLMManager:
         self.config            = ModelConfig()
         # Ollama configuration
-        self.ollama_base_url   = ollama_base_url or self.config.LLM_CONFIG["base_url"]
-        self.ollama_model      = self.config.LLM_CONFIG["model"]
-        self.ollama_timeout    = self.config.LLM_CONFIG["timeout"]
         # OpenAI configuration
         self.openai_api_key    = openai_api_key
@@ -116,7 +128,7 @@ class LLMManager:
         if (ANTHROPIC_AVAILABLE and self.anthropic_api_key):
             self.anthropic_client = anthropic.Anthropic(api_key = self.anthropic_api_key)
         else:
             self.anthropic_client = None
@@ -133,7 +145,7 @@ class LLMManager:
                  openai_available    = OPENAI_AVAILABLE and bool(self.openai_api_key),
                  anthropic_available = ANTHROPIC_AVAILABLE and bool(self.anthropic_api_key),
                 )
     # PROVIDER AVAILABILITY CHECKS
     def _check_ollama_available(self) -> bool:

 from typing import Literal
 from typing import Optional
 from dataclasses import dataclass
+from config.settings import settings
 # Add parent directory to path for imports
 sys.path.append(str(Path(__file__).parent.parent))
     Unified LLM manager for multiple providers : handles Ollama (local), OpenAI API, and Anthropic API
     """
     def __init__(self, default_provider: LLMProvider = LLMProvider.OLLAMA, ollama_base_url: Optional[str] = None,
+             openai_api_key: Optional[str] = None, anthropic_api_key: Optional[str] = None):
         """
         Initialize LLM Manager
         Arguments:
         ----------
             default_provider  : Default LLM provider to use
             ollama_base_url   : Ollama server URL (default: http://localhost:11434)
             openai_api_key    : OpenAI API key (or set OPENAI_API_KEY env var)
         self.config            = ModelConfig()
         # Ollama configuration
+        self.ollama_base_url   = ollama_base_url or "http://localhost:11434"  # Default Ollama URL
+        self.ollama_model      = "mistral:7b"  # Default model
+        self.ollama_timeout    = 300           # Default timeout
+        # Get settings from environment or use defaults
+        try:
+            self.ollama_base_url = ollama_base_url or settings.OLLAMA_BASE_URL
+            self.ollama_model    = settings.OLLAMA_MODEL
+            self.ollama_timeout = settings.OLLAMA_TIMEOUT
+        except ImportError:
+            # Fallback to defaults if settings not available
+            pass
         # OpenAI configuration
         self.openai_api_key    = openai_api_key
         if (ANTHROPIC_AVAILABLE and self.anthropic_api_key):
             self.anthropic_client = anthropic.Anthropic(api_key = self.anthropic_api_key)
         else:
             self.anthropic_client = None
                  openai_available    = OPENAI_AVAILABLE and bool(self.openai_api_key),
                  anthropic_available = ANTHROPIC_AVAILABLE and bool(self.anthropic_api_key),
                 )
     # PROVIDER AVAILABILITY CHECKS
     def _check_ollama_available(self) -> bool:

reporter/pdf_generator.py CHANGED Viewed

@@ -67,7 +67,7 @@ class PDFReportGenerator:
         # Body text
         self.styles.add(ParagraphStyle(
-            name='BodyText',
             parent=self.styles['Normal'],
             fontSize=10,
             leading=14,
@@ -445,52 +445,3 @@ def generate_pdf_report(analysis_result: Dict[str, Any],
     generator = PDFReportGenerator()
     return generator.generate_report(analysis_result, output_path)
-if __name__ == "__main__":
-    # Test with sample data
-    sample_result = {
-        "analysis_id": "test-123",
-        "timestamp": datetime.now().isoformat(),
-        "risk_analysis": {
-            "overall_score": 85,
-            "risk_level": "CRITICAL",
-            "risk_breakdown": [
-                {
-                    "category": "Restrictive Covenants",
-                    "score": 95,
-                    "summary": "The agreement contains exceptionally broad and long-lasting non-compete (24 months) and non-solicitation (5 years) clauses."
-                },
-                {
-                    "category": "Penalties & Termination",
-                    "score": 90,
-                    "summary": "The contract includes severe penalties for breach, including forfeiture of earned salary."
-                }
-            ]
-        },
-        "executive_summary": "This employment agreement is heavily skewed in favor of the Employer, presenting a very high risk.",
-        "unfavorable_terms": [
-            {
-                "term": "Undefined Post-Probation Salary",
-                "clause_reference": "Clause 8.2",
-                "severity": "critical",
-                "explanation": "Post-probation salary is undefined ('as discussed').",
-                "suggested_fix": "Insist that the exact salary be explicitly stated."
-            }
-        ],
-        "missing_protections": [
-            {
-                "protection": "Defined Post-Probation Salary",
-                "importance": "critical",
-                "explanation": "The contract lacks a specific, written salary commitment."
-            }
-        ],
-        "negotiation_points": [
-            {
-                "issue": "Post-probation salary",
-                "rationale": "Must be explicitly defined in writing before signing."
-            }
-        ]
-    }
-    buffer = generate_pdf_report(sample_result, "test_report.pdf")
-    print("Test PDF generated successfully!")

         # Body text
         self.styles.add(ParagraphStyle(
+            name='CustomBodyText',
             parent=self.styles['Normal'],
             fontSize=10,
             leading=14,
     generator = PDFReportGenerator()
     return generator.generate_report(analysis_result, output_path)

services/clause_extractor.py CHANGED Viewed

@@ -21,6 +21,7 @@ from utils.logger import log_info
 from utils.logger import log_error
 from utils.text_processor import TextProcessor
 from utils.logger import ContractAnalyzerLogger
 @dataclass
@@ -604,7 +605,7 @@ class ClauseExtractor:
         Extract risk indicator keywords from clause text
         """
         text_lower       = text.lower()
-        found_indicators = dict()
         for severity, indicators in self.RISK_INDICATORS.items():
             for indicator in indicators:

 from utils.logger import log_error
 from utils.text_processor import TextProcessor
 from utils.logger import ContractAnalyzerLogger
+from model_manager.model_loader import ModelLoader
 @dataclass
         Extract risk indicator keywords from clause text
         """
         text_lower       = text.lower()
+        found_indicators = list()
         for severity, indicators in self.RISK_INDICATORS.items():
             for indicator in indicators:

services/contract_classifier.py CHANGED Viewed

@@ -55,127 +55,55 @@ class ContractClassifier:
     4. Confidence scoring with explanations
     """
     # CATEGORY HIERARCHY WITH KEYWORDS
-    CATEGORY_HIERARCHY = {
-        'employment': {
-            'subcategories': ['full_time', 'part_time', 'contract_worker', 'internship', 'executive'],
-            'keywords': [
-                'employee', 'employment', 'job', 'position', 'salary', 'benefits',
-                'annual leave', 'sick leave', 'probation', 'job description',
-                'work hours', 'overtime', 'performance review', 'bonus structure'
-            ],
-            'weight': 1.0
-        },
-        'consulting': {
-            'subcategories': ['independent_contractor', 'advisory', 'professional_services', 'freelance'],
-            'keywords': [
-                'consultant', 'consulting', 'independent contractor', 'statement of work',
-                'deliverables', 'professional services', 'hourly rate', 'project scope',
-                'milestone', 'acceptance criteria', 'work product'
-            ],
-            'weight': 1.0
-        },
-        'nda': {
-            'subcategories': ['mutual_nda', 'unilateral_nda', 'confidentiality_agreement'],
-            'keywords': [
-                'non-disclosure', 'confidentiality', 'proprietary information',
-                'nda', 'disclosure agreement', 'trade secret', 'confidential information',
-                'receiving party', 'disclosing party', 'confidentiality obligation'
-            ],
-            'weight': 1.2  # Higher weight as NDAs are distinct
-        },
-        'technology': {
-            'subcategories': ['software_license', 'saas', 'cloud_services', 'development', 'api_access'],
-            'keywords': [
-                'software', 'license', 'saas', 'subscription', 'source code',
-                'object code', 'api', 'cloud', 'hosting', 'maintenance',
-                'updates', 'support', 'uptime', 'service level'
-            ],
-            'weight': 1.0
-        },
-        'intellectual_property': {
-            'subcategories': ['ip_assignment', 'licensing', 'patent', 'trademark', 'copyright'],
-            'keywords': [
-                'intellectual property', 'ip', 'copyright', 'patent', 'trademark',
-                'work product', 'inventions', 'ip rights', 'ownership',
-                'assignment of rights', 'license grant', 'royalty'
-            ],
-            'weight': 1.1
-        },
-        'real_estate': {
-            'subcategories': ['residential_lease', 'commercial_lease', 'sublease', 'purchase_agreement'],
-            'keywords': [
-                'landlord', 'tenant', 'lease', 'premises', 'rent', 'property',
-                'security deposit', 'utilities', 'maintenance', 'repairs',
-                'eviction', 'lease term', 'renewal', 'square footage'
-            ],
-            'weight': 1.0
-        },
-        'financial': {
-            'subcategories': ['loan', 'mortgage', 'credit', 'investment', 'promissory_note'],
-            'keywords': [
-                'loan', 'borrower', 'lender', 'principal', 'interest rate',
-                'collateral', 'default', 'repayment', 'amortization',
-                'promissory note', 'security interest', 'mortgage'
-            ],
-            'weight': 1.0
-        },
-        'business': {
-            'subcategories': ['partnership', 'joint_venture', 'shareholders', 'llc_operating', 'merger'],
-            'keywords': [
-                'partnership', 'joint venture', 'equity', 'shares', 'profit sharing',
-                'loss allocation', 'management', 'governance', 'voting rights',
-                'dissolution', 'capital contribution', 'distribution'
-            ],
-            'weight': 1.0
-        },
-        'sales': {
-            'subcategories': ['purchase_order', 'sales_agreement', 'distribution', 'supply_agreement'],
-            'keywords': [
-                'purchase', 'sale', 'buyer', 'seller', 'goods', 'products',
-                'delivery', 'shipment', 'payment terms', 'invoice',
-                'purchase price', 'quantity', 'specifications'
-            ],
-            'weight': 1.0
-        },
-        'service_agreement': {
-            'subcategories': ['master_services', 'maintenance', 'support', 'subscription'],
-            'keywords': [
-                'service provider', 'services', 'sla', 'service level agreement',
-                'uptime', 'response time', 'support', 'maintenance',
-                'service credits', 'performance metrics', 'implementation'
-            ],
-            'weight': 1.0
-        },
-        'vendor': {
-            'subcategories': ['supplier_agreement', 'procurement', 'master_vendor'],
-            'keywords': [
-                'vendor', 'supplier', 'procurement', 'supply chain',
-                'purchase order', 'fulfillment', 'vendor management',
-                'pricing', 'terms of supply'
-            ],
-            'weight': 1.0
-        },
-        'agency': {
-            'subcategories': ['marketing_agency', 'recruiting', 'representation'],
-            'keywords': [
-                'agent', 'agency', 'principal', 'commission', 'representation',
-                'authority', 'scope of authority', 'compensation',
-                'exclusive rights', 'territory'
-            ],
-            'weight': 1.0
-        }
-    }
     # SUBCATEGORY DETECTION PATTERNS
     SUBCATEGORY_PATTERNS = {'full_time'                 : ['full-time', 'full time', 'permanent', 'regular employee', '40 hours', 'exempt employee'],
@@ -325,7 +253,7 @@ class ContractClassifier:
             raise ValueError("Contract text too short for classification")
         # Preprocess text (use first 3000 chars for efficiency)
-        text_excerpt = contract_text
         log_info("Starting contract classification",
                  text_length    = len(contract_text),
@@ -338,8 +266,8 @@ class ContractClassifier:
         # Step 2: Semantic similarity
         semantic_scores   = self._semantic_similarity(text_excerpt)
-        # Step 3: Legal-BERT enhanced (optional - can be expensive)
-        legal_bert_scores = self._legal_bert_classification(text_excerpt)
         # Step 4: Combine scores (weighted average)
         combined_scores   = self._combine_scores(keyword_scores    = keyword_scores,
@@ -369,6 +297,7 @@ class ContractClassifier:
                                                     subcategory      = subcategory,
                                                     keyword_scores   = keyword_scores,
                                                     semantic_scores  = semantic_scores,
                                                     combined_scores  = combined_scores,
                                                    )
@@ -452,9 +381,9 @@ class ContractClassifier:
         return similarities
-    def _legal_bert_classification(self, text: str) -> Dict[str, float]:
         """
-        Use Legal-BERT for classification (optional - computationally expensive)
         Arguments:
         ----------
@@ -462,7 +391,42 @@ class ContractClassifier:
         Returns:
         --------
-            { dict }     : Dictionary of {category: score}
         """
         # Tokenize
         inputs = self.legal_bert_tokenizer(text,
@@ -475,9 +439,10 @@ class ContractClassifier:
         # Get embeddings
         with torch.no_grad():
             outputs       = self.legal_bert_model(**inputs)
             cls_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()[0]
-        return {cat: 0.5 for cat in self.CATEGORY_HIERARCHY.keys()}
     def _combine_scores(self, keyword_scores: Dict[str, float], semantic_scores: Dict[str, float], legal_bert_scores: Dict[str, float] = None) -> Dict[str, float]:
@@ -490,7 +455,7 @@ class ContractClassifier:
             semantic_scores   { dict } : Semantic similarity scores
-            legal_bert_scores { dict } : Legal-BERT scores (optional)
         Returns:
         --------
@@ -499,22 +464,14 @@ class ContractClassifier:
         combined          = dict()
         # Weights for each method
-        keyword_weight    = 0.40
-        semantic_weight   = 0.60
-        legal_bert_weight = 0.00  # Set to 0 if not using Legal-BERT
-        if legal_bert_scores:
-            # Normalize weights
-            total_weight       = keyword_weight + semantic_weight + legal_bert_weight
-            keyword_weight    /= total_weight
-            semantic_weight   /= total_weight
-            legal_bert_weight /= total_weight
         for category in self.CATEGORY_HIERARCHY.keys():
-            score = (keyword_scores.get(category, 0) * keyword_weight + semantic_scores.get(category, 0) * semantic_weight)
-            if legal_bert_scores:
-                score += legal_bert_scores.get(category, 0) * legal_bert_weight
             combined[category] = score
@@ -562,8 +519,9 @@ class ContractClassifier:
         return None
-    def _generate_reasoning(self, contract_text: str, primary_category: str, subcategory: Optional[str], keyword_scores: Dict[str, float], semantic_scores: Dict[str, float],
-                            combined_scores: Dict[str, float]) -> List[str]:
         """
         Generate human-readable reasoning for classification
@@ -576,6 +534,7 @@ class ContractClassifier:
         # Primary category reasoning
         keyword_match  = keyword_scores.get(primary_category, 0)
         semantic_match = semantic_scores.get(primary_category, 0)
         if (keyword_match > 0.5):
             reasoning.append(f"Strong keyword indicators for {primary_category.replace('_', ' ')} category "
@@ -597,6 +556,11 @@ class ContractClassifier:
                              f"(similarity: {semantic_match:.2f})"
                             )
         # Subcategory reasoning
         if subcategory:
             reasoning.append(f"Specific subcategory identified: {subcategory.replace('_', ' ')}")
@@ -659,9 +623,10 @@ class ContractClassifier:
         log_info("Starting multi-label classification", threshold = threshold)
         # Get scores
-        keyword_scores  = self._score_keywords(text.lower())
-        semantic_scores = self._semantic_similarity(text)
-        combined_scores = self._combine_scores(keyword_scores, semantic_scores)
         # Get all categories above threshold
         matches         = list()
@@ -669,7 +634,8 @@ class ContractClassifier:
         for category, score in combined_scores.items():
             if (score >= threshold):
                 subcategory = self._detect_subcategory(text, category)
-                reasoning   = self._generate_reasoning(text, category, subcategory, keyword_scores, semantic_scores, combined_scores)
                 keywords    = self._extract_detected_keywords(text, category)
                 matches.append(ContractCategory(category          = category,
@@ -720,4 +686,4 @@ class ContractClassifier:
         """
         Get subcategories for a specific category
         """
-        return self.CATEGORY_HIERARCHY.get(category, {}).get('subcategories', [])

     4. Confidence scoring with explanations
     """
     # CATEGORY HIERARCHY WITH KEYWORDS
+    CATEGORY_HIERARCHY   = {'employment'            : {'subcategories' : ['full_time', 'part_time', 'contract_worker', 'internship', 'executive'],
+                                                       'keywords'      : ['employee', 'employment', 'job', 'position', 'salary', 'benefits', 'annual leave', 'sick leave', 'probation', 'job description', 'work hours', 'overtime', 'performance review', 'bonus structure'],
+                                                       'weight'        : 1.0,
+                                                      },
+                            'consulting'            : {'subcategories' : ['independent_contractor', 'advisory', 'professional_services', 'freelance'],
+                                                       'keywords'      : ['consultant', 'consulting', 'independent contractor', 'statement of work', 'deliverables', 'professional services', 'hourly rate', 'project scope', 'milestone', 'acceptance criteria', 'work product'],
+                                                       'weight'        : 1.0,
+                                                      },
+                            'nda'                   : {'subcategories' : ['mutual_nda', 'unilateral_nda', 'confidentiality_agreement'],
+                                                       'keywords'      : ['non-disclosure', 'confidentiality', 'proprietary information', 'nda', 'disclosure agreement', 'trade secret', 'confidential information', 'receiving party', 'disclosing party', 'confidentiality obligation'],
+                                                       'weight'        : 1.2,
+                                                      },
+                            'technology'            : {'subcategories' : ['software_license', 'saas', 'cloud_services', 'development', 'api_access'],
+                                                       'keywords'      : ['software', 'license', 'saas', 'subscription', 'source code', 'object code', 'api', 'cloud', 'hosting', 'maintenance', 'updates', 'support', 'uptime', 'service level'],
+                                                       'weight'        : 1.0,
+                                                      },
+                            'intellectual_property' : {'subcategories' : ['ip_assignment', 'licensing', 'patent', 'trademark', 'copyright'],
+                                                       'keywords'      : ['intellectual property', 'ip', 'copyright', 'patent', 'trademark', 'work product', 'inventions', 'ip rights', 'ownership', 'assignment of rights', 'license grant', 'royalty'],
+                                                       'weight'        : 1.1,
+                                                      },
+                            'real_estate'           : {'subcategories' : ['residential_lease', 'commercial_lease', 'sublease', 'purchase_agreement'],
+                                                       'keywords'      : ['landlord', 'tenant', 'lease', 'premises', 'rent', 'property', 'security deposit', 'utilities', 'maintenance', 'repairs', 'eviction', 'lease term', 'renewal', 'square footage'],
+                                                       'weight'        : 1.0,
+                                                      },
+                            'financial'             : {'subcategories' : ['loan', 'mortgage', 'credit', 'investment', 'promissory_note'],
+                                                       'keywords'      : ['loan', 'borrower', 'lender', 'principal', 'interest rate', 'collateral', 'default', 'repayment', 'amortization', 'promissory note', 'security interest', 'mortgage'],
+                                                       'weight'        : 1.0,
+                                                      },
+                            'business'              : {'subcategories' : ['partnership', 'joint_venture', 'shareholders', 'llc_operating', 'merger'],
+                                                       'keywords'      : ['partnership', 'joint venture', 'equity', 'shares', 'profit sharing', 'loss allocation', 'management', 'governance', 'voting rights', 'dissolution', 'capital contribution', 'distribution'],
+                                                       'weight'        : 1.0,
+                                                      },
+                            'sales'                 : {'subcategories' : ['purchase_order', 'sales_agreement', 'distribution', 'supply_agreement'],
+                                                       'keywords'      : ['purchase', 'sale', 'buyer', 'seller', 'goods', 'products', 'delivery', 'shipment', 'payment terms', 'invoice', 'purchase price', 'quantity', 'specifications'],
+                                                       'weight'        : 1.0,
+                                                      },
+                            'service_agreement'     : {'subcategories' : ['master_services', 'maintenance', 'support', 'subscription'],
+                                                       'keywords'      : ['service provider', 'services', 'sla', 'service level agreement', 'uptime', 'response time', 'support', 'maintenance', 'service credits', 'performance metrics', 'implementation'],
+                                                       'weight'        : 1.0,
+                                                      },
+                            'vendor'                : {'subcategories' : ['supplier_agreement', 'procurement', 'master_vendor'],
+                                                       'keywords'      : ['vendor', 'supplier', 'procurement', 'supply chain', 'purchase order', 'fulfillment', 'vendor management', 'pricing', 'terms of supply'],
+                                                       'weight'        : 1.0,
+                                                      },
+                            'agency'                : {'subcategories' : ['marketing_agency', 'recruiting', 'representation'],
+                                                       'keywords'      : ['agent', 'agency', 'principal', 'commission', 'representation', 'authority', 'scope of authority', 'compensation', 'exclusive rights', 'territory'],
+                                                       'weight'        : 1.0,
+                                                      },
+                           }
     # SUBCATEGORY DETECTION PATTERNS
     SUBCATEGORY_PATTERNS = {'full_time'                 : ['full-time', 'full time', 'permanent', 'regular employee', '40 hours', 'exempt employee'],
             raise ValueError("Contract text too short for classification")
         # Preprocess text (use first 3000 chars for efficiency)
+        text_excerpt = contract_text[:3000]
         log_info("Starting contract classification",
                  text_length    = len(contract_text),
         # Step 2: Semantic similarity
         semantic_scores   = self._semantic_similarity(text_excerpt)
+        # Step 3: Legal-BERT semantic similarity (enhanced)
+        legal_bert_scores = self._legal_bert_similarity(text_excerpt)
         # Step 4: Combine scores (weighted average)
         combined_scores   = self._combine_scores(keyword_scores    = keyword_scores,
                                                     subcategory      = subcategory,
                                                     keyword_scores   = keyword_scores,
                                                     semantic_scores  = semantic_scores,
+                                                    legal_bert_scores = legal_bert_scores,
                                                     combined_scores  = combined_scores,
                                                    )
         return similarities
+    def _legal_bert_similarity(self, text: str) -> Dict[str, float]:
         """
+        Use Legal-BERT for semantic similarity calculation
         Arguments:
         ----------
         Returns:
         --------
+            { dict }     : Dictionary of {category: similarity_score} using Legal-BERT embeddings
+        """
+        # Get Legal-BERT embedding for the text
+        text_embedding = self._get_legal_bert_embedding(text)
+        # Calculate similarity to each category's Legal-BERT embedding
+        similarities = dict()
+        for category in self.CATEGORY_HIERARCHY.keys():
+            # Get pre-computed category embedding
+            cat_embedding = self._get_legal_bert_embedding(
+                f"This is a {category.replace('_', ' ')} contract agreement"
+            )
+            # Calculate cosine similarity
+            similarity = torch.nn.functional.cosine_similarity(
+                torch.tensor(text_embedding).unsqueeze(0),
+                torch.tensor(cat_embedding).unsqueeze(0)
+            ).item()
+            similarities[category] = similarity
+        return similarities
+    def _get_legal_bert_embedding(self, text: str) -> np.ndarray:
+        """
+        Get Legal-BERT embedding for text using [CLS] token
+        Arguments:
+        ----------
+            text { str } : Input text
+        Returns:
+        --------
+            { np.ndarray } : Embedding vector
         """
         # Tokenize
         inputs = self.legal_bert_tokenizer(text,
         # Get embeddings
         with torch.no_grad():
             outputs       = self.legal_bert_model(**inputs)
+            # Use [CLS] token embedding (first token)
             cls_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()[0]
+        return cls_embedding
     def _combine_scores(self, keyword_scores: Dict[str, float], semantic_scores: Dict[str, float], legal_bert_scores: Dict[str, float] = None) -> Dict[str, float]:
             semantic_scores   { dict } : Semantic similarity scores
+            legal_bert_scores { dict } : Legal-BERT similarity scores (optional)
         Returns:
         --------
         combined          = dict()
         # Weights for each method
+        keyword_weight    = 0.30
+        semantic_weight   = 0.40
+        legal_bert_weight = 0.30
         for category in self.CATEGORY_HIERARCHY.keys():
+            score = (keyword_scores.get(category, 0) * keyword_weight +
+                    semantic_scores.get(category, 0) * semantic_weight +
+                    legal_bert_scores.get(category, 0) * legal_bert_weight)
             combined[category] = score
         return None
+    def _generate_reasoning(self, contract_text: str, primary_category: str, subcategory: Optional[str],
+                           keyword_scores: Dict[str, float], semantic_scores: Dict[str, float],
+                           legal_bert_scores: Dict[str, float], combined_scores: Dict[str, float]) -> List[str]:
         """
         Generate human-readable reasoning for classification
         # Primary category reasoning
         keyword_match  = keyword_scores.get(primary_category, 0)
         semantic_match = semantic_scores.get(primary_category, 0)
+        legal_bert_match = legal_bert_scores.get(primary_category, 0)
         if (keyword_match > 0.5):
             reasoning.append(f"Strong keyword indicators for {primary_category.replace('_', ' ')} category "
                              f"(similarity: {semantic_match:.2f})"
                             )
+        if (legal_bert_match > 0.60):
+            reasoning.append(f"Legal-BERT semantic analysis confirms {primary_category.replace('_', ' ')} classification "
+                             f"(similarity: {legal_bert_match:.2f})"
+                            )
         # Subcategory reasoning
         if subcategory:
             reasoning.append(f"Specific subcategory identified: {subcategory.replace('_', ' ')}")
         log_info("Starting multi-label classification", threshold = threshold)
         # Get scores
+        keyword_scores    = self._score_keywords(text.lower())
+        semantic_scores   = self._semantic_similarity(text[:3000])
+        legal_bert_scores = self._legal_bert_similarity(text[:3000])
+        combined_scores   = self._combine_scores(keyword_scores, semantic_scores, legal_bert_scores)
         # Get all categories above threshold
         matches         = list()
         for category, score in combined_scores.items():
             if (score >= threshold):
                 subcategory = self._detect_subcategory(text, category)
+                reasoning   = self._generate_reasoning(text, category, subcategory, keyword_scores,
+                                                     semantic_scores, legal_bert_scores, combined_scores)
                 keywords    = self._extract_detected_keywords(text, category)
                 matches.append(ContractCategory(category          = category,
         """
         Get subcategories for a specific category
         """
+        return self.CATEGORY_HIERARCHY.get(category, {}).get('subcategories', [])

services/risk_analyzer.py CHANGED Viewed

@@ -1,70 +1,82 @@
-"""
-Multi-Factor Risk Analyzer with sophisticated rule-based scoring
-Analyzes contracts using keyword severity, structural patterns, clause-level analysis,
-industry benchmarks, and missing protections detection
-"""
 import re
-from typing import Dict, List, Tuple, Optional, Any
-from dataclasses import dataclass, field
-from collections import defaultdict
 import sys
 from pathlib import Path
 # Add parent directory to path for imports
 sys.path.append(str(Path(__file__).parent.parent))
-from config.risk_rules import RiskRules, ContractType
-from services.clause_extractor import ExtractedClause
-from utils.logger import ContractAnalyzerLogger, log_info, log_error
 from utils.text_processor import TextProcessor
 @dataclass
 class RiskBreakdownItem:
-    """Individual risk category breakdown"""
-    category: str
-    score: int  # 0-100
-    summary: str
-    findings: List[str] = field(default_factory=list)
     def to_dict(self) -> Dict[str, Any]:
-        """Convert to dictionary"""
-        return {
-            "category": self.category,
-            "score": self.score,
-            "summary": self.summary,
-            "findings": self.findings
-        }
 @dataclass
 class RiskScore:
-    """Comprehensive risk score with detailed breakdown"""
-    overall_score: int  # 0-100
-    risk_level: str  # "CRITICAL", "HIGH", "MEDIUM", "LOW"
-    category_scores: Dict[str, int]
-    risk_factors: List[str]
-    detailed_findings: Dict[str, List[str]]
-    benchmark_comparison: Dict[str, str]
-    risk_breakdown: List[RiskBreakdownItem]
     def to_dict(self) -> Dict[str, Any]:
-        """Convert to dictionary for serialization"""
-        return {
-            "overall_score": self.overall_score,
-            "risk_level": self.risk_level,
-            "category_scores": self.category_scores,
-            "risk_factors": self.risk_factors,
-            "detailed_findings": self.detailed_findings,
-            "benchmark_comparison": self.benchmark_comparison,
-            "risk_breakdown": [item.to_dict() for item in self.risk_breakdown]
-        }
 class MultiFactorRiskAnalyzer:
     """
-    Sophisticated multi-factor risk analysis engine
     Analysis Factors:
     1. Keyword severity scoring (critical/high/medium keywords)
@@ -74,353 +86,386 @@ class MultiFactorRiskAnalyzer:
     5. Missing protections check
     6. Contract type-specific weight adjustments
     """
     def __init__(self, contract_type: ContractType = ContractType.GENERAL):
         """
         Initialize risk analyzer
-        Args:
-            contract_type: Type of contract for specialized analysis
         """
-        self.contract_type = contract_type
-        self.rules = RiskRules()
         self.adjusted_weights = self.rules.get_adjusted_weights(contract_type)
-        self.text_processor = TextProcessor(use_spacy=False)
-        self.logger = ContractAnalyzerLogger.get_logger()
         log_info("MultiFactorRiskAnalyzer initialized",
-                contract_type=contract_type.value,
-                adjusted_weights=self.adjusted_weights)
-    # =========================================================================
-    # MAIN ANALYSIS METHOD
-    # =========================================================================
     @ContractAnalyzerLogger.log_execution_time("analyze_risk")
-    def analyze_risk(self, contract_text: str,
-                    clauses: List[ExtractedClause]) -> RiskScore:
         """
         Comprehensive multi-factor risk analysis
-        Args:
-            contract_text: Full contract text
-            clauses: Extracted clauses from ClauseExtractor
         Returns:
-            RiskScore object with detailed analysis
         """
-        log_info("Starting risk analysis",
-                text_length=len(contract_text),
-                num_clauses=len(clauses),
-                contract_type=self.contract_type.value)
         # Initialize scoring containers
-        category_scores = defaultdict(list)
-        risk_factors = []
-        detailed_findings = defaultdict(list)
-        # Factor 1: Keyword Severity Scoring
-        keyword_risks = self._score_keywords(contract_text)
-        log_info("Keyword analysis complete",
-                critical_score=keyword_risks.get('critical', 0),
-                high_score=keyword_risks.get('high', 0))
-        # Factor 2: Structural Pattern Analysis
-        pattern_risks = self._analyze_patterns(contract_text)
         log_info(f"Pattern analysis found {len(pattern_risks)} risky patterns")
-        # Factor 3: Clause-Level Analysis
-        clause_risks = self._analyze_clauses(clauses)
         log_info(f"Clause analysis complete for {len(clause_risks)} categories")
-        # Factor 4: Missing Protections
-        missing_risks = self._check_missing_protections(contract_text, clauses)
         log_info(f"Missing protections analysis complete")
-        # Factor 5: Industry Benchmark Comparison
         benchmark_comparison = self._compare_to_benchmarks(contract_text, clauses)
         log_info(f"Benchmark comparison complete")
         # Aggregate scores by category
         for category in self.adjusted_weights.keys():
-            category_risk = self._calculate_category_risk(
-                category=category,
-                keyword_risks=keyword_risks,
-                pattern_risks=pattern_risks,
-                clause_risks=clause_risks,
-                missing_risks=missing_risks,
-                benchmark_comparison=benchmark_comparison
-            )
-            category_scores[category] = category_risk["score"]
             detailed_findings[category] = category_risk["findings"]
-            if category_risk["score"] >= self.rules.RISK_THRESHOLDS["high"]:
                 risk_factors.append(category)
         # Calculate weighted overall score
-        overall_score = self._calculate_weighted_score(category_scores)
-        risk_level = self._get_risk_level(overall_score)
         # Create risk breakdown items
-        risk_breakdown = self._create_risk_breakdown(
-            category_scores,
-            detailed_findings
-        )
-        result = RiskScore(
-            overall_score=overall_score,
-            risk_level=risk_level,
-            category_scores=dict(category_scores),
-            risk_factors=risk_factors,
-            detailed_findings=dict(detailed_findings),
-            benchmark_comparison=benchmark_comparison,
-            risk_breakdown=risk_breakdown
-        )
-        log_info("Risk analysis complete",
-                overall_score=overall_score,
-                risk_level=risk_level,
-                high_risk_categories=len(risk_factors))
         return result
-    # =========================================================================
-    # FACTOR 1: KEYWORD SEVERITY SCORING
-    # =========================================================================
     def _score_keywords(self, text: str) -> Dict[str, int]:
         """
         Score text based on keyword severity tiers
         Returns:
-            Dictionary with 'critical', 'high', 'medium' scores
         """
         text_lower = text.lower()
-        scores = defaultdict(int)
         # Critical keywords (Tier 1)
         for keyword, weight in self.rules.CRITICAL_KEYWORDS.items():
             if keyword in text_lower:
-                count = text_lower.count(keyword)
-                scores["critical"] += weight * min(count, 3)  # Cap at 3 occurrences
         # High-risk keywords (Tier 2)
         for keyword, weight in self.rules.HIGH_RISK_KEYWORDS.items():
             if keyword in text_lower:
-                count = text_lower.count(keyword)
                 scores["high"] += weight * min(count, 2)
         # Medium-risk keywords (Tier 3)
         for keyword, weight in self.rules.MEDIUM_RISK_KEYWORDS.items():
             if keyword in text_lower:
-                count = text_lower.count(keyword)
                 scores["medium"] += weight * min(count, 2)
         return dict(scores)
-    # =========================================================================
-    # FACTOR 2: STRUCTURAL PATTERN ANALYSIS
-    # =========================================================================
     def _analyze_patterns(self, text: str) -> List[Dict]:
         """
         Detect risky structural patterns in contract
         Returns:
-            List of detected pattern dictionaries
         """
-        findings = []
         for pattern, risk_points, description in self.rules.RISKY_PATTERNS:
             matches = re.finditer(pattern, text, re.IGNORECASE)
             for match in matches:
-                findings.append({
-                    "pattern": description,
-                    "risk_points": risk_points,
-                    "match": match.group(0)[:100],  # First 100 chars
-                    "position": match.start()
-                })
         return findings
-    # =========================================================================
-    # FACTOR 3: CLAUSE-LEVEL DETAILED ANALYSIS
-    # =========================================================================
     def _analyze_clauses(self, clauses: List[ExtractedClause]) -> Dict[str, List[Dict]]:
         """
         Deep dive into each clause with specific risk factors
         Returns:
-            Dictionary mapping categories to clause analysis results
         """
         clause_analysis = defaultdict(list)
         for clause in clauses:
             # Get risk factors for this clause category
-            if clause.category in self.rules.CLAUSE_RISK_FACTORS:
                 analysis = self._analyze_single_clause(clause)
                 clause_analysis[clause.category].append(analysis)
         return dict(clause_analysis)
     def _analyze_single_clause(self, clause: ExtractedClause) -> Dict:
         """
         Analyze a single clause with detailed risk factors
         Returns:
-            Dictionary with risk_score and findings
         """
         risk_config = self.rules.CLAUSE_RISK_FACTORS.get(clause.category, {})
-        base_risk = risk_config.get("base_risk", 50)
-        risk_score = base_risk
-        findings = []
-        text_lower = clause.text.lower()
         # Check red flags
-        if "red_flags" in risk_config:
             for flag, adjustment in risk_config["red_flags"].items():
                 if flag in text_lower:
                     risk_score += adjustment
-                    severity = "increases" if adjustment > 0 else "decreases"
                     findings.append(f"Found '{flag}' ({severity} risk by {abs(adjustment)} points)")
         # Special handling for specific clause types
-        if clause.category == "non_compete":
             duration_risk = self._analyze_noncompete_duration(clause.text)
-            risk_score += duration_risk["adjustment"]
             findings.extend(duration_risk["findings"])
-            scope_risk = self._analyze_noncompete_scope(clause.text)
-            risk_score += scope_risk["adjustment"]
             findings.extend(scope_risk["findings"])
-        elif clause.category == "termination":
             notice_risk = self._analyze_notice_period(clause.text)
             risk_score += notice_risk["adjustment"]
             findings.extend(notice_risk["findings"])
-        elif clause.category == "indemnification":
             mutual_risk = self._analyze_indemnification_mutuality(clause.text)
             risk_score += mutual_risk["adjustment"]
             findings.extend(mutual_risk["findings"])
-        elif clause.category == "compensation":
             clarity_risk = self._analyze_compensation_clarity(clause.text)
-            risk_score += clarity_risk["adjustment"]
             findings.extend(clarity_risk["findings"])
-        elif clause.category == "intellectual_property":
-            scope_risk = self._analyze_ip_scope(clause.text)
             risk_score += scope_risk["adjustment"]
             findings.extend(scope_risk["findings"])
         # Cap score between 0 and 100
         risk_score = max(0, min(100, risk_score))
-        return {
-            "clause_reference": clause.reference,
-            "risk_score": risk_score,
-            "findings": findings,
-            "confidence": clause.confidence
-        }
     def _analyze_noncompete_duration(self, text: str) -> Dict:
-        """Analyze non-compete duration reasonableness"""
         duration_pattern = r'(\d+)\s*(year|yr|month|mo)s?'
-        matches = re.findall(duration_pattern, text, re.IGNORECASE)
         if not matches:
-            return {"adjustment": 0, "findings": ["No specific duration found"]}
         # Convert to months
         duration_months = 0
         for num, unit in matches:
-            months = int(num) * (12 if 'year' in unit.lower() or 'yr' in unit.lower() else 1)
             duration_months = max(duration_months, months)
         # Get benchmark
-        industry = self._detect_industry()
         benchmark = self.rules.INDUSTRY_BENCHMARKS["non_compete_duration"][industry]
-        if duration_months <= benchmark["reasonable"]:
             return {"adjustment": -10, "findings": [f"Duration of {duration_months} months is reasonable"]}
-        elif duration_months <= benchmark["standard"]:
             return {"adjustment": 0, "findings": [f"Duration of {duration_months} months is standard"]}
-        elif duration_months <= benchmark["excessive"]:
             return {"adjustment": +15, "findings": [f"Duration of {duration_months} months is lengthy"]}
         else:
             return {"adjustment": +30, "findings": [f"Duration of {duration_months} months is excessive"]}
     def _analyze_noncompete_scope(self, text: str) -> Dict:
-        """Analyze non-compete scope reasonableness"""
-        text_lower = text.lower()
-        adjustment = 0
-        findings = []
         scope_config = self.rules.CLAUSE_RISK_FACTORS["non-compete"]["scope_keywords"]
         for keyword, adj in scope_config.items():
             if keyword in text_lower:
                 adjustment += adj
-                severity = "reasonable" if adj < 0 else "concerning"
                 findings.append(f"Scope includes '{keyword}' ({severity})")
         return {"adjustment": adjustment, "findings": findings}
     def _analyze_notice_period(self, text: str) -> Dict:
-        """Analyze termination notice period balance"""
         notice_pattern = r'(\d+)\s*days?\s*(?:notice|prior\s+notice)'
-        matches = re.findall(notice_pattern, text, re.IGNORECASE)
-        if len(matches) < 2:
-            return {"adjustment": 0, "findings": ["Notice period analysis inconclusive"]}
         periods = [int(m) for m in matches]
-        if len(periods) >= 2:
             ratio = max(periods) / min(periods)
-            if ratio >= 4:
-                return {"adjustment": +25, "findings": [f"Notice periods highly imbalanced ({max(periods)} vs {min(periods)} days)"]}
-            elif ratio >= 3:
-                return {"adjustment": +18, "findings": [f"Notice periods significantly imbalanced ({max(periods)} vs {min(periods)} days)"]}
-            elif ratio >= 2:
-                return {"adjustment": +10, "findings": [f"Notice periods moderately imbalanced ({max(periods)} vs {min(periods)} days)"]}
             else:
-                return {"adjustment": -5, "findings": [f"Notice periods balanced ({max(periods)} vs {min(periods)} days)"]}
-        return {"adjustment": 0, "findings": ["Could not determine notice period balance"]}
     def _analyze_indemnification_mutuality(self, text: str) -> Dict:
-        """Check if indemnification is mutual or one-sided"""
-        text_lower = text.lower()
-        mutual_indicators = ["mutual", "both parties", "each party", "reciprocal"]
-        one_sided_indicators = ["employee shall indemnify", "consultant shall indemnify",
-                               "contractor shall indemnify", "you shall indemnify"]
-        has_mutual = any(ind in text_lower for ind in mutual_indicators)
-        has_one_sided = any(ind in text_lower for ind in one_sided_indicators)
-        if has_mutual and not has_one_sided:
-            return {"adjustment": -15, "findings": ["Mutual indemnification (balanced)"]}
         elif has_one_sided:
-            return {"adjustment": +20, "findings": ["One-sided indemnification (unfavorable)"]}
         else:
-            return {"adjustment": 0, "findings": ["Indemnification mutuality unclear"]}
     def _analyze_compensation_clarity(self, text: str) -> Dict:
-        """Analyze clarity of compensation terms"""
-        text_lower = text.lower()
-        adjustment = 0
-        findings = []
         # Check for vague terms
         vague_terms = ["to be determined", "tbd", "subject to review", "discretionary"]
         for term in vague_terms:
             if term in text_lower:
                 adjustment += 10
@@ -431,16 +476,22 @@ class MultiFactorRiskAnalyzer:
             adjustment -= 10
             findings.append("Specific monetary amount provided (good)")
-        return {"adjustment": adjustment, "findings": findings}
     def _analyze_ip_scope(self, text: str) -> Dict:
-        """Analyze IP assignment scope"""
-        text_lower = text.lower()
-        adjustment = 0
-        findings = []
         # Overly broad indicators
         broad_terms = ["all work product", "anything created", "whether or not related"]
         for term in broad_terms:
             if term in text_lower:
                 adjustment += 15
@@ -449,25 +500,31 @@ class MultiFactorRiskAnalyzer:
         # Protective terms
         protective_terms = ["prior ip excluded", "personal projects excluded"]
         for term in protective_terms:
-            if term in text_lower:
                 adjustment -= 15
                 findings.append(f"Protective IP term present: '{term}'")
-        return {"adjustment": adjustment, "findings": findings}
-    # =========================================================================
-    # FACTOR 4: MISSING PROTECTIONS CHECK
-    # =========================================================================
-    def _check_missing_protections(self, text: str,
-                                   clauses: List[ExtractedClause]) -> Dict[str, int]:
         """
         Check for missing critical protections
         Returns:
-            Dictionary mapping categories to risk scores for missing items
         """
-        text_lower = text.lower()
         missing_risks = defaultdict(int)
         for protection_id, config in self.rules.PROTECTION_CHECKLIST.items():
@@ -480,11 +537,11 @@ class MultiFactorRiskAnalyzer:
         return dict(missing_risks)
-    def _check_protection_present(self, protection_id: str,
-                                 text_lower: str,
-                                 clauses: List[ExtractedClause]) -> bool:
-        """Check if a specific protection is present"""
         protection_indicators = {
             "for_cause_definition": ["for cause", "cause defined", "grounds for termination"],
             "severance_provision": ["severance", "severance pay", "separation pay"],

+# DEPENDENCIES
 import re
 import sys
+from typing import Any
+from typing import List
+from typing import Dict
+from typing import Tuple
 from pathlib import Path
+from typing import Optional
+from dataclasses import field
+from dataclasses import dataclass
+from collections import defaultdict
 # Add parent directory to path for imports
 sys.path.append(str(Path(__file__).parent.parent))
+from utils.logger import log_info
+from utils.logger import log_error
+from config.risk_rules import RiskRules
+from config.risk_rules import ContractType
 from utils.text_processor import TextProcessor
+from utils.logger import ContractAnalyzerLogger
+from services.clause_extractor import ExtractedClause
 @dataclass
 class RiskBreakdownItem:
+    """
+    Individual risk category breakdown
+    """
+    category : str
+    score    : int  # 0-100
+    summary  : str
+    findings : List[str] = field(default_factory=list)
     def to_dict(self) -> Dict[str, Any]:
+        """
+        Convert to dictionary
+        """
+        return {"category" : self.category,
+                "score"    : self.score,
+                "summary"  : self.summary,
+                "findings" : self.findings,
+               }
 @dataclass
 class RiskScore:
+    """
+    Comprehensive risk score with detailed breakdown
+    """
+    overall_score        : int            # 0-100
+    risk_level           : str            # "CRITICAL", "HIGH", "MEDIUM", "LOW"
+    category_scores      : Dict[str, int]
+    risk_factors         : List[str]
+    detailed_findings    : Dict[str, List[str]]
+    benchmark_comparison : Dict[str, str]
+    risk_breakdown       : List[RiskBreakdownItem]
     def to_dict(self) -> Dict[str, Any]:
+        """
+        Convert to dictionary for serialization
+        """
+        return {"overall_score"        : self.overall_score,
+                "risk_level"           : self.risk_level,
+                "category_scores"      : self.category_scores,
+                "risk_factors"         : self.risk_factors,
+                "detailed_findings"    : self.detailed_findings,
+                "benchmark_comparison" : self.benchmark_comparison,
+                "risk_breakdown"       : [item.to_dict() for item in self.risk_breakdown],
+               }
 class MultiFactorRiskAnalyzer:
     """
+    Multi-factor risk analysis engine
     Analysis Factors:
     1. Keyword severity scoring (critical/high/medium keywords)
     5. Missing protections check
     6. Contract type-specific weight adjustments
     """
     def __init__(self, contract_type: ContractType = ContractType.GENERAL):
         """
         Initialize risk analyzer
+        Arguments:
+        ----------
+            contract_type { ContractType } : Type of contract for specialized analysis
         """
+        self.contract_type    = contract_type
+        self.rules            = RiskRules()
         self.adjusted_weights = self.rules.get_adjusted_weights(contract_type)
+        self.text_processor   = TextProcessor(use_spacy = True)
+        self.logger           = ContractAnalyzerLogger.get_logger()
         log_info("MultiFactorRiskAnalyzer initialized",
+                 contract_type    = contract_type.value,
+                 adjusted_weights = self.adjusted_weights,
+                )
     @ContractAnalyzerLogger.log_execution_time("analyze_risk")
+    def analyze_risk(self, contract_text: str, clauses: List[ExtractedClause]) -> RiskScore:
         """
         Comprehensive multi-factor risk analysis
+        Arguments:
+        ----------
+            contract_text { str } : Full contract text
+            clauses      { list } : Extracted clauses from ClauseExtractor
         Returns:
+        --------
+               { RiskScore }      : RiskScore object with detailed analysis
         """
+        log_info("Starting risk analysis", text_length = len(contract_text), num_clauses = len(clauses), contract_type = self.contract_type.value)
         # Initialize scoring containers
+        category_scores      = defaultdict(list)
+        detailed_findings    = defaultdict(list)
+        risk_factors         = list()
+        # Keyword Severity Scoring
+        keyword_risks        = self._score_keywords(contract_text)
+        log_info("Keyword analysis complete", critical_score = keyword_risks.get('critical', 0), high_score = keyword_risks.get('high', 0))
+        # Structural Pattern Analysis
+        pattern_risks        = self._analyze_patterns(contract_text)
         log_info(f"Pattern analysis found {len(pattern_risks)} risky patterns")
+        # Clause-Level Analysis
+        clause_risks         = self._analyze_clauses(clauses)
         log_info(f"Clause analysis complete for {len(clause_risks)} categories")
+        # Missing Protections
+        missing_risks        = self._check_missing_protections(contract_text, clauses)
         log_info(f"Missing protections analysis complete")
+        # Industry Benchmark Comparison
         benchmark_comparison = self._compare_to_benchmarks(contract_text, clauses)
         log_info(f"Benchmark comparison complete")
         # Aggregate scores by category
         for category in self.adjusted_weights.keys():
+            category_risk               = self._calculate_category_risk(category             = category,
+                                                                        keyword_risks        = keyword_risks,
+                                                                        pattern_risks        = pattern_risks,
+                                                                        clause_risks         = clause_risks,
+                                                                        missing_risks        = missing_risks,
+                                                                        benchmark_comparison = benchmark_comparison,
+                                                                       )
+            category_scores[category]   = category_risk["score"]
             detailed_findings[category] = category_risk["findings"]
+            if (category_risk["score"] >= self.rules.RISK_THRESHOLDS["high"]):
                 risk_factors.append(category)
         # Calculate weighted overall score
+        overall_score  = self._calculate_weighted_score(category_scores)
+        risk_level     = self._get_risk_level(overall_score)
         # Create risk breakdown items
+        risk_breakdown = self._create_risk_breakdown(category_scores, detailed_findings)
+        result         = RiskScore(overall_score        = overall_score,
+                                   risk_level           = risk_level,
+                                   category_scores      = dict(category_scores),
+                                   risk_factors         = risk_factors,
+                                   detailed_findings    = dict(detailed_findings),
+                                   benchmark_comparison = benchmark_comparison,
+                                   risk_breakdown       = risk_breakdown,
+                                  )
+        log_info("Risk analysis complete", overall_score = overall_score, risk_level = risk_level, high_risk_categories = len(risk_factors))
         return result
     def _score_keywords(self, text: str) -> Dict[str, int]:
         """
         Score text based on keyword severity tiers
         Returns:
+        --------
+            { dict } : Dictionary with 'critical', 'high', 'medium' scores
         """
         text_lower = text.lower()
+        scores     = defaultdict(int)
         # Critical keywords (Tier 1)
         for keyword, weight in self.rules.CRITICAL_KEYWORDS.items():
             if keyword in text_lower:
+                count               = text_lower.count(keyword)
+                # Cap at 3 occurrences
+                scores["critical"] += weight * min(count, 3)
         # High-risk keywords (Tier 2)
         for keyword, weight in self.rules.HIGH_RISK_KEYWORDS.items():
             if keyword in text_lower:
+                count           = text_lower.count(keyword)
                 scores["high"] += weight * min(count, 2)
         # Medium-risk keywords (Tier 3)
         for keyword, weight in self.rules.MEDIUM_RISK_KEYWORDS.items():
             if keyword in text_lower:
+                count             = text_lower.count(keyword)
                 scores["medium"] += weight * min(count, 2)
         return dict(scores)
     def _analyze_patterns(self, text: str) -> List[Dict]:
         """
         Detect risky structural patterns in contract
         Returns:
+        --------
+            { list }    : List of detected pattern dictionaries
         """
+        findings = list()
         for pattern, risk_points, description in self.rules.RISKY_PATTERNS:
             matches = re.finditer(pattern, text, re.IGNORECASE)
             for match in matches:
+                findings.append({"pattern"     : description,
+                                 "risk_points" : risk_points,
+                                 "match"       : match.group(0)[:100],  # First 100 chars
+                                 "position"    : match.start(),
+                               })
         return findings
     def _analyze_clauses(self, clauses: List[ExtractedClause]) -> Dict[str, List[Dict]]:
         """
         Deep dive into each clause with specific risk factors
         Returns:
+        --------
+            { dict }    : Dictionary mapping categories to clause analysis results
         """
         clause_analysis = defaultdict(list)
         for clause in clauses:
             # Get risk factors for this clause category
+            if (clause.category in self.rules.CLAUSE_RISK_FACTORS):
                 analysis = self._analyze_single_clause(clause)
                 clause_analysis[clause.category].append(analysis)
         return dict(clause_analysis)
     def _analyze_single_clause(self, clause: ExtractedClause) -> Dict:
         """
         Analyze a single clause with detailed risk factors
         Returns:
+        --------
+            { dict }    : Dictionary with risk_score and findings
         """
         risk_config = self.rules.CLAUSE_RISK_FACTORS.get(clause.category, {})
+        base_risk   = risk_config.get("base_risk", 50)
+        risk_score  = base_risk
+        findings    = list()
+        text_lower  = clause.text.lower()
         # Check red flags
+        if ("red_flags" in risk_config):
             for flag, adjustment in risk_config["red_flags"].items():
                 if flag in text_lower:
                     risk_score += adjustment
+                    severity    = "increases" if (adjustment > 0) else "decreases"
                     findings.append(f"Found '{flag}' ({severity} risk by {abs(adjustment)} points)")
         # Special handling for specific clause types
+        if (clause.category == "non_compete"):
             duration_risk = self._analyze_noncompete_duration(clause.text)
+            risk_score   += duration_risk["adjustment"]
             findings.extend(duration_risk["findings"])
+            scope_risk    = self._analyze_noncompete_scope(clause.text)
+            risk_score   += scope_risk["adjustment"]
             findings.extend(scope_risk["findings"])
+        elif (clause.category == "termination"):
             notice_risk = self._analyze_notice_period(clause.text)
             risk_score += notice_risk["adjustment"]
             findings.extend(notice_risk["findings"])
+        elif (clause.category == "indemnification"):
             mutual_risk = self._analyze_indemnification_mutuality(clause.text)
             risk_score += mutual_risk["adjustment"]
             findings.extend(mutual_risk["findings"])
+        elif (clause.category == "compensation"):
             clarity_risk = self._analyze_compensation_clarity(clause.text)
+            risk_score  += clarity_risk["adjustment"]
             findings.extend(clarity_risk["findings"])
+        elif (clause.category == "intellectual_property"):
+            scope_risk  = self._analyze_ip_scope(clause.text)
             risk_score += scope_risk["adjustment"]
             findings.extend(scope_risk["findings"])
         # Cap score between 0 and 100
         risk_score = max(0, min(100, risk_score))
+        return {"clause_reference" : clause.reference,
+                "risk_score"       : risk_score,
+                "findings"         : findings,
+                "confidence"       : clause.confidence,
+               }
     def _analyze_noncompete_duration(self, text: str) -> Dict:
+        """
+        Analyze non-compete duration reasonableness
+        """
         duration_pattern = r'(\d+)\s*(year|yr|month|mo)s?'
+        matches          = re.findall(duration_pattern, text, re.IGNORECASE)
         if not matches:
+            return {"adjustment" : 0,
+                    "findings"   : ["No specific duration found"],
+                   }
         # Convert to months
         duration_months = 0
         for num, unit in matches:
+            months          = int(num) * (12 if 'year' in unit.lower() or 'yr' in unit.lower() else 1)
             duration_months = max(duration_months, months)
         # Get benchmark
+        industry  = self._detect_industry()
         benchmark = self.rules.INDUSTRY_BENCHMARKS["non_compete_duration"][industry]
+        if (duration_months <= benchmark["reasonable"]):
             return {"adjustment": -10, "findings": [f"Duration of {duration_months} months is reasonable"]}
+        elif (duration_months <= benchmark["standard"]):
             return {"adjustment": 0, "findings": [f"Duration of {duration_months} months is standard"]}
+        elif (duration_months <= benchmark["excessive"]):
             return {"adjustment": +15, "findings": [f"Duration of {duration_months} months is lengthy"]}
         else:
             return {"adjustment": +30, "findings": [f"Duration of {duration_months} months is excessive"]}
     def _analyze_noncompete_scope(self, text: str) -> Dict:
+        """
+        Analyze non-compete scope reasonableness
+        """
+        text_lower   = text.lower()
+        adjustment   = 0
+        findings     = list()
         scope_config = self.rules.CLAUSE_RISK_FACTORS["non-compete"]["scope_keywords"]
         for keyword, adj in scope_config.items():
             if keyword in text_lower:
                 adjustment += adj
+                severity    = "reasonable" if adj < 0 else "concerning"
                 findings.append(f"Scope includes '{keyword}' ({severity})")
         return {"adjustment": adjustment, "findings": findings}
     def _analyze_notice_period(self, text: str) -> Dict:
+        """
+        Analyze termination notice period balance
+        """
         notice_pattern = r'(\d+)\s*days?\s*(?:notice|prior\s+notice)'
+        matches        = re.findall(notice_pattern, text, re.IGNORECASE)
+        if (len(matches) < 2):
+            return {"adjustment" : 0,
+                    "findings"   : ["Notice period analysis inconclusive"],
+                   }
         periods = [int(m) for m in matches]
+        if (len(periods) >= 2):
             ratio = max(periods) / min(periods)
+            if (ratio >= 4):
+                return {"adjustment" : +25, "findings": [f"Notice periods highly imbalanced ({max(periods)} vs {min(periods)} days)"]}
+            elif (ratio >= 3):
+                return {"adjustment" : +18,
+                        "findings"   : [f"Notice periods significantly imbalanced ({max(periods)} vs {min(periods)} days)"],
+                       }
+            elif (ratio >= 2):
+                return {"adjustment" : +10,
+                        "findings"   : [f"Notice periods moderately imbalanced ({max(periods)} vs {min(periods)} days)"],
+                       }
             else:
+                return {"adjustment" : -5,
+                        "findings"   : [f"Notice periods balanced ({max(periods)} vs {min(periods)} days)"],
+                       }
+        return {"adjustment" : 0,
+                "findings"   : ["Could not determine notice period balance"],
+               }
     def _analyze_indemnification_mutuality(self, text: str) -> Dict:
+        """
+        Check if indemnification is mutual or one-sided
+        """
+        text_lower           = text.lower()
+        mutual_indicators    = ["mutual", "both parties", "each party", "reciprocal"]
+        one_sided_indicators = ["employee shall indemnify", "consultant shall indemnify", "contractor shall indemnify", "you shall indemnify"]
+        has_mutual           = any(ind in text_lower for ind in mutual_indicators)
+        has_one_sided        = any(ind in text_lower for ind in one_sided_indicators)
+        if (has_mutual and not has_one_sided):
+            return {"adjustment" : -15,
+                    "findings"   : ["Mutual indemnification (balanced)"],
+                   }
         elif has_one_sided:
+            return {"adjustment" : +20,
+                    "findings"   : ["One-sided indemnification (unfavorable)"],
+                   }
         else:
+            return {"adjustment" : 0,
+                    "findings"   : ["Indemnification mutuality unclear"],
+                   }
     def _analyze_compensation_clarity(self, text: str) -> Dict:
+        """
+        Analyze clarity of compensation terms
+        """
+        text_lower  = text.lower()
+        adjustment  = 0
+        findings    = list()
         # Check for vague terms
         vague_terms = ["to be determined", "tbd", "subject to review", "discretionary"]
         for term in vague_terms:
             if term in text_lower:
                 adjustment += 10
             adjustment -= 10
             findings.append("Specific monetary amount provided (good)")
+        return {"adjustment" : adjustment,
+                "findings"   : findings,
+               }
     def _analyze_ip_scope(self, text: str) -> Dict:
+        """
+        Analyze IP assignment scope
+        """
+        text_lower  = text.lower()
+        adjustment  = 0
+        findings    = list()
         # Overly broad indicators
         broad_terms = ["all work product", "anything created", "whether or not related"]
         for term in broad_terms:
             if term in text_lower:
                 adjustment += 15
         # Protective terms
         protective_terms = ["prior ip excluded", "personal projects excluded"]
         for term in protective_terms:
+            if (term in text_lower):
                 adjustment -= 15
                 findings.append(f"Protective IP term present: '{term}'")
+        return {"adjustment" : adjustment,
+                "findings"   : findings,
+               }
+    def _check_missing_protections(self, text: str, clauses: List[ExtractedClause]) -> Dict[str, int]:
         """
         Check for missing critical protections
+        Arguments:
+        ----------
+            text     { str } :
+            clauses { list } :
         Returns:
+        --------
+            { dict }    : Dictionary mapping categories to risk scores for missing items
         """
+        text_lower    = text.lower()
         missing_risks = defaultdict(int)
         for protection_id, config in self.rules.PROTECTION_CHECKLIST.items():
         return dict(missing_risks)
+    def _check_protection_present(self, protection_id: str, text_lower: str, clauses: List[ExtractedClause]) -> bool:
+        """
+        Check if a specific protection is present
+        """
         protection_indicators = {
             "for_cause_definition": ["for cause", "cause defined", "grounds for termination"],
             "severance_provision": ["severance", "severance pay", "separation pay"],

services/summary_generator.py ADDED Viewed

	@@ -0,0 +1,570 @@

+# services/summary_generator.py
+import logging
+from typing import Dict, List, Optional
+from dataclasses import dataclass
+from utils.logger import ContractAnalyzerLogger
+from model_manager.llm_manager import LLMManager, LLMProvider
+logger = ContractAnalyzerLogger.get_logger()
+@dataclass
+class SummaryContext:
+    """Context data for summary generation"""
+    contract_type: str
+    risk_score: int
+    risk_level: str
+    category_scores: Dict[str, int]
+    unfavorable_terms: List[Dict]
+    missing_protections: List[Dict]
+    clauses: List
+    key_findings: List[str]
+class SummaryGenerator:
+    """
+    LLM-powered executive summary generator for contract analysis
+    Generates professional, detailed executive summaries like legal professionals
+    """
+    def __init__(self, llm_manager: Optional[LLMManager] = None):
+        """
+        Initialize the summary generator
+        Args:
+            llm_manager: LLM manager instance (if None, creates one with default settings)
+        """
+        self.llm_manager = llm_manager or LLMManager()
+        self.logger = ContractAnalyzerLogger.get_logger()
+        # Use proper logging syntax without keyword arguments
+        logger.info("Summary generator initialized")
+    def generate_executive_summary(self,
+                                 classification: Dict,
+                                 risk_analysis: Dict,
+                                 unfavorable_terms: List[Dict],
+                                 missing_protections: List[Dict],
+                                 clauses: List) -> str:
+        """
+        Generate a comprehensive executive summary using LLM
+        Args:
+            classification: Contract classification data
+            risk_analysis: Risk analysis results
+            unfavorable_terms: List of unfavorable terms
+            missing_protections: List of missing protections
+            clauses: List of analyzed clauses (ExtractedClause objects)
+        Returns:
+            Generated executive summary string
+        """
+        try:
+            # Prepare context for the LLM
+            context = self._prepare_summary_context(
+                classification, risk_analysis, unfavorable_terms,
+                missing_protections, clauses
+            )
+            # Generate summary using LLM
+            summary = self._generate_with_llm(context)
+            # Use proper logging syntax
+            logger.info(f"Executive summary generated successfully - Risk score: {context.risk_score}, Risk level: {context.risk_level}")
+            return summary
+        except Exception as e:
+            logger.error(f"Failed to generate executive summary: {e}")
+            # Create fallback context if preparation failed
+            fallback_context = SummaryContext(
+                contract_type=classification.get("category", "contract"),
+                risk_score=risk_analysis.get("overall_score", 0),
+                risk_level=risk_analysis.get("risk_level", "unknown"),
+                category_scores=risk_analysis.get("category_scores", {}),
+                unfavorable_terms=unfavorable_terms,
+                missing_protections=missing_protections,
+                clauses=clauses,
+                key_findings=[]
+            )
+            # Fallback to simple summary
+            return self._generate_fallback_summary(fallback_context)
+    def _prepare_summary_context(self,
+                               classification: Dict,
+                               risk_analysis: Dict,
+                               unfavorable_terms: List[Dict],
+                               missing_protections: List[Dict],
+                               clauses: List) -> SummaryContext:
+        """Prepare structured context for summary generation"""
+        contract_type = classification.get("category", "contract")
+        risk_score = risk_analysis.get("overall_score", 0)
+        risk_level = risk_analysis.get("risk_level", "unknown")
+        category_scores = risk_analysis.get("category_scores", {})
+        # Extract key findings
+        key_findings = self._extract_key_findings(
+            unfavorable_terms, missing_protections, clauses, risk_score
+        )
+        return SummaryContext(
+            contract_type=contract_type,
+            risk_score=risk_score,
+            risk_level=risk_level,
+            category_scores=category_scores,
+            unfavorable_terms=unfavorable_terms,
+            missing_protections=missing_protections,
+            clauses=clauses,
+            key_findings=key_findings
+        )
+    def _extract_key_findings(self,
+                            unfavorable_terms: List[Dict],
+                            missing_protections: List[Dict],
+                            clauses: List,
+                            risk_score: int) -> List[str]:
+        """Extract the most important findings for the summary"""
+        findings = []
+        # High-risk clauses - handle both dict and object clauses
+        high_risk_clauses = []
+        for clause in clauses:
+            try:
+                # Try to access as object first, then as dict
+                if hasattr(clause, 'confidence'):
+                    confidence = clause.confidence
+                    risk_level = getattr(clause, 'risk_level', None)
+                    category = getattr(clause, 'category', 'clause')
+                    text = getattr(clause, 'text', '')
+                else:
+                    # Fallback to dict access
+                    confidence = clause.get('confidence', 0)
+                    risk_level = clause.get('risk_level')
+                    category = clause.get('category', 'clause')
+                    text = clause.get('text', '')
+                if confidence > 0.7 and risk_level in ['high', 'critical']:
+                    high_risk_clauses.append({
+                        'category': category,
+                        'text': text,
+                        'confidence': confidence,
+                        'risk_level': risk_level
+                    })
+            except (AttributeError, KeyError, TypeError):
+                # Skip clauses that can't be processed
+                continue
+        for clause in high_risk_clauses[:3]:  # Top 3 high-risk clauses
+            clause_text = clause['text'][:100] + '...' if len(clause['text']) > 100 else clause['text']
+            findings.append(f"High-risk {clause['category']}: {clause_text}")
+        # Critical unfavorable terms
+        critical_terms = []
+        for term in unfavorable_terms:
+            try:
+                if hasattr(term, 'severity'):
+                    severity = term.severity
+                    term_name = getattr(term, 'term', 'Unknown')
+                    explanation = getattr(term, 'explanation', '')
+                else:
+                    severity = term.get('severity')
+                    term_name = term.get('term', 'Unknown')
+                    explanation = term.get('explanation', '')
+                if severity == 'critical':
+                    critical_terms.append({
+                        'term': term_name,
+                        'explanation': explanation
+                    })
+            except (AttributeError, KeyError, TypeError):
+                continue
+        for term in critical_terms[:2]:
+            findings.append(f"Critical term: {term['term']} - {term['explanation']}")
+        # Important missing protections
+        critical_protections = []
+        for prot in missing_protections:
+            try:
+                if hasattr(prot, 'importance'):
+                    importance = prot.importance
+                    protection_name = getattr(prot, 'protection', 'Unknown')
+                    explanation = getattr(prot, 'explanation', '')
+                else:
+                    importance = prot.get('importance')
+                    protection_name = prot.get('protection', 'Unknown')
+                    explanation = prot.get('explanation', '')
+                if importance == 'critical':
+                    critical_protections.append({
+                        'protection': protection_name,
+                        'explanation': explanation
+                    })
+            except (AttributeError, KeyError, TypeError):
+                continue
+        for prot in critical_protections[:2]:
+            findings.append(f"Missing protection: {prot['protection']}")
+        # Overall risk context
+        if risk_score >= 80:
+            findings.append("Contract presents critical level of risk requiring immediate attention")
+        elif risk_score >= 60:
+            findings.append("Significant concerns identified requiring careful review")
+        return findings
+    def _generate_with_llm(self, context: SummaryContext) -> str:
+        """Generate summary using LLM"""
+        prompt = self._build_summary_prompt(context)
+        system_prompt = self._build_system_prompt()
+        try:
+            response = self.llm_manager.complete(
+                prompt=prompt,
+                system_prompt=system_prompt,
+                temperature=0.3,  # Lower temperature for more consistent, professional output
+                max_tokens=800,   # Limit summary length
+                json_mode=False
+            )
+            if response.success and response.text.strip():
+                return self._clean_summary_response(response.text)
+            else:
+                raise ValueError(f"LLM generation failed: {response.error_message}")
+        except Exception as e:
+            logger.error(f"LLM summary generation failed: {e}")
+            raise
+    def _build_system_prompt(self) -> str:
+        """Build system prompt for professional summary generation"""
+        return """You are a senior legal analyst specializing in contract risk assessment. Your task is to generate concise, professional executive summaries that:
+KEY REQUIREMENTS:
+1. Write in formal, professional business language
+2. Focus on the most critical risks and implications
+3. Be specific about contractual provisions and their impact
+4. Maintain objective, factual tone
+5. Keep summary length between 100-200 words
+6. Structure: Start with overall risk assessment, then key findings, then implications
+WRITING STYLE:
+- Use precise legal/business terminology
+- Avoid markdown formatting
+- Be direct and actionable
+- Highlight asymmetrical terms and missing protections
+- Focus on practical consequences for the signing party
+OUTPUT FORMAT:
+Return only the executive summary text, no headings, no bullet points, just clean paragraph text."""
+    def _build_summary_prompt(self, context: SummaryContext) -> str:
+        """Build detailed prompt for summary generation"""
+        # Build risk context
+        risk_context = self._build_risk_context(context)
+        # Build key provisions section
+        key_provisions = self._build_key_provisions_context(context)
+        # Build missing protections section
+        missing_protections_text = self._build_missing_protections_context(context)
+        prompt = f"""
+CONTRACT ANALYSIS DATA:
+{risk_context}
+{key_provisions}
+{missing_protections_text}
+GENERATION INSTRUCTIONS:
+Based on the analysis above, write a professional executive summary that:
+1. Starts with the overall risk assessment for the {context.contract_type}
+2. Highlights the 2-3 most critical issues
+3. Explains the practical implications for the signing party
+4. Mentions any severely imbalanced or punitive clauses
+5. Notes significant missing protections
+Focus on clarity, specificity, and actionable insights.
+"""
+        return prompt
+    def _build_risk_context(self, context: SummaryContext) -> str:
+        """Build risk assessment context"""
+        risk_level_descriptions = {
+            "critical": "CRITICAL level of risk requiring immediate attention",
+            "high": "HIGH level of risk requiring significant review",
+            "medium": "MODERATE level of risk with some concerns",
+            "low": "LOW level of risk, generally favorable"
+        }
+        risk_desc = risk_level_descriptions.get(context.risk_level.lower(), "UNKNOWN level of risk")
+        text = f"RISK ASSESSMENT:\n"
+        text += f"- Overall Score: {context.risk_score}/100 ({risk_desc})\n"
+        text += f"- Contract Type: {context.contract_type.replace('_', ' ').title()}\n"
+        # Add category scores
+        if context.category_scores:
+            text += "- Risk by Category:\n"
+            for category, score in context.category_scores.items():
+                category_name = category.replace('_', ' ').title()
+                text += f"  * {category_name}: {score}/100\n"
+        return text
+    def _build_key_provisions_context(self, context: SummaryContext) -> str:
+        """Build context about key provisions and unfavorable terms"""
+        text = "KEY PROVISIONS & UNFAVORABLE TERMS:\n"
+        # Critical terms first
+        critical_terms = []
+        for term in context.unfavorable_terms:
+            try:
+                if hasattr(term, 'severity'):
+                    severity = term.severity
+                else:
+                    severity = term.get('severity')
+                if severity == 'critical':
+                    critical_terms.append(term)
+            except (AttributeError, KeyError):
+                continue
+        high_terms = []
+        for term in context.unfavorable_terms:
+            try:
+                if hasattr(term, 'severity'):
+                    severity = term.severity
+                else:
+                    severity = term.get('severity')
+                if severity == 'high':
+                    high_terms.append(term)
+            except (AttributeError, KeyError):
+                continue
+        if critical_terms:
+            text += f"- Critical Issues Found: {len(critical_terms)}\n"
+            for term in critical_terms[:3]:
+                try:
+                    if hasattr(term, 'term'):
+                        term_name = term.term
+                        explanation = getattr(term, 'explanation', '')
+                    else:
+                        term_name = term.get('term', 'Unknown')
+                        explanation = term.get('explanation', '')
+                    text += f"  * {term_name}: {explanation}\n"
+                except (AttributeError, KeyError):
+                    continue
+        if high_terms:
+            text += f"- Significant Concerns: {len(high_terms)}\n"
+            for term in high_terms[:2]:
+                try:
+                    if hasattr(term, 'term'):
+                        term_name = term.term
+                        explanation = getattr(term, 'explanation', '')
+                    else:
+                        term_name = term.get('term', 'Unknown')
+                        explanation = term.get('explanation', '')
+                    text += f"  * {term_name}: {explanation}\n"
+                except (AttributeError, KeyError):
+                    continue
+        # High-risk clauses
+        high_risk_clauses = []
+        for clause in context.clauses:
+            try:
+                if hasattr(clause, 'confidence'):
+                    confidence = clause.confidence
+                    risk_level = getattr(clause, 'risk_level', None)
+                else:
+                    confidence = clause.get('confidence', 0)
+                    risk_level = clause.get('risk_level')
+                if confidence > 0.7 and risk_level in ['high', 'critical']:
+                    high_risk_clauses.append(clause)
+            except (AttributeError, KeyError, TypeError):
+                continue
+        if high_risk_clauses:
+            text += f"- High-Risk Clauses Identified: {len(high_risk_clauses)}\n"
+            for clause in high_risk_clauses[:2]:
+                try:
+                    if hasattr(clause, 'category'):
+                        category = clause.category
+                        clause_text = getattr(clause, 'text', '')
+                    else:
+                        category = clause.get('category', 'Unknown')
+                        clause_text = clause.get('text', '')
+                    display_text = clause_text[:80] + '...' if len(clause_text) > 80 else clause_text
+                    text += f"  * {category}: {display_text}\n"
+                except (AttributeError, KeyError):
+                    continue
+        return text
+    def _build_missing_protections_context(self, context: SummaryContext) -> str:
+        """Build context about missing protections"""
+        text = "MISSING PROTECTIONS:\n"
+        critical_protections = []
+        for prot in context.missing_protections:
+            try:
+                if hasattr(prot, 'importance'):
+                    importance = prot.importance
+                else:
+                    importance = prot.get('importance')
+                if importance == 'critical':
+                    critical_protections.append(prot)
+            except (AttributeError, KeyError):
+                continue
+        important_protections = []
+        for prot in context.missing_protections:
+            try:
+                if hasattr(prot, 'importance'):
+                    importance = prot.importance
+                else:
+                    importance = prot.get('importance')
+                if importance == 'high':
+                    important_protections.append(prot)
+            except (AttributeError, KeyError):
+                continue
+        if critical_protections:
+            text += f"- Critical Protections Missing: {len(critical_protections)}\n"
+            for prot in critical_protections[:3]:
+                try:
+                    if hasattr(prot, 'protection'):
+                        protection_name = prot.protection
+                        explanation = getattr(prot, 'explanation', '')
+                    else:
+                        protection_name = prot.get('protection', 'Unknown')
+                        explanation = prot.get('explanation', '')
+                    text += f"  * {protection_name}: {explanation}\n"
+                except (AttributeError, KeyError):
+                    continue
+        if important_protections:
+            text += f"- Important Protections Missing: {len(important_protections)}\n"
+            for prot in important_protections[:2]:
+                try:
+                    if hasattr(prot, 'protection'):
+                        protection_name = prot.protection
+                        explanation = getattr(prot, 'explanation', '')
+                    else:
+                        protection_name = prot.get('protection', 'Unknown')
+                        explanation = prot.get('explanation', '')
+                    text += f"  * {protection_name}: {explanation}\n"
+                except (AttributeError, KeyError):
+                    continue
+        if not critical_protections and not important_protections:
+            text += "- No critical protections missing\n"
+        return text
+    def _clean_summary_response(self, text: str) -> str:
+        """Clean and format the LLM response"""
+        # Remove any markdown formatting
+        text = text.replace('**', '').replace('*', '').replace('#', '')
+        # Remove common LLM artifacts
+        lines = text.split('\n')
+        cleaned_lines = []
+        for line in lines:
+            line = line.strip()
+            if line and not line.lower().startswith(('executive summary', 'summary:', 'here is', 'based on')):
+                cleaned_lines.append(line)
+        # Join into coherent paragraph
+        summary = ' '.join(cleaned_lines)
+        # Ensure proper sentence structure
+        if summary and not summary[0].isupper():
+            summary = summary[0].upper() + summary[1:]
+        if summary and not summary.endswith(('.', '!', '?')):
+            summary += '.'
+        return summary
+    def _generate_fallback_summary(self, context: SummaryContext) -> str:
+        """Generate a fallback summary when LLM is not available"""
+        contract_type_display = context.contract_type.replace('_', ' ').title()
+        # Count critical items
+        critical_terms = 0
+        for term in context.unfavorable_terms:
+            try:
+                if hasattr(term, 'severity'):
+                    if term.severity == 'critical':
+                        critical_terms += 1
+                else:
+                    if term.get('severity') == 'critical':
+                        critical_terms += 1
+            except (AttributeError, KeyError):
+                continue
+        critical_protections = 0
+        for prot in context.missing_protections:
+            try:
+                if hasattr(prot, 'importance'):
+                    if prot.importance == 'critical':
+                        critical_protections += 1
+                else:
+                    if prot.get('importance') == 'critical':
+                        critical_protections += 1
+            except (AttributeError, KeyError):
+                continue
+        if context.risk_score >= 80:
+            risk_assessment = f"This {contract_type_display} presents a CRITICAL level of risk"
+            action = "requires immediate attention and significant revision"
+        elif context.risk_score >= 60:
+            risk_assessment = f"This {contract_type_display} presents a HIGH level of risk"
+            action = "requires careful review and substantial negotiation"
+        elif context.risk_score >= 40:
+            risk_assessment = f"This {contract_type_display} presents a MODERATE level of risk"
+            action = "requires review and selective negotiation"
+        else:
+            risk_assessment = f"This {contract_type_display} presents a LOW level of risk"
+            action = "appears generally reasonable but should be reviewed"
+        summary = f"{risk_assessment} with a score of {context.risk_score}/100. "
+        summary += f"The agreement {action}. "
+        if critical_terms > 0:
+            summary += f"Found {critical_terms} critical unfavorable terms and "
+        else:
+            summary += f"Found {len(context.unfavorable_terms)} unfavorable terms and "
+        if critical_protections > 0:
+            summary += f"{critical_protections} critical missing protections. "
+        else:
+            summary += f"{len(context.missing_protections)} missing protections. "
+        summary += "Review the detailed analysis below for specific clauses and recommendations."
+        return summary

static/index.html CHANGED Viewed

@@ -15,14 +15,14 @@
             font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
             background: #ffffff;
             color: #333;
-            line-height: 1.6;
         }
         /* Header */
         .header {
             background: white;
-            border-bottom: 1px solid #e5e5e5;
-            padding: 1rem 2rem;
             display: flex;
             justify-content: space-between;
             align-items: center;
@@ -49,32 +49,32 @@
             align-items: center;
             justify-content: center;
             color: white;
-            font-size: 18px;
         }
         .subtitle {
             color: #666;
-            font-size: 0.9rem;
-            font-weight: 400;
         }
         .container {
             max-width: 1200px;
             margin: 0 auto;
-            padding: 0 2rem;
         }
-        /* Landing Page Styles - Updated to match screenshot */
         .landing-screen {
-            padding-top: 80px;
         }
         .hero-section {
             text-align: center;
-            padding: 6rem 0 4rem;
             background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
             color: white;
-            margin-bottom: 4rem;
         }
         .hero-title {
@@ -88,7 +88,7 @@
             font-size: 1.3rem;
             margin-bottom: 2.5rem;
             opacity: 0.95;
-            max-width: 600px;
             margin-left: auto;
             margin-right: auto;
         }
@@ -112,42 +112,42 @@
         }
         .section {
-            padding: 4rem 0;
             text-align: center;
         }
         .section-title {
             font-size: 2.2rem;
             font-weight: 600;
-            margin-bottom: 3rem;
             color: #333;
         }
         .section-subtitle {
             font-size: 1.2rem;
             color: #666;
-            margin-bottom: 3rem;
-            max-width: 800px;
             margin-left: auto;
             margin-right: auto;
-            line-height: 1.8;
         }
         .features-grid {
             display: grid;
             grid-template-columns: repeat(3, 1fr);
             gap: 3rem;
-            margin-bottom: 4rem;
         }
         .feature-card {
             text-align: center;
-            padding: 2rem;
         }
         .feature-icon {
             font-size: 3rem;
-            margin-bottom: 1.5rem;
         }
         .feature-title {
@@ -159,25 +159,26 @@
         .feature-description {
             color: #666;
-            line-height: 1.7;
             font-size: 1rem;
         }
         .steps-section {
-            background: #f8f9fa;
-            padding: 5rem 0;
         }
         .steps-grid {
             display: grid;
             grid-template-columns: repeat(3, 1fr);
             gap: 3rem;
-            margin-top: 3rem;
         }
         .step-card {
             text-align: center;
-            padding: 2rem;
         }
         .step-number {
@@ -191,7 +192,7 @@
             justify-content: center;
             font-size: 1.5rem;
             font-weight: 700;
-            margin: 0 auto 1.5rem;
         }
         .step-title {
@@ -203,16 +204,16 @@
         .step-description {
             color: #666;
-            line-height: 1.7;
         }
         .footer {
             text-align: center;
-            padding: 3rem 2rem;
             color: #999;
             font-size: 0.9rem;
-            border-top: 1px solid #e5e5e5;
-            background: #f8f9fa;
         }
         /* Analyzer Styles */
@@ -240,6 +241,14 @@
             margin-bottom: 2rem;
         }
         .upload-card {
             background: white;
             border-radius: 12px;
@@ -446,11 +455,11 @@
             font-size: 1rem;
         }
-        .results-screen {
             display: none;
         }
-        .results-screen.active {
             display: block;
         }
@@ -493,22 +502,26 @@
             border: 1px solid #fecaca;
         }
-        /* Results screen styles */
         .results-header {
             display: flex;
             justify-content: space-between;
-            align-items: center;
             margin-bottom: 2rem;
         }
         .results-title {
             font-size: 2rem;
             font-weight: 700;
         }
         .results-actions {
             display: flex;
             gap: 1rem;
         }
         .btn {
@@ -519,6 +532,10 @@
             cursor: pointer;
             border: none;
             transition: all 0.2s;
         }
         .btn-primary {
@@ -582,7 +599,6 @@
             transform: translate(-50%, -50%);
             font-size: 3rem;
             font-weight: 700;
-            color: #dc2626;
         }
         .risk-level {
@@ -864,9 +880,10 @@
         <!-- Hero Section -->
         <section class="hero-section">
             <div class="container">
-                <h1 class="hero-title">Unlock Legal Intelligence<br>Analyze Contracts with AI</h1>
                 <p class="hero-subtitle">
                     Instantly identify risks, uncover unfavorable terms, and gain actionable negotiation points.
                     Our AI-powered platform gives you the clarity and confidence to sign better contracts.
                 </p>
                 <button class="cta-button" id="getStartedBtn">Try Now for Free</button>
@@ -954,48 +971,46 @@
                 ← Back to Overview
             </button>
-            <div class="hero-section-analyzer">
-                <h1 class="hero-title-analyzer">Analyze Your Contract in Seconds</h1>
-                <p class="hero-description">Paste your contract or upload a file to get an instant, AI-powered risk assessment.</p>
-            </div>
-            <!-- API Status Indicator -->
-            <div id="apiStatus" class="api-status" style="display: none;">
-                Checking backend connection...
-            </div>
-            <div class="upload-card">
-                <div class="tabs">
-                    <button class="tab active" data-tab="paste">Paste Text</button>
-                    <button class="tab" data-tab="upload">Upload File</button>
                 </div>
-                <div id="pasteTab" class="tab-content active">
-                    <textarea class="textarea" id="contractText" placeholder="Paste your full contract text here..."></textarea>
-                </div>
-                <div id="uploadTab" class="tab-content">
-                    <div class="file-upload-area" id="fileUploadArea">
-                        <input type="file" id="fileInput" class="file-input" accept=".pdf,.docx,.txt">
-                        <div class="upload-icon">📄</div>
-                        <div class="upload-text">Click to upload or drag and drop</div>
-                        <div class="upload-hint">PDF, DOCX, or TXT files (Max 10MB)</div>
                     </div>
-                    <div id="selectedFile" class="selected-file" style="display: none;">
-                        <div class="file-icon">📄</div>
-                        <div class="file-info">
-                            <div class="file-name" id="fileName"></div>
-                            <div class="file-size" id="fileSize"></div>
                         </div>
-                        <button class="remove-file" id="removeFile">×</button>
                     </div>
-                </div>
-                <div class="analyze-btn-container">
-                    <button class="analyze-btn" id="analyzeBtn">
-                        <span>🔍</span>
-                        <span>Analyze Contract</span>
-                    </button>
                 </div>
             </div>
@@ -1006,8 +1021,8 @@
                 <p class="loading-text">This may take a moment for large documents.</p>
             </div>
-            <!-- Results Screen -->
-            <div id="resultsScreen" class="results-screen">
                 <div class="results-header">
                     <h1 class="results-title">Analysis Report</h1>
                     <div class="results-actions">
@@ -1098,20 +1113,19 @@
     </div>
     <script>
-        const API_BASE_URL = window.location.hostname === 'localhost'
-            ? 'http://localhost:8000/api/v1'
-            : '/api/v1';
         let selectedFile = null;
-        let currentJobId = null;
-        let pollInterval = null;
         // Screen management
         function showScreen(screenName) {
             document.getElementById('landingScreen').style.display = 'none';
             document.getElementById('analyzerScreen').style.display = 'none';
             document.getElementById('loadingScreen').classList.remove('active');
-            document.getElementById('resultsScreen').classList.remove('active');
             if (screenName === 'landing') {
                 document.getElementById('landingScreen').style.display = 'block';
@@ -1121,9 +1135,11 @@
             } else if (screenName === 'loading') {
                 document.getElementById('analyzerScreen').style.display = 'block';
                 document.getElementById('loadingScreen').classList.add('active');
             } else if (screenName === 'results') {
                 document.getElementById('analyzerScreen').style.display = 'block';
-                document.getElementById('resultsScreen').classList.add('active');
             }
         }
@@ -1143,14 +1159,15 @@
                 });
                 if (response.ok) {
-                    statusElement.textContent = '✓ Backend connected successfully';
                     statusElement.className = 'api-status connected';
                 } else {
                     throw new Error('Backend not responding properly');
                 }
             } catch (error) {
                 console.error('Backend connection failed:', error);
-                statusElement.textContent = '✗ Cannot connect to backend. Make sure the server is running on port 8000.';
                 statusElement.className = 'api-status disconnected';
                 setTimeout(() => {
@@ -1168,6 +1185,18 @@
             showScreen('landing');
         });
         // Tab switching
         document.querySelectorAll('.tab').forEach(tab => {
             tab.addEventListener('click', (e) => {
@@ -1271,15 +1300,13 @@
                         alert('Please paste contract text');
                         return;
                     }
-                    const blob = new Blob([text], { type: 'text/plain' });
-                    const file = new File([blob], 'contract.txt', { type: 'text/plain' });
-                    await analyzeContract(file);
                 } else {
                     if (!selectedFile) {
                         alert('Please select a file');
                         return;
                     }
-                    await analyzeContract(selectedFile);
                 }
             } catch (error) {
                 console.error('Analysis error:', error);
@@ -1290,7 +1317,8 @@
             }
         });
-        async function analyzeContract(file) {
             try {
                 showScreen('loading');
@@ -1300,9 +1328,8 @@
                 formData.append('interpret_clauses', 'true');
                 formData.append('generate_negotiation_points', 'true');
                 formData.append('compare_to_market', 'true');
-                formData.append('llm_provider', 'ollama');
-                const response = await fetch(`${API_BASE_URL}/analyze`, {
                     method: 'POST',
                     body: formData
                 });
@@ -1318,10 +1345,10 @@
                     throw new Error(errorDetail);
                 }
-                const job = await response.json();
-                currentJobId = job.job_id;
-                pollInterval = setInterval(() => pollJobStatus(currentJobId), 2000);
             } catch (error) {
                 console.error('Error:', error);
@@ -1330,44 +1357,110 @@
             }
         }
-        async function pollJobStatus(jobId) {
             try {
-                const response = await fetch(`${API_BASE_URL}/jobs/${jobId}`);
-                if (!response.ok) throw new Error('Failed to fetch job status');
-                const job = await response.json();
-                if (job.status === 'completed') {
-                    clearInterval(pollInterval);
-                    displayResults(job.result);
-                    showScreen('results');
-                } else if (job.status === 'failed') {
-                    clearInterval(pollInterval);
-                    alert('Analysis failed: ' + job.error);
-                    showScreen('analyzer');
                 }
             } catch (error) {
-                console.error('Polling error:', error);
             }
         }
         function displayResults(result) {
             const score = result.risk_analysis.overall_score;
             const riskLevel = result.risk_analysis.risk_level;
             document.getElementById('riskScoreValue').textContent = score;
             document.getElementById('riskLevel').textContent = riskLevel.toUpperCase();
             document.getElementById('riskLevel').className = 'risk-level risk-' + getRiskClass(score);
             const circumference = 534;
             const offset = circumference - (score / 100) * circumference;
             const circle = document.getElementById('riskCircle');
             circle.style.strokeDashoffset = offset;
-            circle.style.stroke = getRiskColor(score);
             document.getElementById('executiveSummary').textContent = result.executive_summary;
-            // Update other result sections...
             const unfavorableList = document.getElementById('unfavorableTermsList');
             unfavorableList.innerHTML = '';
             if (result.unfavorable_terms && result.unfavorable_terms.length > 0) {
@@ -1380,7 +1473,91 @@
                 unfavorableList.innerHTML = '<li>No unfavorable terms detected</li>';
             }
-            // Similar updates for other sections...
         }
         function getRiskClass(score) {
@@ -1397,6 +1574,24 @@
             return '#16a34a';
         }
         // Initialize
         showScreen('landing');
     </script>

             font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
             background: #ffffff;
             color: #333;
+            line-height: 1.5;
         }
         /* Header */
         .header {
             background: white;
+            border-bottom: 0.2px solid #e5e5e5;
+            padding: 0.3rem 2.0rem;
             display: flex;
             justify-content: space-between;
             align-items: center;
             align-items: center;
             justify-content: center;
             color: white;
+            font-size: 20px;
         }
         .subtitle {
             color: #666;
+            font-size: 1.0rem;
+            font-weight: 500;
         }
         .container {
             max-width: 1200px;
             margin: 0 auto;
+            padding: 0 0.2rem;
         }
+        /* Landing Page Styles */
         .landing-screen {
+            padding-top: 50px;
         }
         .hero-section {
             text-align: center;
+            padding: 1rem 0 1rem;
             background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
             color: white;
+            margin-bottom: 2rem;
         }
         .hero-title {
             font-size: 1.3rem;
             margin-bottom: 2.5rem;
             opacity: 0.95;
+            max-width: 1000px;
             margin-left: auto;
             margin-right: auto;
         }
         }
         .section {
+            padding: 0.5rem 0;
             text-align: center;
         }
         .section-title {
             font-size: 2.2rem;
             font-weight: 600;
+            margin-bottom: 1rem;
             color: #333;
         }
         .section-subtitle {
             font-size: 1.2rem;
             color: #666;
+            margin-bottom: 1rem;
+            max-width: 1000px;
             margin-left: auto;
             margin-right: auto;
+            line-height: 0.5;
         }
         .features-grid {
             display: grid;
             grid-template-columns: repeat(3, 1fr);
             gap: 3rem;
+            margin-bottom: 0.1rem;
         }
         .feature-card {
             text-align: center;
+            padding: 1rem;
         }
         .feature-icon {
             font-size: 3rem;
+            margin-bottom: 1.0rem;
         }
         .feature-title {
         .feature-description {
             color: #666;
+            line-height: 1.5;
             font-size: 1rem;
         }
         .steps-section {
+            background: white;
+            padding: 1rem 0;
+            text-align: center;
         }
         .steps-grid {
             display: grid;
             grid-template-columns: repeat(3, 1fr);
             gap: 3rem;
+            margin-top: 0.5rem;
         }
         .step-card {
             text-align: center;
+            padding: 0.5rem;
         }
         .step-number {
             justify-content: center;
             font-size: 1.5rem;
             font-weight: 700;
+            margin: 0 auto 1.2rem;
         }
         .step-title {
         .step-description {
             color: #666;
+            line-height: 1.5;
         }
         .footer {
             text-align: center;
+            padding: 1rem 0.5rem;
             color: #999;
             font-size: 0.9rem;
+            border-top: 0.5px solid #e5e5e5;
+            background: white;
         }
         /* Analyzer Styles */
             margin-bottom: 2rem;
         }
+        .upload-section {
+            transition: all 0.3s ease;
+        }
+        .upload-section.hidden {
+            display: none !important;
+        }
         .upload-card {
             background: white;
             border-radius: 12px;
             font-size: 1rem;
         }
+        .results-content {
             display: none;
         }
+        .results-content.active {
             display: block;
         }
             border: 1px solid #fecaca;
         }
+        /* Results screen styles - UPDATED BUTTON POSITIONING */
         .results-header {
             display: flex;
             justify-content: space-between;
+            align-items: flex-start;
             margin-bottom: 2rem;
+            gap: 2rem;
         }
         .results-title {
             font-size: 2rem;
             font-weight: 700;
+            flex: 1;
         }
         .results-actions {
             display: flex;
             gap: 1rem;
+            align-items: center;
+            justify-content: flex-end;
         }
         .btn {
             cursor: pointer;
             border: none;
             transition: all 0.2s;
+            display: flex;
+            align-items: center;
+            gap: 0.5rem;
+            white-space: nowrap;
         }
         .btn-primary {
             transform: translate(-50%, -50%);
             font-size: 3rem;
             font-weight: 700;
         }
         .risk-level {
         <!-- Hero Section -->
         <section class="hero-section">
             <div class="container">
+                <h1 class="hero-title">Unlock Legal Intelligence : Analyze Contracts with AI</h1>
                 <p class="hero-subtitle">
                     Instantly identify risks, uncover unfavorable terms, and gain actionable negotiation points.
+                    <br>
                     Our AI-powered platform gives you the clarity and confidence to sign better contracts.
                 </p>
                 <button class="cta-button" id="getStartedBtn">Try Now for Free</button>
                 ← Back to Overview
             </button>
+            <!-- Upload Section - Shown by default -->
+            <div id="uploadSection" class="upload-section">
+                <div class="hero-section-analyzer">
+                    <h1 class="hero-title-analyzer">Analyze Your Contract in Seconds</h1>
+                    <p class="hero-description">Paste your contract or upload a file to get an instant, AI-powered risk assessment.</p>
                 </div>
+                <div class="upload-card">
+                    <div class="tabs">
+                        <button class="tab active" data-tab="paste">Paste Text</button>
+                        <button class="tab" data-tab="upload">Upload File</button>
+                    </div>
+                    <div id="pasteTab" class="tab-content active">
+                        <textarea class="textarea" id="contractText" placeholder="Paste your full contract text here..."></textarea>
                     </div>
+                    <div id="uploadTab" class="tab-content">
+                        <div class="file-upload-area" id="fileUploadArea">
+                            <input type="file" id="fileInput" class="file-input" accept=".pdf,.docx,.txt">
+                            <div class="upload-icon">📄</div>
+                            <div class="upload-text">Click to upload or drag and drop</div>
+                            <div class="upload-hint">PDF, DOCX, or TXT files (Max 10MB)</div>
+                        </div>
+                        <div id="selectedFile" class="selected-file" style="display: none;">
+                            <div class="file-icon">📄</div>
+                            <div class="file-info">
+                                <div class="file-name" id="fileName"></div>
+                                <div class="file-size" id="fileSize"></div>
+                            </div>
+                            <button class="remove-file" id="removeFile">×</button>
                         </div>
                     </div>
+                    <div class="analyze-btn-container">
+                        <button class="analyze-btn" id="analyzeBtn">
+                            <span>🔍</span>
+                            <span>Analyze Contract</span>
+                        </button>
+                    </div>
                 </div>
             </div>
                 <p class="loading-text">This may take a moment for large documents.</p>
             </div>
+            <!-- Results Content - Hidden by default -->
+            <div id="resultsContent" class="results-content">
                 <div class="results-header">
                     <h1 class="results-title">Analysis Report</h1>
                     <div class="results-actions">
     </div>
     <script>
+        // DYNAMIC API BASE URL - Automatically detects current port
+        const API_BASE_URL = `${window.location.protocol}//${window.location.host}/api/v1`;
         let selectedFile = null;
+        let currentAnalysisResult = null;
         // Screen management
         function showScreen(screenName) {
             document.getElementById('landingScreen').style.display = 'none';
             document.getElementById('analyzerScreen').style.display = 'none';
             document.getElementById('loadingScreen').classList.remove('active');
+            document.getElementById('resultsContent').classList.remove('active');
+            document.getElementById('uploadSection').classList.remove('hidden');
             if (screenName === 'landing') {
                 document.getElementById('landingScreen').style.display = 'block';
             } else if (screenName === 'loading') {
                 document.getElementById('analyzerScreen').style.display = 'block';
                 document.getElementById('loadingScreen').classList.add('active');
+                document.getElementById('uploadSection').classList.add('hidden');
             } else if (screenName === 'results') {
                 document.getElementById('analyzerScreen').style.display = 'block';
+                document.getElementById('resultsContent').classList.add('active');
+                document.getElementById('uploadSection').classList.add('hidden');
             }
         }
                 });
                 if (response.ok) {
+                    const data = await response.json();
+                    statusElement.textContent = `✓ Backend connected (${data.models_loaded} models, ${data.services_loaded} services)`;
                     statusElement.className = 'api-status connected';
                 } else {
                     throw new Error('Backend not responding properly');
                 }
             } catch (error) {
                 console.error('Backend connection failed:', error);
+                statusElement.textContent = '✗ Cannot connect to backend. Make sure the server is running.';
                 statusElement.className = 'api-status disconnected';
                 setTimeout(() => {
             showScreen('landing');
         });
+        document.getElementById('analyzeAnotherBtn').addEventListener('click', () => {
+            // Reset form
+            document.getElementById('contractText').value = '';
+            selectedFile = null;
+            fileInput.value = '';
+            selectedFileDiv.style.display = 'none';
+            fileUploadArea.style.display = 'block';
+            // Show upload section again
+            showScreen('analyzer');
+        });
         // Tab switching
         document.querySelectorAll('.tab').forEach(tab => {
             tab.addEventListener('click', (e) => {
                         alert('Please paste contract text');
                         return;
                     }
+                    await analyzeContractText(text);
                 } else {
                     if (!selectedFile) {
                         alert('Please select a file');
                         return;
                     }
+                    await analyzeContractFile(selectedFile);
                 }
             } catch (error) {
                 console.error('Analysis error:', error);
             }
         });
+        // Direct file analysis (synchronous)
+        async function analyzeContractFile(file) {
             try {
                 showScreen('loading');
                 formData.append('interpret_clauses', 'true');
                 formData.append('generate_negotiation_points', 'true');
                 formData.append('compare_to_market', 'true');
+                const response = await fetch(`${API_BASE_URL}/analyze/file`, {
                     method: 'POST',
                     body: formData
                 });
                     throw new Error(errorDetail);
                 }
+                const result = await response.json();
+                currentAnalysisResult = result;
+                displayResults(result);
+                showScreen('results');
             } catch (error) {
                 console.error('Error:', error);
             }
         }
+        // Direct text analysis (synchronous)
+        async function analyzeContractText(text) {
             try {
+                showScreen('loading');
+                const formData = new FormData();
+                formData.append('contract_text', text);
+                formData.append('max_clauses', '15');
+                formData.append('interpret_clauses', 'true');
+                formData.append('generate_negotiation_points', 'true');
+                formData.append('compare_to_market', 'true');
+                const response = await fetch(`${API_BASE_URL}/analyze/text`, {
+                    method: 'POST',
+                    body: formData
+                });
+                if (!response.ok) {
+                    let errorDetail = 'Analysis failed';
+                    try {
+                        const errorData = await response.json();
+                        errorDetail = errorData.detail || errorData.error || errorDetail;
+                    } catch (e) {
+                        errorDetail = `Server error: ${response.status} ${response.statusText}`;
+                    }
+                    throw new Error(errorDetail);
                 }
+                const result = await response.json();
+                currentAnalysisResult = result;
+                displayResults(result);
+                showScreen('results');
             } catch (error) {
+                console.error('Error:', error);
+                alert('Error analyzing contract: ' + error.message);
+                showScreen('analyzer');
             }
         }
+        // Download PDF
+        document.getElementById('downloadBtn').addEventListener('click', async () => {
+            if (!currentAnalysisResult) {
+                alert('No analysis results available to download');
+                return;
+            }
+            try {
+                const response = await fetch(`${API_BASE_URL}/generate-pdf`, {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json',
+                    },
+                    body: JSON.stringify(currentAnalysisResult)
+                });
+                if (!response.ok) {
+                    throw new Error('Failed to generate PDF');
+                }
+                const blob = await response.blob();
+                const url = window.URL.createObjectURL(blob);
+                const a = document.createElement('a');
+                a.style.display = 'none';
+                a.href = url;
+                a.download = `contract_analysis_${currentAnalysisResult.analysis_id}.pdf`;
+                document.body.appendChild(a);
+                a.click();
+                window.URL.revokeObjectURL(url);
+                document.body.removeChild(a);
+            } catch (error) {
+                console.error('PDF download error:', error);
+                alert('Error downloading PDF: ' + error.message);
+            }
+        });
         function displayResults(result) {
             const score = result.risk_analysis.overall_score;
             const riskLevel = result.risk_analysis.risk_level;
+            // Update risk score
             document.getElementById('riskScoreValue').textContent = score;
             document.getElementById('riskLevel').textContent = riskLevel.toUpperCase();
             document.getElementById('riskLevel').className = 'risk-level risk-' + getRiskClass(score);
+            // Update risk circle
             const circumference = 534;
             const offset = circumference - (score / 100) * circumference;
             const circle = document.getElementById('riskCircle');
             circle.style.strokeDashoffset = offset;
+            // Get risk color and apply to both circle and text
+            const riskColor = getRiskColor(score);
+            circle.style.stroke = riskColor;
+            // Update text color in the middle of the circle
+            const riskScoreValue = document.getElementById('riskScoreValue');
+            riskScoreValue.style.color = riskColor;
+            // Update executive summary
             document.getElementById('executiveSummary').textContent = result.executive_summary;
+            // Update unfavorable terms
             const unfavorableList = document.getElementById('unfavorableTermsList');
             unfavorableList.innerHTML = '';
             if (result.unfavorable_terms && result.unfavorable_terms.length > 0) {
                 unfavorableList.innerHTML = '<li>No unfavorable terms detected</li>';
             }
+            // Update missing protections
+            const missingList = document.getElementById('missingProtectionsList');
+            missingList.innerHTML = '';
+            if (result.missing_protections && result.missing_protections.length > 0) {
+                result.missing_protections.slice(0, 8).forEach(protection => {
+                    const li = document.createElement('li');
+                    li.innerHTML = `<span class="item-icon">›</span><span class="item-text"><strong>${protection.protection}:</strong> ${protection.explanation}</span>`;
+                    missingList.appendChild(li);
+                });
+            } else {
+                missingList.innerHTML = '<li>No missing protections detected</li>';
+            }
+            // Update negotiation points
+            const negotiationList = document.getElementById('negotiationPointsList');
+            negotiationList.innerHTML = '';
+            if (result.negotiation_points && result.negotiation_points.length > 0) {
+                result.negotiation_points.slice(0, 8).forEach(point => {
+                    const li = document.createElement('li');
+                    li.innerHTML = `<span class="item-icon">›</span><span class="item-text"><strong>${point.point}:</strong> ${point.explanation}</span>`;
+                    negotiationList.appendChild(li);
+                });
+            } else {
+                negotiationList.innerHTML = '<li>No negotiation points generated</li>';
+            }
+            // Update category breakdown
+            const categoryBreakdown = document.getElementById('categoryBreakdown');
+            categoryBreakdown.innerHTML = '';
+            if (result.risk_analysis.category_scores) {
+                Object.entries(result.risk_analysis.category_scores).forEach(([category, score]) => {
+                    const categoryItem = document.createElement('div');
+                    categoryItem.className = 'category-item';
+                    const riskClass = getRiskClass(score);
+                    const riskColor = getRiskColor(score);
+                    categoryItem.innerHTML = `
+                        <div class="category-header">
+                            <span class="category-name">${formatCategoryName(category)}</span>
+                            <span class="category-score score-${riskClass}">${score}/100</span>
+                        </div>
+                        <div class="progress-bar">
+                            <div class="progress-fill progress-${riskClass}" style="width: ${score}%"></div>
+                        </div>
+                        <div class="category-description">
+                            ${getCategoryDescription(category, score)}
+                        </div>
+                    `;
+                    categoryBreakdown.appendChild(categoryItem);
+                });
+            }
+            // Update clause analysis
+            const clauseAnalysis = document.getElementById('clauseAnalysis');
+            clauseAnalysis.innerHTML = '';
+            if (result.clauses && result.clauses.length > 0) {
+                result.clauses.slice(0, 10).forEach(clause => {
+                    const clauseItem = document.createElement('div');
+                    clauseItem.className = `clause-item ${getRiskClass(clause.confidence * 100)}`;
+                    clauseItem.innerHTML = `
+                        <div class="clause-header">
+                            <div>
+                                <div class="clause-label">${clause.reference} • ${clause.category}</div>
+                                <div class="clause-text">${clause.text.substring(0, 200)}${clause.text.length > 200 ? '...' : ''}</div>
+                            </div>
+                            <div class="severity-badge badge-${getRiskClass(clause.confidence * 100)}">
+                                ${Math.round(clause.confidence * 100)}% confidence
+                            </div>
+                        </div>
+                        ${clause.risk_indicators && clause.risk_indicators.length > 0 ? `
+                            <div class="clause-section">
+                                <div class="clause-section-title">Risk Indicators</div>
+                                <div class="clause-section-text">${clause.risk_indicators.join(', ')}</div>
+                            </div>
+                        ` : ''}
+                    `;
+                    clauseAnalysis.appendChild(clauseItem);
+                });
+            }
         }
         function getRiskClass(score) {
             return '#16a34a';
         }
+        function formatCategoryName(category) {
+            return category.split('_').map(word =>
+                word.charAt(0).toUpperCase() + word.slice(1)
+            ).join(' ');
+        }
+        function getCategoryDescription(category, score) {
+            const descriptions = {
+                'termination': score > 60 ? 'High termination risk detected' : 'Termination terms appear reasonable',
+                'compensation': score > 60 ? 'Compensation structure needs review' : 'Compensation terms are clear',
+                'confidentiality': score > 60 ? 'Confidentiality terms may be overly broad' : 'Confidentiality terms are balanced',
+                'liability': score > 60 ? 'Liability allocation needs attention' : 'Liability terms are reasonable',
+                'intellectual_property': score > 60 ? 'IP rights allocation requires review' : 'IP terms are well-defined'
+            };
+            return descriptions[category] || 'Review recommended based on risk score';
+        }
         // Initialize
         showScreen('landing');
     </script>

utils/logger.py CHANGED Viewed

@@ -114,7 +114,7 @@ class ContractAnalyzerLogger:
     @classmethod
-    def log_structured(cls, level: int, message: str, request_id: Optional[str] = None, **kwargs):
         """
         Log structured data as JSON
@@ -124,15 +124,12 @@ class ContractAnalyzerLogger:
             message    { str } : Log message
-            request_id { str } : Optional request ID for tracking
             **kwargs           : Additional structured data
         """
         logger   = cls.get_logger()
         log_data = {"timestamp"  : datetime.now().isoformat(),
                     "message"    : message,
-                    "request_id" : request_id,
                     **kwargs
                    }
@@ -140,7 +137,7 @@ class ContractAnalyzerLogger:
     @classmethod
-    def log_error(cls, error: Exception, context: Dict[str, Any] = None, request_id: Optional[str] = None):
         """
         Log error with full traceback and context
@@ -149,8 +146,6 @@ class ContractAnalyzerLogger:
             error      { Exception } : Exception object
             context      { dict }    : Additional context dictionary
-            request_id    { str }    : Request ID for tracking
         """
         error_logger = cls._loggers.get("contract_analyzer.error")
@@ -158,7 +153,6 @@ class ContractAnalyzerLogger:
             error_logger = cls.get_logger()
         error_data = {"timestamp"     : datetime.now().isoformat(),
-                      "request_id"    : request_id,
                       "error_type"    : type(error).__name__,
                       "error_message" : str(error),
                       "traceback"     : traceback.format_exc(),
@@ -169,7 +163,7 @@ class ContractAnalyzerLogger:
     @classmethod
-    def log_performance(cls, operation: str, duration: float, request_id: Optional[str] = None, **metrics):
         """
         Log performance metrics
@@ -179,8 +173,6 @@ class ContractAnalyzerLogger:
             duration  { float } : Duration in seconds
-            request_id { str }  : Request ID
             **metrics           : Additional metrics
         """
         perf_logger = cls._loggers.get("contract_analyzer.performance")
@@ -188,7 +180,6 @@ class ContractAnalyzerLogger:
             perf_logger = cls.get_logger()
         perf_data = {"timestamp"        : datetime.now().isoformat(),
-                     "request_id"       : request_id,
                      "operation"        : operation,
                      "duration_seconds" : round(duration, 3),
                      **metrics

     @classmethod
+    def log_structured(cls, level: int, message: str, **kwargs):
         """
         Log structured data as JSON
             message    { str } : Log message
             **kwargs           : Additional structured data
         """
         logger   = cls.get_logger()
         log_data = {"timestamp"  : datetime.now().isoformat(),
                     "message"    : message,
                     **kwargs
                    }
     @classmethod
+    def log_error(cls, error: Exception, context: Dict[str, Any] = None):
         """
         Log error with full traceback and context
             error      { Exception } : Exception object
             context      { dict }    : Additional context dictionary
         """
         error_logger = cls._loggers.get("contract_analyzer.error")
             error_logger = cls.get_logger()
         error_data = {"timestamp"     : datetime.now().isoformat(),
                       "error_type"    : type(error).__name__,
                       "error_message" : str(error),
                       "traceback"     : traceback.format_exc(),
     @classmethod
+    def log_performance(cls, operation: str, duration: float, **metrics):
         """
         Log performance metrics
             duration  { float } : Duration in seconds
             **metrics           : Additional metrics
         """
         perf_logger = cls._loggers.get("contract_analyzer.performance")
             perf_logger = cls.get_logger()
         perf_data = {"timestamp"        : datetime.now().isoformat(),
                      "operation"        : operation,
                      "duration_seconds" : round(duration, 3),
                      **metrics