File size: 5,461 Bytes
4b022af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.responses import JSONResponse
import tempfile
import os
from typing import List
# Import our services
from app.services.text_extractor import extract_text_from_pdf
from app.services.preprocessor import segment_into_clauses
from app.services.risk_analyzer import analyze_clause_with_gemini
from app.services.risk_scorer import calculate_scores, get_risk_definition
from app.schemas import AnalysisReport, AnalyzedClause, RiskFinding
# Create FastAPI app instance
app = FastAPI(
title="Multilingual Legal Contract Analyzer",
description="AI-powered contract analysis for English and Indic languages",
version="1.0.0"
)
@app.post("/analyze/", response_model=AnalysisReport)
async def analyze_contract(file: UploadFile = File(...)):
"""
Analyze a legal contract PDF and return detailed risk analysis.
Args:
file: PDF file to analyze
Returns:
AnalysisReport with risk analysis and suggestions
"""
# Validate file type
if not file.filename.lower().endswith('.pdf'):
raise HTTPException(
status_code=400, detail="Only PDF files are supported")
# Create temporary file to store uploaded PDF
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
try:
# Write uploaded file to temporary file
content = await file.read()
temp_file.write(content)
temp_file.flush()
# Step 1: Extract text from PDF
print(f"Extracting text from {file.filename}...")
full_text = extract_text_from_pdf(temp_file.name)
if not full_text or len(full_text.strip()) < 50:
raise HTTPException(
status_code=400,
detail="Unable to extract meaningful text from PDF. Please ensure the PDF is readable."
)
# Step 2: Segment text into clauses
print("Segmenting text into clauses...")
clauses = segment_into_clauses(full_text)
if not clauses:
raise HTTPException(
status_code=400,
detail="Unable to identify contract clauses. Please ensure the document is a valid contract."
)
# Step 3: Analyze each clause with Gemini AI
print(f"Analyzing {len(clauses)} clauses with AI...")
analyzed_clauses = []
for i, clause_text in enumerate(clauses, 1):
print(f"Analyzing clause {i}/{len(clauses)}...")
# Get AI analysis
ai_result = analyze_clause_with_gemini(clause_text)
# Convert AI results to RiskFinding objects
risks = []
for risk_data in ai_result.get("risks", []):
risk_id = risk_data.get("risk_id")
if risk_id:
risk_def = get_risk_definition(risk_id)
risk_finding = RiskFinding(
risk_id=risk_id,
description=risk_data.get(
"explanation", risk_def["description"]),
score=risk_def["score"]
)
risks.append(risk_finding)
# Create AnalyzedClause object
analyzed_clause = AnalyzedClause(
clause_number=i,
# Truncate for response
text=clause_text[:500] +
"..." if len(clause_text) > 500 else clause_text,
risks=risks,
suggestion=ai_result.get("suggestion")
)
analyzed_clauses.append(analyzed_clause)
# Step 4: Calculate final risk score
print("Calculating final risk score...")
final_score, all_findings = calculate_scores(analyzed_clauses)
# Step 5: Determine contract type and language (basic detection)
contract_type = "General Contract" # Could be enhanced with AI detection
language = "English" # Could be enhanced with language detection
# Create final analysis report
analysis_report = AnalysisReport(
file_name=file.filename,
language=language,
contract_type=contract_type,
final_risk_score=final_score,
clauses=analyzed_clauses
)
print(f"Analysis complete. Final risk score: {final_score}")
return analysis_report
except HTTPException:
raise
except Exception as e:
print(f"Error during analysis: {e}")
raise HTTPException(
status_code=500,
detail=f"Analysis failed: {str(e)}"
)
finally:
# Clean up temporary file
try:
os.unlink(temp_file.name)
except:
pass
@app.get("/")
async def root():
"""Health check endpoint"""
return {"message": "Multilingual Legal Contract Analyzer API is running"}
@app.get("/health")
async def health_check():
"""Health check endpoint"""
return {"status": "healthy", "service": "contract-analyzer"}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
|