File size: 5,461 Bytes
4b022af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.responses import JSONResponse
import tempfile
import os
from typing import List

# Import our services
from app.services.text_extractor import extract_text_from_pdf
from app.services.preprocessor import segment_into_clauses
from app.services.risk_analyzer import analyze_clause_with_gemini
from app.services.risk_scorer import calculate_scores, get_risk_definition

from app.schemas import AnalysisReport, AnalyzedClause, RiskFinding

# Create FastAPI app instance
app = FastAPI(
    title="Multilingual Legal Contract Analyzer",
    description="AI-powered contract analysis for English and Indic languages",
    version="1.0.0"
)


@app.post("/analyze/", response_model=AnalysisReport)
async def analyze_contract(file: UploadFile = File(...)):
    """
    Analyze a legal contract PDF and return detailed risk analysis.

    Args:
        file: PDF file to analyze

    Returns:
        AnalysisReport with risk analysis and suggestions
    """

    # Validate file type
    if not file.filename.lower().endswith('.pdf'):
        raise HTTPException(
            status_code=400, detail="Only PDF files are supported")

    # Create temporary file to store uploaded PDF
    with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
        try:
            # Write uploaded file to temporary file
            content = await file.read()
            temp_file.write(content)
            temp_file.flush()

            # Step 1: Extract text from PDF
            print(f"Extracting text from {file.filename}...")
            full_text = extract_text_from_pdf(temp_file.name)

            if not full_text or len(full_text.strip()) < 50:
                raise HTTPException(
                    status_code=400,
                    detail="Unable to extract meaningful text from PDF. Please ensure the PDF is readable."
                )

            # Step 2: Segment text into clauses
            print("Segmenting text into clauses...")
            clauses = segment_into_clauses(full_text)

            if not clauses:
                raise HTTPException(
                    status_code=400,
                    detail="Unable to identify contract clauses. Please ensure the document is a valid contract."
                )

            # Step 3: Analyze each clause with Gemini AI
            print(f"Analyzing {len(clauses)} clauses with AI...")
            analyzed_clauses = []

            for i, clause_text in enumerate(clauses, 1):
                print(f"Analyzing clause {i}/{len(clauses)}...")

                # Get AI analysis
                ai_result = analyze_clause_with_gemini(clause_text)

                # Convert AI results to RiskFinding objects
                risks = []
                for risk_data in ai_result.get("risks", []):
                    risk_id = risk_data.get("risk_id")
                    if risk_id:
                        risk_def = get_risk_definition(risk_id)
                        risk_finding = RiskFinding(
                            risk_id=risk_id,
                            description=risk_data.get(
                                "explanation", risk_def["description"]),
                            score=risk_def["score"]
                        )
                        risks.append(risk_finding)

                # Create AnalyzedClause object
                analyzed_clause = AnalyzedClause(
                    clause_number=i,
                    # Truncate for response
                    text=clause_text[:500] +
                    "..." if len(clause_text) > 500 else clause_text,
                    risks=risks,
                    suggestion=ai_result.get("suggestion")
                )
                analyzed_clauses.append(analyzed_clause)

            # Step 4: Calculate final risk score
            print("Calculating final risk score...")
            final_score, all_findings = calculate_scores(analyzed_clauses)

            # Step 5: Determine contract type and language (basic detection)
            contract_type = "General Contract"  # Could be enhanced with AI detection
            language = "English"  # Could be enhanced with language detection

            # Create final analysis report
            analysis_report = AnalysisReport(
                file_name=file.filename,
                language=language,
                contract_type=contract_type,
                final_risk_score=final_score,
                clauses=analyzed_clauses
            )

            print(f"Analysis complete. Final risk score: {final_score}")
            return analysis_report

        except HTTPException:
            raise
        except Exception as e:
            print(f"Error during analysis: {e}")
            raise HTTPException(
                status_code=500,
                detail=f"Analysis failed: {str(e)}"
            )
        finally:
            # Clean up temporary file
            try:
                os.unlink(temp_file.name)
            except:
                pass


@app.get("/")
async def root():
    """Health check endpoint"""
    return {"message": "Multilingual Legal Contract Analyzer API is running"}


@app.get("/health")
async def health_check():
    """Health check endpoint"""
    return {"status": "healthy", "service": "contract-analyzer"}


if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)