File size: 11,191 Bytes
044b0bb
 
 
 
 
 
 
 
 
 
 
 
 
7a28e6a
044b0bb
 
 
 
 
 
 
 
 
 
 
 
 
23fbe07
044b0bb
7a28e6a
 
044b0bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a28e6a
08f4f04
044b0bb
 
08f4f04
044b0bb
 
7a28e6a
044b0bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6af1a5f
044b0bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a28e6a
044b0bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a28e6a
044b0bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from pydantic import BaseModel
import requests
import base64
import json
import re
import os

app = FastAPI(
    title="Aadhaar Card OCR API",
    description="Two-step RAG pipeline: nemotron-ocr-v1 โ†’ nvidia-nemotron-nano-9b-v2 for Aadhaar card extraction",
)

# โ”€โ”€ CORS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# โ”€โ”€ Configuration โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# Set NVIDIA_API_KEY as a Secret in your HuggingFace Space settings
NVIDIA_API_KEY = os.environ.get("NVIDIA_API_KEY", "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")

# FIX 1: Corrected endpoint โ€” matches the official reference (nemotron-ocr-v1, not nemoretriever-ocr-v1)
OCR_URL = "https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-ocr-v1"
LLM_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
LLM_MODEL = "nvidia/nvidia-nemotron-nano-9b-v2"

OCR_HEADERS = {
    "Authorization": f"Bearer {NVIDIA_API_KEY}",
    "Accept": "application/json",
}

LLM_HEADERS = {
    "Authorization": f"Bearer {NVIDIA_API_KEY}",
    "Content-Type": "application/json",
}

# โ”€โ”€ System prompts โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€

FRONT_SYSTEM_PROMPT = """You are an Aadhaar card front-side data extraction assistant.
You will receive raw OCR text extracted from the FRONT side of an Indian Aadhaar card.
Parse it carefully and return ONLY a valid JSON object.
No markdown fences, no explanation, no preamble โ€” just the raw JSON object.
JSON schema (return exactly this structure):
{
  "name": "full name of the card holder (string)",
  "dob": "date of birth in DD/MM/YYYY format (string)",
  "gender": "gender Male, Female, or Other (string)",
  "aadhaar_no": "12-digit Aadhaar number as a string, digits only, yes spaces"
}
Rules:
- name: the primary card holder's full name (usually in bold, after "Name:" or just prominently placed)
- dob: look for "DOB", "Date of Birth", "เคœเคจเฅเคฎ เคคเคฟเคฅเคฟ" โ€” output in DD/MM/YYYY format; if already in that format keep it
- gender: look for "Male", "Female", "Other", or Hindi equivalents "เคชเฅเคฐเฅเคท", "เคฎเคนเคฟเคฒเคพ", "เค…เคจเฅเคฏ"
- aadhaar_no: the 12-digit number, usually printed in groups like "XXXX XXXX XXXX" โ€” remove all spaces and return only digits
- If a field is not found, use "" for strings
- Do NOT include address details, gender, or any other fields not in the schema"""

BACK_SYSTEM_PROMPT = """You are an Aadhaar card back-side data extraction assistant.
You will receive raw OCR text extracted from the BACK side of an Indian Aadhaar card.
Parse it carefully and return ONLY a valid JSON object.
No markdown fences, no explanation, no preamble โ€” just the raw JSON object.
JSON schema (return exactly this structure):
{
  "address": "door/flat number and street/locality/road name (string)",
  "village_city": "village name or city/town name (string)",
  "state": "state name (string)",
  "pincode": "6-digit PIN code as a string"
}
Rules:
- address: the first line(s) of the address โ€” house/flat number, building name, street or locality; exclude city, district, state, and PIN
- village_city: look for village name, town, or city; may also appear under "District" โ€” prefer the more specific locality name over the district
- state: the full state name (e.g. "Maharashtra", "Tamil Nadu"); look near the end of the address block
- pincode: the 6-digit postal code; look for "PIN", "PIN Code", or a standalone 6-digit number at the end of the address
- If a field is not found, use "" for strings
- Do NOT include the card holder's name or Aadhaar number โ€” focus only on address fields"""


# โ”€โ”€ Helper โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€

async def run_ocr(file: UploadFile) -> str:
    """Upload image to NVIDIA OCR and return concatenated plain text."""
    content = await file.read()
    image_b64 = base64.b64encode(content).decode()

    # FIX 2: Matches reference limit โ€” base64 must be < 180,000 characters
    if len(image_b64) >= 1000_000:
        raise HTTPException(
            status_code=413,
            detail="Image too large (base64 must be < 1,000,000 chars). Resize the image and try again.",
        )

    # Payload structure matches the official reference exactly
    payload = {
        "input": [
            {
                "type": "image_url",
                "url": f"data:image/png;base64,{image_b64}",
            }
        ]
    }

    try:
        response = requests.post(OCR_URL, headers=OCR_HEADERS, json=payload, timeout=30)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        raise HTTPException(status_code=502, detail=f"NVIDIA OCR API error: {str(e)}")

    ocr_json = response.json()
    print("OCR Response:", ocr_json)

    detections = ocr_json.get("text_detections", [])
    if not detections:
        data = ocr_json.get("data", [])
        if isinstance(data, list) and len(data) > 0:
            detections = data[0].get("text_detections", [])

    lines = []
    for det in detections:
        text = ""
        if isinstance(det, dict):
            if "text_prediction" in det:
                text = det["text_prediction"].get("text", "").strip()
            else:
                text = det.get("text", "").strip()
        if text:
            lines.append(text)

    return "\n".join(lines)


def call_llm(ocr_text: str, system_prompt: str) -> dict:
    """Send OCR text to the LLM with the given system prompt and return parsed JSON dict."""
    llm_payload = {
        "model": LLM_MODEL,
        "max_tokens": 1024,
        "temperature": 0.2,
        "top_p": 0.9,
        "messages": [
            {"role": "system", "content": system_prompt},
            {
                "role": "user",
                "content": (
                    f"Here is the OCR text extracted from the Aadhaar card:\n\n"
                    f"{ocr_text}\n\n"
                    f"Extract the required data and return ONLY the JSON object."
                ),
            },
        ],
    }

    try:
        llm_response = requests.post(LLM_URL, headers=LLM_HEADERS, json=llm_payload, timeout=200)
        llm_response.raise_for_status()
        llm_json = llm_response.json()
        print("LLM Response JSON:", llm_json)
    except requests.exceptions.RequestException as e:
        raise HTTPException(status_code=502, detail=f"NVIDIA LLM API error: {str(e)}")

    raw_text: str = llm_json.get("choices", [{}])[0].get("message", {}).get("content", "")
    print("LLM Raw Text:", raw_text)

    if not raw_text:
        raise HTTPException(status_code=502, detail="LLM returned an empty response")

    cleaned = re.sub(r"```json\s*", "", raw_text, flags=re.IGNORECASE)
    cleaned = re.sub(r"```\s*", "", cleaned).strip()

    parsed = None
    try:
        parsed = json.loads(cleaned)
    except json.JSONDecodeError:
        match = re.search(r"\{[\s\S]*\}", cleaned)
        if not match:
            raise HTTPException(
                status_code=502,
                detail=f"LLM did not return valid JSON. Preview: {raw_text[:400]}",
            )
        try:
            parsed = json.loads(match.group(0))
        except json.JSONDecodeError as e:
            raise HTTPException(status_code=502, detail=f"JSON parse error: {str(e)}")

    if not isinstance(parsed, dict):
        raise HTTPException(
            status_code=502,
            detail=f"LLM response is not a JSON object. Got: {type(parsed).__name__}",
        )

    print("LLM Parsed Data:", parsed)
    return parsed


# โ”€โ”€ Request / Response models โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€

class AadhaarFrontData(BaseModel):
    name: str
    dob: str
    gender: str
    aadhaar_no: str


class AadhaarBackData(BaseModel):
    address: str
    village_city: str
    state: str
    pincode: str


# โ”€โ”€ Endpoints โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€

@app.post("/extract-front", response_model=AadhaarFrontData)
async def extract_front(file: UploadFile = File(...)):
    """
    Upload the FRONT side of an Aadhaar card image.
    Pipeline:
      1. nemotron-ocr-v1  โ†’  raw OCR text
      2. nvidia-nemotron-nano-9b-v2  โ†’  structured JSON
    Returns: name, dob, gender, aadhaar_no
    """
    ocr_text = await run_ocr(file)

    if not ocr_text.strip():
        raise HTTPException(status_code=422, detail="OCR produced no text. Check the image quality.")

    parsed = call_llm(ocr_text, FRONT_SYSTEM_PROMPT)

    raw_aadhaar = str(parsed.get("aadhaar_no", ""))
    aadhaar_digits = re.sub(r"\D", "", raw_aadhaar)

    return AadhaarFrontData(
        name=str(parsed.get("name", "")).strip()[:100],
        dob=str(parsed.get("dob", "")).strip()[:12],
        gender=str(parsed.get("gender", "")).strip()[:20],
        aadhaar_no=aadhaar_digits[:12],
    )


@app.post("/extract-back", response_model=AadhaarBackData)
async def extract_back(file: UploadFile = File(...)):
    """
    Upload the BACK side of an Aadhaar card image.
    Pipeline:
      1. nemotron-ocr-v1  โ†’  raw OCR text
      2. nvidia-nemotron-nano-9b-v2  โ†’  structured JSON
    Returns: address, village_city, state, pincode
    """
    ocr_text = await run_ocr(file)

    if not ocr_text.strip():
        raise HTTPException(status_code=422, detail="OCR produced no text. Check the image quality.")

    parsed = call_llm(ocr_text, BACK_SYSTEM_PROMPT)

    raw_pin = str(parsed.get("pincode", ""))
    pin_digits = re.sub(r"\D", "", raw_pin)[:6]

    return AadhaarBackData(
        address=str(parsed.get("address", "")).strip()[:200],
        village_city=str(parsed.get("village_city", "")).strip()[:100],
        state=str(parsed.get("state", "")).strip()[:60],
        pincode=pin_digits,
    )


@app.get("/health")
async def health_check():
    return {"status": "healthy", "model": LLM_MODEL}


@app.get("/")
async def root():
    return FileResponse("index.html")


if __name__ == "__main__":
    import uvicorn
    uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)