Spaces:
Running
Running
File size: 11,191 Bytes
044b0bb 7a28e6a 044b0bb 23fbe07 044b0bb 7a28e6a 044b0bb 7a28e6a 08f4f04 044b0bb 08f4f04 044b0bb 7a28e6a 044b0bb 6af1a5f 044b0bb 7a28e6a 044b0bb 7a28e6a 044b0bb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 | from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from pydantic import BaseModel
import requests
import base64
import json
import re
import os
app = FastAPI(
title="Aadhaar Card OCR API",
description="Two-step RAG pipeline: nemotron-ocr-v1 โ nvidia-nemotron-nano-9b-v2 for Aadhaar card extraction",
)
# โโ CORS โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# โโ Configuration โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# Set NVIDIA_API_KEY as a Secret in your HuggingFace Space settings
NVIDIA_API_KEY = os.environ.get("NVIDIA_API_KEY", "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")
# FIX 1: Corrected endpoint โ matches the official reference (nemotron-ocr-v1, not nemoretriever-ocr-v1)
OCR_URL = "https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-ocr-v1"
LLM_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
LLM_MODEL = "nvidia/nvidia-nemotron-nano-9b-v2"
OCR_HEADERS = {
"Authorization": f"Bearer {NVIDIA_API_KEY}",
"Accept": "application/json",
}
LLM_HEADERS = {
"Authorization": f"Bearer {NVIDIA_API_KEY}",
"Content-Type": "application/json",
}
# โโ System prompts โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
FRONT_SYSTEM_PROMPT = """You are an Aadhaar card front-side data extraction assistant.
You will receive raw OCR text extracted from the FRONT side of an Indian Aadhaar card.
Parse it carefully and return ONLY a valid JSON object.
No markdown fences, no explanation, no preamble โ just the raw JSON object.
JSON schema (return exactly this structure):
{
"name": "full name of the card holder (string)",
"dob": "date of birth in DD/MM/YYYY format (string)",
"gender": "gender Male, Female, or Other (string)",
"aadhaar_no": "12-digit Aadhaar number as a string, digits only, yes spaces"
}
Rules:
- name: the primary card holder's full name (usually in bold, after "Name:" or just prominently placed)
- dob: look for "DOB", "Date of Birth", "เคเคจเฅเคฎ เคคเคฟเคฅเคฟ" โ output in DD/MM/YYYY format; if already in that format keep it
- gender: look for "Male", "Female", "Other", or Hindi equivalents "เคชเฅเคฐเฅเคท", "เคฎเคนเคฟเคฒเคพ", "เค
เคจเฅเคฏ"
- aadhaar_no: the 12-digit number, usually printed in groups like "XXXX XXXX XXXX" โ remove all spaces and return only digits
- If a field is not found, use "" for strings
- Do NOT include address details, gender, or any other fields not in the schema"""
BACK_SYSTEM_PROMPT = """You are an Aadhaar card back-side data extraction assistant.
You will receive raw OCR text extracted from the BACK side of an Indian Aadhaar card.
Parse it carefully and return ONLY a valid JSON object.
No markdown fences, no explanation, no preamble โ just the raw JSON object.
JSON schema (return exactly this structure):
{
"address": "door/flat number and street/locality/road name (string)",
"village_city": "village name or city/town name (string)",
"state": "state name (string)",
"pincode": "6-digit PIN code as a string"
}
Rules:
- address: the first line(s) of the address โ house/flat number, building name, street or locality; exclude city, district, state, and PIN
- village_city: look for village name, town, or city; may also appear under "District" โ prefer the more specific locality name over the district
- state: the full state name (e.g. "Maharashtra", "Tamil Nadu"); look near the end of the address block
- pincode: the 6-digit postal code; look for "PIN", "PIN Code", or a standalone 6-digit number at the end of the address
- If a field is not found, use "" for strings
- Do NOT include the card holder's name or Aadhaar number โ focus only on address fields"""
# โโ Helper โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
async def run_ocr(file: UploadFile) -> str:
"""Upload image to NVIDIA OCR and return concatenated plain text."""
content = await file.read()
image_b64 = base64.b64encode(content).decode()
# FIX 2: Matches reference limit โ base64 must be < 180,000 characters
if len(image_b64) >= 1000_000:
raise HTTPException(
status_code=413,
detail="Image too large (base64 must be < 1,000,000 chars). Resize the image and try again.",
)
# Payload structure matches the official reference exactly
payload = {
"input": [
{
"type": "image_url",
"url": f"data:image/png;base64,{image_b64}",
}
]
}
try:
response = requests.post(OCR_URL, headers=OCR_HEADERS, json=payload, timeout=30)
response.raise_for_status()
except requests.exceptions.RequestException as e:
raise HTTPException(status_code=502, detail=f"NVIDIA OCR API error: {str(e)}")
ocr_json = response.json()
print("OCR Response:", ocr_json)
detections = ocr_json.get("text_detections", [])
if not detections:
data = ocr_json.get("data", [])
if isinstance(data, list) and len(data) > 0:
detections = data[0].get("text_detections", [])
lines = []
for det in detections:
text = ""
if isinstance(det, dict):
if "text_prediction" in det:
text = det["text_prediction"].get("text", "").strip()
else:
text = det.get("text", "").strip()
if text:
lines.append(text)
return "\n".join(lines)
def call_llm(ocr_text: str, system_prompt: str) -> dict:
"""Send OCR text to the LLM with the given system prompt and return parsed JSON dict."""
llm_payload = {
"model": LLM_MODEL,
"max_tokens": 1024,
"temperature": 0.2,
"top_p": 0.9,
"messages": [
{"role": "system", "content": system_prompt},
{
"role": "user",
"content": (
f"Here is the OCR text extracted from the Aadhaar card:\n\n"
f"{ocr_text}\n\n"
f"Extract the required data and return ONLY the JSON object."
),
},
],
}
try:
llm_response = requests.post(LLM_URL, headers=LLM_HEADERS, json=llm_payload, timeout=200)
llm_response.raise_for_status()
llm_json = llm_response.json()
print("LLM Response JSON:", llm_json)
except requests.exceptions.RequestException as e:
raise HTTPException(status_code=502, detail=f"NVIDIA LLM API error: {str(e)}")
raw_text: str = llm_json.get("choices", [{}])[0].get("message", {}).get("content", "")
print("LLM Raw Text:", raw_text)
if not raw_text:
raise HTTPException(status_code=502, detail="LLM returned an empty response")
cleaned = re.sub(r"```json\s*", "", raw_text, flags=re.IGNORECASE)
cleaned = re.sub(r"```\s*", "", cleaned).strip()
parsed = None
try:
parsed = json.loads(cleaned)
except json.JSONDecodeError:
match = re.search(r"\{[\s\S]*\}", cleaned)
if not match:
raise HTTPException(
status_code=502,
detail=f"LLM did not return valid JSON. Preview: {raw_text[:400]}",
)
try:
parsed = json.loads(match.group(0))
except json.JSONDecodeError as e:
raise HTTPException(status_code=502, detail=f"JSON parse error: {str(e)}")
if not isinstance(parsed, dict):
raise HTTPException(
status_code=502,
detail=f"LLM response is not a JSON object. Got: {type(parsed).__name__}",
)
print("LLM Parsed Data:", parsed)
return parsed
# โโ Request / Response models โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
class AadhaarFrontData(BaseModel):
name: str
dob: str
gender: str
aadhaar_no: str
class AadhaarBackData(BaseModel):
address: str
village_city: str
state: str
pincode: str
# โโ Endpoints โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
@app.post("/extract-front", response_model=AadhaarFrontData)
async def extract_front(file: UploadFile = File(...)):
"""
Upload the FRONT side of an Aadhaar card image.
Pipeline:
1. nemotron-ocr-v1 โ raw OCR text
2. nvidia-nemotron-nano-9b-v2 โ structured JSON
Returns: name, dob, gender, aadhaar_no
"""
ocr_text = await run_ocr(file)
if not ocr_text.strip():
raise HTTPException(status_code=422, detail="OCR produced no text. Check the image quality.")
parsed = call_llm(ocr_text, FRONT_SYSTEM_PROMPT)
raw_aadhaar = str(parsed.get("aadhaar_no", ""))
aadhaar_digits = re.sub(r"\D", "", raw_aadhaar)
return AadhaarFrontData(
name=str(parsed.get("name", "")).strip()[:100],
dob=str(parsed.get("dob", "")).strip()[:12],
gender=str(parsed.get("gender", "")).strip()[:20],
aadhaar_no=aadhaar_digits[:12],
)
@app.post("/extract-back", response_model=AadhaarBackData)
async def extract_back(file: UploadFile = File(...)):
"""
Upload the BACK side of an Aadhaar card image.
Pipeline:
1. nemotron-ocr-v1 โ raw OCR text
2. nvidia-nemotron-nano-9b-v2 โ structured JSON
Returns: address, village_city, state, pincode
"""
ocr_text = await run_ocr(file)
if not ocr_text.strip():
raise HTTPException(status_code=422, detail="OCR produced no text. Check the image quality.")
parsed = call_llm(ocr_text, BACK_SYSTEM_PROMPT)
raw_pin = str(parsed.get("pincode", ""))
pin_digits = re.sub(r"\D", "", raw_pin)[:6]
return AadhaarBackData(
address=str(parsed.get("address", "")).strip()[:200],
village_city=str(parsed.get("village_city", "")).strip()[:100],
state=str(parsed.get("state", "")).strip()[:60],
pincode=pin_digits,
)
@app.get("/health")
async def health_check():
return {"status": "healthy", "model": LLM_MODEL}
@app.get("/")
async def root():
return FileResponse("index.html")
if __name__ == "__main__":
import uvicorn
uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True) |