Spaces:
Running
Running
| from fastapi import FastAPI, File, UploadFile, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.staticfiles import StaticFiles | |
| from fastapi.responses import FileResponse | |
| from pydantic import BaseModel | |
| import requests | |
| import base64 | |
| import json | |
| import re | |
| import os | |
| app = FastAPI( | |
| title="Aadhaar Card OCR API", | |
| description="Two-step RAG pipeline: nemoretriever-ocr-v1 โ nvidia-nemotron-nano-9b-v2 for Aadhaar card extraction", | |
| ) | |
| # โโ CORS โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # โโ Configuration โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| # Set NVIDIA_API_KEY as a Secret in your HuggingFace Space settings | |
| NVIDIA_API_KEY = os.environ.get("NVIDIA_API_KEY", "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") | |
| OCR_URL = "https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1" | |
| LLM_URL = "https://integrate.api.nvidia.com/v1/chat/completions" | |
| LLM_MODEL = "nvidia/nvidia-nemotron-nano-9b-v2" | |
| OCR_HEADERS = { | |
| "Authorization": f"Bearer {NVIDIA_API_KEY}", | |
| "Accept": "application/json", | |
| } | |
| LLM_HEADERS = { | |
| "Authorization": f"Bearer {NVIDIA_API_KEY}", | |
| "Content-Type": "application/json", | |
| } | |
| # โโ System prompts โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| FRONT_SYSTEM_PROMPT = """You are an Aadhaar card front-side data extraction assistant. | |
| You will receive raw OCR text extracted from the FRONT side of an Indian Aadhaar card. | |
| Parse it carefully and return ONLY a valid JSON object. | |
| No markdown fences, no explanation, no preamble โ just the raw JSON object. | |
| JSON schema (return exactly this structure): | |
| { | |
| "name": "full name of the card holder (string)", | |
| "dob": "date of birth in DD/MM/YYYY format (string)", | |
| "gender": "gender Male, Female, or Other (string)", | |
| "aadhaar_no": "12-digit Aadhaar number as a string, digits only, yes spaces" | |
| } | |
| Rules: | |
| - name: the primary card holder's full name (usually in bold, after "Name:" or just prominently placed) | |
| - dob: look for "DOB", "Date of Birth", "เคเคจเฅเคฎ เคคเคฟเคฅเคฟ" โ output in DD/MM/YYYY format; if already in that format keep it | |
| - gender: look for "Male", "Female", "Other", or Hindi equivalents "เคชเฅเคฐเฅเคท", "เคฎเคนเคฟเคฒเคพ", "เค เคจเฅเคฏ" | |
| - aadhaar_no: the 12-digit number, usually printed in groups like "XXXX XXXX XXXX" โ remove all spaces and return only digits | |
| - If a field is not found, use "" for strings | |
| - Do NOT include address details, gender, or any other fields not in the schema""" | |
| BACK_SYSTEM_PROMPT = """You are an Aadhaar card back-side data extraction assistant. | |
| You will receive raw OCR text extracted from the BACK side of an Indian Aadhaar card. | |
| Parse it carefully and return ONLY a valid JSON object. | |
| No markdown fences, no explanation, no preamble โ just the raw JSON object. | |
| JSON schema (return exactly this structure): | |
| { | |
| "address": "door/flat number and street/locality/road name (string)", | |
| "village_city": "village name or city/town name (string)", | |
| "state": "state name (string)", | |
| "pincode": "6-digit PIN code as a string" | |
| } | |
| Rules: | |
| - address: the first line(s) of the address โ house/flat number, building name, street or locality; exclude city, district, state, and PIN | |
| - village_city: look for village name, town, or city; may also appear under "District" โ prefer the more specific locality name over the district | |
| - state: the full state name (e.g. "Maharashtra", "Tamil Nadu"); look near the end of the address block | |
| - pincode: the 6-digit postal code; look for "PIN", "PIN Code", or a standalone 6-digit number at the end of the address | |
| - If a field is not found, use "" for strings | |
| - Do NOT include the card holder's name or Aadhaar number โ focus only on address fields""" | |
| # โโ Helper โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| async def run_ocr(file: UploadFile) -> str: | |
| """Upload image to NVIDIA OCR and return concatenated plain text.""" | |
| content = await file.read() | |
| image_b64 = base64.b64encode(content).decode() | |
| if len(image_b64) >= 500_000: | |
| raise HTTPException( | |
| status_code=413, | |
| detail="Image too large (base64 > 180 KB). Resize the image and try again.", | |
| ) | |
| payload = { | |
| "input": [ | |
| { | |
| "type": "image_url", | |
| "url": f"data:image/png;base64,{image_b64}", | |
| } | |
| ] | |
| } | |
| try: | |
| response = requests.post(OCR_URL, headers=OCR_HEADERS, json=payload, timeout=30) | |
| response.raise_for_status() | |
| except requests.exceptions.RequestException as e: | |
| raise HTTPException(status_code=502, detail=f"NVIDIA OCR API error: {str(e)}") | |
| ocr_json = response.json() | |
| print("OCR Response:", ocr_json) | |
| detections = ocr_json.get("text_detections", []) | |
| if not detections: | |
| data = ocr_json.get("data", []) | |
| if isinstance(data, list) and len(data) > 0: | |
| detections = data[0].get("text_detections", []) | |
| lines = [] | |
| for det in detections: | |
| text = "" | |
| if isinstance(det, dict): | |
| if "text_prediction" in det: | |
| text = det["text_prediction"].get("text", "").strip() | |
| else: | |
| text = det.get("text", "").strip() | |
| if text: | |
| lines.append(text) | |
| return "\n".join(lines) | |
| def call_llm(ocr_text: str, system_prompt: str) -> dict: | |
| """Send OCR text to the LLM with the given system prompt and return parsed JSON dict.""" | |
| llm_payload = { | |
| "model": LLM_MODEL, | |
| "max_tokens": 1024, | |
| "temperature": 0.2, | |
| "top_p": 0.9, | |
| "messages": [ | |
| {"role": "system", "content": system_prompt}, | |
| { | |
| "role": "user", | |
| "content": ( | |
| f"Here is the OCR text extracted from the Aadhaar card:\n\n" | |
| f"{ocr_text}\n\n" | |
| f"Extract the required data and return ONLY the JSON object." | |
| ), | |
| }, | |
| ], | |
| } | |
| try: | |
| llm_response = requests.post(LLM_URL, headers=LLM_HEADERS, json=llm_payload, timeout=200) | |
| llm_response.raise_for_status() | |
| llm_json = llm_response.json() | |
| print("LLM Response JSON:", llm_json) | |
| except requests.exceptions.RequestException as e: | |
| raise HTTPException(status_code=502, detail=f"NVIDIA LLM API error: {str(e)}") | |
| raw_text: str = llm_json.get("choices", [{}])[0].get("message", {}).get("content", "") | |
| print("LLM Raw Text:", raw_text) | |
| if not raw_text: | |
| raise HTTPException(status_code=502, detail="LLM returned an empty response") | |
| cleaned = re.sub(r"```json\s*", "", raw_text, flags=re.IGNORECASE) | |
| cleaned = re.sub(r"```\s*", "", cleaned).strip() | |
| parsed = None | |
| try: | |
| parsed = json.loads(cleaned) | |
| except json.JSONDecodeError: | |
| match = re.search(r"\{[\s\S]*\}", cleaned) | |
| if not match: | |
| raise HTTPException( | |
| status_code=502, | |
| detail=f"LLM did not return valid JSON. Preview: {raw_text[:400]}", | |
| ) | |
| try: | |
| parsed = json.loads(match.group(0)) | |
| except json.JSONDecodeError as e: | |
| raise HTTPException(status_code=502, detail=f"JSON parse error: {str(e)}") | |
| if not isinstance(parsed, dict): | |
| raise HTTPException( | |
| status_code=502, | |
| detail=f"LLM response is not a JSON object. Got: {type(parsed).__name__}", | |
| ) | |
| print("LLM Parsed Data:", parsed) | |
| return parsed | |
| # โโ Request / Response models โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| class AadhaarFrontData(BaseModel): | |
| name: str | |
| dob: str | |
| gender: str | |
| aadhaar_no: str | |
| class AadhaarBackData(BaseModel): | |
| address: str | |
| village_city: str | |
| state: str | |
| pincode: str | |
| # โโ Endpoints โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| async def extract_front(file: UploadFile = File(...)): | |
| """ | |
| Upload the FRONT side of an Aadhaar card image. | |
| Pipeline: | |
| 1. nemoretriever-ocr-v1 โ raw OCR text | |
| 2. nvidia-nemotron-nano-9b-v2 โ structured JSON | |
| Returns: name, dob, gender, aadhaar_no | |
| """ | |
| ocr_text = await run_ocr(file) | |
| if not ocr_text.strip(): | |
| raise HTTPException(status_code=422, detail="OCR produced no text. Check the image quality.") | |
| parsed = call_llm(ocr_text, FRONT_SYSTEM_PROMPT) | |
| raw_aadhaar = str(parsed.get("aadhaar_no", "")) | |
| aadhaar_digits = re.sub(r"\D", "", raw_aadhaar) | |
| return AadhaarFrontData( | |
| name=str(parsed.get("name", "")).strip()[:100], | |
| dob=str(parsed.get("dob", "")).strip()[:12], | |
| gender=str(parsed.get("gender", "")).strip()[:20], | |
| aadhaar_no=aadhaar_digits[:12], | |
| ) | |
| async def extract_back(file: UploadFile = File(...)): | |
| """ | |
| Upload the BACK side of an Aadhaar card image. | |
| Pipeline: | |
| 1. nemoretriever-ocr-v1 โ raw OCR text | |
| 2. nvidia-nemotron-nano-9b-v2 โ structured JSON | |
| Returns: address, village_city, state, pincode | |
| """ | |
| ocr_text = await run_ocr(file) | |
| if not ocr_text.strip(): | |
| raise HTTPException(status_code=422, detail="OCR produced no text. Check the image quality.") | |
| parsed = call_llm(ocr_text, BACK_SYSTEM_PROMPT) | |
| raw_pin = str(parsed.get("pincode", "")) | |
| pin_digits = re.sub(r"\D", "", raw_pin)[:6] | |
| return AadhaarBackData( | |
| address=str(parsed.get("address", "")).strip()[:200], | |
| village_city=str(parsed.get("village_city", "")).strip()[:100], | |
| state=str(parsed.get("state", "")).strip()[:60], | |
| pincode=pin_digits, | |
| ) | |
| async def health_check(): | |
| return {"status": "healthy", "model": LLM_MODEL} | |
| async def root(): | |
| return FileResponse("index.html") | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True) |