Spaces:

BlackSpire
/

AadharAPI

Running

File size: 11,191 Bytes

from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from pydantic import BaseModel
import requests
import base64
import json
import re
import os

app = FastAPI(
    title="Aadhaar Card OCR API",
    description="Two-step RAG pipeline: nemotron-ocr-v1 → nvidia-nemotron-nano-9b-v2 for Aadhaar card extraction",
)

# ── CORS ──────────────────────────────────────────────────────────────────────
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# ── Configuration ─────────────────────────────────────────────────────────────
# Set NVIDIA_API_KEY as a Secret in your HuggingFace Space settings
NVIDIA_API_KEY = os.environ.get("NVIDIA_API_KEY", "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")

# FIX 1: Corrected endpoint — matches the official reference (nemotron-ocr-v1, not nemoretriever-ocr-v1)
OCR_URL = "https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-ocr-v1"
LLM_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
LLM_MODEL = "nvidia/nvidia-nemotron-nano-9b-v2"

OCR_HEADERS = {
    "Authorization": f"Bearer {NVIDIA_API_KEY}",
    "Accept": "application/json",
}

LLM_HEADERS = {
    "Authorization": f"Bearer {NVIDIA_API_KEY}",
    "Content-Type": "application/json",
}

# ── System prompts ─────────────────────────────────────────────────────────────

FRONT_SYSTEM_PROMPT = """You are an Aadhaar card front-side data extraction assistant.
You will receive raw OCR text extracted from the FRONT side of an Indian Aadhaar card.
Parse it carefully and return ONLY a valid JSON object.
No markdown fences, no explanation, no preamble — just the raw JSON object.
JSON schema (return exactly this structure):
{
  "name": "full name of the card holder (string)",
  "dob": "date of birth in DD/MM/YYYY format (string)",
  "gender": "gender Male, Female, or Other (string)",
  "aadhaar_no": "12-digit Aadhaar number as a string, digits only, yes spaces"
}
Rules:
- name: the primary card holder's full name (usually in bold, after "Name:" or just prominently placed)
- dob: look for "DOB", "Date of Birth", "जन्म तिथि" — output in DD/MM/YYYY format; if already in that format keep it
- gender: look for "Male", "Female", "Other", or Hindi equivalents "पुरुष", "महिला", "अन्य"
- aadhaar_no: the 12-digit number, usually printed in groups like "XXXX XXXX XXXX" — remove all spaces and return only digits
- If a field is not found, use "" for strings
- Do NOT include address details, gender, or any other fields not in the schema"""

BACK_SYSTEM_PROMPT = """You are an Aadhaar card back-side data extraction assistant.
You will receive raw OCR text extracted from the BACK side of an Indian Aadhaar card.
Parse it carefully and return ONLY a valid JSON object.
No markdown fences, no explanation, no preamble — just the raw JSON object.
JSON schema (return exactly this structure):
{
  "address": "door/flat number and street/locality/road name (string)",
  "village_city": "village name or city/town name (string)",
  "state": "state name (string)",
  "pincode": "6-digit PIN code as a string"
}
Rules:
- address: the first line(s) of the address — house/flat number, building name, street or locality; exclude city, district, state, and PIN
- village_city: look for village name, town, or city; may also appear under "District" — prefer the more specific locality name over the district
- state: the full state name (e.g. "Maharashtra", "Tamil Nadu"); look near the end of the address block
- pincode: the 6-digit postal code; look for "PIN", "PIN Code", or a standalone 6-digit number at the end of the address
- If a field is not found, use "" for strings
- Do NOT include the card holder's name or Aadhaar number — focus only on address fields"""


# ── Helper ─────────────────────────────────────────────────────────────────────

async def run_ocr(file: UploadFile) -> str:
    """Upload image to NVIDIA OCR and return concatenated plain text."""
    content = await file.read()
    image_b64 = base64.b64encode(content).decode()

    # FIX 2: Matches reference limit — base64 must be < 180,000 characters
    if len(image_b64) >= 1000_000:
        raise HTTPException(
            status_code=413,
            detail="Image too large (base64 must be < 1,000,000 chars). Resize the image and try again.",
        )

    # Payload structure matches the official reference exactly
    payload = {
        "input": [
            {
                "type": "image_url",
                "url": f"data:image/png;base64,{image_b64}",
            }
        ]
    }

    try:
        response = requests.post(OCR_URL, headers=OCR_HEADERS, json=payload, timeout=30)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        raise HTTPException(status_code=502, detail=f"NVIDIA OCR API error: {str(e)}")

    ocr_json = response.json()
    print("OCR Response:", ocr_json)

    detections = ocr_json.get("text_detections", [])
    if not detections:
        data = ocr_json.get("data", [])
        if isinstance(data, list) and len(data) > 0:
            detections = data[0].get("text_detections", [])

    lines = []
    for det in detections:
        text = ""
        if isinstance(det, dict):
            if "text_prediction" in det:
                text = det["text_prediction"].get("text", "").strip()
            else:
                text = det.get("text", "").strip()
        if text:
            lines.append(text)

    return "\n".join(lines)


def call_llm(ocr_text: str, system_prompt: str) -> dict:
    """Send OCR text to the LLM with the given system prompt and return parsed JSON dict."""
    llm_payload = {
        "model": LLM_MODEL,
        "max_tokens": 1024,
        "temperature": 0.2,
        "top_p": 0.9,
        "messages": [
            {"role": "system", "content": system_prompt},
            {
                "role": "user",
                "content": (
                    f"Here is the OCR text extracted from the Aadhaar card:\n\n"
                    f"{ocr_text}\n\n"
                    f"Extract the required data and return ONLY the JSON object."
                ),
            },
        ],
    }

    try:
        llm_response = requests.post(LLM_URL, headers=LLM_HEADERS, json=llm_payload, timeout=200)
        llm_response.raise_for_status()
        llm_json = llm_response.json()
        print("LLM Response JSON:", llm_json)
    except requests.exceptions.RequestException as e:
        raise HTTPException(status_code=502, detail=f"NVIDIA LLM API error: {str(e)}")

    raw_text: str = llm_json.get("choices", [{}])[0].get("message", {}).get("content", "")
    print("LLM Raw Text:", raw_text)

    if not raw_text:
        raise HTTPException(status_code=502, detail="LLM returned an empty response")

    cleaned = re.sub(r"```json\s*", "", raw_text, flags=re.IGNORECASE)
    cleaned = re.sub(r"```\s*", "", cleaned).strip()

    parsed = None
    try:
        parsed = json.loads(cleaned)
    except json.JSONDecodeError:
        match = re.search(r"\{[\s\S]*\}", cleaned)
        if not match:
            raise HTTPException(
                status_code=502,
                detail=f"LLM did not return valid JSON. Preview: {raw_text[:400]}",
            )
        try:
            parsed = json.loads(match.group(0))
        except json.JSONDecodeError as e:
            raise HTTPException(status_code=502, detail=f"JSON parse error: {str(e)}")

    if not isinstance(parsed, dict):
        raise HTTPException(
            status_code=502,
            detail=f"LLM response is not a JSON object. Got: {type(parsed).__name__}",
        )

    print("LLM Parsed Data:", parsed)
    return parsed


# ── Request / Response models ─────────────────────────────────────────────────

class AadhaarFrontData(BaseModel):
    name: str
    dob: str
    gender: str
    aadhaar_no: str


class AadhaarBackData(BaseModel):
    address: str
    village_city: str
    state: str
    pincode: str


# ── Endpoints ─────────────────────────────────────────────────────────────────

@app.post("/extract-front", response_model=AadhaarFrontData)
async def extract_front(file: UploadFile = File(...)):
    """
    Upload the FRONT side of an Aadhaar card image.
    Pipeline:
      1. nemotron-ocr-v1  →  raw OCR text
      2. nvidia-nemotron-nano-9b-v2  →  structured JSON
    Returns: name, dob, gender, aadhaar_no
    """
    ocr_text = await run_ocr(file)

    if not ocr_text.strip():
        raise HTTPException(status_code=422, detail="OCR produced no text. Check the image quality.")

    parsed = call_llm(ocr_text, FRONT_SYSTEM_PROMPT)

    raw_aadhaar = str(parsed.get("aadhaar_no", ""))
    aadhaar_digits = re.sub(r"\D", "", raw_aadhaar)

    return AadhaarFrontData(
        name=str(parsed.get("name", "")).strip()[:100],
        dob=str(parsed.get("dob", "")).strip()[:12],
        gender=str(parsed.get("gender", "")).strip()[:20],
        aadhaar_no=aadhaar_digits[:12],
    )


@app.post("/extract-back", response_model=AadhaarBackData)
async def extract_back(file: UploadFile = File(...)):
    """
    Upload the BACK side of an Aadhaar card image.
    Pipeline:
      1. nemotron-ocr-v1  →  raw OCR text
      2. nvidia-nemotron-nano-9b-v2  →  structured JSON
    Returns: address, village_city, state, pincode
    """
    ocr_text = await run_ocr(file)

    if not ocr_text.strip():
        raise HTTPException(status_code=422, detail="OCR produced no text. Check the image quality.")

    parsed = call_llm(ocr_text, BACK_SYSTEM_PROMPT)

    raw_pin = str(parsed.get("pincode", ""))
    pin_digits = re.sub(r"\D", "", raw_pin)[:6]

    return AadhaarBackData(
        address=str(parsed.get("address", "")).strip()[:200],
        village_city=str(parsed.get("village_city", "")).strip()[:100],
        state=str(parsed.get("state", "")).strip()[:60],
        pincode=pin_digits,
    )


@app.get("/health")
async def health_check():
    return {"status": "healthy", "model": LLM_MODEL}


@app.get("/")
async def root():
    return FileResponse("index.html")


if __name__ == "__main__":
    import uvicorn
    uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)