Spaces:

BlackSpire
/

AadharAPI

Running

App Files Files Community

BlackSpire commited on about 14 hours ago

Commit

7a28e6a

verified ·

1 Parent(s): 23fbe07

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -14

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ import os
 app = FastAPI(
     title="Aadhaar Card OCR API",
-    description="Two-step RAG pipeline: nemoretriever-ocr-v1 → nvidia-nemotron-nano-9b-v2 for Aadhaar card extraction",
 )
 # ── CORS ──────────────────────────────────────────────────────────────────────
@@ -27,7 +27,8 @@ app.add_middleware(
 # Set NVIDIA_API_KEY as a Secret in your HuggingFace Space settings
 NVIDIA_API_KEY = os.environ.get("NVIDIA_API_KEY", "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")
-OCR_URL = "https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1"
 LLM_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
 LLM_MODEL = "nvidia/nvidia-nemotron-nano-9b-v2"
@@ -47,7 +48,6 @@ FRONT_SYSTEM_PROMPT = """You are an Aadhaar card front-side data extraction assi
 You will receive raw OCR text extracted from the FRONT side of an Indian Aadhaar card.
 Parse it carefully and return ONLY a valid JSON object.
 No markdown fences, no explanation, no preamble — just the raw JSON object.
 JSON schema (return exactly this structure):
 {
   "name": "full name of the card holder (string)",
@@ -55,7 +55,6 @@ JSON schema (return exactly this structure):
   "gender": "gender Male, Female, or Other (string)",
   "aadhaar_no": "12-digit Aadhaar number as a string, digits only, yes spaces"
 }
 Rules:
 - name: the primary card holder's full name (usually in bold, after "Name:" or just prominently placed)
 - dob: look for "DOB", "Date of Birth", "जन्म तिथि" — output in DD/MM/YYYY format; if already in that format keep it
@@ -68,7 +67,6 @@ BACK_SYSTEM_PROMPT = """You are an Aadhaar card back-side data extraction assist
 You will receive raw OCR text extracted from the BACK side of an Indian Aadhaar card.
 Parse it carefully and return ONLY a valid JSON object.
 No markdown fences, no explanation, no preamble — just the raw JSON object.
 JSON schema (return exactly this structure):
 {
   "address": "door/flat number and street/locality/road name (string)",
@@ -76,7 +74,6 @@ JSON schema (return exactly this structure):
   "state": "state name (string)",
   "pincode": "6-digit PIN code as a string"
 }
 Rules:
 - address: the first line(s) of the address — house/flat number, building name, street or locality; exclude city, district, state, and PIN
 - village_city: look for village name, town, or city; may also appear under "District" — prefer the more specific locality name over the district
@@ -93,12 +90,14 @@ async def run_ocr(file: UploadFile) -> str:
     content = await file.read()
     image_b64 = base64.b64encode(content).decode()
-    if len(image_b64) >= 500_000:
         raise HTTPException(
             status_code=413,
-            detail="Image too large (base64 > 180 KB). Resize the image and try again.",
         )
     payload = {
         "input": [
             {
@@ -221,11 +220,9 @@ class AadhaarBackData(BaseModel):
 async def extract_front(file: UploadFile = File(...)):
     """
     Upload the FRONT side of an Aadhaar card image.
     Pipeline:
-      1. nemoretriever-ocr-v1  →  raw OCR text
       2. nvidia-nemotron-nano-9b-v2  →  structured JSON
     Returns: name, dob, gender, aadhaar_no
     """
     ocr_text = await run_ocr(file)
@@ -250,11 +247,9 @@ async def extract_front(file: UploadFile = File(...)):
 async def extract_back(file: UploadFile = File(...)):
     """
     Upload the BACK side of an Aadhaar card image.
     Pipeline:
-      1. nemoretriever-ocr-v1  →  raw OCR text
       2. nvidia-nemotron-nano-9b-v2  →  structured JSON
     Returns: address, village_city, state, pincode
     """
     ocr_text = await run_ocr(file)

 app = FastAPI(
     title="Aadhaar Card OCR API",
+    description="Two-step RAG pipeline: nemotron-ocr-v1 → nvidia-nemotron-nano-9b-v2 for Aadhaar card extraction",
 )
 # ── CORS ──────────────────────────────────────────────────────────────────────
 # Set NVIDIA_API_KEY as a Secret in your HuggingFace Space settings
 NVIDIA_API_KEY = os.environ.get("NVIDIA_API_KEY", "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")
+# FIX 1: Corrected endpoint — matches the official reference (nemotron-ocr-v1, not nemoretriever-ocr-v1)
+OCR_URL = "https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-ocr-v1"
 LLM_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
 LLM_MODEL = "nvidia/nvidia-nemotron-nano-9b-v2"
 You will receive raw OCR text extracted from the FRONT side of an Indian Aadhaar card.
 Parse it carefully and return ONLY a valid JSON object.
 No markdown fences, no explanation, no preamble — just the raw JSON object.
 JSON schema (return exactly this structure):
 {
   "name": "full name of the card holder (string)",
   "gender": "gender Male, Female, or Other (string)",
   "aadhaar_no": "12-digit Aadhaar number as a string, digits only, yes spaces"
 }
 Rules:
 - name: the primary card holder's full name (usually in bold, after "Name:" or just prominently placed)
 - dob: look for "DOB", "Date of Birth", "जन्म तिथि" — output in DD/MM/YYYY format; if already in that format keep it
 You will receive raw OCR text extracted from the BACK side of an Indian Aadhaar card.
 Parse it carefully and return ONLY a valid JSON object.
 No markdown fences, no explanation, no preamble — just the raw JSON object.
 JSON schema (return exactly this structure):
 {
   "address": "door/flat number and street/locality/road name (string)",
   "state": "state name (string)",
   "pincode": "6-digit PIN code as a string"
 }
 Rules:
 - address: the first line(s) of the address — house/flat number, building name, street or locality; exclude city, district, state, and PIN
 - village_city: look for village name, town, or city; may also appear under "District" — prefer the more specific locality name over the district
     content = await file.read()
     image_b64 = base64.b64encode(content).decode()
+    # FIX 2: Matches reference limit — base64 must be < 180,000 characters
+    if len(image_b64) >= 180_000:
         raise HTTPException(
             status_code=413,
+            detail="Image too large (base64 must be < 180,000 chars). Resize the image and try again.",
         )
+    # Payload structure matches the official reference exactly
     payload = {
         "input": [
             {
 async def extract_front(file: UploadFile = File(...)):
     """
     Upload the FRONT side of an Aadhaar card image.
     Pipeline:
+      1. nemotron-ocr-v1  →  raw OCR text
       2. nvidia-nemotron-nano-9b-v2  →  structured JSON
     Returns: name, dob, gender, aadhaar_no
     """
     ocr_text = await run_ocr(file)
 async def extract_back(file: UploadFile = File(...)):
     """
     Upload the BACK side of an Aadhaar card image.
     Pipeline:
+      1. nemotron-ocr-v1  →  raw OCR text
       2. nvidia-nemotron-nano-9b-v2  →  structured JSON
     Returns: address, village_city, state, pincode
     """
     ocr_text = await run_ocr(file)