BlackSpire commited on
Commit
7a28e6a
ยท
verified ยท
1 Parent(s): 23fbe07

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -14
app.py CHANGED
@@ -11,7 +11,7 @@ import os
11
 
12
  app = FastAPI(
13
  title="Aadhaar Card OCR API",
14
- description="Two-step RAG pipeline: nemoretriever-ocr-v1 โ†’ nvidia-nemotron-nano-9b-v2 for Aadhaar card extraction",
15
  )
16
 
17
  # โ”€โ”€ CORS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
@@ -27,7 +27,8 @@ app.add_middleware(
27
  # Set NVIDIA_API_KEY as a Secret in your HuggingFace Space settings
28
  NVIDIA_API_KEY = os.environ.get("NVIDIA_API_KEY", "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")
29
 
30
- OCR_URL = "https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1"
 
31
  LLM_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
32
  LLM_MODEL = "nvidia/nvidia-nemotron-nano-9b-v2"
33
 
@@ -47,7 +48,6 @@ FRONT_SYSTEM_PROMPT = """You are an Aadhaar card front-side data extraction assi
47
  You will receive raw OCR text extracted from the FRONT side of an Indian Aadhaar card.
48
  Parse it carefully and return ONLY a valid JSON object.
49
  No markdown fences, no explanation, no preamble โ€” just the raw JSON object.
50
-
51
  JSON schema (return exactly this structure):
52
  {
53
  "name": "full name of the card holder (string)",
@@ -55,7 +55,6 @@ JSON schema (return exactly this structure):
55
  "gender": "gender Male, Female, or Other (string)",
56
  "aadhaar_no": "12-digit Aadhaar number as a string, digits only, yes spaces"
57
  }
58
-
59
  Rules:
60
  - name: the primary card holder's full name (usually in bold, after "Name:" or just prominently placed)
61
  - dob: look for "DOB", "Date of Birth", "เคœเคจเฅเคฎ เคคเคฟเคฅเคฟ" โ€” output in DD/MM/YYYY format; if already in that format keep it
@@ -68,7 +67,6 @@ BACK_SYSTEM_PROMPT = """You are an Aadhaar card back-side data extraction assist
68
  You will receive raw OCR text extracted from the BACK side of an Indian Aadhaar card.
69
  Parse it carefully and return ONLY a valid JSON object.
70
  No markdown fences, no explanation, no preamble โ€” just the raw JSON object.
71
-
72
  JSON schema (return exactly this structure):
73
  {
74
  "address": "door/flat number and street/locality/road name (string)",
@@ -76,7 +74,6 @@ JSON schema (return exactly this structure):
76
  "state": "state name (string)",
77
  "pincode": "6-digit PIN code as a string"
78
  }
79
-
80
  Rules:
81
  - address: the first line(s) of the address โ€” house/flat number, building name, street or locality; exclude city, district, state, and PIN
82
  - village_city: look for village name, town, or city; may also appear under "District" โ€” prefer the more specific locality name over the district
@@ -93,12 +90,14 @@ async def run_ocr(file: UploadFile) -> str:
93
  content = await file.read()
94
  image_b64 = base64.b64encode(content).decode()
95
 
96
- if len(image_b64) >= 500_000:
 
97
  raise HTTPException(
98
  status_code=413,
99
- detail="Image too large (base64 > 180 KB). Resize the image and try again.",
100
  )
101
 
 
102
  payload = {
103
  "input": [
104
  {
@@ -221,11 +220,9 @@ class AadhaarBackData(BaseModel):
221
  async def extract_front(file: UploadFile = File(...)):
222
  """
223
  Upload the FRONT side of an Aadhaar card image.
224
-
225
  Pipeline:
226
- 1. nemoretriever-ocr-v1 โ†’ raw OCR text
227
  2. nvidia-nemotron-nano-9b-v2 โ†’ structured JSON
228
-
229
  Returns: name, dob, gender, aadhaar_no
230
  """
231
  ocr_text = await run_ocr(file)
@@ -250,11 +247,9 @@ async def extract_front(file: UploadFile = File(...)):
250
  async def extract_back(file: UploadFile = File(...)):
251
  """
252
  Upload the BACK side of an Aadhaar card image.
253
-
254
  Pipeline:
255
- 1. nemoretriever-ocr-v1 โ†’ raw OCR text
256
  2. nvidia-nemotron-nano-9b-v2 โ†’ structured JSON
257
-
258
  Returns: address, village_city, state, pincode
259
  """
260
  ocr_text = await run_ocr(file)
 
11
 
12
  app = FastAPI(
13
  title="Aadhaar Card OCR API",
14
+ description="Two-step RAG pipeline: nemotron-ocr-v1 โ†’ nvidia-nemotron-nano-9b-v2 for Aadhaar card extraction",
15
  )
16
 
17
  # โ”€โ”€ CORS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
 
27
  # Set NVIDIA_API_KEY as a Secret in your HuggingFace Space settings
28
  NVIDIA_API_KEY = os.environ.get("NVIDIA_API_KEY", "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")
29
 
30
+ # FIX 1: Corrected endpoint โ€” matches the official reference (nemotron-ocr-v1, not nemoretriever-ocr-v1)
31
+ OCR_URL = "https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-ocr-v1"
32
  LLM_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
33
  LLM_MODEL = "nvidia/nvidia-nemotron-nano-9b-v2"
34
 
 
48
  You will receive raw OCR text extracted from the FRONT side of an Indian Aadhaar card.
49
  Parse it carefully and return ONLY a valid JSON object.
50
  No markdown fences, no explanation, no preamble โ€” just the raw JSON object.
 
51
  JSON schema (return exactly this structure):
52
  {
53
  "name": "full name of the card holder (string)",
 
55
  "gender": "gender Male, Female, or Other (string)",
56
  "aadhaar_no": "12-digit Aadhaar number as a string, digits only, yes spaces"
57
  }
 
58
  Rules:
59
  - name: the primary card holder's full name (usually in bold, after "Name:" or just prominently placed)
60
  - dob: look for "DOB", "Date of Birth", "เคœเคจเฅเคฎ เคคเคฟเคฅเคฟ" โ€” output in DD/MM/YYYY format; if already in that format keep it
 
67
  You will receive raw OCR text extracted from the BACK side of an Indian Aadhaar card.
68
  Parse it carefully and return ONLY a valid JSON object.
69
  No markdown fences, no explanation, no preamble โ€” just the raw JSON object.
 
70
  JSON schema (return exactly this structure):
71
  {
72
  "address": "door/flat number and street/locality/road name (string)",
 
74
  "state": "state name (string)",
75
  "pincode": "6-digit PIN code as a string"
76
  }
 
77
  Rules:
78
  - address: the first line(s) of the address โ€” house/flat number, building name, street or locality; exclude city, district, state, and PIN
79
  - village_city: look for village name, town, or city; may also appear under "District" โ€” prefer the more specific locality name over the district
 
90
  content = await file.read()
91
  image_b64 = base64.b64encode(content).decode()
92
 
93
+ # FIX 2: Matches reference limit โ€” base64 must be < 180,000 characters
94
+ if len(image_b64) >= 180_000:
95
  raise HTTPException(
96
  status_code=413,
97
+ detail="Image too large (base64 must be < 180,000 chars). Resize the image and try again.",
98
  )
99
 
100
+ # Payload structure matches the official reference exactly
101
  payload = {
102
  "input": [
103
  {
 
220
  async def extract_front(file: UploadFile = File(...)):
221
  """
222
  Upload the FRONT side of an Aadhaar card image.
 
223
  Pipeline:
224
+ 1. nemotron-ocr-v1 โ†’ raw OCR text
225
  2. nvidia-nemotron-nano-9b-v2 โ†’ structured JSON
 
226
  Returns: name, dob, gender, aadhaar_no
227
  """
228
  ocr_text = await run_ocr(file)
 
247
  async def extract_back(file: UploadFile = File(...)):
248
  """
249
  Upload the BACK side of an Aadhaar card image.
 
250
  Pipeline:
251
+ 1. nemotron-ocr-v1 โ†’ raw OCR text
252
  2. nvidia-nemotron-nano-9b-v2 โ†’ structured JSON
 
253
  Returns: address, village_city, state, pincode
254
  """
255
  ocr_text = await run_ocr(file)