BlackSpire commited on
Commit
4d6a332
·
verified ·
1 Parent(s): 1344de4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -8
app.py CHANGED
@@ -2,12 +2,10 @@
2
  Visiting Card & Letterhead OCR API
3
  ===================================
4
  Two-step pipeline: nemoretriever-ocr-v1 → nvidia-nemotron-nano-9b-v2
5
-
6
  Deploy on Hugging Face Spaces (Docker or Python SDK):
7
  - Set secret NVIDIA_API_KEY in Space settings → Variables and secrets
8
  - The app serves the HTML frontend at / and the API at /extract-card
9
  - HF Spaces exposes port 7860 by default (set via HF_PORT env var)
10
-
11
  Local usage:
12
  pip install fastapi uvicorn requests python-multipart
13
  NVIDIA_API_KEY=nvapi-xxx python visiting_card_api.py
@@ -57,7 +55,6 @@ CARD_SYSTEM_PROMPT = """You are a business card and letterhead data extraction a
57
  You will receive raw OCR text extracted from a visiting card, business card, or the header/footer of a business letter.
58
  Parse it carefully and return ONLY a valid JSON object.
59
  No markdown fences, no explanation, no preamble — just the raw JSON object.
60
-
61
  JSON schema (return exactly this structure):
62
  {
63
  "company_name": "full name of the company or firm (string)",
@@ -75,7 +72,6 @@ JSON schema (return exactly this structure):
75
  "website": "website URL if present (string)",
76
  "fax": "fax number if present (string)"
77
  }
78
-
79
  Rules:
80
  - company_name: usually the largest text or the text near a logo
81
  - contact_person: individual's personal name distinct from company name
@@ -102,10 +98,19 @@ async def run_ocr(file: UploadFile) -> str:
102
  content = await file.read()
103
  image_b64 = base64.b64encode(content).decode()
104
 
105
- if len(image_b64) >= 500_000:
106
- raise HTTPException(413, "Image too large. Resize and retry.")
 
107
 
108
- payload = {"input": [{"type": "image_url", "url": f"data:image/png;base64,{image_b64}"}]}
 
 
 
 
 
 
 
 
109
 
110
  try:
111
  r = requests.post(OCR_URL, headers=OCR_HEADERS, json=payload, timeout=30)
@@ -260,4 +265,4 @@ async def serve_ui():
260
  if __name__ == "__main__":
261
  import uvicorn
262
  port = int(os.environ.get("HF_PORT", 7860))
263
- uvicorn.run("app:app", host="0.0.0.0", port=port, reload=False)
 
2
  Visiting Card & Letterhead OCR API
3
  ===================================
4
  Two-step pipeline: nemoretriever-ocr-v1 → nvidia-nemotron-nano-9b-v2
 
5
  Deploy on Hugging Face Spaces (Docker or Python SDK):
6
  - Set secret NVIDIA_API_KEY in Space settings → Variables and secrets
7
  - The app serves the HTML frontend at / and the API at /extract-card
8
  - HF Spaces exposes port 7860 by default (set via HF_PORT env var)
 
9
  Local usage:
10
  pip install fastapi uvicorn requests python-multipart
11
  NVIDIA_API_KEY=nvapi-xxx python visiting_card_api.py
 
55
  You will receive raw OCR text extracted from a visiting card, business card, or the header/footer of a business letter.
56
  Parse it carefully and return ONLY a valid JSON object.
57
  No markdown fences, no explanation, no preamble — just the raw JSON object.
 
58
  JSON schema (return exactly this structure):
59
  {
60
  "company_name": "full name of the company or firm (string)",
 
72
  "website": "website URL if present (string)",
73
  "fax": "fax number if present (string)"
74
  }
 
75
  Rules:
76
  - company_name: usually the largest text or the text near a logo
77
  - contact_person: individual's personal name distinct from company name
 
98
  content = await file.read()
99
  image_b64 = base64.b64encode(content).decode()
100
 
101
+ # Matches reference: base64 must be < 180,000 characters
102
+ if len(image_b64) >= 180_000:
103
+ raise HTTPException(413, "Image too large (base64 must be < 180,000 chars). Resize and retry.")
104
 
105
+ # Payload structure matches the official reference exactly
106
+ payload = {
107
+ "input": [
108
+ {
109
+ "type": "image_url",
110
+ "url": f"data:image/png;base64,{image_b64}",
111
+ }
112
+ ]
113
+ }
114
 
115
  try:
116
  r = requests.post(OCR_URL, headers=OCR_HEADERS, json=payload, timeout=30)
 
265
  if __name__ == "__main__":
266
  import uvicorn
267
  port = int(os.environ.get("HF_PORT", 7860))
268
+ uvicorn.run("app:app", host="0.0.0.0", port=port, reload=False)