Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,12 +2,10 @@
|
|
| 2 |
Visiting Card & Letterhead OCR API
|
| 3 |
===================================
|
| 4 |
Two-step pipeline: nemoretriever-ocr-v1 → nvidia-nemotron-nano-9b-v2
|
| 5 |
-
|
| 6 |
Deploy on Hugging Face Spaces (Docker or Python SDK):
|
| 7 |
- Set secret NVIDIA_API_KEY in Space settings → Variables and secrets
|
| 8 |
- The app serves the HTML frontend at / and the API at /extract-card
|
| 9 |
- HF Spaces exposes port 7860 by default (set via HF_PORT env var)
|
| 10 |
-
|
| 11 |
Local usage:
|
| 12 |
pip install fastapi uvicorn requests python-multipart
|
| 13 |
NVIDIA_API_KEY=nvapi-xxx python visiting_card_api.py
|
|
@@ -57,7 +55,6 @@ CARD_SYSTEM_PROMPT = """You are a business card and letterhead data extraction a
|
|
| 57 |
You will receive raw OCR text extracted from a visiting card, business card, or the header/footer of a business letter.
|
| 58 |
Parse it carefully and return ONLY a valid JSON object.
|
| 59 |
No markdown fences, no explanation, no preamble — just the raw JSON object.
|
| 60 |
-
|
| 61 |
JSON schema (return exactly this structure):
|
| 62 |
{
|
| 63 |
"company_name": "full name of the company or firm (string)",
|
|
@@ -75,7 +72,6 @@ JSON schema (return exactly this structure):
|
|
| 75 |
"website": "website URL if present (string)",
|
| 76 |
"fax": "fax number if present (string)"
|
| 77 |
}
|
| 78 |
-
|
| 79 |
Rules:
|
| 80 |
- company_name: usually the largest text or the text near a logo
|
| 81 |
- contact_person: individual's personal name distinct from company name
|
|
@@ -102,10 +98,19 @@ async def run_ocr(file: UploadFile) -> str:
|
|
| 102 |
content = await file.read()
|
| 103 |
image_b64 = base64.b64encode(content).decode()
|
| 104 |
|
| 105 |
-
|
| 106 |
-
|
|
|
|
| 107 |
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
try:
|
| 111 |
r = requests.post(OCR_URL, headers=OCR_HEADERS, json=payload, timeout=30)
|
|
@@ -260,4 +265,4 @@ async def serve_ui():
|
|
| 260 |
if __name__ == "__main__":
|
| 261 |
import uvicorn
|
| 262 |
port = int(os.environ.get("HF_PORT", 7860))
|
| 263 |
-
uvicorn.run("app:app", host="0.0.0.0", port=port, reload=False)
|
|
|
|
| 2 |
Visiting Card & Letterhead OCR API
|
| 3 |
===================================
|
| 4 |
Two-step pipeline: nemoretriever-ocr-v1 → nvidia-nemotron-nano-9b-v2
|
|
|
|
| 5 |
Deploy on Hugging Face Spaces (Docker or Python SDK):
|
| 6 |
- Set secret NVIDIA_API_KEY in Space settings → Variables and secrets
|
| 7 |
- The app serves the HTML frontend at / and the API at /extract-card
|
| 8 |
- HF Spaces exposes port 7860 by default (set via HF_PORT env var)
|
|
|
|
| 9 |
Local usage:
|
| 10 |
pip install fastapi uvicorn requests python-multipart
|
| 11 |
NVIDIA_API_KEY=nvapi-xxx python visiting_card_api.py
|
|
|
|
| 55 |
You will receive raw OCR text extracted from a visiting card, business card, or the header/footer of a business letter.
|
| 56 |
Parse it carefully and return ONLY a valid JSON object.
|
| 57 |
No markdown fences, no explanation, no preamble — just the raw JSON object.
|
|
|
|
| 58 |
JSON schema (return exactly this structure):
|
| 59 |
{
|
| 60 |
"company_name": "full name of the company or firm (string)",
|
|
|
|
| 72 |
"website": "website URL if present (string)",
|
| 73 |
"fax": "fax number if present (string)"
|
| 74 |
}
|
|
|
|
| 75 |
Rules:
|
| 76 |
- company_name: usually the largest text or the text near a logo
|
| 77 |
- contact_person: individual's personal name distinct from company name
|
|
|
|
| 98 |
content = await file.read()
|
| 99 |
image_b64 = base64.b64encode(content).decode()
|
| 100 |
|
| 101 |
+
# Matches reference: base64 must be < 180,000 characters
|
| 102 |
+
if len(image_b64) >= 180_000:
|
| 103 |
+
raise HTTPException(413, "Image too large (base64 must be < 180,000 chars). Resize and retry.")
|
| 104 |
|
| 105 |
+
# Payload structure matches the official reference exactly
|
| 106 |
+
payload = {
|
| 107 |
+
"input": [
|
| 108 |
+
{
|
| 109 |
+
"type": "image_url",
|
| 110 |
+
"url": f"data:image/png;base64,{image_b64}",
|
| 111 |
+
}
|
| 112 |
+
]
|
| 113 |
+
}
|
| 114 |
|
| 115 |
try:
|
| 116 |
r = requests.post(OCR_URL, headers=OCR_HEADERS, json=payload, timeout=30)
|
|
|
|
| 265 |
if __name__ == "__main__":
|
| 266 |
import uvicorn
|
| 267 |
port = int(os.environ.get("HF_PORT", 7860))
|
| 268 |
+
uvicorn.run("app:app", host="0.0.0.0", port=port, reload=False)
|