File size: 3,672 Bytes
d1c6be0 0d9ba16 53b400b 25ca7ed 53b400b 25ca7ed 53b400b 25ca7ed 53b400b 25ca7ed 0d9ba16 d1c6be0 25ca7ed 0d9ba16 1614ed7 d1c6be0 25ca7ed 0d9ba16 77a55a1 25ca7ed 0d9ba16 d1c6be0 25ca7ed 0d9ba16 25ca7ed 0d9ba16 25ca7ed d1c6be0 25ca7ed d1c6be0 25ca7ed d1c6be0 77a55a1 0d9ba16 77a55a1 d1c6be0 0d9ba16 d1c6be0 77a55a1 0d9ba16 25ca7ed 0d9ba16 25ca7ed cdacb08 25ca7ed 0d9ba16 25ca7ed 0d9ba16 25ca7ed 77a55a1 25ca7ed 0d9ba16 25ca7ed 77a55a1 cdacb08 d1c6be0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | import os
import json
import time
import gradio as gr
import google.generativeai as genai
from PIL import Image
# ============================================================
# Configuration
# ============================================================
API_KEY = os.getenv("GEMINI_API_KEY")
if not API_KEY:
raise RuntimeError("GEMINI_API_KEY missing in Hugging Face Secrets")
genai.configure(api_key=API_KEY)
MODEL_NAME = "gemini-2.5-flash"
model = genai.GenerativeModel(MODEL_NAME)
# Simple global rate limit (HF protection)
LAST_CALL_TS = 0
MIN_INTERVAL = 3 # seconds
# ============================================================
# Core Extraction Logic (Doc-Agnostic)
# ============================================================
def extract_document(image: Image.Image):
global LAST_CALL_TS
# ---- Rate limiting ----
now = time.time()
if now - LAST_CALL_TS < MIN_INTERVAL:
return {"error": "Rate limited. Please wait a few seconds."}
LAST_CALL_TS = now
prompt = """
You are a document intelligence system.
Your job is to analyze ANY document image and produce a
Docsumo-compatible structured JSON output.
DOCUMENT TYPES MAY INCLUDE (but are not limited to):
- Financial statements
- Invoices
- Forms
- Reports
- Letters
- Tables-only documents
--------------------------------
TASKS
--------------------------------
1. Identify document_type and document_subtype.
2. Extract all key-value fields visible in the document.
3. Extract ALL tables with exact row/column structure.
4. If charts/graphs exist, summarize insights textually.
5. Do NOT hallucinate missing data.
6. Preserve numbers exactly as shown.
--------------------------------
OUTPUT RULES
--------------------------------
- Output ONLY valid JSON
- No markdown
- No explanations
- Follow the schema EXACTLY
--------------------------------
DOCSUMO-COMPATIBLE JSON SCHEMA
--------------------------------
{
"document_metadata": {
"document_type": string,
"document_subtype": string,
"page_count": number,
"language": string
},
"extraction": {
"fields": {
"<field_name>": {
"value": string,
"normalized_value": string | null,
"type": "string" | "number" | "date" | "currency" | "enum"
}
},
"tables": {
"<table_id>": {
"table_label": string,
"headers": [string],
"rows": [
{ "<header>": string }
]
}
},
"derived_insights": {
"<insight_name>": {
"value": string
}
}
}
}
"""
try:
response = model.generate_content(
[prompt, image],
generation_config={
"temperature": 0,
"response_mime_type": "application/json"
}
)
return json.loads(response.text)
except Exception as e:
return {"error": str(e)}
# ============================================================
# Gradio UI (HF)
# ============================================================
with gr.Blocks(title="DocAI โ Docsumo Compatible") as demo:
gr.Markdown("""
# ๐ DocAI โ Docsumo-Compatible Document Intelligence
Upload **any document image** (invoice, statement, report, form).
This demo returns a **Docsumo-compatible JSON contract**:
- Document metadata
- Key-value fields
- Tables
- Derived insights
""")
image_input = gr.Image(type="pil", label="Upload Document Image")
extract_btn = gr.Button("Extract Document")
output = gr.JSON(label="Docsumo-Compatible JSON Output")
extract_btn.click(
fn=extract_document,
inputs=image_input,
outputs=output
)
demo.launch()
|