import os import json import time import gradio as gr import google.generativeai as genai from PIL import Image # ============================================================ # Configuration # ============================================================ API_KEY = os.getenv("GEMINI_API_KEY") if not API_KEY: raise RuntimeError("GEMINI_API_KEY missing in Hugging Face Secrets") genai.configure(api_key=API_KEY) MODEL_NAME = "gemini-2.5-flash" model = genai.GenerativeModel(MODEL_NAME) # Simple global rate limit (HF protection) LAST_CALL_TS = 0 MIN_INTERVAL = 3 # seconds # ============================================================ # Core Extraction Logic (Doc-Agnostic) # ============================================================ def extract_document(image: Image.Image): global LAST_CALL_TS # ---- Rate limiting ---- now = time.time() if now - LAST_CALL_TS < MIN_INTERVAL: return {"error": "Rate limited. Please wait a few seconds."} LAST_CALL_TS = now prompt = """ You are a document intelligence system. Your job is to analyze ANY document image and produce a Docsumo-compatible structured JSON output. DOCUMENT TYPES MAY INCLUDE (but are not limited to): - Financial statements - Invoices - Forms - Reports - Letters - Tables-only documents -------------------------------- TASKS -------------------------------- 1. Identify document_type and document_subtype. 2. Extract all key-value fields visible in the document. 3. Extract ALL tables with exact row/column structure. 4. If charts/graphs exist, summarize insights textually. 5. Do NOT hallucinate missing data. 6. Preserve numbers exactly as shown. -------------------------------- OUTPUT RULES -------------------------------- - Output ONLY valid JSON - No markdown - No explanations - Follow the schema EXACTLY -------------------------------- DOCSUMO-COMPATIBLE JSON SCHEMA -------------------------------- { "document_metadata": { "document_type": string, "document_subtype": string, "page_count": number, "language": string }, "extraction": { "fields": { "": { "value": string, "normalized_value": string | null, "type": "string" | "number" | "date" | "currency" | "enum" } }, "tables": { "": { "table_label": string, "headers": [string], "rows": [ { "

": string } ] } }, "derived_insights": { "": { "value": string } } } } """ try: response = model.generate_content( [prompt, image], generation_config={ "temperature": 0, "response_mime_type": "application/json" } ) return json.loads(response.text) except Exception as e: return {"error": str(e)} # ============================================================ # Gradio UI (HF) # ============================================================ with gr.Blocks(title="DocAI – Docsumo Compatible") as demo: gr.Markdown(""" # 📄 DocAI — Docsumo-Compatible Document Intelligence Upload **any document image** (invoice, statement, report, form). This demo returns a **Docsumo-compatible JSON contract**: - Document metadata - Key-value fields - Tables - Derived insights """) image_input = gr.Image(type="pil", label="Upload Document Image") extract_btn = gr.Button("Extract Document") output = gr.JSON(label="Docsumo-Compatible JSON Output") extract_btn.click( fn=extract_document, inputs=image_input, outputs=output ) demo.launch()