Spaces:

akashraut
/

docAI

Sleeping

App Files Files Community

akashraut commited on Feb 10

Commit

25ca7ed

verified ·

1 Parent(s): 42f29a8

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -54

app.py CHANGED Viewed

@@ -5,67 +5,102 @@ import gradio as gr
 import google.generativeai as genai
 from PIL import Image
-# -----------------------------
-# Gemini Configuration
-# -----------------------------
-GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
-if not GEMINI_API_KEY:
-    raise RuntimeError("GEMINI_API_KEY not found in Hugging Face Secrets")
-genai.configure(api_key=GEMINI_API_KEY)
-MODEL_NAME = "gemini-2.5-flash"
 model = genai.GenerativeModel(MODEL_NAME)
-# Simple rate limiter (protects your quota)
 LAST_CALL_TS = 0
 MIN_INTERVAL = 3  # seconds
-def extract_financial_document(image: Image.Image):
     global LAST_CALL_TS
-    # --- Rate limiting ---
     now = time.time()
     if now - LAST_CALL_TS < MIN_INTERVAL:
         return {"error": "Rate limited. Please wait a few seconds."}
     LAST_CALL_TS = now
     prompt = """
-You are a financial document intelligence system.
-TASKS:
-1. Identify the document type.
-2. Extract ALL tables exactly as they appear.
-3. Preserve row/column structure.
-4. Convert charts (pie/bar) into numeric insights.
-5. Do NOT hallucinate values.
-6. Numbers must be exact.
-OUTPUT RULES:
-- Return ONLY valid JSON
 - No markdown
 - No explanations
-JSON SCHEMA:
 {
-  "document_type": string,
-  "summary_fields": { "key": "value" },
-  "table_data": [
-    {
-      "table_name": string,
-      "headers": [string],
-      "rows": [[string]]
-    }
-  ],
-  "visual_insights": [
-    {
-      "chart_title": string,
-      "chart_type": string,
-      "trends": string
     }
-  ]
 }
 """
@@ -78,34 +113,37 @@ JSON SCHEMA:
             }
         )
-        # Ensure valid JSON
         return json.loads(response.text)
     except Exception as e:
         return {"error": str(e)}
-# -----------------------------
-# Gradio UI
-# -----------------------------
-with gr.Blocks(title="Financial DocAI (Gemini Vision)") as demo:
     gr.Markdown("""
-# 📄 Financial DocAI — Gemini Vision
-Upload a financial document image (portfolio report, MF statement, etc.)
-""")
-    with gr.Row():
-        image_input = gr.Image(type="pil", label="Upload Document Image")
-    with gr.Row():
-        extract_btn = gr.Button("Extract Data")
-    output_json = gr.JSON(label="Extracted Structured Data")
     extract_btn.click(
-        fn=extract_financial_document,
         inputs=image_input,
-        outputs=output_json
     )
 demo.launch()

 import google.generativeai as genai
 from PIL import Image
+# ============================================================
+# Configuration
+# ============================================================
+API_KEY = os.getenv("GEMINI_API_KEY")
+if not API_KEY:
+    raise RuntimeError("GEMINI_API_KEY missing in Hugging Face Secrets")
+genai.configure(api_key=API_KEY)
+MODEL_NAME = "gemini-2.5-flash"
 model = genai.GenerativeModel(MODEL_NAME)
+# Simple global rate limit (HF protection)
 LAST_CALL_TS = 0
 MIN_INTERVAL = 3  # seconds
+# ============================================================
+# Core Extraction Logic (Doc-Agnostic)
+# ============================================================
+def extract_document(image: Image.Image):
     global LAST_CALL_TS
+    # ---- Rate limiting ----
     now = time.time()
     if now - LAST_CALL_TS < MIN_INTERVAL:
         return {"error": "Rate limited. Please wait a few seconds."}
     LAST_CALL_TS = now
     prompt = """
+You are a document intelligence system.
+Your job is to analyze ANY document image and produce a
+Docsumo-compatible structured JSON output.
+DOCUMENT TYPES MAY INCLUDE (but are not limited to):
+- Financial statements
+- Invoices
+- Forms
+- Reports
+- Letters
+- Tables-only documents
+--------------------------------
+TASKS
+--------------------------------
+1. Identify document_type and document_subtype.
+2. Extract all key-value fields visible in the document.
+3. Extract ALL tables with exact row/column structure.
+4. If charts/graphs exist, summarize insights textually.
+5. Do NOT hallucinate missing data.
+6. Preserve numbers exactly as shown.
+--------------------------------
+OUTPUT RULES
+--------------------------------
+- Output ONLY valid JSON
 - No markdown
 - No explanations
+- Follow the schema EXACTLY
+--------------------------------
+DOCSUMO-COMPATIBLE JSON SCHEMA
+--------------------------------
 {
+  "document_metadata": {
+    "document_type": string,
+    "document_subtype": string,
+    "page_count": number,
+    "language": string
+  },
+  "extraction": {
+    "fields": {
+      "<field_name>": {
+        "value": string,
+        "normalized_value": string | null,
+        "type": "string" | "number" | "date" | "currency" | "enum"
+      }
+    },
+    "tables": {
+      "<table_id>": {
+        "table_label": string,
+        "headers": [string],
+        "rows": [
+          { "<header>": string }
+        ]
+      }
+    },
+    "derived_insights": {
+      "<insight_name>": {
+        "value": string
+      }
     }
+  }
 }
 """
             }
         )
         return json.loads(response.text)
     except Exception as e:
         return {"error": str(e)}
+# ============================================================
+# Gradio UI (HF)
+# ============================================================
+with gr.Blocks(title="DocAI – Docsumo Compatible") as demo:
     gr.Markdown("""
+# 📄 DocAI — Docsumo-Compatible Document Intelligence
+Upload **any document image** (invoice, statement, report, form).
+This demo returns a **Docsumo-compatible JSON contract**:
+- Document metadata
+- Key-value fields
+- Tables
+- Derived insights
+""")
+    image_input = gr.Image(type="pil", label="Upload Document Image")
+    extract_btn = gr.Button("Extract Document")
+    output = gr.JSON(label="Docsumo-Compatible JSON Output")
     extract_btn.click(
+        fn=extract_document,
         inputs=image_input,
+        outputs=output
     )
 demo.launch()