Spaces:

akashraut
/

docAI

Sleeping

App Files Files Community

akashraut commited on Feb 10

Commit

0d9ba16

verified ·

1 Parent(s): 555a02b

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -97

app.py CHANGED Viewed

@@ -1,129 +1,110 @@
-import gradio as gr
-import requests
-import json
 import os
-import base64
 from PIL import Image
-from io import BytesIO
-OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
-MODEL_ID = "nvidia/nemotron-nano-12b-v2-vl:free"
-OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
-def image_to_base64(image: Image.Image) -> str:
-    buffered = BytesIO()
-    image.save(buffered, format="PNG")
-    return base64.b64encode(buffered.getvalue()).decode("utf-8")
-def extract_document(image: Image.Image):
-    if image is None:
-        return {"error": "Please upload an image"}
-    if not OPENROUTER_API_KEY:
-        return {"error": "OPENROUTER_API_KEY not set"}
-    img_b64 = image_to_base64(image)
     prompt = """
-You are a universal document understanding AI.
-Return ONLY valid JSON.
-Do NOT include explanations or markdown.
-Rules:
-- Be document-agnostic
-- Detect document_type if possible
-- Extract all visible key-value fields
-- Extract tables with full rows and columns
-- Preserve numbers exactly
-- Use null for missing values
-- Do not hallucinate
-JSON schema:
 {
-  "document_type": string | null,
-  "summary": string,
-  "fields": {
-    "<field_name>": "<value or null>"
-  },
-  "tables": [
     {
       "table_name": string,
-      "columns": [string],
-      "rows": [[string | number | null]]
     }
   ]
 }
 """
-    payload = {
-        "model": MODEL_ID,
-        "messages": [
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": prompt},
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": f"data:image/png;base64,{img_b64}"
-                        }
-                    }
-                ]
-            }
-        ],
-        "temperature": 0.0,
-        "max_tokens": 1200
-    }
-    headers = {
-        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
-        "Content-Type": "application/json",
-        "HTTP-Referer": "https://huggingface.co",
-        "X-Title": "DocAI"
-    }
     try:
-        response = requests.post(
-            OPENROUTER_URL,
-            headers=headers,
-            json=payload,
-            timeout=90
         )
-        response.raise_for_status()
-        content = response.json()["choices"][0]["message"]["content"]
-        start = content.find("{")
-        end = content.rfind("}") + 1
-        return json.loads(content[start:end])
     except Exception as e:
-        return {
-            "error": "Extraction failed",
-            "details": str(e)
-        }
-with gr.Blocks(title="DocAI – Universal Document Intelligence") as demo:
-    gr.Markdown(
-        """
-# 📄 DocAI – Universal Document Intelligence
-Vision-powered. No templates. Any document.
-**Model:** NVIDIA Nemotron Nano 12B VL (free)
-"""
-    )
     with gr.Row():
-        input_img = gr.Image(type="pil", label="Upload document")
-        output_json = gr.JSON(label="Extracted JSON")
-    extract_btn = gr.Button("Extract", variant="primary")
     extract_btn.click(
-        fn=extract_document,
-        inputs=input_img,
         outputs=output_json
     )

 import os
+import json
+import time
+import gradio as gr
+import google.generativeai as genai
 from PIL import Image
+# -----------------------------
+# Gemini Configuration
+# -----------------------------
+GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
+if not GEMINI_API_KEY:
+    raise RuntimeError("GEMINI_API_KEY not found in Hugging Face Secrets")
+genai.configure(api_key=GEMINI_API_KEY)
+MODEL_NAME = "gemini-1.5-flash-latest"
+model = genai.GenerativeModel(MODEL_NAME)
+# Simple rate limiter (protects your quota)
+LAST_CALL_TS = 0
+MIN_INTERVAL = 3  # seconds
+def extract_financial_document(image: Image.Image):
+    global LAST_CALL_TS
+    # --- Rate limiting ---
+    now = time.time()
+    if now - LAST_CALL_TS < MIN_INTERVAL:
+        return {"error": "Rate limited. Please wait a few seconds."}
+    LAST_CALL_TS = now
     prompt = """
+You are a financial document intelligence system.
+TASKS:
+1. Identify the document type.
+2. Extract ALL tables exactly as they appear.
+3. Preserve row/column structure.
+4. Convert charts (pie/bar) into numeric insights.
+5. Do NOT hallucinate values.
+6. Numbers must be exact.
+OUTPUT RULES:
+- Return ONLY valid JSON
+- No markdown
+- No explanations
+JSON SCHEMA:
 {
+  "document_type": string,
+  "summary_fields": { "key": "value" },
+  "table_data": [
     {
       "table_name": string,
+      "headers": [string],
+      "rows": [[string]]
+    }
+  ],
+  "visual_insights": [
+    {
+      "chart_title": string,
+      "chart_type": string,
+      "trends": string
     }
   ]
 }
 """
     try:
+        response = model.generate_content(
+            [prompt, image],
+            generation_config={
+                "temperature": 0,
+                "response_mime_type": "application/json"
+            }
         )
+        # Ensure valid JSON
+        return json.loads(response.text)
     except Exception as e:
+        return {"error": str(e)}
+# -----------------------------
+# Gradio UI
+# -----------------------------
+with gr.Blocks(title="Financial DocAI (Gemini Vision)") as demo:
+    gr.Markdown("""
+# 📄 Financial DocAI — Gemini Vision
+Upload a financial document image (portfolio report, MF statement, etc.)
+""")
     with gr.Row():
+        image_input = gr.Image(type="pil", label="Upload Document Image")
+    with gr.Row():
+        extract_btn = gr.Button("Extract Data")
+    output_json = gr.JSON(label="Extracted Structured Data")
     extract_btn.click(
+        fn=extract_financial_document,
+        inputs=image_input,
         outputs=output_json
     )