akashraut commited on
Commit
25ca7ed
Β·
verified Β·
1 Parent(s): 42f29a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -54
app.py CHANGED
@@ -5,67 +5,102 @@ import gradio as gr
5
  import google.generativeai as genai
6
  from PIL import Image
7
 
8
- # -----------------------------
9
- # Gemini Configuration
10
- # -----------------------------
11
- GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
12
- if not GEMINI_API_KEY:
13
- raise RuntimeError("GEMINI_API_KEY not found in Hugging Face Secrets")
14
 
15
- genai.configure(api_key=GEMINI_API_KEY)
 
 
16
 
17
- MODEL_NAME = "gemini-2.5-flash"
18
 
 
19
  model = genai.GenerativeModel(MODEL_NAME)
20
 
21
- # Simple rate limiter (protects your quota)
22
  LAST_CALL_TS = 0
23
  MIN_INTERVAL = 3 # seconds
24
 
25
 
26
- def extract_financial_document(image: Image.Image):
 
 
 
 
27
  global LAST_CALL_TS
28
 
29
- # --- Rate limiting ---
30
  now = time.time()
31
  if now - LAST_CALL_TS < MIN_INTERVAL:
32
  return {"error": "Rate limited. Please wait a few seconds."}
33
  LAST_CALL_TS = now
34
 
35
  prompt = """
36
- You are a financial document intelligence system.
37
-
38
- TASKS:
39
- 1. Identify the document type.
40
- 2. Extract ALL tables exactly as they appear.
41
- 3. Preserve row/column structure.
42
- 4. Convert charts (pie/bar) into numeric insights.
43
- 5. Do NOT hallucinate values.
44
- 6. Numbers must be exact.
45
-
46
- OUTPUT RULES:
47
- - Return ONLY valid JSON
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  - No markdown
49
  - No explanations
 
50
 
51
- JSON SCHEMA:
 
 
52
  {
53
- "document_type": string,
54
- "summary_fields": { "key": "value" },
55
- "table_data": [
56
- {
57
- "table_name": string,
58
- "headers": [string],
59
- "rows": [[string]]
60
- }
61
- ],
62
- "visual_insights": [
63
- {
64
- "chart_title": string,
65
- "chart_type": string,
66
- "trends": string
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  }
68
- ]
69
  }
70
  """
71
 
@@ -78,34 +113,37 @@ JSON SCHEMA:
78
  }
79
  )
80
 
81
- # Ensure valid JSON
82
  return json.loads(response.text)
83
 
84
  except Exception as e:
85
  return {"error": str(e)}
86
 
87
 
88
- # -----------------------------
89
- # Gradio UI
90
- # -----------------------------
91
- with gr.Blocks(title="Financial DocAI (Gemini Vision)") as demo:
 
92
  gr.Markdown("""
93
- # πŸ“„ Financial DocAI β€” Gemini Vision
94
- Upload a financial document image (portfolio report, MF statement, etc.)
95
- """)
96
 
97
- with gr.Row():
98
- image_input = gr.Image(type="pil", label="Upload Document Image")
99
 
100
- with gr.Row():
101
- extract_btn = gr.Button("Extract Data")
 
 
 
 
102
 
103
- output_json = gr.JSON(label="Extracted Structured Data")
 
 
104
 
105
  extract_btn.click(
106
- fn=extract_financial_document,
107
  inputs=image_input,
108
- outputs=output_json
109
  )
110
 
111
  demo.launch()
 
5
  import google.generativeai as genai
6
  from PIL import Image
7
 
8
+ # ============================================================
9
+ # Configuration
10
+ # ============================================================
 
 
 
11
 
12
+ API_KEY = os.getenv("GEMINI_API_KEY")
13
+ if not API_KEY:
14
+ raise RuntimeError("GEMINI_API_KEY missing in Hugging Face Secrets")
15
 
16
+ genai.configure(api_key=API_KEY)
17
 
18
+ MODEL_NAME = "gemini-2.5-flash"
19
  model = genai.GenerativeModel(MODEL_NAME)
20
 
21
+ # Simple global rate limit (HF protection)
22
  LAST_CALL_TS = 0
23
  MIN_INTERVAL = 3 # seconds
24
 
25
 
26
+ # ============================================================
27
+ # Core Extraction Logic (Doc-Agnostic)
28
+ # ============================================================
29
+
30
+ def extract_document(image: Image.Image):
31
  global LAST_CALL_TS
32
 
33
+ # ---- Rate limiting ----
34
  now = time.time()
35
  if now - LAST_CALL_TS < MIN_INTERVAL:
36
  return {"error": "Rate limited. Please wait a few seconds."}
37
  LAST_CALL_TS = now
38
 
39
  prompt = """
40
+ You are a document intelligence system.
41
+
42
+ Your job is to analyze ANY document image and produce a
43
+ Docsumo-compatible structured JSON output.
44
+
45
+ DOCUMENT TYPES MAY INCLUDE (but are not limited to):
46
+ - Financial statements
47
+ - Invoices
48
+ - Forms
49
+ - Reports
50
+ - Letters
51
+ - Tables-only documents
52
+
53
+ --------------------------------
54
+ TASKS
55
+ --------------------------------
56
+ 1. Identify document_type and document_subtype.
57
+ 2. Extract all key-value fields visible in the document.
58
+ 3. Extract ALL tables with exact row/column structure.
59
+ 4. If charts/graphs exist, summarize insights textually.
60
+ 5. Do NOT hallucinate missing data.
61
+ 6. Preserve numbers exactly as shown.
62
+
63
+ --------------------------------
64
+ OUTPUT RULES
65
+ --------------------------------
66
+ - Output ONLY valid JSON
67
  - No markdown
68
  - No explanations
69
+ - Follow the schema EXACTLY
70
 
71
+ --------------------------------
72
+ DOCSUMO-COMPATIBLE JSON SCHEMA
73
+ --------------------------------
74
  {
75
+ "document_metadata": {
76
+ "document_type": string,
77
+ "document_subtype": string,
78
+ "page_count": number,
79
+ "language": string
80
+ },
81
+ "extraction": {
82
+ "fields": {
83
+ "<field_name>": {
84
+ "value": string,
85
+ "normalized_value": string | null,
86
+ "type": "string" | "number" | "date" | "currency" | "enum"
87
+ }
88
+ },
89
+ "tables": {
90
+ "<table_id>": {
91
+ "table_label": string,
92
+ "headers": [string],
93
+ "rows": [
94
+ { "<header>": string }
95
+ ]
96
+ }
97
+ },
98
+ "derived_insights": {
99
+ "<insight_name>": {
100
+ "value": string
101
+ }
102
  }
103
+ }
104
  }
105
  """
106
 
 
113
  }
114
  )
115
 
 
116
  return json.loads(response.text)
117
 
118
  except Exception as e:
119
  return {"error": str(e)}
120
 
121
 
122
+ # ============================================================
123
+ # Gradio UI (HF)
124
+ # ============================================================
125
+
126
+ with gr.Blocks(title="DocAI – Docsumo Compatible") as demo:
127
  gr.Markdown("""
128
+ # πŸ“„ DocAI β€” Docsumo-Compatible Document Intelligence
 
 
129
 
130
+ Upload **any document image** (invoice, statement, report, form).
 
131
 
132
+ This demo returns a **Docsumo-compatible JSON contract**:
133
+ - Document metadata
134
+ - Key-value fields
135
+ - Tables
136
+ - Derived insights
137
+ """)
138
 
139
+ image_input = gr.Image(type="pil", label="Upload Document Image")
140
+ extract_btn = gr.Button("Extract Document")
141
+ output = gr.JSON(label="Docsumo-Compatible JSON Output")
142
 
143
  extract_btn.click(
144
+ fn=extract_document,
145
  inputs=image_input,
146
+ outputs=output
147
  )
148
 
149
  demo.launch()