Spaces:

akashraut
/

docAI

Sleeping

App Files Files Community

docAI / app.py

akashraut

Update app.py

25ca7ed verified about 1 month ago

raw

history blame contribute delete

3.67 kB

	import os
	import json
	import time
	import gradio as gr
	import google.generativeai as genai
	from PIL import Image

	# ============================================================
	# Configuration
	# ============================================================

	API_KEY = os.getenv("GEMINI_API_KEY")
	if not API_KEY:
	raise RuntimeError("GEMINI_API_KEY missing in Hugging Face Secrets")

	genai.configure(api_key=API_KEY)

	MODEL_NAME = "gemini-2.5-flash"
	model = genai.GenerativeModel(MODEL_NAME)

	# Simple global rate limit (HF protection)
	LAST_CALL_TS = 0
	MIN_INTERVAL = 3 # seconds


	# ============================================================
	# Core Extraction Logic (Doc-Agnostic)
	# ============================================================

	def extract_document(image: Image.Image):
	global LAST_CALL_TS

	# ---- Rate limiting ----
	now = time.time()
	if now - LAST_CALL_TS < MIN_INTERVAL:
	return {"error": "Rate limited. Please wait a few seconds."}
	LAST_CALL_TS = now

	prompt = """
	You are a document intelligence system.

	Your job is to analyze ANY document image and produce a
	Docsumo-compatible structured JSON output.

	DOCUMENT TYPES MAY INCLUDE (but are not limited to):
	- Financial statements
	- Invoices
	- Forms
	- Reports
	- Letters
	- Tables-only documents

	--------------------------------
	TASKS
	--------------------------------
	1. Identify document_type and document_subtype.
	2. Extract all key-value fields visible in the document.
	3. Extract ALL tables with exact row/column structure.
	4. If charts/graphs exist, summarize insights textually.
	5. Do NOT hallucinate missing data.
	6. Preserve numbers exactly as shown.

	--------------------------------
	OUTPUT RULES
	--------------------------------
	- Output ONLY valid JSON
	- No markdown
	- No explanations
	- Follow the schema EXACTLY

	--------------------------------
	DOCSUMO-COMPATIBLE JSON SCHEMA
	--------------------------------
	{
	"document_metadata": {
	"document_type": string,
	"document_subtype": string,
	"page_count": number,
	"language": string
	},
	"extraction": {
	"fields": {
	"<field_name>": {
	"value": string,
	"normalized_value": string \| null,
	"type": "string" \| "number" \| "date" \| "currency" \| "enum"
	}
	},
	"tables": {
	"<table_id>": {
	"table_label": string,
	"headers": [string],
	"rows": [
	{ "<header>": string }
	]
	}
	},
	"derived_insights": {
	"<insight_name>": {
	"value": string
	}
	}
	}
	}
	"""

	try:
	response = model.generate_content(
	[prompt, image],
	generation_config={
	"temperature": 0,
	"response_mime_type": "application/json"
	}
	)

	return json.loads(response.text)

	except Exception as e:
	return {"error": str(e)}


	# ============================================================
	# Gradio UI (HF)
	# ============================================================

	with gr.Blocks(title="DocAI – Docsumo Compatible") as demo:
	gr.Markdown("""
	# 📄 DocAI — Docsumo-Compatible Document Intelligence

	Upload any document image (invoice, statement, report, form).

	This demo returns a Docsumo-compatible JSON contract:
	- Document metadata
	- Key-value fields
	- Tables
	- Derived insights
	""")

	image_input = gr.Image(type="pil", label="Upload Document Image")
	extract_btn = gr.Button("Extract Document")
	output = gr.JSON(label="Docsumo-Compatible JSON Output")

	extract_btn.click(
	fn=extract_document,
	inputs=image_input,
	outputs=output
	)

	demo.launch()