Spaces:

Liviu16
/

InvoiceRecon

Running on Zero

App Files Files Community

InvoiceRecon / app.py

Liviu16

Update app.py

d45f115 verified 7 days ago

raw

history blame contribute delete

6.52 kB

	import gradio as gr
	import torch
	import json
	import spaces
	import fitz # PyMuPDF for PDF handling
	from PIL import Image
	import io
	from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, BitsAndBytesConfig
	from qwen_vl_utils import process_vision_info

	# --- DETAILED SCHEMAS ---
	SCHEMAS = {
	"VODAFONE": {
	"vendor": "VODAFONE ROMANIA",
	"invoice_number": "string",
	"date": "string (DD-MM-YYYY)",
	"client_name": "string",
	"client_address": "string",
	"account_id": "string",
	"billing_period": "string",
	"totals": {
	"subtotal_no_vat": "number",
	"vat_amount": "number",
	"grand_total": "number",
	"currency": "RON"
	}
	},
	"DIGI": {
	"vendor": "DIGI (RCS & RDS)",
	"invoice_number": "string",
	"contract_id": "string",
	"total_amount": "number",
	"iban": "string"
	},
	"GENERAL": {
	"vendor_name": "string",
	"invoice_id": "string",
	"date": "string",
	"total_with_vat": "number",
	"client_name": "string"
	}
	}

	# --- MODEL LOADING ---
	MODEL_ID = "Qwen/Qwen2.5-VL-3B-Instruct"

	def load_model():
	quant_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_compute_dtype=torch.float16,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_use_double_quant=True
	)
	model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
	MODEL_ID,
	torch_dtype="auto",
	device_map="cuda",
	quantization_config=quant_config
	)
	processor = AutoProcessor.from_pretrained(MODEL_ID, max_pixels=1280*1280)
	return model, processor

	model, processor = load_model()

	# --- PDF TO IMAGE HELPER ---
	def get_pdf_page_image(pdf_path):
	doc = fitz.open(pdf_path)
	page = doc.load_page(0)
	pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
	img = Image.open(io.BytesIO(pix.tobytes()))
	doc.close()
	return img

	# --- INFERENCE ---
	@spaces.GPU(duration=60)
	def process_invoice(file_info, progress=gr.Progress()):
	if file_info is None:
	return None, {"error": "No file uploaded"}

	# 1. Handle File Type and Preview
	progress(0.1, desc="📄 Processing document...")
	if file_info.name.lower().endswith(".pdf"):
	image = get_pdf_page_image(file_info.name)
	else:
	image = Image.open(file_info.name)

	# 2. Router & Validation (Identify Vendor or Reject)
	progress(0.3, desc="🔍 Validating and Identifying Vendor...")

	# Updated prompt to provide an 'INVALID' exit
	decision_prompt = """Analyze this image. Is it a financial invoice or receipt?
	- If NO (e.g. random photo, object, landscape): Reply 'INVALID'.
	- If YES: Reply ONLY with 'VODAFONE', 'DIGI', or 'GENERAL'."""

	messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": decision_prompt}]}]

	text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	image_inputs, _ = process_vision_info(messages)
	inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to(model.device)

	generated_ids = model.generate(**inputs, max_new_tokens=10)
	raw_choice = processor.batch_decode(generated_ids[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0].strip().upper()

	# VALIDATION CHECK: If model says INVALID, stop here
	if "INVALID" in raw_choice:
	progress(1.0, desc="❌ Invalid Document")
	return image, {
	"error": "Validation Failed",
	"message": "The uploaded image does not appear to be an invoice. Extraction cancelled to prevent hallucinations."
	}

	vendor_key = "VODAFONE" if "VODAFONE" in raw_choice else ("DIGI" if "DIGI" in raw_choice else "GENERAL")

	# 3. Specialist (Extract Data) - Only runs for valid documents
	progress(0.6, desc=f"🤖 Extracting {vendor_key} details...")
	schema_json = json.dumps(SCHEMAS[vendor_key], indent=2)
	extract_prompt = f"Extract details as JSON strictly following this schema: {schema_json}. Return ONLY valid JSON."

	messages[0]["content"][1]["text"] = extract_prompt
	text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to(model.device)

	generated_ids = model.generate(**inputs, max_new_tokens=1536)
	result = processor.batch_decode(generated_ids[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0]

	progress(0.9, desc="⚙️ Finalizing result...")

	# 4. Return Image for Preview and JSON for data
	try:
	data = json.loads(result.strip().replace('```json', '').replace('```', ''))
	progress(1.0, desc="✅ Success!")
	return image, data
	except:
	progress(1.0, desc="⚠️ Extraction complete with formatting issues")
	return image, {"raw_output": result}

	# --- INTERFACE ---
	with gr.Blocks(title="InvoiceRecon", theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 📑 IntelliReceipt: Real-Time Invoice AI")
	gr.Markdown("Upload an invoice (PDF or Image) to extract structured data using Qwen2.5-VL.")

	with gr.Row():
	# LEFT COLUMN: Inputs and Preview
	with gr.Column(scale=1):
	file_input = gr.File(label="1. Upload Invoice", file_types=[".pdf", ".png", ".jpg"])
	preview_output = gr.Image(label="2. Document Preview", type="pil")
	run_btn = gr.Button("🚀 Extract Data", variant="primary")

	# The ClearButton can now safely reference json_output because it is defined below
	# but inside the same Row block.
	reset_btn = gr.ClearButton(
	components=[file_input, preview_output], # We will add json_output via a method below
	value="🗑️ Reset All",
	variant="secondary"
	)

	# RIGHT COLUMN: JSON Result
	with gr.Column(scale=1):
	json_output = gr.JSON(label="3. Extracted JSON Result")

	# To ensure the Reset All button clears the JSON even though it was defined after the button:
	reset_btn.add(json_output)

	# Important: Ensure inputs and outputs match function signature
	run_btn.click(
	fn=process_invoice,
	inputs=file_input,
	outputs=[preview_output, json_output]
	)

	demo.launch()