Spaces:

Liviu16
/

InvoiceRecon

Sleeping

App Files Files Community

Liviu16 commited on Feb 13

Commit

3a86ca3

verified ·

1 Parent(s): 8702b18

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -70

app.py CHANGED Viewed

@@ -1,94 +1,59 @@
 import gradio as gr
 import torch
 import json
-import spaces  # <--- CRITICAL: Required for ZeroGPU
 from PIL import Image
 from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
 from qwen_vl_utils import process_vision_info
-# --- MODEL LOADING (Optimized for ZeroGPU) ---
 MODEL_ID = "Qwen/Qwen2.5-VL-3B-Instruct"
-# We load in bfloat16 for max accuracy since ZeroGPU has 70GB VRAM
-model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    MODEL_ID,
-    torch_dtype=torch.bfloat16,
-    device_map="cuda"
-)
 processor = AutoProcessor.from_pretrained(MODEL_ID, max_pixels=1280*1280)
-SCHEMAS = {
-    "VODAFONE": {
-        "vendor": "VODAFONE ROMANIA",
-        "invoice_number": "string",
-        "date": "string (DD-MM-YYYY)",
-        "due_date": "string (DD-MM-YYYY)",
-        "client_name": "string",
-        "client_address": "string",
-        "account_id": "string",
-        "billing_period": "string",
-        "totals": {
-            "subtotal_no_vat": "number",
-            "vat_amount": "number",
-            "grand_total": "number",
-            "currency": "RON"
-        },
-        "details": [
-            {"phone_number": "string", "service_name": "string", "cost": "number"}
-        ]
-    },
-    "DIGI": {
-        "vendor": "DIGI (RCS & RDS)",
-        "invoice_number": "string",
-        "date": "string",
-        "contract_id": "string",
-        "client_name": "string",
-        "total_amount": "number",
-        "iban_code": "string"
-    },
-    "GENERAL": {
-        "vendor_name": "string",
-        "vendor_iban": "string",
-        "invoice_id": "string",
-        "date_issued": "string",
-        "total_gross_amount": "number",
-        "vat_total": "number",
-        "vat_rate": "string (e.g., 19%)",
-        "client_name": "string",
-        "service_description": "string",
-        "consumption_details": "string (e.g., 450 kWh or Period Nov-Dec)"
-    }
-}
-# --- THE DECORATED INFERENCE FUNCTION ---
-@spaces.GPU(duration=60)  # <--- CRITICAL: This triggers the GPU
-def process_invoice(image):
-    if image is None: return {"error": "No image uploaded"}
-    # 1. Router (Which vendor?)
     decision_prompt = "Identify vendor: VODAFONE, DIGI, or GENERAL. Reply with one word."
     messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": decision_prompt}]}]
-    # Process for model
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     image_inputs, _ = process_vision_info(messages)
     inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to(model.device)
-    # Generate Choice
-    generated_ids = model.generate(**inputs, max_new_tokens=1536)
     raw_choice = processor.batch_decode(generated_ids[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0].strip().upper()
     vendor_key = "VODAFONE" if "VODAFONE" in raw_choice else ("DIGI" if "DIGI" in raw_choice else "GENERAL")
-    # 2. Specialist (Extract Data)
-    schema_json = json.dumps(SCHEMAS[vendor_key], indent=2)
-    extract_prompt = f"Return ONLY valid JSON: {schema_json}"
     messages[0]["content"][1]["text"] = extract_prompt
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to(model.device)
-    generated_ids = model.generate(**inputs, max_new_tokens=1024)
     result = processor.batch_decode(generated_ids[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0]
     try:
@@ -97,15 +62,16 @@ def process_invoice(image):
         return {"raw_output": result}
 # --- TRANSKRIBUS LAYOUT ---
-with gr.Blocks() as demo:
     gr.Markdown("# 📑 IntelliReceipt: Real-Time Invoice AI")
     with gr.Row():
-        with gr.Column():
-            img_input = gr.File(label="1. Upload Invoice (Image or PDF)", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
             run_btn = gr.Button("🚀 Extract Data", variant="primary")
-        with gr.Column():
             json_output = gr.JSON(label="Extracted Result")
-    run_btn.click(fn=process_invoice, inputs=img_input, outputs=json_output)
 demo.launch()

 import gradio as gr
 import torch
 import json
+import spaces
+import fitz  # PyMuPDF
 from PIL import Image
+import io
 from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
 from qwen_vl_utils import process_vision_info
+# --- MODEL LOADING ---
 MODEL_ID = "Qwen/Qwen2.5-VL-3B-Instruct"
+model = Qwen2_5_VLForConditionalGeneration.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16, device_map="cuda")
 processor = AutoProcessor.from_pretrained(MODEL_ID, max_pixels=1280*1280)
+# --- PDF HELPER ---
+def pdf_to_image(pdf_path):
+    """Converts the first page of a PDF to a PIL Image."""
+    doc = fitz.open(pdf_path)
+    page = doc.load_page(0)  # Extract only first page for demo
+    pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))  # 2x zoom for better OCR
+    img = Image.open(io.BytesIO(pix.tobytes()))
+    doc.close()
+    return img
+@spaces.GPU(duration=60)
+def process_invoice(file_info):
+    if file_info is None: return {"error": "No file uploaded"}
+    # 1. Handle PDF vs Image
+    file_path = file_info.name
+    if file_path.lower().endswith(".pdf"):
+        image = pdf_to_image(file_path)
+    else:
+        image = Image.open(file_path)
+    # 2. Identify Vendor (Router)
     decision_prompt = "Identify vendor: VODAFONE, DIGI, or GENERAL. Reply with one word."
     messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": decision_prompt}]}]
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     image_inputs, _ = process_vision_info(messages)
     inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to(model.device)
+    generated_ids = model.generate(**inputs, max_new_tokens=10)
     raw_choice = processor.batch_decode(generated_ids[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0].strip().upper()
+    # [Your Schema Logic Here...]
     vendor_key = "VODAFONE" if "VODAFONE" in raw_choice else ("DIGI" if "DIGI" in raw_choice else "GENERAL")
+    # 3. Extract Data (Specialist)
+    extract_prompt = f"Return ONLY valid JSON for {vendor_key} invoice."
     messages[0]["content"][1]["text"] = extract_prompt
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to(model.device)
+    generated_ids = model.generate(**inputs, max_new_tokens=1536)
     result = processor.batch_decode(generated_ids[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0]
     try:
         return {"raw_output": result}
 # --- TRANSKRIBUS LAYOUT ---
+with gr.Blocks(title="InvoiceRecon") as demo:
     gr.Markdown("# 📑 IntelliReceipt: Real-Time Invoice AI")
     with gr.Row():
+        with gr.Column(scale=1):
+            # gr.File supports the PDF preview you want to see
+            file_input = gr.File(label="Upload Invoice (PDF, PNG, JPG)", file_types=[".pdf", ".png", ".jpg"])
             run_btn = gr.Button("🚀 Extract Data", variant="primary")
+        with gr.Column(scale=1):
             json_output = gr.JSON(label="Extracted Result")
+    run_btn.click(fn=process_invoice, inputs=file_input, outputs=json_output)
 demo.launch()