Spaces:

Liviu16
/

InvoiceRecon

Sleeping

App Files Files Community

Liviu16 commited on 21 days ago

Commit

7080e09

verified ·

1 Parent(s): 4de3590

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -22

app.py CHANGED Viewed

@@ -2,39 +2,88 @@ import gradio as gr
 import torch
 import json
 import spaces
-import fitz  # PyMuPDF
 from PIL import Image
 import io
-from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
 from qwen_vl_utils import process_vision_info
 # --- MODEL LOADING ---
 MODEL_ID = "Qwen/Qwen2.5-VL-3B-Instruct"
-model = Qwen2_5_VLForConditionalGeneration.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16, device_map="cuda")
-processor = AutoProcessor.from_pretrained(MODEL_ID, max_pixels=1280*1280)
-# --- PDF HELPER ---
-def pdf_to_image(pdf_path):
-    """Converts the first page of a PDF to a PIL Image."""
     doc = fitz.open(pdf_path)
-    page = doc.load_page(0)  # Extract only first page for demo
-    pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))  # 2x zoom for better OCR
     img = Image.open(io.BytesIO(pix.tobytes()))
     doc.close()
     return img
 @spaces.GPU(duration=60)
 def process_invoice(file_info):
     if file_info is None: return {"error": "No file uploaded"}
-    # 1. Handle PDF vs Image
-    file_path = file_info.name
-    if file_path.lower().endswith(".pdf"):
-        image = pdf_to_image(file_path)
     else:
-        image = Image.open(file_path)
-    # 2. Identify Vendor (Router)
     decision_prompt = "Identify vendor: VODAFONE, DIGI, or GENERAL. Reply with one word."
     messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": decision_prompt}]}]
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
@@ -44,11 +93,12 @@ def process_invoice(file_info):
     generated_ids = model.generate(**inputs, max_new_tokens=10)
     raw_choice = processor.batch_decode(generated_ids[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0].strip().upper()
-    # [Your Schema Logic Here...]
     vendor_key = "VODAFONE" if "VODAFONE" in raw_choice else ("DIGI" if "DIGI" in raw_choice else "GENERAL")
-    # 3. Extract Data (Specialist)
-    extract_prompt = f"Return ONLY valid JSON for {vendor_key} invoice."
     messages[0]["content"][1]["text"] = extract_prompt
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to(model.device)
@@ -61,13 +111,13 @@ def process_invoice(file_info):
     except:
         return {"raw_output": result}
-# --- TRANSKRIBUS LAYOUT ---
 with gr.Blocks(title="InvoiceRecon") as demo:
-    gr.Markdown("# 📑 IntelliReceipt: Real-Time Invoice AI")
     with gr.Row():
         with gr.Column(scale=1):
-            # gr.File supports the PDF preview you want to see
-            file_input = gr.File(label="Upload Invoice (PDF, PNG, JPG)", file_types=[".pdf", ".png", ".jpg"])
             run_btn = gr.Button("🚀 Extract Data", variant="primary")
         with gr.Column(scale=1):
             json_output = gr.JSON(label="Extracted Result")

 import torch
 import json
 import spaces
+import fitz  # PyMuPDF for PDF handling
 from PIL import Image
 import io
+from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, BitsAndBytesConfig
 from qwen_vl_utils import process_vision_info
+# --- DETAILED SCHEMAS RESTORED ---
+SCHEMAS = {
+    "VODAFONE": {
+        "vendor": "VODAFONE ROMANIA",
+        "invoice_number": "string",
+        "date": "string (DD-MM-YYYY)",
+        "client_name": "string",
+        "client_address": "string",
+        "account_id": "string",
+        "billing_period": "string",
+        "totals": {
+            "subtotal_no_vat": "number",
+            "vat_amount": "number",
+            "grand_total": "number",
+            "currency": "RON"
+        }
+    },
+    "DIGI": {
+        "vendor": "DIGI (RCS & RDS)",
+        "invoice_number": "string",
+        "contract_id": "string",
+        "total_amount": "number",
+        "iban": "string"
+    },
+    "GENERAL": {
+        "vendor_name": "string",
+        "invoice_id": "string",
+        "date": "string",
+        "total_with_vat": "number",
+        "client_name": "string"
+    }
+}
 # --- MODEL LOADING ---
 MODEL_ID = "Qwen/Qwen2.5-VL-3B-Instruct"
+def load_model():
+    # Keep 4-bit for speed even on ZeroGPU
+    quant_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_compute_dtype=torch.float16,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_use_double_quant=True
+    )
+    model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+        MODEL_ID,
+        torch_dtype="auto",
+        device_map="cuda", # Explicit for ZeroGPU
+        quantization_config=quant_config
+    )
+    processor = AutoProcessor.from_pretrained(MODEL_ID, max_pixels=1280*1280)
+    return model, processor
+model, processor = load_model()
+# --- PDF TO IMAGE HELPER ---
+def get_pdf_page_image(pdf_path):
     doc = fitz.open(pdf_path)
+    page = doc.load_page(0) # First page only
+    pix = page.get_pixmap(matrix=fitz.Matrix(2, 2)) # 2x zoom
     img = Image.open(io.BytesIO(pix.tobytes()))
     doc.close()
     return img
+# --- INFERENCE ---
 @spaces.GPU(duration=60)
 def process_invoice(file_info):
     if file_info is None: return {"error": "No file uploaded"}
+    # Handle File Type
+    if file_info.name.lower().endswith(".pdf"):
+        image = get_pdf_page_image(file_info.name)
     else:
+        image = Image.open(file_info.name)
+    # 1. Router
     decision_prompt = "Identify vendor: VODAFONE, DIGI, or GENERAL. Reply with one word."
     messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": decision_prompt}]}]
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     generated_ids = model.generate(**inputs, max_new_tokens=10)
     raw_choice = processor.batch_decode(generated_ids[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0].strip().upper()
     vendor_key = "VODAFONE" if "VODAFONE" in raw_choice else ("DIGI" if "DIGI" in raw_choice else "GENERAL")
+    # 2. Specialist
+    schema_json = json.dumps(SCHEMAS[vendor_key], indent=2)
+    extract_prompt = f"Extract details as JSON strictly following this schema: {schema_json}. Return ONLY valid JSON."
     messages[0]["content"][1]["text"] = extract_prompt
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to(model.device)
     except:
         return {"raw_output": result}
+# --- INTERFACE ---
 with gr.Blocks(title="InvoiceRecon") as demo:
+    gr.Markdown("# 📑 IntelliReceipt: Local AI Invoice Parser")
     with gr.Row():
         with gr.Column(scale=1):
+            # Using gr.File for the PDF preview experience
+            file_input = gr.File(label="Upload Invoice (PDF or Image)", file_types=[".pdf", ".png", ".jpg"])
             run_btn = gr.Button("🚀 Extract Data", variant="primary")
         with gr.Column(scale=1):
             json_output = gr.JSON(label="Extracted Result")