Spaces:

Liviu16
/

InvoiceRecon

Running on Zero

App Files Files Community

Liviu16 commited on 7 days ago

Commit

d0c8e87

verified ·

1 Parent(s): 7080e09

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -18

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ import io
 from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, BitsAndBytesConfig
 from qwen_vl_utils import process_vision_info
-# --- DETAILED SCHEMAS RESTORED ---
 SCHEMAS = {
     "VODAFONE": {
         "vendor": "VODAFONE ROMANIA",
@@ -45,7 +45,6 @@ SCHEMAS = {
 MODEL_ID = "Qwen/Qwen2.5-VL-3B-Instruct"
 def load_model():
-    # Keep 4-bit for speed even on ZeroGPU
     quant_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_compute_dtype=torch.float16,
@@ -55,7 +54,7 @@ def load_model():
     model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
         MODEL_ID,
         torch_dtype="auto",
-        device_map="cuda", # Explicit for ZeroGPU
         quantization_config=quant_config
     )
     processor = AutoProcessor.from_pretrained(MODEL_ID, max_pixels=1280*1280)
@@ -66,26 +65,30 @@ model, processor = load_model()
 # --- PDF TO IMAGE HELPER ---
 def get_pdf_page_image(pdf_path):
     doc = fitz.open(pdf_path)
-    page = doc.load_page(0) # First page only
-    pix = page.get_pixmap(matrix=fitz.Matrix(2, 2)) # 2x zoom
     img = Image.open(io.BytesIO(pix.tobytes()))
     doc.close()
     return img
 # --- INFERENCE ---
 @spaces.GPU(duration=60)
-def process_invoice(file_info):
-    if file_info is None: return {"error": "No file uploaded"}
-    # Handle File Type
     if file_info.name.lower().endswith(".pdf"):
         image = get_pdf_page_image(file_info.name)
     else:
         image = Image.open(file_info.name)
-    # 1. Router
     decision_prompt = "Identify vendor: VODAFONE, DIGI, or GENERAL. Reply with one word."
     messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": decision_prompt}]}]
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     image_inputs, _ = process_vision_info(messages)
     inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to(model.device)
@@ -95,7 +98,8 @@ def process_invoice(file_info):
     vendor_key = "VODAFONE" if "VODAFONE" in raw_choice else ("DIGI" if "DIGI" in raw_choice else "GENERAL")
-    # 2. Specialist
     schema_json = json.dumps(SCHEMAS[vendor_key], indent=2)
     extract_prompt = f"Extract details as JSON strictly following this schema: {schema_json}. Return ONLY valid JSON."
@@ -106,22 +110,37 @@ def process_invoice(file_info):
     generated_ids = model.generate(**inputs, max_new_tokens=1536)
     result = processor.batch_decode(generated_ids[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0]
     try:
-        return json.loads(result.strip().replace('```json', '').replace('```', ''))
     except:
-        return {"raw_output": result}
 # --- INTERFACE ---
-with gr.Blocks(title="InvoiceRecon") as demo:
-    gr.Markdown("# 📑 IntelliReceipt: Local AI Invoice Parser")
     with gr.Row():
         with gr.Column(scale=1):
-            # Using gr.File for the PDF preview experience
-            file_input = gr.File(label="Upload Invoice (PDF or Image)", file_types=[".pdf", ".png", ".jpg"])
             run_btn = gr.Button("🚀 Extract Data", variant="primary")
         with gr.Column(scale=1):
-            json_output = gr.JSON(label="Extracted Result")
-    run_btn.click(fn=process_invoice, inputs=file_input, outputs=json_output)
 demo.launch()

 from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, BitsAndBytesConfig
 from qwen_vl_utils import process_vision_info
+# --- DETAILED SCHEMAS ---
 SCHEMAS = {
     "VODAFONE": {
         "vendor": "VODAFONE ROMANIA",
 MODEL_ID = "Qwen/Qwen2.5-VL-3B-Instruct"
 def load_model():
     quant_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_compute_dtype=torch.float16,
     model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
         MODEL_ID,
         torch_dtype="auto",
+        device_map="cuda",
         quantization_config=quant_config
     )
     processor = AutoProcessor.from_pretrained(MODEL_ID, max_pixels=1280*1280)
 # --- PDF TO IMAGE HELPER ---
 def get_pdf_page_image(pdf_path):
     doc = fitz.open(pdf_path)
+    page = doc.load_page(0)
+    pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
     img = Image.open(io.BytesIO(pix.tobytes()))
     doc.close()
     return img
 # --- INFERENCE ---
 @spaces.GPU(duration=60)
+def process_invoice(file_info, progress=gr.Progress()):
+    if file_info is None:
+        return None, {"error": "No file uploaded"}
+    # 1. Handle File Type and Preview
+    progress(0.1, desc="📄 Processing document...")
     if file_info.name.lower().endswith(".pdf"):
         image = get_pdf_page_image(file_info.name)
     else:
         image = Image.open(file_info.name)
+    # 2. Router (Identify Vendor)
+    progress(0.3, desc="🔍 Identifying vendor (Router)...")
     decision_prompt = "Identify vendor: VODAFONE, DIGI, or GENERAL. Reply with one word."
     messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": decision_prompt}]}]
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     image_inputs, _ = process_vision_info(messages)
     inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to(model.device)
     vendor_key = "VODAFONE" if "VODAFONE" in raw_choice else ("DIGI" if "DIGI" in raw_choice else "GENERAL")
+    # 3. Specialist (Extract Data)
+    progress(0.6, desc=f"🤖 Extracting {vendor_key} details...")
     schema_json = json.dumps(SCHEMAS[vendor_key], indent=2)
     extract_prompt = f"Extract details as JSON strictly following this schema: {schema_json}. Return ONLY valid JSON."
     generated_ids = model.generate(**inputs, max_new_tokens=1536)
     result = processor.batch_decode(generated_ids[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0]
+    progress(0.9, desc="⚙️ Finalizing result...")
+    # 4. Return Image for Preview and JSON for data
     try:
+        data = json.loads(result.strip().replace('```json', '').replace('```', ''))
+        progress(1.0, desc="✅ Success!")
+        return image, data
     except:
+        progress(1.0, desc="⚠️ Extraction complete with formatting issues")
+        return image, {"raw_output": result}
 # --- INTERFACE ---
+with gr.Blocks(title="InvoiceRecon", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 📑 IntelliReceipt: Real-Time Invoice AI")
+    gr.Markdown("Upload a Romanian invoice (PDF or Image) to extract structured data using Qwen2.5-VL.")
     with gr.Row():
         with gr.Column(scale=1):
+            file_input = gr.File(label="1. Upload Invoice", file_types=[".pdf", ".png", ".jpg"])
+            # The preview component to show the first page
+            preview_output = gr.Image(label="2. Document Preview", type="pil")
             run_btn = gr.Button("🚀 Extract Data", variant="primary")
         with gr.Column(scale=1):
+            json_output = gr.JSON(label="3. Extracted JSON Result")
+    # Important: Ensure inputs and outputs match function signature
+    run_btn.click(
+        fn=process_invoice,
+        inputs=file_input,
+        outputs=[preview_output, json_output]
+    )
 demo.launch()