Spaces:

ashvin-savani
/

Invoice_parser

Running on Zero

App Files Files Community

ashvin-savani commited on Nov 14, 2025

Commit

294474c

1 Parent(s): 3889a50

Alpha

Browse files

Files changed (2) hide show

app.py +132 -4
requirements.txt +7 -0

app.py CHANGED Viewed

@@ -1,7 +1,135 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+import os
+import time
+import base64
+import json
+import gc
+import torch
+import io
+from transformers import AutoProcessor, AutoModelForImageTextToText
+from qwen_vl_utils import process_vision_info
 import gradio as gr
+import spaces
+# Model setup
+MODEL_NAME = "numind/NuExtract-2.0-4B"
+device = "cuda"  # ZeroGPU provides GPU
+model = AutoModelForImageTextToText.from_pretrained(
+    MODEL_NAME,
+    trust_remote_code=True,
+    dtype=torch.bfloat16,
+    device_map=None,  # Load on CPU, move to GPU in function
+)
+processor = AutoProcessor.from_pretrained(
+    MODEL_NAME,
+    trust_remote_code=True,
+    padding_side='left',
+    use_fast=True,
+)
+# Invoice schema
+invoice_schema = {
+    "invoice_number": "",
+    "invoice_date": "",
+    "supplier_name": "",
+    "supplier_address": "",
+    "total_amount": "",
+    "currency": "",
+    "items": [
+        {
+            "description": "",
+            "quantity": "",
+            "unit_price": "",
+            "total_price": ""
+        }
+    ]
+}
+def encode_image_to_base64(image_path):
+    with open(image_path, "rb") as img_file:
+        return base64.b64encode(img_file.read()).decode("utf-8")
+def encode_image_from_pil(image):
+    buffer = io.BytesIO()
+    image.save(buffer, format="PNG")
+    return base64.b64encode(buffer.getvalue()).decode("utf-8")
+def prepare_prompt(image_path):
+    base64_image = encode_image_to_base64(image_path)
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image", "image": f"data:image;base64,{base64_image}"}
+            ]
+        }
+    ]
+    text = processor.tokenizer.apply_chat_template(
+        messages,
+        template=json.dumps(invoice_schema, indent=4),
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    return messages, text
+@spaces.GPU
+def process_image(image):
+    if image is None:
+        return "No image provided."
+    base64_str = encode_image_from_pil(image)
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image", "image": f"data:image;base64,{base64_str}"}
+            ]
+        }
+    ]
+    text = processor.tokenizer.apply_chat_template(
+        messages,
+        template=json.dumps(invoice_schema, indent=4),
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    image_inputs = process_vision_info(messages)[0] or []
+    inputs = processor(
+        text=[text],
+        images=image_inputs,
+        padding=True,
+        return_tensors="pt",
+    ).to(device)
+    generation_config = {
+        "do_sample": False,
+        "num_beams": 1,
+        "max_new_tokens": 2048,
+    }
+    generated_ids = model.generate(**inputs, **generation_config)
+    generated_ids_trimmed = [
+        out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+    ]
+    output_text = processor.batch_decode(
+        generated_ids_trimmed,
+        skip_special_tokens=True,
+        clean_up_tokenization_spaces=False,
+    )[0]
+    return output_text
+# Gradio interface
+iface = gr.Interface(
+    fn=process_image,
+    inputs=gr.Image(type="pil", label="Upload Invoice Image"),
+    outputs=gr.Textbox(label="Extracted Invoice Data (JSON)"),
+    title="Invoice Parser with NuExtract",
+    description="Upload an invoice image to extract structured data using AI."
+)
+iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+torch
+transformers
+qwen-vl-utils
+gradio
+huggingface_hub[spaces]
+accelerate
+flashinfer-python