Liviu16 commited on
Commit
3a86ca3
Β·
verified Β·
1 Parent(s): 8702b18

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -70
app.py CHANGED
@@ -1,94 +1,59 @@
1
  import gradio as gr
2
  import torch
3
  import json
4
- import spaces # <--- CRITICAL: Required for ZeroGPU
 
5
  from PIL import Image
 
6
  from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
7
  from qwen_vl_utils import process_vision_info
8
 
9
- # --- MODEL LOADING (Optimized for ZeroGPU) ---
10
  MODEL_ID = "Qwen/Qwen2.5-VL-3B-Instruct"
11
-
12
- # We load in bfloat16 for max accuracy since ZeroGPU has 70GB VRAM
13
- model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
14
- MODEL_ID,
15
- torch_dtype=torch.bfloat16,
16
- device_map="cuda"
17
- )
18
  processor = AutoProcessor.from_pretrained(MODEL_ID, max_pixels=1280*1280)
19
 
20
- SCHEMAS = {
21
- "VODAFONE": {
22
- "vendor": "VODAFONE ROMANIA",
23
- "invoice_number": "string",
24
- "date": "string (DD-MM-YYYY)",
25
- "due_date": "string (DD-MM-YYYY)",
26
- "client_name": "string",
27
- "client_address": "string",
28
- "account_id": "string",
29
- "billing_period": "string",
30
- "totals": {
31
- "subtotal_no_vat": "number",
32
- "vat_amount": "number",
33
- "grand_total": "number",
34
- "currency": "RON"
35
- },
36
- "details": [
37
- {"phone_number": "string", "service_name": "string", "cost": "number"}
38
- ]
39
- },
40
- "DIGI": {
41
- "vendor": "DIGI (RCS & RDS)",
42
- "invoice_number": "string",
43
- "date": "string",
44
- "contract_id": "string",
45
- "client_name": "string",
46
- "total_amount": "number",
47
- "iban_code": "string"
48
- },
49
- "GENERAL": {
50
- "vendor_name": "string",
51
- "vendor_iban": "string",
52
- "invoice_id": "string",
53
- "date_issued": "string",
54
- "total_gross_amount": "number",
55
- "vat_total": "number",
56
- "vat_rate": "string (e.g., 19%)",
57
- "client_name": "string",
58
- "service_description": "string",
59
- "consumption_details": "string (e.g., 450 kWh or Period Nov-Dec)"
60
- }
61
- }
62
 
63
- # --- THE DECORATED INFERENCE FUNCTION ---
64
- @spaces.GPU(duration=60) # <--- CRITICAL: This triggers the GPU
65
- def process_invoice(image):
66
- if image is None: return {"error": "No image uploaded"}
67
 
68
- # 1. Router (Which vendor?)
 
 
 
 
 
 
 
69
  decision_prompt = "Identify vendor: VODAFONE, DIGI, or GENERAL. Reply with one word."
70
  messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": decision_prompt}]}]
71
-
72
- # Process for model
73
  text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
74
  image_inputs, _ = process_vision_info(messages)
75
  inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to(model.device)
76
 
77
- # Generate Choice
78
- generated_ids = model.generate(**inputs, max_new_tokens=1536)
79
  raw_choice = processor.batch_decode(generated_ids[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0].strip().upper()
80
 
 
81
  vendor_key = "VODAFONE" if "VODAFONE" in raw_choice else ("DIGI" if "DIGI" in raw_choice else "GENERAL")
82
 
83
- # 2. Specialist (Extract Data)
84
- schema_json = json.dumps(SCHEMAS[vendor_key], indent=2)
85
- extract_prompt = f"Return ONLY valid JSON: {schema_json}"
86
-
87
  messages[0]["content"][1]["text"] = extract_prompt
88
  text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
89
  inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to(model.device)
90
 
91
- generated_ids = model.generate(**inputs, max_new_tokens=1024)
92
  result = processor.batch_decode(generated_ids[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0]
93
 
94
  try:
@@ -97,15 +62,16 @@ def process_invoice(image):
97
  return {"raw_output": result}
98
 
99
  # --- TRANSKRIBUS LAYOUT ---
100
- with gr.Blocks() as demo:
101
  gr.Markdown("# πŸ“‘ IntelliReceipt: Real-Time Invoice AI")
102
  with gr.Row():
103
- with gr.Column():
104
- img_input = gr.File(label="1. Upload Invoice (Image or PDF)", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
 
105
  run_btn = gr.Button("πŸš€ Extract Data", variant="primary")
106
- with gr.Column():
107
  json_output = gr.JSON(label="Extracted Result")
108
 
109
- run_btn.click(fn=process_invoice, inputs=img_input, outputs=json_output)
110
 
111
  demo.launch()
 
1
  import gradio as gr
2
  import torch
3
  import json
4
+ import spaces
5
+ import fitz # PyMuPDF
6
  from PIL import Image
7
+ import io
8
  from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
9
  from qwen_vl_utils import process_vision_info
10
 
11
+ # --- MODEL LOADING ---
12
  MODEL_ID = "Qwen/Qwen2.5-VL-3B-Instruct"
13
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16, device_map="cuda")
 
 
 
 
 
 
14
  processor = AutoProcessor.from_pretrained(MODEL_ID, max_pixels=1280*1280)
15
 
16
+ # --- PDF HELPER ---
17
+ def pdf_to_image(pdf_path):
18
+ """Converts the first page of a PDF to a PIL Image."""
19
+ doc = fitz.open(pdf_path)
20
+ page = doc.load_page(0) # Extract only first page for demo
21
+ pix = page.get_pixmap(matrix=fitz.Matrix(2, 2)) # 2x zoom for better OCR
22
+ img = Image.open(io.BytesIO(pix.tobytes()))
23
+ doc.close()
24
+ return img
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ @spaces.GPU(duration=60)
27
+ def process_invoice(file_info):
28
+ if file_info is None: return {"error": "No file uploaded"}
 
29
 
30
+ # 1. Handle PDF vs Image
31
+ file_path = file_info.name
32
+ if file_path.lower().endswith(".pdf"):
33
+ image = pdf_to_image(file_path)
34
+ else:
35
+ image = Image.open(file_path)
36
+
37
+ # 2. Identify Vendor (Router)
38
  decision_prompt = "Identify vendor: VODAFONE, DIGI, or GENERAL. Reply with one word."
39
  messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": decision_prompt}]}]
 
 
40
  text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
41
  image_inputs, _ = process_vision_info(messages)
42
  inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to(model.device)
43
 
44
+ generated_ids = model.generate(**inputs, max_new_tokens=10)
 
45
  raw_choice = processor.batch_decode(generated_ids[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0].strip().upper()
46
 
47
+ # [Your Schema Logic Here...]
48
  vendor_key = "VODAFONE" if "VODAFONE" in raw_choice else ("DIGI" if "DIGI" in raw_choice else "GENERAL")
49
 
50
+ # 3. Extract Data (Specialist)
51
+ extract_prompt = f"Return ONLY valid JSON for {vendor_key} invoice."
 
 
52
  messages[0]["content"][1]["text"] = extract_prompt
53
  text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
54
  inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to(model.device)
55
 
56
+ generated_ids = model.generate(**inputs, max_new_tokens=1536)
57
  result = processor.batch_decode(generated_ids[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0]
58
 
59
  try:
 
62
  return {"raw_output": result}
63
 
64
  # --- TRANSKRIBUS LAYOUT ---
65
+ with gr.Blocks(title="InvoiceRecon") as demo:
66
  gr.Markdown("# πŸ“‘ IntelliReceipt: Real-Time Invoice AI")
67
  with gr.Row():
68
+ with gr.Column(scale=1):
69
+ # gr.File supports the PDF preview you want to see
70
+ file_input = gr.File(label="Upload Invoice (PDF, PNG, JPG)", file_types=[".pdf", ".png", ".jpg"])
71
  run_btn = gr.Button("πŸš€ Extract Data", variant="primary")
72
+ with gr.Column(scale=1):
73
  json_output = gr.JSON(label="Extracted Result")
74
 
75
+ run_btn.click(fn=process_invoice, inputs=file_input, outputs=json_output)
76
 
77
  demo.launch()