Spaces:

Liviu16
/

InvoiceRecon

Sleeping

App Files Files Community

Liviu16 commited on Feb 13

Commit

d45f115

verified ·

1 Parent(s): 41e8072

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -4

app.py CHANGED Viewed

@@ -84,9 +84,14 @@ def process_invoice(file_info, progress=gr.Progress()):
     else:
         image = Image.open(file_info.name)
-    # 2. Router (Identify Vendor)
-    progress(0.3, desc="🔍 Identifying vendor (Router)...")
-    decision_prompt = "Identify vendor: VODAFONE, DIGI, or GENERAL. Reply with one word."
     messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": decision_prompt}]}]
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
@@ -96,9 +101,17 @@ def process_invoice(file_info, progress=gr.Progress()):
     generated_ids = model.generate(**inputs, max_new_tokens=10)
     raw_choice = processor.batch_decode(generated_ids[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0].strip().upper()
     vendor_key = "VODAFONE" if "VODAFONE" in raw_choice else ("DIGI" if "DIGI" in raw_choice else "GENERAL")
-    # 3. Specialist (Extract Data)
     progress(0.6, desc=f"🤖 Extracting {vendor_key} details...")
     schema_json = json.dumps(SCHEMAS[vendor_key], indent=2)
     extract_prompt = f"Extract details as JSON strictly following this schema: {schema_json}. Return ONLY valid JSON."

     else:
         image = Image.open(file_info.name)
+    # 2. Router & Validation (Identify Vendor or Reject)
+    progress(0.3, desc="🔍 Validating and Identifying Vendor...")
+    # Updated prompt to provide an 'INVALID' exit
+    decision_prompt = """Analyze this image. Is it a financial invoice or receipt?
+    - If NO (e.g. random photo, object, landscape): Reply 'INVALID'.
+    - If YES: Reply ONLY with 'VODAFONE', 'DIGI', or 'GENERAL'."""
     messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": decision_prompt}]}]
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     generated_ids = model.generate(**inputs, max_new_tokens=10)
     raw_choice = processor.batch_decode(generated_ids[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0].strip().upper()
+    # VALIDATION CHECK: If model says INVALID, stop here
+    if "INVALID" in raw_choice:
+        progress(1.0, desc="❌ Invalid Document")
+        return image, {
+            "error": "Validation Failed",
+            "message": "The uploaded image does not appear to be an invoice. Extraction cancelled to prevent hallucinations."
+        }
     vendor_key = "VODAFONE" if "VODAFONE" in raw_choice else ("DIGI" if "DIGI" in raw_choice else "GENERAL")
+    # 3. Specialist (Extract Data) - Only runs for valid documents
     progress(0.6, desc=f"🤖 Extracting {vendor_key} details...")
     schema_json = json.dumps(SCHEMAS[vendor_key], indent=2)
     extract_prompt = f"Extract details as JSON strictly following this schema: {schema_json}. Return ONLY valid JSON."