Spaces:

ngupta2026
/

Gen_AI_Project

Sleeping

App Files Files Community

ngupta2026 commited on Apr 29

Commit

d60e25f

verified ·

1 Parent(s): 8be9c6a

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -28

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 import pytesseract
-from PIL import Image
 import torch
 import re
 import requests
@@ -12,9 +12,7 @@ from transformers import LayoutLMTokenizerFast, LayoutLMForTokenClassification
 # CONFIG
 # =====================================================
 RESEND_API_KEY = os.getenv("RESEND_API_KEY")
-# Use verified sender from Resend
-FROM_EMAIL = "AI Claims <claims@yudham.com>"
 MODEL_NAME = "ngupta2026/sroie-layoutlm"
@@ -39,7 +37,7 @@ model.to(device)
 model.eval()
 # =====================================================
-# NORMALIZE BOX
 # =====================================================
 def normalize(box, width, height):
     return [
@@ -50,26 +48,48 @@ def normalize(box, width, height):
     ]
 # =====================================================
-# AVG CONFIDENCE
 # =====================================================
-def avg_conf(values):
-    if len(values) == 0:
         return 0
-    return sum(values) / len(values)
 # =====================================================
-# OCR + EXTRACTION (IMPROVED ACCURACY)
 # =====================================================
 def extract_receipt(image):
     try:
-        # Keep quality high for OCR
-        image = image.convert("RGB")
         data = pytesseract.image_to_data(
             image,
             output_type=pytesseract.Output.DICT,
-            config="--oem 3 --psm 6"
         )
         words = []
@@ -77,16 +97,16 @@ def extract_receipt(image):
         for i in range(len(data["text"])):
-            text = data["text"][i].strip()
-            if text != "" and text != "|":
                 x = data["left"][i]
                 y = data["top"][i]
                 w = data["width"][i]
                 h = data["height"][i]
-                words.append(text)
                 boxes.append([x, y, x + w, y + h])
         if len(words) == 0:
@@ -95,7 +115,9 @@ def extract_receipt(image):
         width, height = image.size
         boxes = [normalize(b, width, height) for b in boxes]
-        # IMPORTANT: use 512 for better predictions
         encoding = tokenizer(
             words,
             boxes=boxes,
@@ -108,6 +130,9 @@ def extract_receipt(image):
         encoding = {k: v.to(device) for k, v in encoding.items()}
         with torch.no_grad():
             outputs = model(**encoding)
@@ -129,32 +154,39 @@ def extract_receipt(image):
         }
         # =================================================
-        # TOKEN LEVEL EXTRACTION
         # =================================================
         for word, pred, conf in zip(words, preds, confs):
             label = id2label[pred.item()]
             c = conf.item()
-            # COMPANY from model
             if label == "COMPANY":
                 result["company"].append(word)
                 conf_store["company"].append(c)
-            # DATE regex
             if re.search(r"\d{1,2}[/-]\d{1,2}[/-]\d{2,4}", word):
                 result["date"].append(word)
                 conf_store["date"].append(c)
-            # TOTAL numeric values
-            cleaned = word.replace(",", "").replace("₹", "")
             if re.fullmatch(r"\d+(\.\d{1,2})?", cleaned):
                 try:
                     value = float(cleaned)
-                    # Better range for totals
-                    if value >= 10:
                         result["total"].append(value)
                         conf_store["total"].append(c)
@@ -167,14 +199,19 @@ def extract_receipt(image):
         # COMPANY
         company = " ".join(result["company"][:6]).strip()
         if company == "":
-            company = "Not Found"
         # DATE
         date = result["date"][0] if result["date"] else "Not Found"
-        # TOTAL = highest amount (better than last token)
-        total = str(max(result["total"])) if result["total"] else "Not Found"
         # CONFIDENCE
         company_conf = avg_conf(conf_store["company"])
@@ -298,7 +335,7 @@ demo = gr.Interface(
     ],
     title="📄 AI Insurance Claim Generator",
-    description="Upload receipt → Better extraction → Confidence check → Auto Email"
 )
 demo.launch()

 import gradio as gr
 import pytesseract
+from PIL import Image, ImageFilter, ImageOps
 import torch
 import re
 import requests
 # CONFIG
 # =====================================================
 RESEND_API_KEY = os.getenv("RESEND_API_KEY")
+FROM_EMAIL = "AI Claims <claims@yudham.com>"   # verified sender
 MODEL_NAME = "ngupta2026/sroie-layoutlm"
 model.eval()
 # =====================================================
+# NORMALIZE BOUNDING BOXES
 # =====================================================
 def normalize(box, width, height):
     return [
     ]
 # =====================================================
+# IMAGE PREPROCESSING (VERY IMPORTANT)
+# =====================================================
+def preprocess_image(image):
+    image = image.convert("RGB")
+    # upscale for OCR
+    w, h = image.size
+    image = image.resize((w * 2, h * 2))
+    # grayscale
+    image = image.convert("L")
+    # sharpen
+    image = image.filter(ImageFilter.SHARPEN)
+    # auto contrast
+    image = ImageOps.autocontrast(image)
+    return image
+# =====================================================
+# CONFIDENCE AVG
 # =====================================================
+def avg_conf(lst):
+    if len(lst) == 0:
         return 0
+    return sum(lst) / len(lst)
 # =====================================================
+# OCR + EXTRACTION
 # =====================================================
 def extract_receipt(image):
     try:
+        image = preprocess_image(image)
+        # Better OCR mode for receipts
         data = pytesseract.image_to_data(
             image,
             output_type=pytesseract.Output.DICT,
+            config="--oem 3 --psm 4"
         )
         words = []
         for i in range(len(data["text"])):
+            txt = data["text"][i].strip()
+            if txt != "" and txt != "|":
                 x = data["left"][i]
                 y = data["top"][i]
                 w = data["width"][i]
                 h = data["height"][i]
+                words.append(txt)
                 boxes.append([x, y, x + w, y + h])
         if len(words) == 0:
         width, height = image.size
         boxes = [normalize(b, width, height) for b in boxes]
+        # =================================================
+        # TOKENIZER
+        # =================================================
         encoding = tokenizer(
             words,
             boxes=boxes,
         encoding = {k: v.to(device) for k, v in encoding.items()}
+        # =================================================
+        # MODEL PREDICTION
+        # =================================================
         with torch.no_grad():
             outputs = model(**encoding)
         }
         # =================================================
+        # EXTRACT ENTITIES
         # =================================================
         for word, pred, conf in zip(words, preds, confs):
             label = id2label[pred.item()]
             c = conf.item()
+            # -------------------------
+            # COMPANY
+            # -------------------------
             if label == "COMPANY":
                 result["company"].append(word)
                 conf_store["company"].append(c)
+            # -------------------------
+            # DATE
+            # -------------------------
             if re.search(r"\d{1,2}[/-]\d{1,2}[/-]\d{2,4}", word):
                 result["date"].append(word)
                 conf_store["date"].append(c)
+            # -------------------------
+            # TOTAL
+            # -------------------------
+            cleaned = word.replace(",", "").replace("₹", "").replace("$", "")
             if re.fullmatch(r"\d+(\.\d{1,2})?", cleaned):
                 try:
                     value = float(cleaned)
+                    # realistic receipt range
+                    if 1 <= value <= 10000:
                         result["total"].append(value)
                         conf_store["total"].append(c)
         # COMPANY
         company = " ".join(result["company"][:6]).strip()
         if company == "":
+            # fallback top words
+            company = " ".join(words[:3])
         # DATE
         date = result["date"][0] if result["date"] else "Not Found"
+        # TOTAL = best realistic amount
+        if result["total"]:
+            total = f"{max(result['total']):.2f}"
+        else:
+            total = "Not Found"
         # CONFIDENCE
         company_conf = avg_conf(conf_store["company"])
     ],
     title="📄 AI Insurance Claim Generator",
+    description="Upload receipt → Extract fields accurately → Confidence Check → Auto Email"
 )
 demo.launch()