Spaces:

ngupta2026
/

Gen_AI_Project

Sleeping

App Files Files Community

ngupta2026 commited on Apr 29

Commit

7f77d2e

verified ·

1 Parent(s): 2d9c5d6

Update app.py

Browse files

Files changed (1) hide show

app.py +132 -133

app.py CHANGED Viewed

@@ -5,11 +5,6 @@ import torch
 import re
 import requests
 import os
-import io
-import base64
-from reportlab.pdfgen import canvas
-from reportlab.lib.pagesizes import A4
 from transformers import LayoutLMTokenizerFast, LayoutLMForTokenClassification
@@ -18,8 +13,8 @@ from transformers import LayoutLMTokenizerFast, LayoutLMForTokenClassification
 # =====================================================
 RESEND_API_KEY = os.getenv("RESEND_API_KEY")
-# VERIFIED DOMAIN EMAIL (CHANGE THIS TO YOUR VERIFIED DOMAIN)
-FROM_EMAIL = "claims@send.yudham.com"
 MODEL_NAME = "ngupta2026/sroie-layoutlm"
@@ -44,7 +39,7 @@ model.to(device)
 model.eval()
 # =====================================================
-# NORMALIZE BOXES
 # =====================================================
 def normalize(box, width, height):
     return [
@@ -55,16 +50,26 @@ def normalize(box, width, height):
     ]
 # =====================================================
-# OCR + MODEL EXTRACTION
 # =====================================================
 def extract_receipt(image):
     try:
         image = image.convert("RGB")
         data = pytesseract.image_to_data(
             image,
-            output_type=pytesseract.Output.DICT
         )
         words = []
@@ -72,234 +77,228 @@ def extract_receipt(image):
         for i in range(len(data["text"])):
-            txt = data["text"][i].strip()
-            if txt != "":
                 x = data["left"][i]
                 y = data["top"][i]
                 w = data["width"][i]
                 h = data["height"][i]
-                words.append(txt)
-                boxes.append([x, y, x+w, y+h])
         if len(words) == 0:
-            return {"error": "No text found"}
         width, height = image.size
-        boxes = [normalize(box, width, height) for box in boxes]
-        # =================================================
-        # TOKENIZER
-        # =================================================
         encoding = tokenizer(
             words,
             boxes=boxes,
             return_tensors="pt",
             padding="max_length",
             truncation=True,
-            max_length=512,
-            is_split_into_words=True
         )
         encoding = {k: v.to(device) for k, v in encoding.items()}
-        # =================================================
-        # MODEL PREDICTION
-        # =================================================
         with torch.no_grad():
             outputs = model(**encoding)
         probs = torch.softmax(outputs.logits, dim=2)
         preds = torch.argmax(probs, dim=2)[0][:len(words)]
         # =================================================
-        # EXTRACTION STORE
         # =================================================
-        company_tokens = []
-        totals = []
-        dates = []
-        for word, pred in zip(words, preds):
             label = id2label[pred.item()]
-            # COMPANY
             if label == "COMPANY":
-                company_tokens.append(word)
-            # DATE via regex
             if re.search(r"\d{1,2}[/-]\d{1,2}[/-]\d{2,4}", word):
-                dates.append(word)
-            # MONEY
-            if re.search(r"^\d+[.,]?\d*$", word):
                 try:
-                    val = float(word.replace(",", ""))
-                    if val > 20:
-                        totals.append(val)
                 except:
                     pass
         # =================================================
         # FINAL CLEANUP
         # =================================================
-        company = " ".join(company_tokens[:6]).strip()
         if company == "":
             company = "Not Found"
-        date = dates[0] if len(dates) > 0 else "Not Found"
-        total = str(max(totals)) if len(totals) > 0 else "Not Found"
-        # =================================================
-        # ADDRESS HEURISTIC
-        # =================================================
-        address_lines = []
-        for w in words:
-            if (
-                w not in company_tokens
-                and w not in dates
-                and not re.search(r"^\d+[.,]?\d*$", w)
-            ):
-                if len(w) > 2:
-                    address_lines.append(w)
-        address = " ".join(address_lines[:10]).strip()
-        if address == "":
-            address = "Not Found"
         return {
             "company": company,
             "date": date,
             "total": total,
-            "address": address
         }
     except Exception as e:
         return {"error": str(e)}
 # =====================================================
-# PDF GENERATOR
 # =====================================================
-def create_pdf(extracted):
-    buffer = io.BytesIO()
-    c = canvas.Canvas(buffer, pagesize=A4)
-    width, height = A4
-    y = height - 60
-    c.setFont("Helvetica-Bold", 18)
-    c.drawString(50, y, "Insurance Claim Summary")
-    y -= 40
-    c.setFont("Helvetica", 12)
-    lines = [
-        f"Provider Name : {extracted['company']}",
-        f"Bill Date     : {extracted['date']}",
-        f"Claim Amount  : ₹{extracted['total']}",
-        f"Address       : {extracted['address']}",
-    ]
-    for line in lines:
-        c.drawString(50, y, line)
-        y -= 30
-    c.save()
-    pdf_bytes = buffer.getvalue()
-    buffer.close()
-    return pdf_bytes
 # =====================================================
-# EMAIL SEND VIA RESEND
 # =====================================================
-def send_email(to_email, extracted):
     if not RESEND_API_KEY:
-        return "❌ RESEND_API_KEY missing"
-    pdf_data = create_pdf(extracted)
-    pdf_b64 = base64.b64encode(pdf_data).decode()
-    html = f"""
     <h2>Insurance Claim Request</h2>
-    <p><b>Provider:</b> {extracted['company']}</p>
-    <p><b>Date:</b> {extracted['date']}</p>
-    <p><b>Amount:</b> ₹{extracted['total']}</p>
-    <p><b>Address:</b> {extracted['address']}</p>
-    <p>Please find attached PDF summary.</p>
-    """
-    payload = {
-        "from": FROM_EMAIL,
-        "to": [to_email],
-        "subject": "Insurance Claim Request",
-        "html": html,
-        "attachments": [
-            {
-                "filename": "claim_summary.pdf",
-                "content": pdf_b64
-            }
-        ]
-    }
-    headers = {
-        "Authorization": f"Bearer {RESEND_API_KEY}",
-        "Content-Type": "application/json"
-    }
     try:
-        r = requests.post(
             "https://api.resend.com/emails",
-            json=payload,
-            headers=headers,
             timeout=20
         )
-        if r.status_code in [200, 201]:
-            return f"✅ Email sent to {to_email}"
-        return f"❌ Email failed: {r.text}"
     except Exception as e:
         return f"❌ Email error: {str(e)}"
 # =====================================================
-# MAIN FUNCTION
 # =====================================================
-def process(image, email):
     extracted = extract_receipt(image)
     if "error" in extracted:
         return extracted, extracted["error"]
-    status = send_email(email, extracted)
-    return extracted, status
 # =====================================================
 # UI
 # =====================================================
 demo = gr.Interface(
-    fn=process,
     inputs=[
         gr.Image(type="pil", label="Upload Receipt"),
-        gr.Textbox(label="Enter Email ID")
     ],
     outputs=[
-        gr.JSON(label="Extracted Output"),
         gr.Textbox(label="Email Status")
     ],
     title="📄 AI Insurance Claim Generator",
-    description="Upload receipt → Extract details → Generate PDF → Send Email"
 )
 demo.launch()

 import re
 import requests
 import os
 from transformers import LayoutLMTokenizerFast, LayoutLMForTokenClassification
 # =====================================================
 RESEND_API_KEY = os.getenv("RESEND_API_KEY")
+# Use verified sender from Resend
+FROM_EMAIL = "AI Claims <claims@yudham.com>"
 MODEL_NAME = "ngupta2026/sroie-layoutlm"
 model.eval()
 # =====================================================
+# NORMALIZE BOX
 # =====================================================
 def normalize(box, width, height):
     return [
     ]
 # =====================================================
+# AVG CONFIDENCE
+# =====================================================
+def avg_conf(values):
+    if len(values) == 0:
+        return 0
+    return sum(values) / len(values)
+# =====================================================
+# OCR + EXTRACTION (IMPROVED ACCURACY)
 # =====================================================
 def extract_receipt(image):
     try:
+        # Keep quality high for OCR
         image = image.convert("RGB")
         data = pytesseract.image_to_data(
             image,
+            output_type=pytesseract.Output.DICT,
+            config="--oem 3 --psm 6"
         )
         words = []
         for i in range(len(data["text"])):
+            text = data["text"][i].strip()
+            if text != "" and text != "|":
                 x = data["left"][i]
                 y = data["top"][i]
                 w = data["width"][i]
                 h = data["height"][i]
+                words.append(text)
+                boxes.append([x, y, x + w, y + h])
         if len(words) == 0:
+            return {"error": "No text detected"}
         width, height = image.size
+        boxes = [normalize(b, width, height) for b in boxes]
+        # IMPORTANT: use 512 for better predictions
         encoding = tokenizer(
             words,
             boxes=boxes,
             return_tensors="pt",
             padding="max_length",
             truncation=True,
+            is_split_into_words=True,
+            max_length=512
         )
         encoding = {k: v.to(device) for k, v in encoding.items()}
         with torch.no_grad():
             outputs = model(**encoding)
         probs = torch.softmax(outputs.logits, dim=2)
         preds = torch.argmax(probs, dim=2)[0][:len(words)]
+        confs = torch.max(probs, dim=2)[0][0][:len(words)]
+        result = {
+            "company": [],
+            "date": [],
+            "total": []
+        }
+        conf_store = {
+            "company": [],
+            "date": [],
+            "total": []
+        }
         # =================================================
+        # TOKEN LEVEL EXTRACTION
         # =================================================
+        for word, pred, conf in zip(words, preds, confs):
             label = id2label[pred.item()]
+            c = conf.item()
+            # COMPANY from model
             if label == "COMPANY":
+                result["company"].append(word)
+                conf_store["company"].append(c)
+            # DATE regex
             if re.search(r"\d{1,2}[/-]\d{1,2}[/-]\d{2,4}", word):
+                result["date"].append(word)
+                conf_store["date"].append(c)
+            # TOTAL numeric values
+            cleaned = word.replace(",", "").replace("₹", "")
+            if re.fullmatch(r"\d+(\.\d{1,2})?", cleaned):
                 try:
+                    value = float(cleaned)
+                    # Better range for totals
+                    if value >= 10:
+                        result["total"].append(value)
+                        conf_store["total"].append(c)
                 except:
                     pass
         # =================================================
         # FINAL CLEANUP
         # =================================================
+        # COMPANY
+        company = " ".join(result["company"][:6]).strip()
         if company == "":
             company = "Not Found"
+        # DATE
+        date = result["date"][0] if result["date"] else "Not Found"
+        # TOTAL = highest amount (better than last token)
+        total = str(max(result["total"])) if result["total"] else "Not Found"
+        # CONFIDENCE
+        company_conf = avg_conf(conf_store["company"])
+        date_conf = avg_conf(conf_store["date"])
+        total_conf = avg_conf(conf_store["total"])
+        overall = (company_conf + date_conf + total_conf) / 3
         return {
             "company": company,
             "date": date,
             "total": total,
+            "confidence": round(overall, 3)
         }
     except Exception as e:
         return {"error": str(e)}
 # =====================================================
+# DECISION ENGINE
 # =====================================================
+def decision_layer(conf):
+    if conf >= 0.80:
+        return "AUTO_SEND"
+    elif conf >= 0.60:
+        return "REVIEW"
+    else:
+        return "REJECT"
 # =====================================================
+# EMAIL SEND
 # =====================================================
+def send_claim_email(to_email, extracted):
     if not RESEND_API_KEY:
+        return "❌ Missing RESEND_API_KEY secret"
+    subject = "Insurance Claim Request"
+    html_body = f"""
     <h2>Insurance Claim Request</h2>
+    <p>Dear Claims Team,</p>
+    <p>Please process reimbursement request.</p>
+    <p><b>Provider Name:</b> {extracted['company']}</p>
+    <p><b>Bill Date:</b> {extracted['date']}</p>
+    <p><b>Claim Amount:</b> ₹{extracted['total']}</p>
+    <p>Regards,<br>AI Claims System</p>
+    """
     try:
+        response = requests.post(
             "https://api.resend.com/emails",
+            headers={
+                "Authorization": f"Bearer {RESEND_API_KEY}",
+                "Content-Type": "application/json"
+            },
+            json={
+                "from": FROM_EMAIL,
+                "to": [to_email],
+                "subject": subject,
+                "html": html_body
+            },
             timeout=20
         )
+        if response.status_code in [200, 201]:
+            return f"✅ Email sent successfully to {to_email}"
+        return f"❌ Email failed: {response.text}"
     except Exception as e:
         return f"❌ Email error: {str(e)}"
 # =====================================================
+# MAIN PIPELINE
 # =====================================================
+def process_and_send(image, email_id):
     extracted = extract_receipt(image)
     if "error" in extracted:
         return extracted, extracted["error"]
+    conf = extracted["confidence"]
+    decision = decision_layer(conf)
+    extracted["decision"] = decision
+    if decision == "AUTO_SEND":
+        email_status = send_claim_email(email_id, extracted)
+    elif decision == "REVIEW":
+        email_status = f"⚠️ Human review required (confidence={conf})"
+    else:
+        email_status = f"❌ Rejected (low confidence={conf})"
+    return extracted, email_status
 # =====================================================
 # UI
 # =====================================================
 demo = gr.Interface(
+    fn=process_and_send,
     inputs=[
         gr.Image(type="pil", label="Upload Receipt"),
+        gr.Textbox(label="Enter Destination Email")
     ],
     outputs=[
+        gr.JSON(label="AI Extraction"),
         gr.Textbox(label="Email Status")
     ],
     title="📄 AI Insurance Claim Generator",
+    description="Upload receipt → Better extraction → Confidence check → Auto Email"
 )
 demo.launch()