Spaces:

ngupta2026
/

Gen_AI_Project

Sleeping

App Files Files Community

ngupta2026 commited on Apr 29

Commit

2bd90f0

verified ·

1 Parent(s): f8fd50e

Update app.py

Browse files

Files changed (1) hide show

app.py +134 -42

app.py CHANGED Viewed

@@ -3,18 +3,24 @@ import pytesseract
 from PIL import Image
 import torch
 import re
 from transformers import LayoutLMTokenizerFast, LayoutLMForTokenClassification
-# =========================
 # LABELS
-# =========================
-label2id = {"O":0, "COMPANY":1, "DATE":2, "TOTAL":3}
-id2label = {v:k for k,v in label2id.items()}
-# =========================
-# LOAD YOUR TRAINED MODEL
-# =========================
-MODEL_NAME = "ngupta2026/sroie-layoutlm"   # 🔥 your new model
 model = LayoutLMForTokenClassification.from_pretrained(MODEL_NAME)
 tokenizer = LayoutLMTokenizerFast.from_pretrained(MODEL_NAME)
@@ -23,9 +29,18 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
 model.eval()
-# =========================
 # NORMALIZE BOXES
-# =========================
 def normalize(box, width, height):
     return [
         int(1000 * box[0] / width),
@@ -34,18 +49,21 @@ def normalize(box, width, height):
         int(1000 * box[3] / height),
     ]
-# =========================
-# MAIN FUNCTION
-# =========================
-def process(image):
-    # OCR
-    data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
     words = []
     boxes = []
     for i in range(len(data["text"])):
         text = data["text"][i].strip()
         if text != "":
@@ -55,16 +73,14 @@ def process(image):
             h = data["height"][i]
             words.append(text)
-            boxes.append([x, y, x+w, y+h])
     if len(words) == 0:
         return {"error": "No text detected"}
-    # normalize boxes
     width, height = image.size
     boxes = [normalize(box, width, height) for box in boxes]
-    # tokenize
     encoding = tokenizer(
         words,
         boxes=boxes,
@@ -75,58 +91,134 @@ def process(image):
         max_length=512
     )
-    encoding = {k:v.to(device) for k,v in encoding.items()}
-    # model prediction
     with torch.no_grad():
         outputs = model(**encoding)
     predictions = torch.argmax(outputs.logits, dim=2)[0][:len(words)]
-    # =========================
-    # HYBRID EXTRACTION
-    # =========================
-    result = {"company": [], "date": [], "total": []}
     for word, pred in zip(words, predictions):
         label = id2label[pred.item()]
-        # 🧠 MODEL (company)
         if label == "COMPANY":
             result["company"].append(word)
-        # 📅 DATE (strong regex)
         if re.search(r"\d{2}[/-]\d{2}[/-]\d{2,4}", word):
             result["date"].append(word)
-        # 💰 TOTAL (better filtering)
         if re.search(r"\d+(\.\d{2})?", word):
             try:
                 value = float(word.replace(",", ""))
-                if value > 50:   # ignore small numbers
                     result["total"].append(word)
             except:
                 pass
-    # =========================
-    # CLEAN OUTPUT
-    # =========================
-    result["company"] = " ".join(result["company"]) if result["company"] else "Not Found"
-    result["date"] = result["date"][0] if result["date"] else "Not Found"
-    result["total"] = result["total"][-1] if result["total"] else "Not Found"
     return result
-# =========================
 # UI
-# =========================
 demo = gr.Interface(
-    fn=process,
-    inputs=gr.Image(type="pil"),
-    outputs="json",
-    title="📄 Document AI Extractor",
-    description="Upload invoice image → Extract Company, Date, Total"
 )
 demo.launch()

 from PIL import Image
 import torch
 import re
+import os
+import smtplib
+from email.mime.text import MIMEText
+from email.mime.multipart import MIMEMultipart
 from transformers import LayoutLMTokenizerFast, LayoutLMForTokenClassification
+# =====================================================
 # LABELS
+# =====================================================
+label2id = {"O": 0, "COMPANY": 1, "DATE": 2, "TOTAL": 3}
+id2label = {v: k for k, v in label2id.items()}
+# =====================================================
+# LOAD MODEL
+# =====================================================
+MODEL_NAME = "ngupta2026/sroie-layoutlm"
 model = LayoutLMForTokenClassification.from_pretrained(MODEL_NAME)
 tokenizer = LayoutLMTokenizerFast.from_pretrained(MODEL_NAME)
 model.to(device)
 model.eval()
+# =====================================================
+# EMAIL CONFIG
+# Add these in Hugging Face Space Secrets:
+# EMAIL_USER = yourgmail@gmail.com
+# EMAIL_PASS = your_app_password
+# =====================================================
+EMAIL_USER = os.getenv("EMAIL_USER")
+EMAIL_PASS = os.getenv("EMAIL_PASS")
+# =====================================================
 # NORMALIZE BOXES
+# =====================================================
 def normalize(box, width, height):
     return [
         int(1000 * box[0] / width),
         int(1000 * box[3] / height),
     ]
+# =====================================================
+# EXTRACT DATA
+# =====================================================
+def extract_receipt(image):
+    data = pytesseract.image_to_data(
+        image,
+        output_type=pytesseract.Output.DICT
+    )
     words = []
     boxes = []
     for i in range(len(data["text"])):
         text = data["text"][i].strip()
         if text != "":
             h = data["height"][i]
             words.append(text)
+            boxes.append([x, y, x + w, y + h])
     if len(words) == 0:
         return {"error": "No text detected"}
     width, height = image.size
     boxes = [normalize(box, width, height) for box in boxes]
     encoding = tokenizer(
         words,
         boxes=boxes,
         max_length=512
     )
+    encoding = {k: v.to(device) for k, v in encoding.items()}
     with torch.no_grad():
         outputs = model(**encoding)
     predictions = torch.argmax(outputs.logits, dim=2)[0][:len(words)]
+    result = {
+        "company": [],
+        "date": [],
+        "total": []
+    }
     for word, pred in zip(words, predictions):
         label = id2label[pred.item()]
+        # company from model
         if label == "COMPANY":
             result["company"].append(word)
+        # date from regex
         if re.search(r"\d{2}[/-]\d{2}[/-]\d{2,4}", word):
             result["date"].append(word)
+        # total from regex
         if re.search(r"\d+(\.\d{2})?", word):
             try:
                 value = float(word.replace(",", ""))
+                if value > 50:
                     result["total"].append(word)
             except:
                 pass
+    result["company"] = (
+        " ".join(result["company"])
+        if result["company"] else "Not Found"
+    )
+    result["date"] = (
+        result["date"][0]
+        if result["date"] else "Not Found"
+    )
+    result["total"] = (
+        result["total"][-1]
+        if result["total"] else "Not Found"
+    )
     return result
+# =====================================================
+# SEND EMAIL
+# =====================================================
+def send_claim_email(to_email, extracted):
+    if not EMAIL_USER or not EMAIL_PASS:
+        return "Email secrets not configured."
+    subject = "Insurance Claim Request"
+    body = f"""
+Dear Claims Team,
+I would like to request reimbursement for an eligible expense.
+Provider Name: {extracted['company']}
+Bill Date: {extracted['date']}
+Claim Amount: ₹{extracted['total']}
+Please process the claim.
+Regards
+Customer
+"""
+    msg = MIMEMultipart()
+    msg["From"] = EMAIL_USER
+    msg["To"] = to_email
+    msg["Subject"] = subject
+    msg.attach(MIMEText(body, "plain"))
+    try:
+        server = smtplib.SMTP("smtp.gmail.com", 587)
+        server.starttls()
+        server.login(EMAIL_USER, EMAIL_PASS)
+        server.sendmail(
+            EMAIL_USER,
+            to_email,
+            msg.as_string()
+        )
+        server.quit()
+        return f"✅ Email sent successfully to {to_email}"
+    except Exception as e:
+        return f"❌ Email failed: {str(e)}"
+# =====================================================
+# MAIN UI FUNCTION
+# =====================================================
+def process_and_send(image, email_id):
+    extracted = extract_receipt(image)
+    if "error" in extracted:
+        return extracted, extracted["error"]
+    email_status = send_claim_email(email_id, extracted)
+    return extracted, email_status
+# =====================================================
 # UI
+# =====================================================
 demo = gr.Interface(
+    fn=process_and_send,
+    inputs=[
+        gr.Image(type="pil", label="Upload Receipt"),
+        gr.Textbox(label="Insurance Email ID")
+    ],
+    outputs=[
+        gr.JSON(label="Extracted Data"),
+        gr.Textbox(label="Email Status")
+    ],
+    title="📄 AI Insurance Claim Generator",
+    description="Upload receipt → Extract details → Auto send claim email"
 )
 demo.launch()