Spaces:

ngupta2026
/

Gen_AI_Project

Sleeping

App Files Files Community

ngupta2026 commited on Apr 29

Commit

f5ff3d2

verified ·

1 Parent(s): 2f0bde3

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -162

app.py CHANGED Viewed

@@ -6,78 +6,29 @@ import re
 import requests
 import os
-RESEND_API_KEY = os.getenv("RESEND_API_KEY")
-def send_claim_email(to_email, extracted):
-    if not RESEND_API_KEY:
-        return "❌ API key missing"
-    subject = "Insurance Claim Request"
-    html_body = f"""
-    <h2>Insurance Claim</h2>
-    <p><b>Provider:</b> {extracted['company']}</p>
-    <p><b>Date:</b> {extracted['date']}</p>
-    <p><b>Amount:</b> ₹{extracted['total']}</p>
-    """
-    try:
-        response = requests.post(
-            "https://api.resend.com/emails",
-            headers={
-                "Authorization": f"Bearer {RESEND_API_KEY}",
-                "Content-Type": "application/json",
-            },
-            json={
-                "from": "onboarding@resend.dev",
-                "to": [to_email],
-                "subject": subject,
-                "html": html_body,
-            },
-            timeout=10
-        )
-        if response.status_code == 200:
-            return f"✅ Email sent to {to_email}"
-        else:
-            return f"❌ Failed: {response.text}"
-    except Exception as e:
-        return f"❌ Error: {str(e)}"
-from email.mime.text import MIMEText
-from email.mime.multipart import MIMEMultipart
 from transformers import LayoutLMTokenizerFast, LayoutLMForTokenClassification
 # =====================================================
-# LABELS
 # =====================================================
 label2id = {"O": 0, "COMPANY": 1, "DATE": 2, "TOTAL": 3}
 id2label = {v: k for k, v in label2id.items()}
 # =====================================================
 # LOAD MODEL
 # =====================================================
-MODEL_NAME = "ngupta2026/sroie-layoutlm"
 model = LayoutLMForTokenClassification.from_pretrained(MODEL_NAME)
 tokenizer = LayoutLMTokenizerFast.from_pretrained(MODEL_NAME)
-device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
 model.eval()
-# =====================================================
-# EMAIL CONFIG
-# Add these in Hugging Face Space Secrets:
-# EMAIL_USER = yourgmail@gmail.com
-# EMAIL_PASS = your_app_password
-# =====================================================
-EMAIL_USER = os.getenv("EMAIL_USER")
-EMAIL_PASS = os.getenv("EMAIL_PASS")
 # =====================================================
 # NORMALIZE BOXES
 # =====================================================
@@ -93,157 +44,149 @@ def normalize(box, width, height):
 # EXTRACT DATA
 # =====================================================
 def extract_receipt(image):
-    image = image.convert("RGB")
-    image.thumbnail((1200, 1200))
-    data = pytesseract.image_to_data(
-        image,
-        output_type=pytesseract.Output.DICT
-    )
-    words = []
-    boxes = []
-    for i in range(len(data["text"])):
-        text = data["text"][i].strip()
-        if text.strip() != "" and len(text) > 2:
-            x = data["left"][i]
-            y = data["top"][i]
-            w = data["width"][i]
-            h = data["height"][i]
-            words.append(text)
-            boxes.append([x, y, x + w, y + h])
-    if len(words) == 0:
-        return {"error": "No text detected"}
-    width, height = image.size
-    boxes = [normalize(box, width, height) for box in boxes]
-    encoding = tokenizer(
-        words,
-        boxes=boxes,
-        return_tensors="pt",
-        padding="max_length",
-        truncation=True,
-        is_split_into_words=True,
-        max_length=256
-    )
-    encoding = {k: v.to(device) for k, v in encoding.items()}
-    with torch.no_grad():
-        outputs = model(**encoding)
-    predictions = torch.argmax(outputs.logits, dim=2)[0][:len(words)]
-    result = {
-        "company": [],
-        "date": [],
-        "total": []
-    }
-    for word, pred in zip(words, predictions):
-        label = id2label[pred.item()]
-        # company from model
-        if label == "COMPANY":
-            result["company"].append(word)
-        # date from regex
-        if re.search(r"\d{2}[/-]\d{2}[/-]\d{2,4}", word):
-            result["date"].append(word)
-        # total from regex
-        if re.search(r"\d+(\.\d{2})?", word):
-            try:
-                value = float(word.replace(",", ""))
-                if value > 50:
-                    result["total"].append(word)
-            except:
-                pass
-    result["company"] = (
-        " ".join(result["company"])
-        if result["company"] else "Not Found"
-    )
-    result["date"] = (
-        result["date"][0]
-        if result["date"] else "Not Found"
-    )
-    result["total"] = (
-        result["total"][-1]
-        if result["total"] else "Not Found"
-    )
-    return result
 # =====================================================
-# SEND EMAIL
 # =====================================================
 def send_claim_email(to_email, extracted):
-    if not EMAIL_USER or not EMAIL_PASS:
-        return "Email secrets not configured."
     subject = "Insurance Claim Request"
-    body = f"""
-Dear Claims Team,
-I would like to request reimbursement for an eligible expense.
-Provider Name: {extracted['company']}
-Bill Date: {extracted['date']}
-Claim Amount: ₹{extracted['total']}
-Please process the claim.
-Regards
-Customer
-"""
-    msg = MIMEMultipart()
-    msg["From"] = EMAIL_USER
-    msg["To"] = to_email
-    msg["Subject"] = subject
-    msg.attach(MIMEText(body, "plain"))
     try:
-        server = smtplib.SMTP("smtp.gmail.com", 587)
-        server.starttls()
-        server.login(EMAIL_USER, EMAIL_PASS)
-        server.sendmail(
-            EMAIL_USER,
-            to_email,
-            msg.as_string()
         )
-        server.quit()
-        return f"✅ Email sent successfully to {to_email}"
     except Exception as e:
-        return f"❌ Email failed: {str(e)}"
 # =====================================================
-# MAIN UI FUNCTION
 # =====================================================
 def process_and_send(image, email_id):
     extracted = extract_receipt(image)
     if "error" in extracted:
         return extracted, extracted["error"]
     email_status = send_claim_email(email_id, extracted)
     return extracted, email_status
 # =====================================================
@@ -253,7 +196,7 @@ demo = gr.Interface(
     fn=process_and_send,
     inputs=[
         gr.Image(type="pil", label="Upload Receipt"),
-        gr.Textbox(label="Insurance Email ID")
     ],
     outputs=[
         gr.JSON(label="Extracted Data"),

 import requests
 import os
 from transformers import LayoutLMTokenizerFast, LayoutLMForTokenClassification
 # =====================================================
+# CONFIG
 # =====================================================
+RESEND_API_KEY = os.getenv("RESEND_API_KEY")
 label2id = {"O": 0, "COMPANY": 1, "DATE": 2, "TOTAL": 3}
 id2label = {v: k for k, v in label2id.items()}
+MODEL_NAME = "ngupta2026/sroie-layoutlm"
 # =====================================================
 # LOAD MODEL
 # =====================================================
+device = "cuda" if torch.cuda.is_available() else "cpu"
 model = LayoutLMForTokenClassification.from_pretrained(MODEL_NAME)
 tokenizer = LayoutLMTokenizerFast.from_pretrained(MODEL_NAME)
 model.to(device)
 model.eval()
 # =====================================================
 # NORMALIZE BOXES
 # =====================================================
 # EXTRACT DATA
 # =====================================================
 def extract_receipt(image):
+    try:
+        # 🔥 Speed optimization
+        image = image.convert("RGB")
+        image.thumbnail((1200, 1200))
+        data = pytesseract.image_to_data(
+            image,
+            output_type=pytesseract.Output.DICT
+        )
+        words = []
+        boxes = []
+        for i in range(len(data["text"])):
+            text = data["text"][i].strip()
+            if text != "" and len(text) > 2:
+                x = data["left"][i]
+                y = data["top"][i]
+                w = data["width"][i]
+                h = data["height"][i]
+                words.append(text)
+                boxes.append([x, y, x + w, y + h])
+        if len(words) == 0:
+            return {"error": "No text detected"}
+        width, height = image.size
+        boxes = [normalize(box, width, height) for box in boxes]
+        encoding = tokenizer(
+            words,
+            boxes=boxes,
+            return_tensors="pt",
+            padding="max_length",
+            truncation=True,
+            is_split_into_words=True,
+            max_length=256
+        )
+        encoding = {k: v.to(device) for k, v in encoding.items()}
+        with torch.no_grad():
+            outputs = model(**encoding)
+        predictions = torch.argmax(outputs.logits, dim=2)[0][:len(words)]
+        result = {
+            "company": [],
+            "date": [],
+            "total": []
+        }
+        for word, pred in zip(words, predictions):
+            label = id2label[pred.item()]
+            if label == "COMPANY":
+                result["company"].append(word)
+            if re.search(r"\d{2}[/-]\d{2}[/-]\d{2,4}", word):
+                result["date"].append(word)
+            if re.search(r"\d+(\.\d{2})?", word):
+                try:
+                    value = float(word.replace(",", ""))
+                    if value > 50:
+                        result["total"].append(word)
+                except:
+                    pass
+        result["company"] = " ".join(result["company"]) if result["company"] else "Not Found"
+        result["date"] = result["date"][0] if result["date"] else "Not Found"
+        result["total"] = result["total"][-1] if result["total"] else "Not Found"
+        return result
+    except Exception as e:
+        return {"error": str(e)}
 # =====================================================
+# SEND EMAIL (RESEND API - WORKING)
 # =====================================================
 def send_claim_email(to_email, extracted):
+    if not RESEND_API_KEY:
+        return "❌ RESEND_API_KEY missing in HuggingFace Secrets"
     subject = "Insurance Claim Request"
+    html_body = f"""
+    <h2>Insurance Claim Request</h2>
+    <p><b>Provider:</b> {extracted['company']}</p>
+    <p><b>Date:</b> {extracted['date']}</p>
+    <p><b>Amount:</b> ₹{extracted['total']}</p>
+    <p>Please process this claim.</p>
+    """
     try:
+        response = requests.post(
+            "https://api.resend.com/emails",
+            headers={
+                "Authorization": f"Bearer {RESEND_API_KEY}",
+                "Content-Type": "application/json",
+            },
+            json={
+                "from": "onboarding@resend.dev",
+                "to": [to_email],
+                "subject": subject,
+                "html": html_body,
+            },
+            timeout=10
         )
+        print("EMAIL RESPONSE:", response.status_code, response.text)
+        if response.status_code in [200, 201]:
+            return f"✅ Email sent successfully to {to_email}"
+        else:
+            return f"❌ Email failed: {response.text}"
     except Exception as e:
+        return f"❌ Email error: {str(e)}"
 # =====================================================
+# MAIN FUNCTION
 # =====================================================
 def process_and_send(image, email_id):
+    print("Processing started...")
     extracted = extract_receipt(image)
+    print("Extracted:", extracted)
     if "error" in extracted:
         return extracted, extracted["error"]
+    print("Sending email to:", email_id)
     email_status = send_claim_email(email_id, extracted)
+    print("Email status:", email_status)
     return extracted, email_status
 # =====================================================
     fn=process_and_send,
     inputs=[
         gr.Image(type="pil", label="Upload Receipt"),
+        gr.Textbox(label="Enter Email ID")
     ],
     outputs=[
         gr.JSON(label="Extracted Data"),