Spaces:
Sleeping
Sleeping
| # ===================================================== | |
| # AI INSURANCE CLAIM GENERATOR | |
| # FINAL VERSION | |
| # Accurate Extraction + Professional Claim PDF + Email | |
| # Hugging Face Space Ready | |
| # ===================================================== | |
| import gradio as gr | |
| import pytesseract | |
| from PIL import Image | |
| import torch | |
| import re | |
| import requests | |
| import os | |
| import io | |
| import base64 | |
| from datetime import datetime | |
| from transformers import LayoutLMTokenizerFast, LayoutLMForTokenClassification | |
| # PDF LIBRARY | |
| from reportlab.lib.pagesizes import A4 | |
| from reportlab.pdfgen import canvas | |
| from reportlab.lib.colors import HexColor | |
| # ===================================================== | |
| # CONFIG | |
| # ===================================================== | |
| RESEND_API_KEY = os.getenv("RESEND_API_KEY") | |
| FROM_EMAIL = "AI Claims <claims@yudham.com>" | |
| MODEL_NAME = "ngupta2026/sroie-layoutlm" | |
| label2id = { | |
| "O": 0, | |
| "COMPANY": 1, | |
| "DATE": 2, | |
| "TOTAL": 3 | |
| } | |
| id2label = {v: k for k, v in label2id.items()} | |
| # ===================================================== | |
| # LOAD MODEL | |
| # ===================================================== | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model = LayoutLMForTokenClassification.from_pretrained(MODEL_NAME) | |
| tokenizer = LayoutLMTokenizerFast.from_pretrained(MODEL_NAME) | |
| model.to(device) | |
| model.eval() | |
| # ===================================================== | |
| # HELPERS | |
| # ===================================================== | |
| def normalize(box, width, height): | |
| return [ | |
| int(1000 * box[0] / width), | |
| int(1000 * box[1] / height), | |
| int(1000 * box[2] / width), | |
| int(1000 * box[3] / height), | |
| ] | |
| def avg(lst): | |
| return sum(lst) / len(lst) if len(lst) > 0 else 0 | |
| # ===================================================== | |
| # CLEAN COMPANY | |
| # ===================================================== | |
| def clean_company(txt): | |
| txt = txt.strip() | |
| txt = re.sub(r"[^A-Za-z0-9&().,\- /]", "", txt) | |
| txt = re.sub(r"\s+", " ", txt).strip() | |
| if len(txt) < 2: | |
| return "Not Found" | |
| return txt.upper() | |
| # ===================================================== | |
| # DATE EXTRACTION | |
| # ===================================================== | |
| def extract_date(words): | |
| for w in words: | |
| if re.fullmatch(r"\d{1,2}[/-]\d{1,2}[/-]\d{2,4}", w): | |
| return w | |
| return "Not Found" | |
| # ===================================================== | |
| # TOTAL EXTRACTION | |
| # ===================================================== | |
| def clean_amount_token(txt): | |
| txt = txt.upper() | |
| txt = txt.replace("RM", "") | |
| txt = txt.replace("MYR", "") | |
| txt = txt.replace("RS", "") | |
| txt = txt.replace("βΉ", "") | |
| txt = txt.replace(",", "") | |
| txt = txt.strip() | |
| return txt | |
| def extract_total(words): | |
| vals = [] | |
| for w in words: | |
| x = clean_amount_token(w) | |
| if re.fullmatch(r"\d+\.\d{2}", x): | |
| try: | |
| v = float(x) | |
| if 0.5 <= v <= 100000: | |
| vals.append(v) | |
| except: | |
| pass | |
| if vals: | |
| return f"{max(vals):.2f}" | |
| return "Not Found" | |
| # ===================================================== | |
| # PROFESSIONAL CLAIM PDF | |
| # ===================================================== | |
| def create_pdf_base64(extracted): | |
| buffer = io.BytesIO() | |
| p = canvas.Canvas(buffer, pagesize=A4) | |
| width, height = A4 | |
| # Colors | |
| blue = HexColor("#0B5ED7") | |
| dark = HexColor("#222222") | |
| gray = HexColor("#666666") | |
| light = HexColor("#F5F7FA") | |
| # Header Bar | |
| p.setFillColor(blue) | |
| p.rect(0, height - 80, width, 80, fill=1, stroke=0) | |
| p.setFillColorRGB(1, 1, 1) | |
| p.setFont("Helvetica-Bold", 22) | |
| p.drawString(40, height - 50, "INSURANCE CLAIM FORM") | |
| p.setFont("Helvetica", 10) | |
| p.drawRightString(width - 40, height - 52, "AI Generated Report") | |
| # Body Box | |
| p.setFillColor(light) | |
| p.roundRect(35, height - 430, width - 70, 290, 10, fill=1, stroke=0) | |
| y = height - 130 | |
| p.setFillColor(dark) | |
| p.setFont("Helvetica-Bold", 14) | |
| p.drawString(50, y, "Claim Summary") | |
| y -= 35 | |
| rows = [ | |
| ("Claim ID", f"CLM-{datetime.now().strftime('%Y%m%d%H%M%S')}"), | |
| ("Provider Name", extracted["company"]), | |
| ("Bill Date", extracted["date"]), | |
| ("Claim Amount", f"βΉ {extracted['total']}"), | |
| ("Status", "Submitted"), | |
| ("Generated On", datetime.now().strftime("%d-%m-%Y %H:%M")) | |
| ] | |
| for label, value in rows: | |
| p.setFillColor(gray) | |
| p.setFont("Helvetica-Bold", 11) | |
| p.drawString(55, y, label) | |
| p.setFillColor(dark) | |
| p.setFont("Helvetica", 11) | |
| p.drawString(220, y, str(value)) | |
| y -= 32 | |
| # Footer Notes | |
| p.setFillColor(gray) | |
| p.setFont("Helvetica", 9) | |
| p.drawString( | |
| 40, | |
| 60, | |
| "This document was generated automatically by AI Insurance Claim Generator." | |
| ) | |
| p.drawRightString( | |
| width - 40, | |
| 60, | |
| "Confidential" | |
| ) | |
| p.showPage() | |
| p.save() | |
| pdf_bytes = buffer.getvalue() | |
| buffer.close() | |
| return base64.b64encode(pdf_bytes).decode() | |
| # ===================================================== | |
| # MAIN EXTRACTION | |
| # ===================================================== | |
| def extract_receipt(image): | |
| try: | |
| image = image.convert("RGB") | |
| image.thumbnail((1500, 1500)) | |
| data = pytesseract.image_to_data( | |
| image, | |
| output_type=pytesseract.Output.DICT | |
| ) | |
| words = [] | |
| boxes = [] | |
| for i in range(len(data["text"])): | |
| txt = data["text"][i].strip() | |
| if txt != "" and len(txt) > 1: | |
| x = data["left"][i] | |
| y = data["top"][i] | |
| w = data["width"][i] | |
| h = data["height"][i] | |
| words.append(txt) | |
| boxes.append([x, y, x + w, y + h]) | |
| if len(words) == 0: | |
| return {"error": "No text detected"} | |
| width, height = image.size | |
| boxes = [normalize(b, width, height) for b in boxes] | |
| encoding = tokenizer( | |
| words, | |
| boxes=boxes, | |
| return_tensors="pt", | |
| truncation=True, | |
| padding="max_length", | |
| max_length=512, | |
| is_split_into_words=True | |
| ) | |
| encoding = {k: v.to(device) for k, v in encoding.items()} | |
| with torch.no_grad(): | |
| outputs = model(**encoding) | |
| probs = torch.softmax(outputs.logits, dim=2) | |
| preds = torch.argmax(probs, dim=2)[0][:len(words)] | |
| confs = torch.max(probs, dim=2)[0][0][:len(words)] | |
| company_tokens = [] | |
| company_scores = [] | |
| for word, pred, conf in zip(words, preds, confs): | |
| label = id2label[pred.item()] | |
| if label == "COMPANY": | |
| company_tokens.append(word) | |
| company_scores.append(conf.item()) | |
| if company_tokens: | |
| company = " ".join(company_tokens[:8]) | |
| else: | |
| company = " ".join(words[:5]) | |
| company = clean_company(company) | |
| date = extract_date(words) | |
| total = extract_total(words) | |
| score = avg(company_scores) | |
| if date != "Not Found": | |
| score += 0.12 | |
| if total != "Not Found": | |
| score += 0.18 | |
| score = min(score, 0.99) | |
| return { | |
| "company": company, | |
| "date": date, | |
| "total": total, | |
| "confidence": round(score, 3) | |
| } | |
| except Exception as e: | |
| return {"error": str(e)} | |
| # ===================================================== | |
| # DECISION | |
| # ===================================================== | |
| def decision_layer(conf): | |
| if conf >= 0.80: | |
| return "AUTO_SEND" | |
| elif conf >= 0.60: | |
| return "REVIEW" | |
| else: | |
| return "REJECT" | |
| # ===================================================== | |
| # EMAIL SEND | |
| # ===================================================== | |
| def send_claim_email(to_email, extracted): | |
| if not RESEND_API_KEY: | |
| return "β Missing RESEND_API_KEY" | |
| pdf_b64 = create_pdf_base64(extracted) | |
| subject = "Insurance Claim Request" | |
| html = f""" | |
| <h2>Insurance Claim Request</h2> | |
| <p><b>Provider:</b> {extracted['company']}</p> | |
| <p><b>Date:</b> {extracted['date']}</p> | |
| <p><b>Amount:</b> βΉ{extracted['total']}</p> | |
| <p>Attached: Professional Claim PDF</p> | |
| """ | |
| try: | |
| r = requests.post( | |
| "https://api.resend.com/emails", | |
| headers={ | |
| "Authorization": f"Bearer {RESEND_API_KEY}", | |
| "Content-Type": "application/json" | |
| }, | |
| json={ | |
| "from": FROM_EMAIL, | |
| "to": [to_email], | |
| "subject": subject, | |
| "html": html, | |
| "attachments": [ | |
| { | |
| "filename": "claim_report.pdf", | |
| "content": pdf_b64 | |
| } | |
| ] | |
| }, | |
| timeout=20 | |
| ) | |
| if r.status_code in [200, 201]: | |
| return f"β Email + PDF sent to {to_email}" | |
| return f"β Email failed: {r.text}" | |
| except Exception as e: | |
| return f"β Email error: {str(e)}" | |
| # ===================================================== | |
| # MAIN PIPELINE | |
| # ===================================================== | |
| def process_and_send(image, email_id): | |
| extracted = extract_receipt(image) | |
| if "error" in extracted: | |
| return extracted, extracted["error"] | |
| conf = extracted["confidence"] | |
| decision = decision_layer(conf) | |
| extracted["decision"] = decision | |
| if decision == "AUTO_SEND": | |
| status = send_claim_email(email_id, extracted) | |
| elif decision == "REVIEW": | |
| status = f"β οΈ Human review required ({conf})" | |
| else: | |
| status = f"β Rejected ({conf})" | |
| return extracted, status | |
| # ===================================================== | |
| # UI | |
| # ===================================================== | |
| demo = gr.Interface( | |
| fn=process_and_send, | |
| inputs=[ | |
| gr.Image(type="pil", label="Upload Receipt"), | |
| gr.Textbox(label="Destination Email") | |
| ], | |
| outputs=[ | |
| gr.JSON(label="AI Extraction"), | |
| gr.Textbox(label="Email Status") | |
| ], | |
| title="π AI Insurance Claim Generator", | |
| description="Upload receipt β Extract fields β Generate Claim PDF β Auto Email" | |
| ) | |
| demo.launch() |