ngupta2026 commited on
Commit
3de649e
Β·
verified Β·
1 Parent(s): d79e2a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -44
app.py CHANGED
@@ -13,11 +13,23 @@ from transformers import LayoutLMTokenizerFast, LayoutLMForTokenClassification
13
  # =====================================================
14
  RESEND_API_KEY = os.getenv("RESEND_API_KEY")
15
 
16
- label2id = {"O": 0, "COMPANY": 1, "DATE": 2, "TOTAL": 3}
17
- id2label = {v: k for k, v in label2id.items()}
 
 
 
18
 
19
  MODEL_NAME = "ngupta2026/sroie-layoutlm"
20
 
 
 
 
 
 
 
 
 
 
21
  # =====================================================
22
  # LOAD MODEL
23
  # =====================================================
@@ -30,7 +42,7 @@ model.to(device)
30
  model.eval()
31
 
32
  # =====================================================
33
- # NORMALIZE BOXES
34
  # =====================================================
35
  def normalize(box, width, height):
36
  return [
@@ -41,7 +53,7 @@ def normalize(box, width, height):
41
  ]
42
 
43
  # =====================================================
44
- # CONFIDENCE HELPER
45
  # =====================================================
46
  def avg_conf(lst):
47
  if len(lst) == 0:
@@ -49,9 +61,10 @@ def avg_conf(lst):
49
  return sum(lst) / len(lst)
50
 
51
  # =====================================================
52
- # EXTRACT DATA + CONFIDENCE
53
  # =====================================================
54
  def extract_receipt(image):
 
55
  try:
56
  image = image.convert("RGB")
57
  image.thumbnail((1200, 1200))
@@ -61,12 +74,15 @@ def extract_receipt(image):
61
  output_type=pytesseract.Output.DICT
62
  )
63
 
64
- words, boxes = [], []
 
65
 
66
  for i in range(len(data["text"])):
 
67
  text = data["text"][i].strip()
68
 
69
- if text != "" and len(text) > 2:
 
70
  x = data["left"][i]
71
  y = data["top"][i]
72
  w = data["width"][i]
@@ -79,7 +95,7 @@ def extract_receipt(image):
79
  return {"error": "No text detected"}
80
 
81
  width, height = image.size
82
- boxes = [normalize(box, width, height) for box in boxes]
83
 
84
  encoding = tokenizer(
85
  words,
@@ -96,8 +112,8 @@ def extract_receipt(image):
96
  with torch.no_grad():
97
  outputs = model(**encoding)
98
 
99
- # πŸ”₯ CONFIDENCE LOGIC
100
  probs = torch.softmax(outputs.logits, dim=2)
 
101
  preds = torch.argmax(probs, dim=2)[0][:len(words)]
102
  confs = torch.max(probs, dim=2)[0][0][:len(words)]
103
 
@@ -114,39 +130,50 @@ def extract_receipt(image):
114
  }
115
 
116
  for word, pred, conf in zip(words, preds, confs):
 
117
  label = id2label[pred.item()]
118
- confidence = conf.item()
119
 
120
  if label == "COMPANY":
121
  result["company"].append(word)
122
- conf_store["company"].append(confidence)
123
 
124
  if re.search(r"\d{2}[/-]\d{2}[/-]\d{2,4}", word):
125
  result["date"].append(word)
126
- conf_store["date"].append(confidence)
127
 
128
  if re.search(r"\d+(\.\d{2})?", word):
129
  try:
130
  value = float(word.replace(",", ""))
131
  if value > 50:
132
  result["total"].append(word)
133
- conf_store["total"].append(confidence)
134
  except:
135
  pass
136
 
137
- # FINAL VALUES
138
- result["company"] = " ".join(result["company"]) if result["company"] else "Not Found"
139
- result["date"] = result["date"][0] if result["date"] else "Not Found"
140
- result["total"] = result["total"][-1] if result["total"] else "Not Found"
 
 
 
 
 
 
 
 
 
 
 
141
 
142
- # πŸ”₯ FINAL CONFIDENCE
143
  company_conf = avg_conf(conf_store["company"])
144
  date_conf = avg_conf(conf_store["date"])
145
  total_conf = avg_conf(conf_store["total"])
146
 
147
- overall_conf = (company_conf + date_conf + total_conf) / 3
148
 
149
- result["confidence"] = round(overall_conf, 3)
150
 
151
  return result
152
 
@@ -156,32 +183,39 @@ def extract_receipt(image):
156
  # =====================================================
157
  # DECISION ENGINE
158
  # =====================================================
159
- def decision_layer(confidence):
160
 
161
- if confidence >= 0.80:
162
  return "AUTO_SEND"
163
 
164
- elif confidence >= 0.60:
165
  return "REVIEW"
166
 
167
  else:
168
  return "REJECT"
169
 
170
  # =====================================================
171
- # SEND EMAIL (RESEND)
172
  # =====================================================
173
  def send_claim_email(to_email, extracted):
174
 
175
  if not RESEND_API_KEY:
176
- return "❌ RESEND_API_KEY missing"
177
 
178
  subject = "Insurance Claim Request"
179
 
180
  html_body = f"""
181
  <h2>Insurance Claim Request</h2>
182
- <p><b>Provider:</b> {extracted['company']}</p>
183
- <p><b>Date:</b> {extracted['date']}</p>
184
- <p><b>Amount:</b> β‚Ή{extracted['total']}</p>
 
 
 
 
 
 
 
185
  """
186
 
187
  try:
@@ -189,19 +223,20 @@ def send_claim_email(to_email, extracted):
189
  "https://api.resend.com/emails",
190
  headers={
191
  "Authorization": f"Bearer {RESEND_API_KEY}",
192
- "Content-Type": "application/json",
193
  },
194
  json={
195
- "from": "noreply@yourdomain.com", # change after domain verify
196
  "to": [to_email],
197
  "subject": subject,
198
- "html": html_body,
199
  },
200
- timeout=10
201
  )
202
 
203
  if response.status_code in [200, 201]:
204
- return f"βœ… Email sent to {to_email}"
 
205
  else:
206
  return f"❌ Email failed: {response.text}"
207
 
@@ -218,20 +253,19 @@ def process_and_send(image, email_id):
218
  if "error" in extracted:
219
  return extracted, extracted["error"]
220
 
221
- confidence = extracted.get("confidence", 0)
222
- decision = decision_layer(confidence)
 
 
223
 
224
- # πŸ”₯ DECISION CONTROL
225
  if decision == "AUTO_SEND":
226
  email_status = send_claim_email(email_id, extracted)
227
 
228
  elif decision == "REVIEW":
229
- email_status = f"⚠️ Needs human review (confidence={confidence})"
230
 
231
  else:
232
- email_status = f"❌ Rejected due to low confidence ({confidence})"
233
-
234
- extracted["decision"] = decision
235
 
236
  return extracted, email_status
237
 
@@ -240,16 +274,19 @@ def process_and_send(image, email_id):
240
  # =====================================================
241
  demo = gr.Interface(
242
  fn=process_and_send,
 
243
  inputs=[
244
  gr.Image(type="pil", label="Upload Receipt"),
245
- gr.Textbox(label="Enter Email ID")
246
  ],
 
247
  outputs=[
248
- gr.JSON(label="Extracted Data + Confidence + Decision"),
249
  gr.Textbox(label="Email Status")
250
  ],
251
- title="πŸ“„ AI Insurance Claim Generator (Enterprise Version)",
252
- description="Upload receipt β†’ AI extracts β†’ Confidence check β†’ Safe decision β†’ Email"
 
253
  )
254
 
255
  demo.launch()
 
13
  # =====================================================
14
  RESEND_API_KEY = os.getenv("RESEND_API_KEY")
15
 
16
+ # πŸ”₯ IMPORTANT:
17
+ # Use YOUR VERIFIED DOMAIN EMAIL
18
+ # Example:
19
+ # claims@send.yudham.com
20
+ FROM_EMAIL = "AI Claims <claims@send.yudham.com>"
21
 
22
  MODEL_NAME = "ngupta2026/sroie-layoutlm"
23
 
24
+ label2id = {
25
+ "O": 0,
26
+ "COMPANY": 1,
27
+ "DATE": 2,
28
+ "TOTAL": 3
29
+ }
30
+
31
+ id2label = {v: k for k, v in label2id.items()}
32
+
33
  # =====================================================
34
  # LOAD MODEL
35
  # =====================================================
 
42
  model.eval()
43
 
44
  # =====================================================
45
+ # NORMALIZE BOX
46
  # =====================================================
47
  def normalize(box, width, height):
48
  return [
 
53
  ]
54
 
55
  # =====================================================
56
+ # CONFIDENCE AVG
57
  # =====================================================
58
  def avg_conf(lst):
59
  if len(lst) == 0:
 
61
  return sum(lst) / len(lst)
62
 
63
  # =====================================================
64
+ # OCR + EXTRACTION
65
  # =====================================================
66
  def extract_receipt(image):
67
+
68
  try:
69
  image = image.convert("RGB")
70
  image.thumbnail((1200, 1200))
 
74
  output_type=pytesseract.Output.DICT
75
  )
76
 
77
+ words = []
78
+ boxes = []
79
 
80
  for i in range(len(data["text"])):
81
+
82
  text = data["text"][i].strip()
83
 
84
+ if text != "" and len(text) > 1:
85
+
86
  x = data["left"][i]
87
  y = data["top"][i]
88
  w = data["width"][i]
 
95
  return {"error": "No text detected"}
96
 
97
  width, height = image.size
98
+ boxes = [normalize(b, width, height) for b in boxes]
99
 
100
  encoding = tokenizer(
101
  words,
 
112
  with torch.no_grad():
113
  outputs = model(**encoding)
114
 
 
115
  probs = torch.softmax(outputs.logits, dim=2)
116
+
117
  preds = torch.argmax(probs, dim=2)[0][:len(words)]
118
  confs = torch.max(probs, dim=2)[0][0][:len(words)]
119
 
 
130
  }
131
 
132
  for word, pred, conf in zip(words, preds, confs):
133
+
134
  label = id2label[pred.item()]
135
+ c = conf.item()
136
 
137
  if label == "COMPANY":
138
  result["company"].append(word)
139
+ conf_store["company"].append(c)
140
 
141
  if re.search(r"\d{2}[/-]\d{2}[/-]\d{2,4}", word):
142
  result["date"].append(word)
143
+ conf_store["date"].append(c)
144
 
145
  if re.search(r"\d+(\.\d{2})?", word):
146
  try:
147
  value = float(word.replace(",", ""))
148
  if value > 50:
149
  result["total"].append(word)
150
+ conf_store["total"].append(c)
151
  except:
152
  pass
153
 
154
+ # FINAL CLEAN
155
+ result["company"] = (
156
+ " ".join(result["company"])
157
+ if result["company"] else "Not Found"
158
+ )
159
+
160
+ result["date"] = (
161
+ result["date"][0]
162
+ if result["date"] else "Not Found"
163
+ )
164
+
165
+ result["total"] = (
166
+ result["total"][-1]
167
+ if result["total"] else "Not Found"
168
+ )
169
 
 
170
  company_conf = avg_conf(conf_store["company"])
171
  date_conf = avg_conf(conf_store["date"])
172
  total_conf = avg_conf(conf_store["total"])
173
 
174
+ overall = (company_conf + date_conf + total_conf) / 3
175
 
176
+ result["confidence"] = round(overall, 3)
177
 
178
  return result
179
 
 
183
  # =====================================================
184
  # DECISION ENGINE
185
  # =====================================================
186
+ def decision_layer(conf):
187
 
188
+ if conf >= 0.80:
189
  return "AUTO_SEND"
190
 
191
+ elif conf >= 0.60:
192
  return "REVIEW"
193
 
194
  else:
195
  return "REJECT"
196
 
197
  # =====================================================
198
+ # EMAIL SEND
199
  # =====================================================
200
  def send_claim_email(to_email, extracted):
201
 
202
  if not RESEND_API_KEY:
203
+ return "❌ Missing RESEND_API_KEY secret"
204
 
205
  subject = "Insurance Claim Request"
206
 
207
  html_body = f"""
208
  <h2>Insurance Claim Request</h2>
209
+
210
+ <p>Dear Claims Team,</p>
211
+
212
+ <p>Please process reimbursement request.</p>
213
+
214
+ <p><b>Provider Name:</b> {extracted['company']}</p>
215
+ <p><b>Bill Date:</b> {extracted['date']}</p>
216
+ <p><b>Claim Amount:</b> β‚Ή{extracted['total']}</p>
217
+
218
+ <p>Regards,<br>AI Claims System</p>
219
  """
220
 
221
  try:
 
223
  "https://api.resend.com/emails",
224
  headers={
225
  "Authorization": f"Bearer {RESEND_API_KEY}",
226
+ "Content-Type": "application/json"
227
  },
228
  json={
229
+ "from": FROM_EMAIL,
230
  "to": [to_email],
231
  "subject": subject,
232
+ "html": html_body
233
  },
234
+ timeout=20
235
  )
236
 
237
  if response.status_code in [200, 201]:
238
+ return f"βœ… Email sent successfully to {to_email}"
239
+
240
  else:
241
  return f"❌ Email failed: {response.text}"
242
 
 
253
  if "error" in extracted:
254
  return extracted, extracted["error"]
255
 
256
+ conf = extracted["confidence"]
257
+ decision = decision_layer(conf)
258
+
259
+ extracted["decision"] = decision
260
 
 
261
  if decision == "AUTO_SEND":
262
  email_status = send_claim_email(email_id, extracted)
263
 
264
  elif decision == "REVIEW":
265
+ email_status = f"⚠️ Human review required (confidence={conf})"
266
 
267
  else:
268
+ email_status = f"❌ Rejected (low confidence={conf})"
 
 
269
 
270
  return extracted, email_status
271
 
 
274
  # =====================================================
275
  demo = gr.Interface(
276
  fn=process_and_send,
277
+
278
  inputs=[
279
  gr.Image(type="pil", label="Upload Receipt"),
280
+ gr.Textbox(label="Enter Destination Email")
281
  ],
282
+
283
  outputs=[
284
+ gr.JSON(label="AI Extraction"),
285
  gr.Textbox(label="Email Status")
286
  ],
287
+
288
+ title="πŸ“„ AI Insurance Claim Generator",
289
+ description="Upload receipt β†’ Extract fields β†’ Confidence Check β†’ Auto Email"
290
  )
291
 
292
  demo.launch()