carrief0908 commited on
Commit
722ceda
·
verified ·
1 Parent(s): d978147

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +9 -5
src/streamlit_app.py CHANGED
@@ -352,10 +352,12 @@ def matches_email_filters(info, sender_filter, target_filter):
352
  return sender_match and target_match, forwarded
353
 
354
 
355
- def is_rj(info):
356
  subj = (info.get("subject") or "").lower()
357
  html = info.get("body_html") or ""
358
  text = html_to_text(html).lower()
 
 
359
  if "rj emails--overview" in subj:
360
  return False
361
  has_fwd = 'id="divRplyFwdMsg"' in html or ("from:" in text and "sent:" in text)
@@ -377,8 +379,9 @@ def build_rj(messages, sender_filter, target_filter):
377
  matches_target, fwd = matches_email_filters(info, sender_filter, target_filter)
378
  if not matches_target:
379
  continue
380
- if not is_rj(info):
381
  continue
 
382
  rows.append({
383
  "email_id": info.get("id") or f"msg_{i}",
384
  "fw_subject": info.get("subject"),
@@ -388,7 +391,8 @@ def build_rj(messages, sender_filter, target_filter):
388
  "fw_received_time": info.get("received_time"),
389
  "fw_body_html": info.get("body_html", ""),
390
  **{k: fwd.get(k, "") for k in fwd},
391
- "original_body_html": reply_marker_html(info.get("body_html", "")),
 
392
  })
393
  return rows
394
 
@@ -407,7 +411,7 @@ def build_target_emails(messages, sender_filter, target_filter):
407
  "to": ", ".join(info.get("to", [])),
408
  "cc": ", ".join(info.get("cc", [])),
409
  "received_time": info.get("received_time"),
410
- "is_rj_forward": is_rj(info),
411
  "forwarded_from": forwarded.get("forwarded_from", ""),
412
  "forwarded_to": forwarded.get("forwarded_to", ""),
413
  "forwarded_cc": forwarded.get("forwarded_cc", ""),
@@ -621,7 +625,7 @@ if st.button("▶ Run", disabled=not st.session_state.token):
621
  with st.spinner("Extracting cash tables..."):
622
  cash_rows, wd_rows, dep_rows = [], [], []
623
  for email in st.session_state.rj_emails:
624
- for section, df in extract_tables(email.get("original_body_html", "")):
625
  rows = df.to_dict("records")
626
  for r in rows:
627
  r.update({
 
352
  return sender_match and target_match, forwarded
353
 
354
 
355
+ def is_rj(info, sender_filter=DEFAULT_SENDER):
356
  subj = (info.get("subject") or "").lower()
357
  html = info.get("body_html") or ""
358
  text = html_to_text(html).lower()
359
+ if normalize_email(info.get("from")) == normalize_email(sender_filter):
360
+ return True
361
  if "rj emails--overview" in subj:
362
  return False
363
  has_fwd = 'id="divRplyFwdMsg"' in html or ("from:" in text and "sent:" in text)
 
379
  matches_target, fwd = matches_email_filters(info, sender_filter, target_filter)
380
  if not matches_target:
381
  continue
382
+ if not is_rj(info, sender_filter):
383
  continue
384
+ original_body_html = reply_marker_html(info.get("body_html", ""))
385
  rows.append({
386
  "email_id": info.get("id") or f"msg_{i}",
387
  "fw_subject": info.get("subject"),
 
391
  "fw_received_time": info.get("received_time"),
392
  "fw_body_html": info.get("body_html", ""),
393
  **{k: fwd.get(k, "") for k in fwd},
394
+ "original_body_html": original_body_html,
395
+ "parse_body_html": original_body_html or info.get("body_html", ""),
396
  })
397
  return rows
398
 
 
411
  "to": ", ".join(info.get("to", [])),
412
  "cc": ", ".join(info.get("cc", [])),
413
  "received_time": info.get("received_time"),
414
+ "is_rj_forward": is_rj(info, sender_filter),
415
  "forwarded_from": forwarded.get("forwarded_from", ""),
416
  "forwarded_to": forwarded.get("forwarded_to", ""),
417
  "forwarded_cc": forwarded.get("forwarded_cc", ""),
 
625
  with st.spinner("Extracting cash tables..."):
626
  cash_rows, wd_rows, dep_rows = [], [], []
627
  for email in st.session_state.rj_emails:
628
+ for section, df in extract_tables(email.get("parse_body_html", "")):
629
  rows = df.to_dict("records")
630
  for r in rows:
631
  r.update({