Spaces:
Running
Running
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +9 -5
src/streamlit_app.py
CHANGED
|
@@ -352,10 +352,12 @@ def matches_email_filters(info, sender_filter, target_filter):
|
|
| 352 |
return sender_match and target_match, forwarded
|
| 353 |
|
| 354 |
|
| 355 |
-
def is_rj(info):
|
| 356 |
subj = (info.get("subject") or "").lower()
|
| 357 |
html = info.get("body_html") or ""
|
| 358 |
text = html_to_text(html).lower()
|
|
|
|
|
|
|
| 359 |
if "rj emails--overview" in subj:
|
| 360 |
return False
|
| 361 |
has_fwd = 'id="divRplyFwdMsg"' in html or ("from:" in text and "sent:" in text)
|
|
@@ -377,8 +379,9 @@ def build_rj(messages, sender_filter, target_filter):
|
|
| 377 |
matches_target, fwd = matches_email_filters(info, sender_filter, target_filter)
|
| 378 |
if not matches_target:
|
| 379 |
continue
|
| 380 |
-
if not is_rj(info):
|
| 381 |
continue
|
|
|
|
| 382 |
rows.append({
|
| 383 |
"email_id": info.get("id") or f"msg_{i}",
|
| 384 |
"fw_subject": info.get("subject"),
|
|
@@ -388,7 +391,8 @@ def build_rj(messages, sender_filter, target_filter):
|
|
| 388 |
"fw_received_time": info.get("received_time"),
|
| 389 |
"fw_body_html": info.get("body_html", ""),
|
| 390 |
**{k: fwd.get(k, "") for k in fwd},
|
| 391 |
-
"original_body_html":
|
|
|
|
| 392 |
})
|
| 393 |
return rows
|
| 394 |
|
|
@@ -407,7 +411,7 @@ def build_target_emails(messages, sender_filter, target_filter):
|
|
| 407 |
"to": ", ".join(info.get("to", [])),
|
| 408 |
"cc": ", ".join(info.get("cc", [])),
|
| 409 |
"received_time": info.get("received_time"),
|
| 410 |
-
"is_rj_forward": is_rj(info),
|
| 411 |
"forwarded_from": forwarded.get("forwarded_from", ""),
|
| 412 |
"forwarded_to": forwarded.get("forwarded_to", ""),
|
| 413 |
"forwarded_cc": forwarded.get("forwarded_cc", ""),
|
|
@@ -621,7 +625,7 @@ if st.button("▶ Run", disabled=not st.session_state.token):
|
|
| 621 |
with st.spinner("Extracting cash tables..."):
|
| 622 |
cash_rows, wd_rows, dep_rows = [], [], []
|
| 623 |
for email in st.session_state.rj_emails:
|
| 624 |
-
for section, df in extract_tables(email.get("
|
| 625 |
rows = df.to_dict("records")
|
| 626 |
for r in rows:
|
| 627 |
r.update({
|
|
|
|
| 352 |
return sender_match and target_match, forwarded
|
| 353 |
|
| 354 |
|
| 355 |
+
def is_rj(info, sender_filter=DEFAULT_SENDER):
|
| 356 |
subj = (info.get("subject") or "").lower()
|
| 357 |
html = info.get("body_html") or ""
|
| 358 |
text = html_to_text(html).lower()
|
| 359 |
+
if normalize_email(info.get("from")) == normalize_email(sender_filter):
|
| 360 |
+
return True
|
| 361 |
if "rj emails--overview" in subj:
|
| 362 |
return False
|
| 363 |
has_fwd = 'id="divRplyFwdMsg"' in html or ("from:" in text and "sent:" in text)
|
|
|
|
| 379 |
matches_target, fwd = matches_email_filters(info, sender_filter, target_filter)
|
| 380 |
if not matches_target:
|
| 381 |
continue
|
| 382 |
+
if not is_rj(info, sender_filter):
|
| 383 |
continue
|
| 384 |
+
original_body_html = reply_marker_html(info.get("body_html", ""))
|
| 385 |
rows.append({
|
| 386 |
"email_id": info.get("id") or f"msg_{i}",
|
| 387 |
"fw_subject": info.get("subject"),
|
|
|
|
| 391 |
"fw_received_time": info.get("received_time"),
|
| 392 |
"fw_body_html": info.get("body_html", ""),
|
| 393 |
**{k: fwd.get(k, "") for k in fwd},
|
| 394 |
+
"original_body_html": original_body_html,
|
| 395 |
+
"parse_body_html": original_body_html or info.get("body_html", ""),
|
| 396 |
})
|
| 397 |
return rows
|
| 398 |
|
|
|
|
| 411 |
"to": ", ".join(info.get("to", [])),
|
| 412 |
"cc": ", ".join(info.get("cc", [])),
|
| 413 |
"received_time": info.get("received_time"),
|
| 414 |
+
"is_rj_forward": is_rj(info, sender_filter),
|
| 415 |
"forwarded_from": forwarded.get("forwarded_from", ""),
|
| 416 |
"forwarded_to": forwarded.get("forwarded_to", ""),
|
| 417 |
"forwarded_cc": forwarded.get("forwarded_cc", ""),
|
|
|
|
| 625 |
with st.spinner("Extracting cash tables..."):
|
| 626 |
cash_rows, wd_rows, dep_rows = [], [], []
|
| 627 |
for email in st.session_state.rj_emails:
|
| 628 |
+
for section, df in extract_tables(email.get("parse_body_html", "")):
|
| 629 |
rows = df.to_dict("records")
|
| 630 |
for r in rows:
|
| 631 |
r.update({
|