Emails / src /streamlit_app.py
carrief0908's picture
Update src/streamlit_app.py
ddb5dbb verified
import os
import json
import urllib.parse
import secrets
import requests
import streamlit as st
import pandas as pd
from bs4 import BeautifulSoup
from io import BytesIO, StringIO
import re
from datetime import datetime, timedelta
# ── Page config ────────────────────────────────────────────────────────────────
st.set_page_config(
page_title="RJ Email Processor",
page_icon="πŸ“¬",
layout="wide",
)
st.markdown("""
<style>
@import url('https://fonts.googleapis.com/css2?family=DM+Serif+Display&family=DM+Sans:wght@400;500;600&display=swap');
html, body, [class*="css"] { font-family: 'DM Sans', sans-serif; }
h1, h2, h3 { font-family: 'DM Serif Display', serif; }
.block-container { padding-top: 2rem; max-width: 960px; }
.status-card {
background: white;
border: 1px solid #e5e0d8;
border-radius: 12px;
padding: 1.5rem 2rem;
margin-bottom: 1.2rem;
box-shadow: 0 1px 4px rgba(0,0,0,0.05);
}
.step-label {
font-size: 0.72rem; font-weight: 600;
letter-spacing: 0.1em; text-transform: uppercase;
color: #9a8f82; margin-bottom: 0.25rem;
}
.step-title { font-size: 1.1rem; font-weight: 600; color: #1a1612; }
.badge-success {
background: #d4edda; color: #155724;
padding: 2px 10px; border-radius: 20px;
font-size: 0.78rem; font-weight: 600;
}
.badge-pending {
background: #fff3cd; color: #856404;
padding: 2px 10px; border-radius: 20px;
font-size: 0.78rem; font-weight: 600;
}
.stButton > button {
background: #1a1612 !important; color: white !important;
border: none !important; border-radius: 8px !important;
font-family: 'DM Sans', sans-serif !important;
font-weight: 600 !important;
padding: 0.55rem 1.4rem !important;
font-size: 0.9rem !important;
}
.stButton > button:hover { background: #3d3530 !important; }
.login-btn a {
display: inline-block;
background: #0078d4;
color: white !important;
text-decoration: none;
padding: 0.55rem 1.6rem;
border-radius: 8px;
font-weight: 600;
font-size: 0.95rem;
font-family: 'DM Sans', sans-serif;
}
.login-btn a:hover { background: #106ebe; }
</style>
""", unsafe_allow_html=True)
# ── Config (from HF Secrets) ───────────────────────────────────────────────────
CLIENT_ID = os.environ.get("AZURE_CLIENT_ID", "bfcbb298-4cc1-496e-9d9b-ff8c2d967a3a")
CLIENT_SECRET = os.environ.get("AZURE_CLIENT_SECRET", "") # set in HF Secrets
TENANT_ID = os.environ.get("AZURE_TENANT_ID", "5dac2bf2-8842-4788-ae07-33fb103b55d6")
REDIRECT_URI = os.environ.get("REDIRECT_URI", "") # e.g. https://yourspace.hf.space/
AUTHORITY = f"https://login.microsoftonline.com/{TENANT_ID}"
AUTH_ENDPOINT = f"{AUTHORITY}/oauth2/v2.0/authorize"
TOKEN_ENDPOINT= f"{AUTHORITY}/oauth2/v2.0/token"
SCOPES = "Mail.Read offline_access"
DEFAULT_SENDER = "AMSTradingAdmin@RaymondJames.com"
DEFAULT_TARGET = "dutytrader@newfrontieradvisors.com"
# ── Session defaults ───────────────────────────────────────────────────────────
for k, v in {
"token": None,
"oauth_state": None,
"messages": [],
"messages_df": None,
"target_emails": [],
"target_emails_df": None,
"rj_emails": [],
"cash_df": None,
"withdrawals_df": None,
"deposits_df": None,
"notice_df": None,
}.items():
if k not in st.session_state:
st.session_state[k] = v
# ── OAuth helpers ──────────────────────────────────────────────────────────────
def build_auth_url():
state = secrets.token_urlsafe(16)
st.session_state.oauth_state = state
params = {
"client_id": CLIENT_ID,
"response_type": "code",
"redirect_uri": REDIRECT_URI,
"response_mode": "query",
"scope": SCOPES,
"state": state,
}
return AUTH_ENDPOINT + "?" + urllib.parse.urlencode(params)
def exchange_code_for_token(code: str) -> str:
resp = requests.post(TOKEN_ENDPOINT, data={
"client_id": CLIENT_ID,
"client_secret": CLIENT_SECRET,
"code": code,
"redirect_uri": REDIRECT_URI,
"grant_type": "authorization_code",
}, timeout=30)
resp.raise_for_status()
return resp.json()["access_token"]
# ── Check for OAuth callback (code in URL query params) ───────────────────────
query_params = st.query_params
if not st.session_state.token and "code" in query_params:
code = query_params["code"]
state = query_params.get("state", "")
if state == st.session_state.oauth_state or not st.session_state.oauth_state:
try:
with st.spinner("Completing sign-in..."):
st.session_state.token = exchange_code_for_token(code)
# Clear the code from the URL
st.query_params.clear()
st.rerun()
except Exception as e:
st.error(f"Sign-in failed: {e}")
# ── Email helpers ──────────────────────────────────────────────────────────────
def get_emails(token, top=50):
url = "https://graph.microsoft.com/v1.0/me/mailFolders/inbox/messages"
headers = {
"Authorization": f"Bearer {token}",
"Prefer": 'outlook.body-content-type="html"',
}
params = {
"$top": top,
"$select": "id,subject,from,toRecipients,ccRecipients,receivedDateTime,body",
"$orderby": "receivedDateTime desc",
}
r = requests.get(url, headers=headers, params=params, timeout=60)
r.raise_for_status()
return r.json().get("value", [])
def extract_info(msg):
def addrs(lst):
return [(r.get("emailAddress") or {}).get("address") for r in (lst or [])
if (r.get("emailAddress") or {}).get("address")]
body_html = (msg.get("body") or {}).get("content", "")
return {
"id": msg.get("id"),
"subject": msg.get("subject"),
"from": ((msg.get("from") or {}).get("emailAddress") or {}).get("address"),
"to": addrs(msg.get("toRecipients")),
"cc": addrs(msg.get("ccRecipients")),
"received_time": msg.get("receivedDateTime"),
"body_html": body_html,
}
def html_to_text(html):
return BeautifulSoup(html, "html.parser").get_text("\n", strip=True) if html else ""
def reply_marker_html(body_html):
if not body_html:
return ""
soup = BeautifulSoup(body_html, "html.parser")
marker = soup.find(id="divRplyFwdMsg")
if not marker:
return ""
return "".join(str(n) for n in marker.next_siblings).strip()
def fwd_headers(body_html):
text = html_to_text(body_html)
def grab(label):
m = re.search(rf"{label}:\s*(.*)", text)
return m.group(1).strip() if m else ""
return {k: grab(v) for k, v in {
"forwarded_from": "From",
"forwarded_sent_time": "Sent",
"forwarded_to": "To",
"forwarded_cc": "Cc",
"forwarded_subject": "Subject",
}.items()}
def normalize_email(value):
if not value:
return ""
match = re.search(r'[\w.+-]+@[\w.-]+\.\w+', str(value))
return match.group(0).lower() if match else str(value).strip().lower()
def split_header_addresses(value):
if not value:
return []
matches = re.findall(r'[\w.+-]+@[\w.-]+\.\w+', str(value))
if matches:
return [m.lower() for m in matches]
cleaned = str(value).strip().lower()
return [cleaned] if cleaned else []
def matches_email_filters(info, sender_filter, target_filter):
sender_filter = normalize_email(sender_filter)
target_filter = normalize_email(target_filter)
forwarded = fwd_headers(info.get("body_html", ""))
body_text = html_to_text(info.get("body_html", "")).lower()
body_html = (info.get("body_html", "") or "").lower()
outer_from = normalize_email(info.get("from"))
outer_recipients = [normalize_email(a) for a in info.get("to", []) + info.get("cc", [])]
forwarded_from = split_header_addresses(forwarded.get("forwarded_from", ""))
forwarded_recipients = (
split_header_addresses(forwarded.get("forwarded_to", "")) +
split_header_addresses(forwarded.get("forwarded_cc", ""))
)
sender_match = (
sender_filter in [outer_from]
or sender_filter in forwarded_from
or sender_filter in body_text
or sender_filter in body_html
)
target_match = (
target_filter in outer_recipients
or target_filter in forwarded_recipients
or target_filter in body_text
or target_filter in body_html
)
return sender_match and target_match, forwarded
def is_rj(info):
subj = (info.get("subject") or "").lower()
html = info.get("body_html") or ""
text = html_to_text(html).lower()
if "rj emails--overview" in subj:
return False
has_fwd = 'id="divRplyFwdMsg"' in html or ("from:" in text and "sent:" in text)
if not has_fwd:
return False
return any([
"raymond james" in subj, "raymond james" in text,
"amstradingadmin@raymondjames.com" in text,
"ams managed operations" in subj, "ams managed operations" in text,
"direct asset transfers" in subj,
"distribution & cash balance check" in subj,
])
def build_rj(messages, sender_filter, target_filter):
rows = []
for i, msg in enumerate(messages, 1):
info = extract_info(msg)
matches_target, fwd = matches_email_filters(info, sender_filter, target_filter)
if not matches_target:
continue
if not is_rj(info):
continue
rows.append({
"email_id": info.get("id") or f"msg_{i}",
"fw_subject": info.get("subject"),
"fw_from": info.get("from"),
"fw_to": ", ".join(info.get("to", [])),
"fw_cc": ", ".join(info.get("cc", [])),
"fw_received_time": info.get("received_time"),
"fw_body_html": info.get("body_html", ""),
**{k: fwd.get(k, "") for k in fwd},
"original_body_html": reply_marker_html(info.get("body_html", "")),
})
return rows
def build_target_emails(messages, sender_filter, target_filter):
rows = []
for i, msg in enumerate(messages, 1):
info = extract_info(msg)
matches_target, forwarded = matches_email_filters(info, sender_filter, target_filter)
if not matches_target:
continue
rows.append({
"email_id": info.get("id") or f"msg_{i}",
"subject": info.get("subject"),
"from": info.get("from"),
"to": ", ".join(info.get("to", [])),
"cc": ", ".join(info.get("cc", [])),
"received_time": info.get("received_time"),
"is_rj_forward": is_rj(info),
"forwarded_from": forwarded.get("forwarded_from", ""),
"forwarded_to": forwarded.get("forwarded_to", ""),
"forwarded_cc": forwarded.get("forwarded_cc", ""),
"forwarded_subject": forwarded.get("forwarded_subject", ""),
"body_preview": html_to_text(info.get("body_html", ""))[:300],
})
return rows
def build_fetched_emails(messages):
rows = []
for i, msg in enumerate(messages, 1):
info = extract_info(msg)
forwarded = fwd_headers(info.get("body_html", ""))
rows.append({
"email_id": info.get("id") or f"msg_{i}",
"subject": info.get("subject"),
"from": info.get("from"),
"to": ", ".join(info.get("to", [])),
"cc": ", ".join(info.get("cc", [])),
"received_time": info.get("received_time"),
"is_rj_forward": is_rj(info),
"forwarded_from": forwarded.get("forwarded_from", ""),
"forwarded_to": forwarded.get("forwarded_to", ""),
"forwarded_cc": forwarded.get("forwarded_cc", ""),
"forwarded_subject": forwarded.get("forwarded_subject", ""),
"body_preview": html_to_text(info.get("body_html", ""))[:300],
})
return rows
def extract_tables(body_html):
if not body_html:
return []
soup = BeautifulSoup(body_html, "html.parser")
out = []
for node in soup.find_all(string=True):
label = node.strip()
if label not in {"Cash Only Transactions", "Withdrawals", "Deposits"}:
continue
table = node.find_next("table")
if not table:
continue
try:
dfs = pd.read_html(StringIO(str(table)))
except ValueError:
continue
for df in dfs:
df.columns = [str(c).strip() for c in df.columns]
df = df.dropna(how="all").reset_index(drop=True)
out.append((label, df))
return out
def parse_date(v):
if not v:
return None
for fmt in ("%m/%d/%Y", "%m/%d/%y", "%A, %B %d, %Y %I:%M %p"):
try:
return datetime.strptime(v.strip(), fmt)
except ValueError:
pass
return None
def build_notice(row):
section = row.get("source_section")
account = row.get("Account") or row.get("customer_account")
amount = row.get("Cash Amount") or row.get("Amount")
if section == "Withdrawals":
input_date = row.get("Input date")
deadline = parse_date(input_date)
response_time = (
(deadline + timedelta(days=8)).strftime("%Y-%m-%d")
if deadline else f"8 calendar days after {input_date}"
)
action = (
f"Enter withdrawal in portal for account {account} for {amount}. "
f"Schedule 8 calendar days after input date {input_date}."
)
elif section == "Deposits" or (row.get("Type") or "").lower() == "deposit":
response_time = "As soon as practical after email receipt"
action = (
f"Message dutytrader in Teams: contribution/deposit for account {account} "
f"({amount}) approved to invest in model."
)
else:
response_time = "ASAP"
action = (
f"Halt account and advise of pending trades on {account}. "
f"Immediate cash movement: {amount}."
)
return {
"email_id": row.get("email_id"),
"sent_time": row.get("fw_received_time"),
"response_time_needed": response_time,
"account": account,
"action": action,
"source_section": section,
}
def to_excel(dfs):
buf = BytesIO()
with pd.ExcelWriter(buf, engine="openpyxl") as w:
for name, df in dfs.items():
if df is not None and not df.empty:
df.to_excel(w, sheet_name=name, index=False)
return buf.getvalue()
# ── UI ─────────────────────────────────────────────────────────────────────────
st.markdown("## πŸ“¬ Raymond James Email Processor")
st.markdown("Pulls forwarded RJ emails, extracts cash movement tables, and generates action notices.")
st.divider()
# Step 1 β€” Sign in
st.markdown('<div class="step-label">Step 1</div>', unsafe_allow_html=True)
col1, col2 = st.columns([5, 1])
with col1:
st.markdown('<div class="step-title">Sign in to Microsoft</div>', unsafe_allow_html=True)
with col2:
if st.session_state.token:
st.markdown('<span class="badge-success">βœ“ Signed in</span>', unsafe_allow_html=True)
else:
st.markdown('<span class="badge-pending">Not signed in</span>', unsafe_allow_html=True)
if not st.session_state.token:
auth_url = build_auth_url()
st.markdown(
f'<div class="login-btn"><a href="{auth_url}" target="_self">πŸ” Sign in with Microsoft</a></div>',
unsafe_allow_html=True,
)
st.caption("You'll be redirected to Microsoft's login page and back automatically.")
else:
st.markdown("You are signed in. βœ“")
if st.button("Sign out"):
st.session_state.token = None
st.rerun()
# Step 2 β€” Config
st.markdown('<div class="step-label">Step 2</div>', unsafe_allow_html=True)
st.markdown('<div class="step-title">Configure</div>', unsafe_allow_html=True)
col_a, col_b, col_c = st.columns(3)
with col_a:
sender_email = st.text_input("Sender email (who forwards RJ emails)", value=DEFAULT_SENDER)
with col_b:
target_email = st.text_input("Recipient email to filter for", value=DEFAULT_TARGET)
with col_c:
top_n = st.number_input("Max emails to fetch", min_value=5, max_value=200, value=100, step=5)
# Step 3 β€” Run
st.markdown('<div class="step-label">Step 3</div>', unsafe_allow_html=True)
st.markdown('<div class="step-title">Fetch & Process</div>', unsafe_allow_html=True)
if st.button("β–Ά Run", disabled=not st.session_state.token):
with st.spinner("Fetching emails from Outlook..."):
try:
raw = get_emails(st.session_state.token, int(top_n))
st.session_state.messages = raw
st.session_state.messages_df = pd.DataFrame(build_fetched_emails(raw)) if raw else pd.DataFrame()
except requests.HTTPError as e:
if e.response.status_code == 401:
st.error("Session expired. Please sign in again.")
st.session_state.token = None
else:
st.error(f"API error: {e}")
st.stop()
with st.spinner("Collecting all emails from sender A to recipient B..."):
st.session_state.target_emails = build_target_emails(raw, sender_email, target_email)
st.session_state.target_emails_df = (
pd.DataFrame(st.session_state.target_emails)
if st.session_state.target_emails else pd.DataFrame()
)
with st.spinner("Filtering Raymond James emails..."):
st.session_state.rj_emails = build_rj(raw, sender_email, target_email)
with st.spinner("Extracting cash tables..."):
cash_rows, wd_rows, dep_rows = [], [], []
for email in st.session_state.rj_emails:
for section, df in extract_tables(email.get("original_body_html", "")):
rows = df.to_dict("records")
for r in rows:
r.update({
"email_id": email.get("email_id"),
"fw_subject": email.get("fw_subject"),
"fw_received_time": email.get("fw_received_time"),
"original_subject": email.get("forwarded_subject", ""),
"original_from": email.get("forwarded_from", ""),
"original_sent_time": email.get("forwarded_sent_time", ""),
"source_section": section,
})
if section == "Cash Only Transactions": cash_rows.extend(rows)
elif section == "Withdrawals": wd_rows.extend(rows)
elif section == "Deposits": dep_rows.extend(rows)
st.session_state.cash_df = pd.DataFrame(cash_rows) if cash_rows else pd.DataFrame()
st.session_state.withdrawals_df = pd.DataFrame(wd_rows) if wd_rows else pd.DataFrame()
st.session_state.deposits_df = pd.DataFrame(dep_rows) if dep_rows else pd.DataFrame()
with st.spinner("Building action notices..."):
all_rows = cash_rows + wd_rows + dep_rows
st.session_state.notice_df = (
pd.DataFrame([build_notice(r) for r in all_rows])
if all_rows else pd.DataFrame()
)
st.success(
f"Done! Fetched **{len(raw)}** recent inbox emails and found "
f"**{len(st.session_state.rj_emails)}** RJ forwarded emails."
)
# ── Results ────────────────────────────────────────────────────────────────────
st.markdown('<div class="step-label">Step 4</div>', unsafe_allow_html=True)
st.markdown('<div class="step-title">Debug: All Fetched Inbox Emails</div>', unsafe_allow_html=True)
fetched_df = st.session_state.messages_df
if fetched_df is not None:
if not fetched_df.empty:
st.dataframe(fetched_df, width="stretch", hide_index=True)
elif st.session_state.messages == []:
st.caption("Run the fetch step to inspect all fetched inbox emails.")
st.markdown('<div class="step-label">Step 5</div>', unsafe_allow_html=True)
st.markdown('<div class="step-title">Debug: All Emails from A to B</div>', unsafe_allow_html=True)
debug_df = st.session_state.target_emails_df
if debug_df is not None:
if not debug_df.empty:
st.dataframe(debug_df, width="stretch", hide_index=True)
elif st.session_state.messages:
st.info("Fetched emails from the inbox, but none matched the sender + recipient filter.")
else:
st.caption("Run the fetch step to inspect all emails from sender A to recipient B.")
if st.session_state.rj_emails:
st.divider()
st.markdown("### Results")
tab1, tab2, tab3, tab4 = st.tabs([
"πŸ“‹ Action Notices", "πŸ’΅ Cash Transactions", "⬆ Withdrawals", "⬇ Deposits"
])
for tab, key, label in [
(tab1, "notice_df", "No action notices generated."),
(tab2, "cash_df", "No cash transactions found."),
(tab3, "withdrawals_df", "No withdrawals found."),
(tab4, "deposits_df", "No deposits found."),
]:
with tab:
df = st.session_state[key]
if df is not None and not df.empty:
# Hide HTML columns
display_df = df.drop(columns=[c for c in df.columns if "html" in c.lower()], errors="ignore")
st.dataframe(display_df, width="stretch", hide_index=True)
else:
st.info(label)
st.divider()
excel_bytes = to_excel({
"Action Notices": st.session_state.notice_df,
"Cash Transactions": st.session_state.cash_df,
"Withdrawals": st.session_state.withdrawals_df,
"Deposits": st.session_state.deposits_df,
})
st.download_button(
label="⬇ Download Excel Report",
data=excel_bytes,
file_name=f"rj_emails_{datetime.now().strftime('%Y%m%d_%H%M')}.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
)