import os import json import urllib.parse import secrets import requests import streamlit as st import pandas as pd from bs4 import BeautifulSoup from io import BytesIO, StringIO import re from datetime import datetime, timedelta # ── Page config ──────────────────────────────────────────────────────────────── st.set_page_config( page_title="RJ Email Processor", page_icon="📬", layout="wide", ) st.markdown(""" """, unsafe_allow_html=True) # ── Config (from HF Secrets) ─────────────────────────────────────────────────── CLIENT_ID = os.environ.get("AZURE_CLIENT_ID", "bfcbb298-4cc1-496e-9d9b-ff8c2d967a3a") CLIENT_SECRET = os.environ.get("AZURE_CLIENT_SECRET", "") # set in HF Secrets TENANT_ID = os.environ.get("AZURE_TENANT_ID", "5dac2bf2-8842-4788-ae07-33fb103b55d6") REDIRECT_URI = os.environ.get("REDIRECT_URI", "") # e.g. https://yourspace.hf.space/ AUTHORITY = f"https://login.microsoftonline.com/{TENANT_ID}" AUTH_ENDPOINT = f"{AUTHORITY}/oauth2/v2.0/authorize" TOKEN_ENDPOINT= f"{AUTHORITY}/oauth2/v2.0/token" SCOPES = "Mail.Read offline_access" DEFAULT_SENDER = "AMSTradingAdmin@RaymondJames.com" DEFAULT_TARGET = "dutytrader@newfrontieradvisors.com" # ── Session defaults ─────────────────────────────────────────────────────────── for k, v in { "token": None, "oauth_state": None, "messages": [], "messages_df": None, "target_emails": [], "target_emails_df": None, "rj_emails": [], "cash_df": None, "withdrawals_df": None, "deposits_df": None, "notice_df": None, }.items(): if k not in st.session_state: st.session_state[k] = v # ── OAuth helpers ────────────────────────────────────────────────────────────── def build_auth_url(): state = secrets.token_urlsafe(16) st.session_state.oauth_state = state params = { "client_id": CLIENT_ID, "response_type": "code", "redirect_uri": REDIRECT_URI, "response_mode": "query", "scope": SCOPES, "state": state, } return AUTH_ENDPOINT + "?" + urllib.parse.urlencode(params) def exchange_code_for_token(code: str) -> str: resp = requests.post(TOKEN_ENDPOINT, data={ "client_id": CLIENT_ID, "client_secret": CLIENT_SECRET, "code": code, "redirect_uri": REDIRECT_URI, "grant_type": "authorization_code", }, timeout=30) resp.raise_for_status() return resp.json()["access_token"] # ── Check for OAuth callback (code in URL query params) ─────────────────────── query_params = st.query_params if not st.session_state.token and "code" in query_params: code = query_params["code"] state = query_params.get("state", "") if state == st.session_state.oauth_state or not st.session_state.oauth_state: try: with st.spinner("Completing sign-in..."): st.session_state.token = exchange_code_for_token(code) # Clear the code from the URL st.query_params.clear() st.rerun() except Exception as e: st.error(f"Sign-in failed: {e}") # ── Email helpers ────────────────────────────────────────────────────────────── def get_emails(token, top=50): url = "https://graph.microsoft.com/v1.0/me/mailFolders/inbox/messages" headers = { "Authorization": f"Bearer {token}", "Prefer": 'outlook.body-content-type="html"', } params = { "$top": top, "$select": "id,subject,from,toRecipients,ccRecipients,receivedDateTime,body", "$orderby": "receivedDateTime desc", } r = requests.get(url, headers=headers, params=params, timeout=60) r.raise_for_status() return r.json().get("value", []) def extract_info(msg): def addrs(lst): return [(r.get("emailAddress") or {}).get("address") for r in (lst or []) if (r.get("emailAddress") or {}).get("address")] body_html = (msg.get("body") or {}).get("content", "") return { "id": msg.get("id"), "subject": msg.get("subject"), "from": ((msg.get("from") or {}).get("emailAddress") or {}).get("address"), "to": addrs(msg.get("toRecipients")), "cc": addrs(msg.get("ccRecipients")), "received_time": msg.get("receivedDateTime"), "body_html": body_html, } def html_to_text(html): return BeautifulSoup(html, "html.parser").get_text("\n", strip=True) if html else "" def reply_marker_html(body_html): if not body_html: return "" soup = BeautifulSoup(body_html, "html.parser") marker = soup.find(id="divRplyFwdMsg") if not marker: return "" return "".join(str(n) for n in marker.next_siblings).strip() def fwd_headers(body_html): text = html_to_text(body_html) def grab(label): m = re.search(rf"{label}:\s*(.*)", text) return m.group(1).strip() if m else "" return {k: grab(v) for k, v in { "forwarded_from": "From", "forwarded_sent_time": "Sent", "forwarded_to": "To", "forwarded_cc": "Cc", "forwarded_subject": "Subject", }.items()} def normalize_email(value): if not value: return "" match = re.search(r'[\w.+-]+@[\w.-]+\.\w+', str(value)) return match.group(0).lower() if match else str(value).strip().lower() def split_header_addresses(value): if not value: return [] matches = re.findall(r'[\w.+-]+@[\w.-]+\.\w+', str(value)) if matches: return [m.lower() for m in matches] cleaned = str(value).strip().lower() return [cleaned] if cleaned else [] def matches_email_filters(info, sender_filter, target_filter): sender_filter = normalize_email(sender_filter) target_filter = normalize_email(target_filter) forwarded = fwd_headers(info.get("body_html", "")) body_text = html_to_text(info.get("body_html", "")).lower() body_html = (info.get("body_html", "") or "").lower() outer_from = normalize_email(info.get("from")) outer_recipients = [normalize_email(a) for a in info.get("to", []) + info.get("cc", [])] forwarded_from = split_header_addresses(forwarded.get("forwarded_from", "")) forwarded_recipients = ( split_header_addresses(forwarded.get("forwarded_to", "")) + split_header_addresses(forwarded.get("forwarded_cc", "")) ) sender_match = ( sender_filter in [outer_from] or sender_filter in forwarded_from or sender_filter in body_text or sender_filter in body_html ) target_match = ( target_filter in outer_recipients or target_filter in forwarded_recipients or target_filter in body_text or target_filter in body_html ) return sender_match and target_match, forwarded def is_rj(info): subj = (info.get("subject") or "").lower() html = info.get("body_html") or "" text = html_to_text(html).lower() if "rj emails--overview" in subj: return False has_fwd = 'id="divRplyFwdMsg"' in html or ("from:" in text and "sent:" in text) if not has_fwd: return False return any([ "raymond james" in subj, "raymond james" in text, "amstradingadmin@raymondjames.com" in text, "ams managed operations" in subj, "ams managed operations" in text, "direct asset transfers" in subj, "distribution & cash balance check" in subj, ]) def build_rj(messages, sender_filter, target_filter): rows = [] for i, msg in enumerate(messages, 1): info = extract_info(msg) matches_target, fwd = matches_email_filters(info, sender_filter, target_filter) if not matches_target: continue if not is_rj(info): continue rows.append({ "email_id": info.get("id") or f"msg_{i}", "fw_subject": info.get("subject"), "fw_from": info.get("from"), "fw_to": ", ".join(info.get("to", [])), "fw_cc": ", ".join(info.get("cc", [])), "fw_received_time": info.get("received_time"), "fw_body_html": info.get("body_html", ""), **{k: fwd.get(k, "") for k in fwd}, "original_body_html": reply_marker_html(info.get("body_html", "")), }) return rows def build_target_emails(messages, sender_filter, target_filter): rows = [] for i, msg in enumerate(messages, 1): info = extract_info(msg) matches_target, forwarded = matches_email_filters(info, sender_filter, target_filter) if not matches_target: continue rows.append({ "email_id": info.get("id") or f"msg_{i}", "subject": info.get("subject"), "from": info.get("from"), "to": ", ".join(info.get("to", [])), "cc": ", ".join(info.get("cc", [])), "received_time": info.get("received_time"), "is_rj_forward": is_rj(info), "forwarded_from": forwarded.get("forwarded_from", ""), "forwarded_to": forwarded.get("forwarded_to", ""), "forwarded_cc": forwarded.get("forwarded_cc", ""), "forwarded_subject": forwarded.get("forwarded_subject", ""), "body_preview": html_to_text(info.get("body_html", ""))[:300], }) return rows def build_fetched_emails(messages): rows = [] for i, msg in enumerate(messages, 1): info = extract_info(msg) forwarded = fwd_headers(info.get("body_html", "")) rows.append({ "email_id": info.get("id") or f"msg_{i}", "subject": info.get("subject"), "from": info.get("from"), "to": ", ".join(info.get("to", [])), "cc": ", ".join(info.get("cc", [])), "received_time": info.get("received_time"), "is_rj_forward": is_rj(info), "forwarded_from": forwarded.get("forwarded_from", ""), "forwarded_to": forwarded.get("forwarded_to", ""), "forwarded_cc": forwarded.get("forwarded_cc", ""), "forwarded_subject": forwarded.get("forwarded_subject", ""), "body_preview": html_to_text(info.get("body_html", ""))[:300], }) return rows def extract_tables(body_html): if not body_html: return [] soup = BeautifulSoup(body_html, "html.parser") out = [] for node in soup.find_all(string=True): label = node.strip() if label not in {"Cash Only Transactions", "Withdrawals", "Deposits"}: continue table = node.find_next("table") if not table: continue try: dfs = pd.read_html(StringIO(str(table))) except ValueError: continue for df in dfs: df.columns = [str(c).strip() for c in df.columns] df = df.dropna(how="all").reset_index(drop=True) out.append((label, df)) return out def parse_date(v): if not v: return None for fmt in ("%m/%d/%Y", "%m/%d/%y", "%A, %B %d, %Y %I:%M %p"): try: return datetime.strptime(v.strip(), fmt) except ValueError: pass return None def build_notice(row): section = row.get("source_section") account = row.get("Account") or row.get("customer_account") amount = row.get("Cash Amount") or row.get("Amount") if section == "Withdrawals": input_date = row.get("Input date") deadline = parse_date(input_date) response_time = ( (deadline + timedelta(days=8)).strftime("%Y-%m-%d") if deadline else f"8 calendar days after {input_date}" ) action = ( f"Enter withdrawal in portal for account {account} for {amount}. " f"Schedule 8 calendar days after input date {input_date}." ) elif section == "Deposits" or (row.get("Type") or "").lower() == "deposit": response_time = "As soon as practical after email receipt" action = ( f"Message dutytrader in Teams: contribution/deposit for account {account} " f"({amount}) approved to invest in model." ) else: response_time = "ASAP" action = ( f"Halt account and advise of pending trades on {account}. " f"Immediate cash movement: {amount}." ) return { "email_id": row.get("email_id"), "sent_time": row.get("fw_received_time"), "response_time_needed": response_time, "account": account, "action": action, "source_section": section, } def to_excel(dfs): buf = BytesIO() with pd.ExcelWriter(buf, engine="openpyxl") as w: for name, df in dfs.items(): if df is not None and not df.empty: df.to_excel(w, sheet_name=name, index=False) return buf.getvalue() # ── UI ───────────────────────────────────────────────────────────────────────── st.markdown("## 📬 Raymond James Email Processor") st.markdown("Pulls forwarded RJ emails, extracts cash movement tables, and generates action notices.") st.divider() # Step 1 — Sign in st.markdown('
Step 1
', unsafe_allow_html=True) col1, col2 = st.columns([5, 1]) with col1: st.markdown('
Sign in to Microsoft
', unsafe_allow_html=True) with col2: if st.session_state.token: st.markdown('✓ Signed in', unsafe_allow_html=True) else: st.markdown('Not signed in', unsafe_allow_html=True) if not st.session_state.token: auth_url = build_auth_url() st.markdown( f'
🔐 Sign in with Microsoft
', unsafe_allow_html=True, ) st.caption("You'll be redirected to Microsoft's login page and back automatically.") else: st.markdown("You are signed in. ✓") if st.button("Sign out"): st.session_state.token = None st.rerun() # Step 2 — Config st.markdown('
Step 2
', unsafe_allow_html=True) st.markdown('
Configure
', unsafe_allow_html=True) col_a, col_b, col_c = st.columns(3) with col_a: sender_email = st.text_input("Sender email (who forwards RJ emails)", value=DEFAULT_SENDER) with col_b: target_email = st.text_input("Recipient email to filter for", value=DEFAULT_TARGET) with col_c: top_n = st.number_input("Max emails to fetch", min_value=5, max_value=200, value=100, step=5) # Step 3 — Run st.markdown('
Step 3
', unsafe_allow_html=True) st.markdown('
Fetch & Process
', unsafe_allow_html=True) if st.button("▶ Run", disabled=not st.session_state.token): with st.spinner("Fetching emails from Outlook..."): try: raw = get_emails(st.session_state.token, int(top_n)) st.session_state.messages = raw st.session_state.messages_df = pd.DataFrame(build_fetched_emails(raw)) if raw else pd.DataFrame() except requests.HTTPError as e: if e.response.status_code == 401: st.error("Session expired. Please sign in again.") st.session_state.token = None else: st.error(f"API error: {e}") st.stop() with st.spinner("Collecting all emails from sender A to recipient B..."): st.session_state.target_emails = build_target_emails(raw, sender_email, target_email) st.session_state.target_emails_df = ( pd.DataFrame(st.session_state.target_emails) if st.session_state.target_emails else pd.DataFrame() ) with st.spinner("Filtering Raymond James emails..."): st.session_state.rj_emails = build_rj(raw, sender_email, target_email) with st.spinner("Extracting cash tables..."): cash_rows, wd_rows, dep_rows = [], [], [] for email in st.session_state.rj_emails: for section, df in extract_tables(email.get("original_body_html", "")): rows = df.to_dict("records") for r in rows: r.update({ "email_id": email.get("email_id"), "fw_subject": email.get("fw_subject"), "fw_received_time": email.get("fw_received_time"), "original_subject": email.get("forwarded_subject", ""), "original_from": email.get("forwarded_from", ""), "original_sent_time": email.get("forwarded_sent_time", ""), "source_section": section, }) if section == "Cash Only Transactions": cash_rows.extend(rows) elif section == "Withdrawals": wd_rows.extend(rows) elif section == "Deposits": dep_rows.extend(rows) st.session_state.cash_df = pd.DataFrame(cash_rows) if cash_rows else pd.DataFrame() st.session_state.withdrawals_df = pd.DataFrame(wd_rows) if wd_rows else pd.DataFrame() st.session_state.deposits_df = pd.DataFrame(dep_rows) if dep_rows else pd.DataFrame() with st.spinner("Building action notices..."): all_rows = cash_rows + wd_rows + dep_rows st.session_state.notice_df = ( pd.DataFrame([build_notice(r) for r in all_rows]) if all_rows else pd.DataFrame() ) st.success( f"Done! Fetched **{len(raw)}** recent inbox emails and found " f"**{len(st.session_state.rj_emails)}** RJ forwarded emails." ) # ── Results ──────────────────────────────────────────────────────────────────── st.markdown('
Step 4
', unsafe_allow_html=True) st.markdown('
Debug: All Fetched Inbox Emails
', unsafe_allow_html=True) fetched_df = st.session_state.messages_df if fetched_df is not None: if not fetched_df.empty: st.dataframe(fetched_df, width="stretch", hide_index=True) elif st.session_state.messages == []: st.caption("Run the fetch step to inspect all fetched inbox emails.") st.markdown('
Step 5
', unsafe_allow_html=True) st.markdown('
Debug: All Emails from A to B
', unsafe_allow_html=True) debug_df = st.session_state.target_emails_df if debug_df is not None: if not debug_df.empty: st.dataframe(debug_df, width="stretch", hide_index=True) elif st.session_state.messages: st.info("Fetched emails from the inbox, but none matched the sender + recipient filter.") else: st.caption("Run the fetch step to inspect all emails from sender A to recipient B.") if st.session_state.rj_emails: st.divider() st.markdown("### Results") tab1, tab2, tab3, tab4 = st.tabs([ "📋 Action Notices", "💵 Cash Transactions", "⬆ Withdrawals", "⬇ Deposits" ]) for tab, key, label in [ (tab1, "notice_df", "No action notices generated."), (tab2, "cash_df", "No cash transactions found."), (tab3, "withdrawals_df", "No withdrawals found."), (tab4, "deposits_df", "No deposits found."), ]: with tab: df = st.session_state[key] if df is not None and not df.empty: # Hide HTML columns display_df = df.drop(columns=[c for c in df.columns if "html" in c.lower()], errors="ignore") st.dataframe(display_df, width="stretch", hide_index=True) else: st.info(label) st.divider() excel_bytes = to_excel({ "Action Notices": st.session_state.notice_df, "Cash Transactions": st.session_state.cash_df, "Withdrawals": st.session_state.withdrawals_df, "Deposits": st.session_state.deposits_df, }) st.download_button( label="⬇ Download Excel Report", data=excel_bytes, file_name=f"rj_emails_{datetime.now().strftime('%Y%m%d_%H%M')}.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", )