import os import json import urllib.parse import secrets import requests import streamlit as st import pandas as pd from bs4 import BeautifulSoup from io import BytesIO, StringIO import re from datetime import datetime, timedelta # ── Page config ──────────────────────────────────────────────────────────────── st.set_page_config( page_title="RJ Email Processor", page_icon="📬", layout="wide", ) st.markdown(""" """, unsafe_allow_html=True) # ── Config (from HF Secrets) ─────────────────────────────────────────────────── CLIENT_ID = os.environ.get("AZURE_CLIENT_ID", "bfcbb298-4cc1-496e-9d9b-ff8c2d967a3a") CLIENT_SECRET = os.environ.get("AZURE_CLIENT_SECRET", "") # set in HF Secrets TENANT_ID = os.environ.get("AZURE_TENANT_ID", "5dac2bf2-8842-4788-ae07-33fb103b55d6") REDIRECT_URI = os.environ.get("REDIRECT_URI", "") # e.g. https://yourspace.hf.space/ AUTHORITY = f"https://login.microsoftonline.com/{TENANT_ID}" AUTH_ENDPOINT = f"{AUTHORITY}/oauth2/v2.0/authorize" TOKEN_ENDPOINT= f"{AUTHORITY}/oauth2/v2.0/token" SCOPES = "Mail.Read offline_access" DEFAULT_SENDER = "AMSTradingAdmin@RaymondJames.com" DEFAULT_TARGET = "dutytrader@newfrontieradvisors.com" # ── Session defaults ─────────────────────────────────────────────────────────── for k, v in { "token": None, "oauth_state": None, "messages": [], "messages_df": None, "target_emails": [], "target_emails_df": None, "rj_emails": [], "cash_df": None, "withdrawals_df": None, "deposits_df": None, "notice_df": None, }.items(): if k not in st.session_state: st.session_state[k] = v # ── OAuth helpers ────────────────────────────────────────────────────────────── def build_auth_url(): state = secrets.token_urlsafe(16) st.session_state.oauth_state = state params = { "client_id": CLIENT_ID, "response_type": "code", "redirect_uri": REDIRECT_URI, "response_mode": "query", "scope": SCOPES, "state": state, } return AUTH_ENDPOINT + "?" + urllib.parse.urlencode(params) def exchange_code_for_token(code: str) -> str: resp = requests.post(TOKEN_ENDPOINT, data={ "client_id": CLIENT_ID, "client_secret": CLIENT_SECRET, "code": code, "redirect_uri": REDIRECT_URI, "grant_type": "authorization_code", }, timeout=30) resp.raise_for_status() return resp.json()["access_token"] # ── Check for OAuth callback (code in URL query params) ─────────────────────── query_params = st.query_params if not st.session_state.token and "code" in query_params: code = query_params["code"] state = query_params.get("state", "") if state == st.session_state.oauth_state or not st.session_state.oauth_state: try: with st.spinner("Completing sign-in..."): st.session_state.token = exchange_code_for_token(code) # Clear the code from the URL st.query_params.clear() st.rerun() except Exception as e: st.error(f"Sign-in failed: {e}") # ── Email helpers ────────────────────────────────────────────────────────────── def get_emails(token, top=50): url = "https://graph.microsoft.com/v1.0/me/mailFolders/inbox/messages" headers = { "Authorization": f"Bearer {token}", "Prefer": 'outlook.body-content-type="html"', } params = { "$top": top, "$select": "id,subject,from,toRecipients,ccRecipients,receivedDateTime,body", "$orderby": "receivedDateTime desc", } r = requests.get(url, headers=headers, params=params, timeout=60) r.raise_for_status() return r.json().get("value", []) def extract_info(msg): def addrs(lst): return [(r.get("emailAddress") or {}).get("address") for r in (lst or []) if (r.get("emailAddress") or {}).get("address")] body_html = (msg.get("body") or {}).get("content", "") return { "id": msg.get("id"), "subject": msg.get("subject"), "from": ((msg.get("from") or {}).get("emailAddress") or {}).get("address"), "to": addrs(msg.get("toRecipients")), "cc": addrs(msg.get("ccRecipients")), "received_time": msg.get("receivedDateTime"), "body_html": body_html, } def html_to_text(html): return BeautifulSoup(html, "html.parser").get_text("\n", strip=True) if html else "" def reply_marker_html(body_html): if not body_html: return "" soup = BeautifulSoup(body_html, "html.parser") marker = soup.find(id="divRplyFwdMsg") if not marker: return "" return "".join(str(n) for n in marker.next_siblings).strip() def fwd_headers(body_html): text = html_to_text(body_html) def grab(label): m = re.search(rf"{label}:\s*(.*)", text) return m.group(1).strip() if m else "" return {k: grab(v) for k, v in { "forwarded_from": "From", "forwarded_sent_time": "Sent", "forwarded_to": "To", "forwarded_cc": "Cc", "forwarded_subject": "Subject", }.items()} def normalize_email(value): if not value: return "" match = re.search(r'[\w.+-]+@[\w.-]+\.\w+', str(value)) return match.group(0).lower() if match else str(value).strip().lower() def split_header_addresses(value): if not value: return [] matches = re.findall(r'[\w.+-]+@[\w.-]+\.\w+', str(value)) if matches: return [m.lower() for m in matches] cleaned = str(value).strip().lower() return [cleaned] if cleaned else [] def matches_email_filters(info, sender_filter, target_filter): sender_filter = normalize_email(sender_filter) target_filter = normalize_email(target_filter) forwarded = fwd_headers(info.get("body_html", "")) body_text = html_to_text(info.get("body_html", "")).lower() body_html = (info.get("body_html", "") or "").lower() outer_from = normalize_email(info.get("from")) outer_recipients = [normalize_email(a) for a in info.get("to", []) + info.get("cc", [])] forwarded_from = split_header_addresses(forwarded.get("forwarded_from", "")) forwarded_recipients = ( split_header_addresses(forwarded.get("forwarded_to", "")) + split_header_addresses(forwarded.get("forwarded_cc", "")) ) sender_match = ( sender_filter in [outer_from] or sender_filter in forwarded_from or sender_filter in body_text or sender_filter in body_html ) target_match = ( target_filter in outer_recipients or target_filter in forwarded_recipients or target_filter in body_text or target_filter in body_html ) return sender_match and target_match, forwarded def is_rj(info): subj = (info.get("subject") or "").lower() html = info.get("body_html") or "" text = html_to_text(html).lower() if "rj emails--overview" in subj: return False has_fwd = 'id="divRplyFwdMsg"' in html or ("from:" in text and "sent:" in text) if not has_fwd: return False return any([ "raymond james" in subj, "raymond james" in text, "amstradingadmin@raymondjames.com" in text, "ams managed operations" in subj, "ams managed operations" in text, "direct asset transfers" in subj, "distribution & cash balance check" in subj, ]) def build_rj(messages, sender_filter, target_filter): rows = [] for i, msg in enumerate(messages, 1): info = extract_info(msg) matches_target, fwd = matches_email_filters(info, sender_filter, target_filter) if not matches_target: continue if not is_rj(info): continue rows.append({ "email_id": info.get("id") or f"msg_{i}", "fw_subject": info.get("subject"), "fw_from": info.get("from"), "fw_to": ", ".join(info.get("to", [])), "fw_cc": ", ".join(info.get("cc", [])), "fw_received_time": info.get("received_time"), "fw_body_html": info.get("body_html", ""), **{k: fwd.get(k, "") for k in fwd}, "original_body_html": reply_marker_html(info.get("body_html", "")), }) return rows def build_target_emails(messages, sender_filter, target_filter): rows = [] for i, msg in enumerate(messages, 1): info = extract_info(msg) matches_target, forwarded = matches_email_filters(info, sender_filter, target_filter) if not matches_target: continue rows.append({ "email_id": info.get("id") or f"msg_{i}", "subject": info.get("subject"), "from": info.get("from"), "to": ", ".join(info.get("to", [])), "cc": ", ".join(info.get("cc", [])), "received_time": info.get("received_time"), "is_rj_forward": is_rj(info), "forwarded_from": forwarded.get("forwarded_from", ""), "forwarded_to": forwarded.get("forwarded_to", ""), "forwarded_cc": forwarded.get("forwarded_cc", ""), "forwarded_subject": forwarded.get("forwarded_subject", ""), "body_preview": html_to_text(info.get("body_html", ""))[:300], }) return rows def build_fetched_emails(messages): rows = [] for i, msg in enumerate(messages, 1): info = extract_info(msg) forwarded = fwd_headers(info.get("body_html", "")) rows.append({ "email_id": info.get("id") or f"msg_{i}", "subject": info.get("subject"), "from": info.get("from"), "to": ", ".join(info.get("to", [])), "cc": ", ".join(info.get("cc", [])), "received_time": info.get("received_time"), "is_rj_forward": is_rj(info), "forwarded_from": forwarded.get("forwarded_from", ""), "forwarded_to": forwarded.get("forwarded_to", ""), "forwarded_cc": forwarded.get("forwarded_cc", ""), "forwarded_subject": forwarded.get("forwarded_subject", ""), "body_preview": html_to_text(info.get("body_html", ""))[:300], }) return rows def extract_tables(body_html): if not body_html: return [] soup = BeautifulSoup(body_html, "html.parser") out = [] for node in soup.find_all(string=True): label = node.strip() if label not in {"Cash Only Transactions", "Withdrawals", "Deposits"}: continue table = node.find_next("table") if not table: continue try: dfs = pd.read_html(StringIO(str(table))) except ValueError: continue for df in dfs: df.columns = [str(c).strip() for c in df.columns] df = df.dropna(how="all").reset_index(drop=True) out.append((label, df)) return out def parse_date(v): if not v: return None for fmt in ("%m/%d/%Y", "%m/%d/%y", "%A, %B %d, %Y %I:%M %p"): try: return datetime.strptime(v.strip(), fmt) except ValueError: pass return None def build_notice(row): section = row.get("source_section") account = row.get("Account") or row.get("customer_account") amount = row.get("Cash Amount") or row.get("Amount") if section == "Withdrawals": input_date = row.get("Input date") deadline = parse_date(input_date) response_time = ( (deadline + timedelta(days=8)).strftime("%Y-%m-%d") if deadline else f"8 calendar days after {input_date}" ) action = ( f"Enter withdrawal in portal for account {account} for {amount}. " f"Schedule 8 calendar days after input date {input_date}." ) elif section == "Deposits" or (row.get("Type") or "").lower() == "deposit": response_time = "As soon as practical after email receipt" action = ( f"Message dutytrader in Teams: contribution/deposit for account {account} " f"({amount}) approved to invest in model." ) else: response_time = "ASAP" action = ( f"Halt account and advise of pending trades on {account}. " f"Immediate cash movement: {amount}." ) return { "email_id": row.get("email_id"), "sent_time": row.get("fw_received_time"), "response_time_needed": response_time, "account": account, "action": action, "source_section": section, } def to_excel(dfs): buf = BytesIO() with pd.ExcelWriter(buf, engine="openpyxl") as w: for name, df in dfs.items(): if df is not None and not df.empty: df.to_excel(w, sheet_name=name, index=False) return buf.getvalue() # ── UI ───────────────────────────────────────────────────────────────────────── st.markdown("## 📬 Raymond James Email Processor") st.markdown("Pulls forwarded RJ emails, extracts cash movement tables, and generates action notices.") st.divider() # Step 1 — Sign in st.markdown('