Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import urllib.parse | |
| import secrets | |
| import requests | |
| import streamlit as st | |
| import pandas as pd | |
| from bs4 import BeautifulSoup | |
| from io import BytesIO, StringIO | |
| import re | |
| from datetime import datetime, timedelta | |
| # ββ Page config ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.set_page_config( | |
| page_title="RJ Email Processor", | |
| page_icon="π¬", | |
| layout="wide", | |
| ) | |
| st.markdown(""" | |
| <style> | |
| @import url('https://fonts.googleapis.com/css2?family=DM+Serif+Display&family=DM+Sans:wght@400;500;600&display=swap'); | |
| html, body, [class*="css"] { font-family: 'DM Sans', sans-serif; } | |
| h1, h2, h3 { font-family: 'DM Serif Display', serif; } | |
| .block-container { padding-top: 2rem; max-width: 960px; } | |
| .status-card { | |
| background: white; | |
| border: 1px solid #e5e0d8; | |
| border-radius: 12px; | |
| padding: 1.5rem 2rem; | |
| margin-bottom: 1.2rem; | |
| box-shadow: 0 1px 4px rgba(0,0,0,0.05); | |
| } | |
| .step-label { | |
| font-size: 0.72rem; font-weight: 600; | |
| letter-spacing: 0.1em; text-transform: uppercase; | |
| color: #9a8f82; margin-bottom: 0.25rem; | |
| } | |
| .step-title { font-size: 1.1rem; font-weight: 600; color: #1a1612; } | |
| .badge-success { | |
| background: #d4edda; color: #155724; | |
| padding: 2px 10px; border-radius: 20px; | |
| font-size: 0.78rem; font-weight: 600; | |
| } | |
| .badge-pending { | |
| background: #fff3cd; color: #856404; | |
| padding: 2px 10px; border-radius: 20px; | |
| font-size: 0.78rem; font-weight: 600; | |
| } | |
| .stButton > button { | |
| background: #1a1612 !important; color: white !important; | |
| border: none !important; border-radius: 8px !important; | |
| font-family: 'DM Sans', sans-serif !important; | |
| font-weight: 600 !important; | |
| padding: 0.55rem 1.4rem !important; | |
| font-size: 0.9rem !important; | |
| } | |
| .stButton > button:hover { background: #3d3530 !important; } | |
| .login-btn a { | |
| display: inline-block; | |
| background: #0078d4; | |
| color: white !important; | |
| text-decoration: none; | |
| padding: 0.55rem 1.6rem; | |
| border-radius: 8px; | |
| font-weight: 600; | |
| font-size: 0.95rem; | |
| font-family: 'DM Sans', sans-serif; | |
| } | |
| .login-btn a:hover { background: #106ebe; } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # ββ Config (from HF Secrets) βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| CLIENT_ID = os.environ.get("AZURE_CLIENT_ID", "bfcbb298-4cc1-496e-9d9b-ff8c2d967a3a") | |
| CLIENT_SECRET = os.environ.get("AZURE_CLIENT_SECRET", "") # set in HF Secrets | |
| TENANT_ID = os.environ.get("AZURE_TENANT_ID", "5dac2bf2-8842-4788-ae07-33fb103b55d6") | |
| REDIRECT_URI = os.environ.get("REDIRECT_URI", "") # e.g. https://yourspace.hf.space/ | |
| AUTHORITY = f"https://login.microsoftonline.com/{TENANT_ID}" | |
| AUTH_ENDPOINT = f"{AUTHORITY}/oauth2/v2.0/authorize" | |
| TOKEN_ENDPOINT= f"{AUTHORITY}/oauth2/v2.0/token" | |
| SCOPES = "Mail.Read offline_access" | |
| DEFAULT_SENDER = "AMSTradingAdmin@RaymondJames.com" | |
| DEFAULT_TARGET = "dutytrader@newfrontieradvisors.com" | |
| # ββ Session defaults βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| for k, v in { | |
| "token": None, | |
| "oauth_state": None, | |
| "messages": [], | |
| "messages_df": None, | |
| "target_emails": [], | |
| "target_emails_df": None, | |
| "rj_emails": [], | |
| "cash_df": None, | |
| "withdrawals_df": None, | |
| "deposits_df": None, | |
| "notice_df": None, | |
| }.items(): | |
| if k not in st.session_state: | |
| st.session_state[k] = v | |
| # ββ OAuth helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def build_auth_url(): | |
| state = secrets.token_urlsafe(16) | |
| st.session_state.oauth_state = state | |
| params = { | |
| "client_id": CLIENT_ID, | |
| "response_type": "code", | |
| "redirect_uri": REDIRECT_URI, | |
| "response_mode": "query", | |
| "scope": SCOPES, | |
| "state": state, | |
| } | |
| return AUTH_ENDPOINT + "?" + urllib.parse.urlencode(params) | |
| def exchange_code_for_token(code: str) -> str: | |
| resp = requests.post(TOKEN_ENDPOINT, data={ | |
| "client_id": CLIENT_ID, | |
| "client_secret": CLIENT_SECRET, | |
| "code": code, | |
| "redirect_uri": REDIRECT_URI, | |
| "grant_type": "authorization_code", | |
| }, timeout=30) | |
| resp.raise_for_status() | |
| return resp.json()["access_token"] | |
| # ββ Check for OAuth callback (code in URL query params) βββββββββββββββββββββββ | |
| query_params = st.query_params | |
| if not st.session_state.token and "code" in query_params: | |
| code = query_params["code"] | |
| state = query_params.get("state", "") | |
| if state == st.session_state.oauth_state or not st.session_state.oauth_state: | |
| try: | |
| with st.spinner("Completing sign-in..."): | |
| st.session_state.token = exchange_code_for_token(code) | |
| # Clear the code from the URL | |
| st.query_params.clear() | |
| st.rerun() | |
| except Exception as e: | |
| st.error(f"Sign-in failed: {e}") | |
| # ββ Email helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def get_emails(token, top=50): | |
| url = "https://graph.microsoft.com/v1.0/me/mailFolders/inbox/messages" | |
| headers = { | |
| "Authorization": f"Bearer {token}", | |
| "Prefer": 'outlook.body-content-type="html"', | |
| } | |
| params = { | |
| "$top": top, | |
| "$select": "id,subject,from,toRecipients,ccRecipients,receivedDateTime,body", | |
| "$orderby": "receivedDateTime desc", | |
| } | |
| r = requests.get(url, headers=headers, params=params, timeout=60) | |
| r.raise_for_status() | |
| return r.json().get("value", []) | |
| def extract_info(msg): | |
| def addrs(lst): | |
| return [(r.get("emailAddress") or {}).get("address") for r in (lst or []) | |
| if (r.get("emailAddress") or {}).get("address")] | |
| body_html = (msg.get("body") or {}).get("content", "") | |
| return { | |
| "id": msg.get("id"), | |
| "subject": msg.get("subject"), | |
| "from": ((msg.get("from") or {}).get("emailAddress") or {}).get("address"), | |
| "to": addrs(msg.get("toRecipients")), | |
| "cc": addrs(msg.get("ccRecipients")), | |
| "received_time": msg.get("receivedDateTime"), | |
| "body_html": body_html, | |
| } | |
| def html_to_text(html): | |
| return BeautifulSoup(html, "html.parser").get_text("\n", strip=True) if html else "" | |
| def reply_marker_html(body_html): | |
| if not body_html: | |
| return "" | |
| soup = BeautifulSoup(body_html, "html.parser") | |
| marker = soup.find(id="divRplyFwdMsg") | |
| if not marker: | |
| return "" | |
| return "".join(str(n) for n in marker.next_siblings).strip() | |
| def fwd_headers(body_html): | |
| text = html_to_text(body_html) | |
| def grab(label): | |
| m = re.search(rf"{label}:\s*(.*)", text) | |
| return m.group(1).strip() if m else "" | |
| return {k: grab(v) for k, v in { | |
| "forwarded_from": "From", | |
| "forwarded_sent_time": "Sent", | |
| "forwarded_to": "To", | |
| "forwarded_cc": "Cc", | |
| "forwarded_subject": "Subject", | |
| }.items()} | |
| def normalize_email(value): | |
| if not value: | |
| return "" | |
| match = re.search(r'[\w.+-]+@[\w.-]+\.\w+', str(value)) | |
| return match.group(0).lower() if match else str(value).strip().lower() | |
| def split_header_addresses(value): | |
| if not value: | |
| return [] | |
| matches = re.findall(r'[\w.+-]+@[\w.-]+\.\w+', str(value)) | |
| if matches: | |
| return [m.lower() for m in matches] | |
| cleaned = str(value).strip().lower() | |
| return [cleaned] if cleaned else [] | |
| def matches_email_filters(info, sender_filter, target_filter): | |
| sender_filter = normalize_email(sender_filter) | |
| target_filter = normalize_email(target_filter) | |
| forwarded = fwd_headers(info.get("body_html", "")) | |
| body_text = html_to_text(info.get("body_html", "")).lower() | |
| body_html = (info.get("body_html", "") or "").lower() | |
| outer_from = normalize_email(info.get("from")) | |
| outer_recipients = [normalize_email(a) for a in info.get("to", []) + info.get("cc", [])] | |
| forwarded_from = split_header_addresses(forwarded.get("forwarded_from", "")) | |
| forwarded_recipients = ( | |
| split_header_addresses(forwarded.get("forwarded_to", "")) + | |
| split_header_addresses(forwarded.get("forwarded_cc", "")) | |
| ) | |
| sender_match = ( | |
| sender_filter in [outer_from] | |
| or sender_filter in forwarded_from | |
| or sender_filter in body_text | |
| or sender_filter in body_html | |
| ) | |
| target_match = ( | |
| target_filter in outer_recipients | |
| or target_filter in forwarded_recipients | |
| or target_filter in body_text | |
| or target_filter in body_html | |
| ) | |
| return sender_match and target_match, forwarded | |
| def is_rj(info): | |
| subj = (info.get("subject") or "").lower() | |
| html = info.get("body_html") or "" | |
| text = html_to_text(html).lower() | |
| if "rj emails--overview" in subj: | |
| return False | |
| has_fwd = 'id="divRplyFwdMsg"' in html or ("from:" in text and "sent:" in text) | |
| if not has_fwd: | |
| return False | |
| return any([ | |
| "raymond james" in subj, "raymond james" in text, | |
| "amstradingadmin@raymondjames.com" in text, | |
| "ams managed operations" in subj, "ams managed operations" in text, | |
| "direct asset transfers" in subj, | |
| "distribution & cash balance check" in subj, | |
| ]) | |
| def build_rj(messages, sender_filter, target_filter): | |
| rows = [] | |
| for i, msg in enumerate(messages, 1): | |
| info = extract_info(msg) | |
| matches_target, fwd = matches_email_filters(info, sender_filter, target_filter) | |
| if not matches_target: | |
| continue | |
| if not is_rj(info): | |
| continue | |
| rows.append({ | |
| "email_id": info.get("id") or f"msg_{i}", | |
| "fw_subject": info.get("subject"), | |
| "fw_from": info.get("from"), | |
| "fw_to": ", ".join(info.get("to", [])), | |
| "fw_cc": ", ".join(info.get("cc", [])), | |
| "fw_received_time": info.get("received_time"), | |
| "fw_body_html": info.get("body_html", ""), | |
| **{k: fwd.get(k, "") for k in fwd}, | |
| "original_body_html": reply_marker_html(info.get("body_html", "")), | |
| }) | |
| return rows | |
| def build_target_emails(messages, sender_filter, target_filter): | |
| rows = [] | |
| for i, msg in enumerate(messages, 1): | |
| info = extract_info(msg) | |
| matches_target, forwarded = matches_email_filters(info, sender_filter, target_filter) | |
| if not matches_target: | |
| continue | |
| rows.append({ | |
| "email_id": info.get("id") or f"msg_{i}", | |
| "subject": info.get("subject"), | |
| "from": info.get("from"), | |
| "to": ", ".join(info.get("to", [])), | |
| "cc": ", ".join(info.get("cc", [])), | |
| "received_time": info.get("received_time"), | |
| "is_rj_forward": is_rj(info), | |
| "forwarded_from": forwarded.get("forwarded_from", ""), | |
| "forwarded_to": forwarded.get("forwarded_to", ""), | |
| "forwarded_cc": forwarded.get("forwarded_cc", ""), | |
| "forwarded_subject": forwarded.get("forwarded_subject", ""), | |
| "body_preview": html_to_text(info.get("body_html", ""))[:300], | |
| }) | |
| return rows | |
| def build_fetched_emails(messages): | |
| rows = [] | |
| for i, msg in enumerate(messages, 1): | |
| info = extract_info(msg) | |
| forwarded = fwd_headers(info.get("body_html", "")) | |
| rows.append({ | |
| "email_id": info.get("id") or f"msg_{i}", | |
| "subject": info.get("subject"), | |
| "from": info.get("from"), | |
| "to": ", ".join(info.get("to", [])), | |
| "cc": ", ".join(info.get("cc", [])), | |
| "received_time": info.get("received_time"), | |
| "is_rj_forward": is_rj(info), | |
| "forwarded_from": forwarded.get("forwarded_from", ""), | |
| "forwarded_to": forwarded.get("forwarded_to", ""), | |
| "forwarded_cc": forwarded.get("forwarded_cc", ""), | |
| "forwarded_subject": forwarded.get("forwarded_subject", ""), | |
| "body_preview": html_to_text(info.get("body_html", ""))[:300], | |
| }) | |
| return rows | |
| def extract_tables(body_html): | |
| if not body_html: | |
| return [] | |
| soup = BeautifulSoup(body_html, "html.parser") | |
| out = [] | |
| for node in soup.find_all(string=True): | |
| label = node.strip() | |
| if label not in {"Cash Only Transactions", "Withdrawals", "Deposits"}: | |
| continue | |
| table = node.find_next("table") | |
| if not table: | |
| continue | |
| try: | |
| dfs = pd.read_html(StringIO(str(table))) | |
| except ValueError: | |
| continue | |
| for df in dfs: | |
| df.columns = [str(c).strip() for c in df.columns] | |
| df = df.dropna(how="all").reset_index(drop=True) | |
| out.append((label, df)) | |
| return out | |
| def parse_date(v): | |
| if not v: | |
| return None | |
| for fmt in ("%m/%d/%Y", "%m/%d/%y", "%A, %B %d, %Y %I:%M %p"): | |
| try: | |
| return datetime.strptime(v.strip(), fmt) | |
| except ValueError: | |
| pass | |
| return None | |
| def build_notice(row): | |
| section = row.get("source_section") | |
| account = row.get("Account") or row.get("customer_account") | |
| amount = row.get("Cash Amount") or row.get("Amount") | |
| if section == "Withdrawals": | |
| input_date = row.get("Input date") | |
| deadline = parse_date(input_date) | |
| response_time = ( | |
| (deadline + timedelta(days=8)).strftime("%Y-%m-%d") | |
| if deadline else f"8 calendar days after {input_date}" | |
| ) | |
| action = ( | |
| f"Enter withdrawal in portal for account {account} for {amount}. " | |
| f"Schedule 8 calendar days after input date {input_date}." | |
| ) | |
| elif section == "Deposits" or (row.get("Type") or "").lower() == "deposit": | |
| response_time = "As soon as practical after email receipt" | |
| action = ( | |
| f"Message dutytrader in Teams: contribution/deposit for account {account} " | |
| f"({amount}) approved to invest in model." | |
| ) | |
| else: | |
| response_time = "ASAP" | |
| action = ( | |
| f"Halt account and advise of pending trades on {account}. " | |
| f"Immediate cash movement: {amount}." | |
| ) | |
| return { | |
| "email_id": row.get("email_id"), | |
| "sent_time": row.get("fw_received_time"), | |
| "response_time_needed": response_time, | |
| "account": account, | |
| "action": action, | |
| "source_section": section, | |
| } | |
| def to_excel(dfs): | |
| buf = BytesIO() | |
| with pd.ExcelWriter(buf, engine="openpyxl") as w: | |
| for name, df in dfs.items(): | |
| if df is not None and not df.empty: | |
| df.to_excel(w, sheet_name=name, index=False) | |
| return buf.getvalue() | |
| # ββ UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.markdown("## π¬ Raymond James Email Processor") | |
| st.markdown("Pulls forwarded RJ emails, extracts cash movement tables, and generates action notices.") | |
| st.divider() | |
| # Step 1 β Sign in | |
| st.markdown('<div class="step-label">Step 1</div>', unsafe_allow_html=True) | |
| col1, col2 = st.columns([5, 1]) | |
| with col1: | |
| st.markdown('<div class="step-title">Sign in to Microsoft</div>', unsafe_allow_html=True) | |
| with col2: | |
| if st.session_state.token: | |
| st.markdown('<span class="badge-success">β Signed in</span>', unsafe_allow_html=True) | |
| else: | |
| st.markdown('<span class="badge-pending">Not signed in</span>', unsafe_allow_html=True) | |
| if not st.session_state.token: | |
| auth_url = build_auth_url() | |
| st.markdown( | |
| f'<div class="login-btn"><a href="{auth_url}" target="_self">π Sign in with Microsoft</a></div>', | |
| unsafe_allow_html=True, | |
| ) | |
| st.caption("You'll be redirected to Microsoft's login page and back automatically.") | |
| else: | |
| st.markdown("You are signed in. β") | |
| if st.button("Sign out"): | |
| st.session_state.token = None | |
| st.rerun() | |
| # Step 2 β Config | |
| st.markdown('<div class="step-label">Step 2</div>', unsafe_allow_html=True) | |
| st.markdown('<div class="step-title">Configure</div>', unsafe_allow_html=True) | |
| col_a, col_b, col_c = st.columns(3) | |
| with col_a: | |
| sender_email = st.text_input("Sender email (who forwards RJ emails)", value=DEFAULT_SENDER) | |
| with col_b: | |
| target_email = st.text_input("Recipient email to filter for", value=DEFAULT_TARGET) | |
| with col_c: | |
| top_n = st.number_input("Max emails to fetch", min_value=5, max_value=200, value=100, step=5) | |
| # Step 3 β Run | |
| st.markdown('<div class="step-label">Step 3</div>', unsafe_allow_html=True) | |
| st.markdown('<div class="step-title">Fetch & Process</div>', unsafe_allow_html=True) | |
| if st.button("βΆ Run", disabled=not st.session_state.token): | |
| with st.spinner("Fetching emails from Outlook..."): | |
| try: | |
| raw = get_emails(st.session_state.token, int(top_n)) | |
| st.session_state.messages = raw | |
| st.session_state.messages_df = pd.DataFrame(build_fetched_emails(raw)) if raw else pd.DataFrame() | |
| except requests.HTTPError as e: | |
| if e.response.status_code == 401: | |
| st.error("Session expired. Please sign in again.") | |
| st.session_state.token = None | |
| else: | |
| st.error(f"API error: {e}") | |
| st.stop() | |
| with st.spinner("Collecting all emails from sender A to recipient B..."): | |
| st.session_state.target_emails = build_target_emails(raw, sender_email, target_email) | |
| st.session_state.target_emails_df = ( | |
| pd.DataFrame(st.session_state.target_emails) | |
| if st.session_state.target_emails else pd.DataFrame() | |
| ) | |
| with st.spinner("Filtering Raymond James emails..."): | |
| st.session_state.rj_emails = build_rj(raw, sender_email, target_email) | |
| with st.spinner("Extracting cash tables..."): | |
| cash_rows, wd_rows, dep_rows = [], [], [] | |
| for email in st.session_state.rj_emails: | |
| for section, df in extract_tables(email.get("original_body_html", "")): | |
| rows = df.to_dict("records") | |
| for r in rows: | |
| r.update({ | |
| "email_id": email.get("email_id"), | |
| "fw_subject": email.get("fw_subject"), | |
| "fw_received_time": email.get("fw_received_time"), | |
| "original_subject": email.get("forwarded_subject", ""), | |
| "original_from": email.get("forwarded_from", ""), | |
| "original_sent_time": email.get("forwarded_sent_time", ""), | |
| "source_section": section, | |
| }) | |
| if section == "Cash Only Transactions": cash_rows.extend(rows) | |
| elif section == "Withdrawals": wd_rows.extend(rows) | |
| elif section == "Deposits": dep_rows.extend(rows) | |
| st.session_state.cash_df = pd.DataFrame(cash_rows) if cash_rows else pd.DataFrame() | |
| st.session_state.withdrawals_df = pd.DataFrame(wd_rows) if wd_rows else pd.DataFrame() | |
| st.session_state.deposits_df = pd.DataFrame(dep_rows) if dep_rows else pd.DataFrame() | |
| with st.spinner("Building action notices..."): | |
| all_rows = cash_rows + wd_rows + dep_rows | |
| st.session_state.notice_df = ( | |
| pd.DataFrame([build_notice(r) for r in all_rows]) | |
| if all_rows else pd.DataFrame() | |
| ) | |
| st.success( | |
| f"Done! Fetched **{len(raw)}** recent inbox emails and found " | |
| f"**{len(st.session_state.rj_emails)}** RJ forwarded emails." | |
| ) | |
| # ββ Results ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.markdown('<div class="step-label">Step 4</div>', unsafe_allow_html=True) | |
| st.markdown('<div class="step-title">Debug: All Fetched Inbox Emails</div>', unsafe_allow_html=True) | |
| fetched_df = st.session_state.messages_df | |
| if fetched_df is not None: | |
| if not fetched_df.empty: | |
| st.dataframe(fetched_df, width="stretch", hide_index=True) | |
| elif st.session_state.messages == []: | |
| st.caption("Run the fetch step to inspect all fetched inbox emails.") | |
| st.markdown('<div class="step-label">Step 5</div>', unsafe_allow_html=True) | |
| st.markdown('<div class="step-title">Debug: All Emails from A to B</div>', unsafe_allow_html=True) | |
| debug_df = st.session_state.target_emails_df | |
| if debug_df is not None: | |
| if not debug_df.empty: | |
| st.dataframe(debug_df, width="stretch", hide_index=True) | |
| elif st.session_state.messages: | |
| st.info("Fetched emails from the inbox, but none matched the sender + recipient filter.") | |
| else: | |
| st.caption("Run the fetch step to inspect all emails from sender A to recipient B.") | |
| if st.session_state.rj_emails: | |
| st.divider() | |
| st.markdown("### Results") | |
| tab1, tab2, tab3, tab4 = st.tabs([ | |
| "π Action Notices", "π΅ Cash Transactions", "β¬ Withdrawals", "β¬ Deposits" | |
| ]) | |
| for tab, key, label in [ | |
| (tab1, "notice_df", "No action notices generated."), | |
| (tab2, "cash_df", "No cash transactions found."), | |
| (tab3, "withdrawals_df", "No withdrawals found."), | |
| (tab4, "deposits_df", "No deposits found."), | |
| ]: | |
| with tab: | |
| df = st.session_state[key] | |
| if df is not None and not df.empty: | |
| # Hide HTML columns | |
| display_df = df.drop(columns=[c for c in df.columns if "html" in c.lower()], errors="ignore") | |
| st.dataframe(display_df, width="stretch", hide_index=True) | |
| else: | |
| st.info(label) | |
| st.divider() | |
| excel_bytes = to_excel({ | |
| "Action Notices": st.session_state.notice_df, | |
| "Cash Transactions": st.session_state.cash_df, | |
| "Withdrawals": st.session_state.withdrawals_df, | |
| "Deposits": st.session_state.deposits_df, | |
| }) | |
| st.download_button( | |
| label="β¬ Download Excel Report", | |
| data=excel_bytes, | |
| file_name=f"rj_emails_{datetime.now().strftime('%Y%m%d_%H%M')}.xlsx", | |
| mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", | |
| ) | |