File size: 30,022 Bytes
e59832f
 
 
 
c2bc9c9
e59832f
c2bc9c9
 
 
e59832f
c2bc9c9
 
 
 
 
 
 
 
 
 
 
 
16cc817
 
c2bc9c9
 
 
 
 
 
 
 
 
 
16cc817
 
 
c2bc9c9
16cc817
c2bc9c9
 
 
 
 
 
 
 
 
 
 
16cc817
 
c2bc9c9
 
 
 
 
16cc817
 
 
 
 
 
 
 
c2bc9c9
16cc817
 
c2bc9c9
16cc817
c2bc9c9
 
 
dc56dcd
 
 
 
 
 
 
 
 
0a7560f
dc56dcd
e8eb98d
 
2fb6dc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc56dcd
 
 
 
 
38dc8fc
ddb5dbb
38dc8fc
 
dc56dcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d978147
 
 
 
 
 
 
 
dc56dcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4dc485a
dc56dcd
 
 
 
 
 
 
 
4dc485a
dc56dcd
 
 
 
 
 
0a7560f
 
 
 
 
 
d978147
 
 
 
 
 
 
 
0a7560f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d978147
 
 
 
 
0a7560f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2fb6dc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc56dcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4dc485a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
722ceda
dc56dcd
 
 
722ceda
 
dc56dcd
 
 
 
 
 
 
 
 
 
 
 
 
 
4dc485a
dc56dcd
 
 
4dc485a
 
dc56dcd
722ceda
dc56dcd
722ceda
dc56dcd
 
 
 
 
 
 
 
 
722ceda
 
dc56dcd
 
 
 
38dc8fc
 
 
 
 
 
 
 
 
 
 
 
 
 
722ceda
38dc8fc
 
 
 
 
 
 
 
 
ddb5dbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc56dcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8bdb9e6
 
 
 
dc56dcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c2bc9c9
e59832f
c2bc9c9
 
 
 
 
 
 
 
 
e59832f
 
 
 
 
 
c2bc9c9
 
e59832f
 
 
c2bc9c9
e59832f
 
c2bc9c9
 
0a7560f
 
 
 
 
c2bc9c9
 
e59832f
c2bc9c9
e59832f
c2bc9c9
2fb6dc8
 
 
 
 
 
 
 
c2bc9c9
2fb6dc8
0a7560f
2fb6dc8
 
 
 
 
0a7560f
2fb6dc8
 
 
 
0a7560f
c2bc9c9
 
e59832f
c2bc9c9
 
 
 
0a7560f
2fb6dc8
0a7560f
2fb6dc8
0a7560f
2fb6dc8
0a7560f
 
 
38dc8fc
ddb5dbb
0a7560f
 
 
d978147
 
 
e59832f
 
 
 
 
 
c2bc9c9
 
38dc8fc
 
 
 
 
 
 
e59832f
4dc485a
c2bc9c9
e59832f
c2bc9c9
 
722ceda
c2bc9c9
e59832f
 
c2bc9c9
 
 
e59832f
 
 
c2bc9c9
 
8bdb9e6
 
 
 
 
 
 
 
 
 
 
 
 
c2bc9c9
e59832f
 
 
c2bc9c9
 
 
 
 
 
 
 
0a7560f
e8eb98d
0a7560f
e8eb98d
 
e59832f
c2bc9c9
 
38dc8fc
0a7560f
ddb5dbb
 
 
 
 
 
0a7560f
ddb5dbb
 
38dc8fc
 
 
 
 
 
 
0a7560f
38dc8fc
 
 
c2bc9c9
 
 
 
e59832f
c2bc9c9
e59832f
 
 
 
 
 
 
 
 
 
 
16cc817
e59832f
 
c2bc9c9
 
 
e59832f
c2bc9c9
e59832f
 
c2bc9c9
 
 
 
 
 
 
0a7560f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
import os
import urllib.parse
import secrets

import requests
import streamlit as st
import pandas as pd
from bs4 import BeautifulSoup
from io import BytesIO, StringIO
import re
from datetime import datetime, timedelta

# ── Page config ────────────────────────────────────────────────────────────────
st.set_page_config(
    page_title="RJ Email Processor",
    page_icon="πŸ“¬",
    layout="wide",
)

st.markdown("""
<style>
    @import url('https://fonts.googleapis.com/css2?family=DM+Serif+Display&family=DM+Sans:wght@400;500;600&display=swap');
    html, body, [class*="css"] { font-family: 'DM Sans', sans-serif; }
    h1, h2, h3 { font-family: 'DM Serif Display', serif; }
    .block-container { padding-top: 2rem; max-width: 960px; }
    .status-card {
        background: white;
        border: 1px solid #e5e0d8;
        border-radius: 12px;
        padding: 1.5rem 2rem;
        margin-bottom: 1.2rem;
        box-shadow: 0 1px 4px rgba(0,0,0,0.05);
    }
    .step-label {
        font-size: 0.72rem; font-weight: 600;
        letter-spacing: 0.1em; text-transform: uppercase;
        color: #9a8f82; margin-bottom: 0.25rem;
    }
    .step-title { font-size: 1.1rem; font-weight: 600; color: #1a1612; }
    .badge-success {
        background: #d4edda; color: #155724;
        padding: 2px 10px; border-radius: 20px;
        font-size: 0.78rem; font-weight: 600;
    }
    .badge-pending {
        background: #fff3cd; color: #856404;
        padding: 2px 10px; border-radius: 20px;
        font-size: 0.78rem; font-weight: 600;
    }
    .stButton > button {
        background: #1a1612 !important; color: white !important;
        border: none !important; border-radius: 8px !important;
        font-family: 'DM Sans', sans-serif !important;
        font-weight: 600 !important;
        padding: 0.55rem 1.4rem !important;
        font-size: 0.9rem !important;
    }
    .stButton > button:hover { background: #3d3530 !important; }
    .login-btn a {
        display: inline-block;
        background: #0078d4;
        color: white !important;
        text-decoration: none;
        padding: 0.55rem 1.6rem;
        border-radius: 8px;
        font-weight: 600;
        font-size: 0.95rem;
        font-family: 'DM Sans', sans-serif;
    }
    .login-btn a:hover { background: #106ebe; }
</style>
""", unsafe_allow_html=True)

# ── Config (from HF Secrets) ───────────────────────────────────────────────────
CLIENT_ID     = os.environ.get("AZURE_CLIENT_ID",     "bfcbb298-4cc1-496e-9d9b-ff8c2d967a3a")
CLIENT_SECRET = os.environ.get("AZURE_CLIENT_SECRET", "")   # set in HF Secrets
TENANT_ID     = os.environ.get("AZURE_TENANT_ID",     "5dac2bf2-8842-4788-ae07-33fb103b55d6")
REDIRECT_URI  = os.environ.get("REDIRECT_URI",        "")   # e.g. https://yourspace.hf.space/

AUTHORITY     = f"https://login.microsoftonline.com/{TENANT_ID}"
AUTH_ENDPOINT = f"{AUTHORITY}/oauth2/v2.0/authorize"
TOKEN_ENDPOINT= f"{AUTHORITY}/oauth2/v2.0/token"
SCOPES        = "Mail.Read Group-Conversation.Read.All offline_access"

DEFAULT_SENDER = "AMSTradingAdmin@RaymondJames.com"
DEFAULT_TARGET = "dutytrader@newfrontieradvisors.com"
GROUP_FETCH_LIMIT = 100


def configured_group_ids():
    raw_values = [
        os.environ.get("GROUP_ID_1", ""),
        os.environ.get("GROUP_ID_2", ""),
        os.environ.get("GROUP_ID", ""),
        os.environ.get("GROUP_IDS", ""),
    ]
    group_ids = []
    for raw in raw_values:
        for value in re.split(r"[\s,;]+", raw.strip()):
            if value and value not in group_ids:
                group_ids.append(value)
    return group_ids


DEFAULT_GROUP_IDS = configured_group_ids()

# ── Session defaults ───────────────────────────────────────────────────────────
for k, v in {
    "token": None,
    "oauth_state": None,
    "messages": [],
    "messages_df": None,
    "target_emails": [],
    "target_emails_df": None,
    "rj_emails": [],
    "cash_df": None,
    "withdrawals_df": None,
    "deposits_df": None,
    "notice_df": None,
}.items():
    if k not in st.session_state:
        st.session_state[k] = v

# ── OAuth helpers ──────────────────────────────────────────────────────────────

def build_auth_url():
    state = secrets.token_urlsafe(16)
    st.session_state.oauth_state = state
    params = {
        "client_id": CLIENT_ID,
        "response_type": "code",
        "redirect_uri": REDIRECT_URI,
        "response_mode": "query",
        "scope": SCOPES,
        "state": state,
    }
    return AUTH_ENDPOINT + "?" + urllib.parse.urlencode(params)


def exchange_code_for_token(code: str) -> str:
    resp = requests.post(TOKEN_ENDPOINT, data={
        "client_id":     CLIENT_ID,
        "client_secret": CLIENT_SECRET,
        "code":          code,
        "redirect_uri":  REDIRECT_URI,
        "grant_type":    "authorization_code",
    }, timeout=30)
    try:
        payload = resp.json()
    except ValueError:
        payload = {"error": "token_request_failed", "error_description": resp.text}
    if not resp.ok:
        detail = payload.get("error_description") or payload.get("error") or resp.text
        raise RuntimeError(f"Token exchange failed: {detail}")
    return payload["access_token"]


# ── Check for OAuth callback (code in URL query params) ───────────────────────
query_params = st.query_params
if not st.session_state.token and "code" in query_params:
    code  = query_params["code"]
    state = query_params.get("state", "")
    if state == st.session_state.oauth_state or not st.session_state.oauth_state:
        try:
            with st.spinner("Completing sign-in..."):
                st.session_state.token = exchange_code_for_token(code)
            # Clear the code from the URL
            st.query_params.clear()
            st.rerun()
        except Exception as e:
            st.error(f"Sign-in failed: {e}")

# ── Email helpers ──────────────────────────────────────────────────────────────

def get_emails(token, top=50):
    url = "https://graph.microsoft.com/v1.0/me/mailFolders/inbox/messages"
    headers = {
        "Authorization": f"Bearer {token}",
        "Prefer": 'outlook.body-content-type="html"',
    }
    params = {
        "$top": top,
        "$select": "id,subject,from,toRecipients,ccRecipients,receivedDateTime,body",
        "$orderby": "receivedDateTime desc",
    }
    r = requests.get(url, headers=headers, params=params, timeout=60)
    r.raise_for_status()
    return r.json().get("value", [])


def graph_get_json(token, url, params=None):
    headers = {
        "Authorization": f"Bearer {token}",
        "Prefer": 'outlook.body-content-type="html"',
    }
    r = requests.get(url, headers=headers, params=params, timeout=60)
    try:
        payload = r.json()
    except ValueError:
        payload = {"error": {"message": r.text}}
    if not r.ok:
        detail = (payload.get("error") or {}).get("message") or r.text
        raise RuntimeError(f"Graph API error ({r.status_code}): {detail}")
    return payload


def graph_get_all(token, url, params=None, max_items=None):
    items = []
    next_url = url
    next_params = params
    while next_url:
        data = graph_get_json(token, next_url, next_params)
        batch = data.get("value", [])
        items.extend(batch)
        if max_items and len(items) >= max_items:
            return items[:max_items]
        next_url = data.get("@odata.nextLink")
        next_params = None
    return items


def group_post_to_message(post, thread=None, group_email=""):
    email_address = (
        ((post.get("from") or {}).get("emailAddress") or {})
        or ((post.get("sender") or {}).get("emailAddress") or {})
    )
    body = post.get("body") or {}
    topic = (thread or {}).get("topic") or post.get("conversationThreadId") or "Group post"
    return {
        "id": post.get("id"),
        "subject": topic,
        "from": {"emailAddress": email_address},
        "toRecipients": [{"emailAddress": {"address": group_email}}] if group_email else [],
        "ccRecipients": [],
        "receivedDateTime": post.get("receivedDateTime") or (thread or {}).get("lastDeliveredDateTime"),
        "body": {
            "contentType": body.get("contentType", "html"),
            "content": body.get("content", ""),
        },
    }


def get_group_emails(token, group_id, group_email="", top=50):
    group_id = (group_id or "").strip()
    if not group_id:
        raise ValueError("Group ID is required for Microsoft 365 group mode.")
    if group_id.lower() == CLIENT_ID.lower():
        raise ValueError(
            "The Group ID field currently contains the Azure app Client ID. "
            "Paste the Microsoft 365 group Object ID instead."
        )

    threads = graph_get_all(
        token,
        f"https://graph.microsoft.com/v1.0/groups/{group_id}/threads",
        params={
            "$top": min(int(top), 50),
            "$select": "id,topic,lastDeliveredDateTime",
        },
        max_items=int(top),
    )

    messages = []
    for thread in threads:
        posts = graph_get_all(
            token,
            f"https://graph.microsoft.com/v1.0/groups/{group_id}/threads/{thread['id']}/posts",
            params={
                "$select": "id,body,from,sender,receivedDateTime,conversationThreadId",
                "$top": 50,
            },
        )
        for post in posts:
            messages.append(group_post_to_message(post, thread, group_email))

    messages.sort(key=lambda m: m.get("receivedDateTime") or "", reverse=True)
    return messages[:int(top)]


def get_group_emails_from_groups(token, group_ids, group_email="", top_per_group=GROUP_FETCH_LIMIT):
    cleaned_group_ids = []
    for group_id in group_ids:
        group_id = (group_id or "").strip()
        if group_id and group_id not in cleaned_group_ids:
            cleaned_group_ids.append(group_id)

    if not cleaned_group_ids:
        raise ValueError("At least one Microsoft 365 group ID is required for group mode.")

    messages = []
    for group_id in cleaned_group_ids:
        group_messages = get_group_emails(
            token,
            group_id,
            group_email=group_email,
            top=top_per_group,
        )
        for msg in group_messages:
            msg["source_group_id"] = group_id
        messages.extend(group_messages)

    messages.sort(key=lambda m: m.get("receivedDateTime") or "", reverse=True)
    return messages


def extract_info(msg):
    def addrs(lst):
        return [(r.get("emailAddress") or {}).get("address") for r in (lst or [])
                if (r.get("emailAddress") or {}).get("address")]
    body_html = (msg.get("body") or {}).get("content", "")
    return {
        "id": msg.get("id"),
        "subject": msg.get("subject"),
        "from": ((msg.get("from") or {}).get("emailAddress") or {}).get("address"),
        "to": addrs(msg.get("toRecipients")),
        "cc": addrs(msg.get("ccRecipients")),
        "received_time": msg.get("receivedDateTime"),
        "body_html": body_html,
    }


def html_to_text(html):
    return BeautifulSoup(html, "html.parser").get_text("\n", strip=True) if html else ""


def reply_marker_html(body_html):
    if not body_html:
        return ""
    soup = BeautifulSoup(body_html, "html.parser")
    marker = soup.find(id="divRplyFwdMsg")
    if not marker:
        return ""
    return "".join(str(n) for n in marker.next_siblings).strip()


def fwd_headers(body_html):
    text = html_to_text(body_html)
    def grab(label):
        m = re.search(rf"{label}:\s*(.*)", text)
        return m.group(1).strip() if m else ""
    return {k: grab(v) for k, v in {
        "forwarded_from": "From",
        "forwarded_sent_time": "Sent",
        "forwarded_to": "To",
        "forwarded_cc": "Cc",
        "forwarded_subject": "Subject",
    }.items()}


def normalize_email(value):
    if not value:
        return ""
    match = re.search(r'[\w.+-]+@[\w.-]+\.\w+', str(value))
    return match.group(0).lower() if match else str(value).strip().lower()


def split_header_addresses(value):
    if not value:
        return []
    matches = re.findall(r'[\w.+-]+@[\w.-]+\.\w+', str(value))
    if matches:
        return [m.lower() for m in matches]
    cleaned = str(value).strip().lower()
    return [cleaned] if cleaned else []


def matches_email_filters(info, sender_filter, target_filter):
    sender_filter = normalize_email(sender_filter)
    target_filter = normalize_email(target_filter)

    forwarded = fwd_headers(info.get("body_html", ""))
    body_text = html_to_text(info.get("body_html", "")).lower()
    body_html = (info.get("body_html", "") or "").lower()

    outer_from = normalize_email(info.get("from"))
    outer_recipients = [normalize_email(a) for a in info.get("to", []) + info.get("cc", [])]

    forwarded_from = split_header_addresses(forwarded.get("forwarded_from", ""))
    forwarded_recipients = (
        split_header_addresses(forwarded.get("forwarded_to", "")) +
        split_header_addresses(forwarded.get("forwarded_cc", ""))
    )

    sender_match = (
        sender_filter in [outer_from]
        or sender_filter in forwarded_from
        or sender_filter in body_text
        or sender_filter in body_html
    )
    target_match = (
        target_filter in outer_recipients
        or target_filter in forwarded_recipients
        or target_filter in body_text
        or target_filter in body_html
    )
    return sender_match and target_match, forwarded


def is_rj(info, sender_filter=DEFAULT_SENDER):
    subj = (info.get("subject") or "").lower()
    html = info.get("body_html") or ""
    text = html_to_text(html).lower()
    if normalize_email(info.get("from")) == normalize_email(sender_filter):
        return True
    if "rj emails--overview" in subj:
        return False
    has_fwd = 'id="divRplyFwdMsg"' in html or ("from:" in text and "sent:" in text)
    if not has_fwd:
        return False
    return any([
        "raymond james" in subj, "raymond james" in text,
        "amstradingadmin@raymondjames.com" in text,
        "ams managed operations" in subj, "ams managed operations" in text,
        "direct asset transfers" in subj,
        "distribution & cash balance check" in subj,
    ])


def build_rj(messages, sender_filter, target_filter):
    rows = []
    for i, msg in enumerate(messages, 1):
        info = extract_info(msg)
        matches_target, fwd = matches_email_filters(info, sender_filter, target_filter)
        if not matches_target:
            continue
        if not is_rj(info, sender_filter):
            continue
        original_body_html = reply_marker_html(info.get("body_html", ""))
        rows.append({
            "email_id": info.get("id") or f"msg_{i}",
            "fw_subject": info.get("subject"),
            "fw_from": info.get("from"),
            "fw_to": ", ".join(info.get("to", [])),
            "fw_cc": ", ".join(info.get("cc", [])),
            "fw_received_time": info.get("received_time"),
            "fw_body_html": info.get("body_html", ""),
            **{k: fwd.get(k, "") for k in fwd},
            "original_body_html": original_body_html,
            "parse_body_html": original_body_html or info.get("body_html", ""),
        })
    return rows


def build_target_emails(messages, sender_filter, target_filter):
    rows = []
    for i, msg in enumerate(messages, 1):
        info = extract_info(msg)
        matches_target, forwarded = matches_email_filters(info, sender_filter, target_filter)
        if not matches_target:
            continue
        rows.append({
            "email_id": info.get("id") or f"msg_{i}",
            "subject": info.get("subject"),
            "from": info.get("from"),
            "to": ", ".join(info.get("to", [])),
            "cc": ", ".join(info.get("cc", [])),
            "received_time": info.get("received_time"),
            "is_rj_forward": is_rj(info, sender_filter),
            "forwarded_from": forwarded.get("forwarded_from", ""),
            "forwarded_to": forwarded.get("forwarded_to", ""),
            "forwarded_cc": forwarded.get("forwarded_cc", ""),
            "forwarded_subject": forwarded.get("forwarded_subject", ""),
            "body_preview": html_to_text(info.get("body_html", ""))[:300],
        })
    return rows


def build_fetched_emails(messages):
    rows = []
    for i, msg in enumerate(messages, 1):
        info = extract_info(msg)
        forwarded = fwd_headers(info.get("body_html", ""))
        rows.append({
            "email_id": info.get("id") or f"msg_{i}",
            "subject": info.get("subject"),
            "from": info.get("from"),
            "to": ", ".join(info.get("to", [])),
            "cc": ", ".join(info.get("cc", [])),
            "received_time": info.get("received_time"),
            "is_rj_forward": is_rj(info),
            "forwarded_from": forwarded.get("forwarded_from", ""),
            "forwarded_to": forwarded.get("forwarded_to", ""),
            "forwarded_cc": forwarded.get("forwarded_cc", ""),
            "forwarded_subject": forwarded.get("forwarded_subject", ""),
            "body_preview": html_to_text(info.get("body_html", ""))[:300],
        })
    return rows


def extract_tables(body_html):
    if not body_html:
        return []
    soup = BeautifulSoup(body_html, "html.parser")
    out = []
    for node in soup.find_all(string=True):
        label = node.strip()
        if label not in {"Cash Only Transactions", "Withdrawals", "Deposits"}:
            continue
        table = node.find_next("table")
        if not table:
            continue
        try:
            dfs = pd.read_html(StringIO(str(table)))
        except ValueError:
            continue
        for df in dfs:
            df.columns = [str(c).strip() for c in df.columns]
            df = df.dropna(how="all").reset_index(drop=True)
            out.append((label, df))
    return out


def cash_movement_type(row):
    return re.sub(r"\s+", " ", str(row.get("Type", ""))).strip().lower()


def parse_date(v):
    if not v:
        return None
    for fmt in ("%m/%d/%Y", "%m/%d/%y", "%A, %B %d, %Y %I:%M %p"):
        try:
            return datetime.strptime(v.strip(), fmt)
        except ValueError:
            pass
    return None


def build_notice(row):
    section = row.get("source_section")
    account = row.get("Account") or row.get("customer_account")
    amount  = row.get("Cash Amount") or row.get("Amount")
    if section == "Withdrawals":
        input_date = row.get("Input date")
        deadline   = parse_date(input_date)
        response_time = (
            (deadline + timedelta(days=8)).strftime("%Y-%m-%d")
            if deadline else f"8 calendar days after {input_date}"
        )
        action = (
            f"Enter withdrawal in portal for account {account} for {amount}. "
            f"Schedule 8 calendar days after input date {input_date}."
        )
    elif section == "Deposits" or (row.get("Type") or "").lower() == "deposit":
        response_time = "As soon as practical after email receipt"
        action = (
            f"Message dutytrader in Teams: contribution/deposit for account {account} "
            f"({amount}) approved to invest in model."
        )
    else:
        response_time = "ASAP"
        action = (
            f"Halt account and advise of pending trades on {account}. "
            f"Immediate cash movement: {amount}."
        )
    return {
        "email_id":            row.get("email_id"),
        "sent_time":           row.get("fw_received_time"),
        "response_time_needed": response_time,
        "account":             account,
        "action":              action,
        "source_section":      section,
    }


def to_excel(dfs):
    buf = BytesIO()
    with pd.ExcelWriter(buf, engine="openpyxl") as w:
        for name, df in dfs.items():
            if df is not None and not df.empty:
                df.to_excel(w, sheet_name=name, index=False)
    return buf.getvalue()


# ── UI ─────────────────────────────────────────────────────────────────────────

st.markdown("## πŸ“¬ Raymond James Email Processor")
st.markdown("Pulls forwarded RJ emails, extracts cash movement tables, and generates action notices.")
st.divider()

# Step 1 β€” Sign in
st.markdown('<div class="step-label">Step 1</div>', unsafe_allow_html=True)
col1, col2 = st.columns([5, 1])
with col1:
    st.markdown('<div class="step-title">Sign in to Microsoft</div>', unsafe_allow_html=True)
with col2:
    if st.session_state.token:
        st.markdown('<span class="badge-success">βœ“ Signed in</span>', unsafe_allow_html=True)
    else:
        st.markdown('<span class="badge-pending">Not signed in</span>', unsafe_allow_html=True)

if not st.session_state.token:
    auth_url = build_auth_url()
    st.markdown(
        f'<div class="login-btn"><a href="{auth_url}" target="_self">πŸ” Sign in with Microsoft</a></div>',
        unsafe_allow_html=True,
    )
    st.caption("You'll be redirected to Microsoft's login page and back automatically.")
else:
    st.markdown("You are signed in. βœ“")
    if st.button("Sign out"):
        st.session_state.token = None
        st.rerun()


# Step 2 β€” Config
st.markdown('<div class="step-label">Step 2</div>', unsafe_allow_html=True)
st.markdown('<div class="step-title">Configure</div>', unsafe_allow_html=True)
source_mode = st.radio(
    "Email source",
    ["Microsoft 365 group", "My inbox"],
    horizontal=True,
)
col_a, col_b, col_c = st.columns(3)
with col_a:
    sender_email = st.text_input("Sender email (who forwards RJ emails)", value=DEFAULT_SENDER)
with col_b:
    target_email = st.text_input("Recipient email to filter for", value=DEFAULT_TARGET)
with col_c:
    top_n = st.number_input(
        "Max inbox emails to fetch",
        min_value=5,
        max_value=200,
        value=100,
        step=5,
        disabled=source_mode == "Microsoft 365 group",
    )

group_ids = []
if source_mode == "Microsoft 365 group":
    group_ids_text = st.text_area(
        "Group IDs",
        value="\n".join(DEFAULT_GROUP_IDS),
        placeholder="Set GROUP_ID_1 and GROUP_ID_2 in HF Secrets, or paste one group object id per line",
        height=90,
    )
    group_ids = [v for v in re.split(r"[\s,;]+", group_ids_text.strip()) if v]
    st.caption(f"Group mode fetches the latest {GROUP_FETCH_LIMIT} posts from each group and combines them.")
    if not DEFAULT_GROUP_IDS:
        st.caption("Tip: add GROUP_ID_1 and GROUP_ID_2 in Hugging Face Space Secrets to prefill this field.")

# Step 3 β€” Run
st.markdown('<div class="step-label">Step 3</div>', unsafe_allow_html=True)
st.markdown('<div class="step-title">Fetch & Process</div>', unsafe_allow_html=True)

if st.button("β–Ά Run", disabled=not st.session_state.token):
    with st.spinner("Fetching emails from Outlook..."):
        try:
            if source_mode == "Microsoft 365 group":
                raw = get_group_emails_from_groups(
                    st.session_state.token,
                    group_ids,
                    group_email=target_email,
                    top_per_group=GROUP_FETCH_LIMIT,
                )
            else:
                raw = get_emails(st.session_state.token, int(top_n))
            st.session_state.messages = raw
            st.session_state.messages_df = pd.DataFrame(build_fetched_emails(raw)) if raw else pd.DataFrame()
        except ValueError as e:
            st.error(str(e))
            st.stop()
        except RuntimeError as e:
            st.error(str(e))
            st.stop()
        except requests.HTTPError as e:
            if e.response.status_code == 401:
                st.error("Session expired. Please sign in again.")
                st.session_state.token = None
            else:
                st.error(f"API error: {e}")
            st.stop()

    with st.spinner("Collecting all emails from sender A to recipient B..."):
        st.session_state.target_emails = build_target_emails(raw, sender_email, target_email)
        st.session_state.target_emails_df = (
            pd.DataFrame(st.session_state.target_emails)
            if st.session_state.target_emails else pd.DataFrame()
        )

    with st.spinner("Filtering Raymond James emails..."):
        st.session_state.rj_emails = build_rj(raw, sender_email, target_email)

    with st.spinner("Extracting cash tables..."):
        cash_rows, wd_rows, dep_rows = [], [], []
        for email in st.session_state.rj_emails:
            for section, df in extract_tables(email.get("parse_body_html", "")):
                rows = df.to_dict("records")
                for r in rows:
                    r.update({
                        "email_id": email.get("email_id"),
                        "fw_subject": email.get("fw_subject"),
                        "fw_received_time": email.get("fw_received_time"),
                        "original_subject": email.get("forwarded_subject", ""),
                        "original_from": email.get("forwarded_from", ""),
                        "original_sent_time": email.get("forwarded_sent_time", ""),
                        "source_section": section,
                    })
                if section == "Cash Only Transactions":
                    for r in rows:
                        row_type = cash_movement_type(r)
                        if row_type == "deposit":
                            dep_rows.append(r)
                        elif row_type == "withdrawal":
                            wd_rows.append(r)
                        else:
                            cash_rows.append(r)
                elif section == "Withdrawals":
                    wd_rows.extend(rows)
                elif section == "Deposits":
                    dep_rows.extend(rows)

        st.session_state.cash_df        = pd.DataFrame(cash_rows) if cash_rows else pd.DataFrame()
        st.session_state.withdrawals_df = pd.DataFrame(wd_rows)   if wd_rows   else pd.DataFrame()
        st.session_state.deposits_df    = pd.DataFrame(dep_rows)  if dep_rows  else pd.DataFrame()

    with st.spinner("Building action notices..."):
        all_rows = cash_rows + wd_rows + dep_rows
        st.session_state.notice_df = (
            pd.DataFrame([build_notice(r) for r in all_rows])
            if all_rows else pd.DataFrame()
        )

    source_label = "group posts" if source_mode == "Microsoft 365 group" else "inbox emails"
    st.success(
        f"Done! Fetched **{len(raw)}** recent {source_label} and found "
        f"**{len(st.session_state.rj_emails)}** RJ forwarded emails."
    )


# ── Results ────────────────────────────────────────────────────────────────────
st.markdown('<div class="step-label">Step 4</div>', unsafe_allow_html=True)
st.markdown('<div class="step-title">Debug: All Fetched Emails</div>', unsafe_allow_html=True)

fetched_df = st.session_state.messages_df
if fetched_df is not None:
    if not fetched_df.empty:
        st.dataframe(fetched_df, width="stretch", hide_index=True)
    elif st.session_state.messages == []:
        st.caption("Run the fetch step to inspect all fetched emails.")

st.markdown('<div class="step-label">Step 5</div>', unsafe_allow_html=True)
st.markdown('<div class="step-title">Debug: All Emails from A to B</div>', unsafe_allow_html=True)

debug_df = st.session_state.target_emails_df
if debug_df is not None:
    if not debug_df.empty:
        st.dataframe(debug_df, width="stretch", hide_index=True)
    elif st.session_state.messages:
        st.info("Fetched emails, but none matched the sender + recipient filter.")
    else:
        st.caption("Run the fetch step to inspect all emails from sender A to recipient B.")

if st.session_state.rj_emails:
    st.divider()
    st.markdown("### Results")
    tab1, tab2, tab3, tab4 = st.tabs([
        "πŸ“‹ Action Notices", "πŸ’΅ Cash Transactions", "⬆ Withdrawals", "⬇ Deposits"
    ])
    for tab, key, label in [
        (tab1, "notice_df",        "No action notices generated."),
        (tab2, "cash_df",          "No cash transactions found."),
        (tab3, "withdrawals_df",   "No withdrawals found."),
        (tab4, "deposits_df",      "No deposits found."),
    ]:
        with tab:
            df = st.session_state[key]
            if df is not None and not df.empty:
                # Hide HTML columns
                display_df = df.drop(columns=[c for c in df.columns if "html" in c.lower()], errors="ignore")
                st.dataframe(display_df, width="stretch", hide_index=True)
            else:
                st.info(label)

    st.divider()
    excel_bytes = to_excel({
        "Action Notices":    st.session_state.notice_df,
        "Cash Transactions": st.session_state.cash_df,
        "Withdrawals":       st.session_state.withdrawals_df,
        "Deposits":          st.session_state.deposits_df,
    })
    st.download_button(
        label="⬇ Download Excel Report",
        data=excel_bytes,
        file_name=f"rj_emails_{datetime.now().strftime('%Y%m%d_%H%M')}.xlsx",
        mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    )