File size: 24,233 Bytes
68629e1
5204355
68629e1
 
 
5204355
10e844f
 
5204355
68629e1
 
 
 
 
 
5204355
 
 
 
 
 
 
 
 
 
 
 
 
68629e1
 
 
 
 
5204355
 
 
68629e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5204355
 
 
 
 
 
 
 
68629e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5204355
68629e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebdf502
 
 
5204355
ebdf502
68629e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebdf502
68629e1
 
 
ebdf502
68629e1
ebdf502
 
 
68629e1
 
 
 
 
ebdf502
 
 
68629e1
 
 
 
ebdf502
 
68629e1
ebdf502
 
68629e1
 
 
ebdf502
 
 
 
 
 
 
 
 
 
 
 
 
68629e1
 
5204355
 
68629e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5204355
 
 
 
 
 
 
 
 
ebdf502
68629e1
ebdf502
 
10e844f
ebdf502
 
 
10e844f
 
 
 
 
 
 
 
 
 
 
 
5204355
 
 
 
 
 
 
 
 
 
10e844f
 
68629e1
 
 
5204355
68629e1
 
 
 
 
 
 
 
 
 
5204355
 
 
 
 
 
 
 
 
68629e1
 
 
 
 
 
 
5204355
 
68629e1
5204355
 
 
 
 
 
 
 
 
 
 
68629e1
 
 
 
 
10e844f
68629e1
 
 
5204355
68629e1
5204355
ebdf502
68629e1
 
 
 
 
 
 
10e844f
68629e1
 
 
 
 
 
 
 
5204355
68629e1
 
 
 
 
 
 
 
 
 
 
 
5204355
68629e1
 
 
 
 
10e844f
ebdf502
68629e1
10e844f
ebdf502
 
68629e1
 
5204355
68629e1
5204355
 
 
 
68629e1
10e844f
68629e1
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
import os, time, json, tempfile, datetime, requests, feedparser
import re
import gradio as gr
import pandas as pd
import plotly.express as px
import numpy as np
from concurrent.futures import ThreadPoolExecutor, as_completed

# local modules (flat files in the root)
from search import get_news
from llm import summarize
from huggingface_nlp import analyze_sentiment, analyze_entities, extract_keywords
from aws import s3_upload, ses_send_email
from cache import get_cache, set_cache

# ---------------------- Time helpers ----------------------
def ts_now_utc():
    return datetime.datetime.now(datetime.timezone.utc)

def human_ago(dt_utc):
    delta = ts_now_utc() - dt_utc
    s = int(delta.total_seconds())
    if s < 60: return f"{s}s ago"
    m = s // 60
    if m < 60: return f"{m}m ago"
    h = m // 60
    return f"{h}h ago"

# ---------------------- Presets ----------------------
ONE_CLICK = {
    "Healthcare AI":            {"topic": "Healthcare AI", "query_hint": "hospital AI diagnostics EMR"},
    "Drug Discovery":           {"topic": "Drug discovery", "query_hint": "clinical trials FDA approvals biotech"},
    "Hospital Staffing Trends": {"topic": "Hospital staffing", "query_hint": "nurse shortage hospital layoffs hiring"},
    "Finance (Earnings/Stocks)": {"topic": "Earnings season", "query_hint": "earnings guidance revenue EPS"},
    "Tech R&D (Patents/AI)":    {"topic": "AI research", "query_hint": "foundation models patents transformer LLM"},
    "General":                  {"topic": "", "query_hint": ""}
}

H1B_TECH_PRESETS = sorted(list({
    "Google","Apple","Meta","Amazon","Microsoft","Netflix","NVIDIA","Tesla","Oracle","Salesforce",
    "IBM","Intel","Qualcomm","Cisco","Adobe","Uber","Airbnb","ServiceNow","Snowflake",
    "Databricks","OpenAI","Palantir","Zoom","Workday","Stripe","Block","Atlassian","DoorDash",
    "eBay","LinkedIn","Lyft","Reddit","Shopify","Pinterest","Cloudflare","Twilio","Splunk",
    "AMD","MongoDB","HashiCorp","GitHub","GitLab","Coinbase","TikTok","Bytedance"
}))

# ---------------------- Styling ----------------------
SENTI = {
    "POSITIVE": {"color": "#10b981", "emoji": "🟒"},
    "NEGATIVE": {"color": "#ef4444", "emoji": "πŸ”΄"},
    "NEUTRAL":  {"color": "#f59e0b", "emoji": "🟑"},
    "MIXED":    {"color": "#06b6d4", "emoji": "πŸ”΅"},
}
CSS_BASE = """
:root{
  --bg:#f7f8fb; --panel:#ffffff; --text:#0f172a; --muted:#475569;
  --card:#ffffff; --chip:#eef2ff; --shadow: 0 10px 24px rgba(2,6,23,.08);
}
* { font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial; }
body { background: var(--bg); color: var(--text); }
#root, .gradio-container { background: var(--bg); }
.container { max-width: 1200px; margin: 0 auto; }
.hero { display:flex; align-items:center; gap:14px; margin: 8px 0 18px; }
.title { font-weight:800; font-size: 28px; line-height:1.1; }
.subtitle { color: var(--muted); font-size:14px; }
.grid { display:grid; grid-template-columns: repeat(2, minmax(0,1fr)); gap:14px; }
@media (max-width: 900px){ .grid { grid-template-columns: 1fr; } }
.card { background: var(--card); padding:16px; border-radius:16px; box-shadow: var(--shadow); transition: transform .12s ease; border: 1px solid #e5e7eb; }
.card:hover { transform: translateY(-2px); }
.ctitle { font-weight:800; font-size:18px; margin-bottom:6px; }
.ctitle a { color:#0f172a; }
.ctitle a:hover { text-decoration:underline; }
.csummary { font-size:14px; line-height:1.55; margin:8px 0 10px; color:#334155; }
.row { display:flex; align-items:center; justify-content:space-between; gap:8px; flex-wrap: wrap; }
.badge { padding:4px 10px; border-radius:999px; color:white; font-weight:700; font-size:12px; display:inline-flex; gap:6px; align-items:center; box-shadow: 0 1px 0 rgba(15,23,42,.06); }
.tags { display:flex; gap:8px; flex-wrap: wrap; }
.tag { background: var(--chip); color: var(--text); opacity:.9; padding:4px 10px; border-radius:999px; font-size:12px; }
a { color: #0b5dd7; text-decoration: none; }
a:hover { text-decoration: underline; }
.small { color: var(--muted); font-size:12px; margin: 6px 0 0; }
"""

# ---------------------- Render helpers ----------------------
def format_summary_html(s: str) -> str:
    """Bold the three bullet labels so recruiters can scan quickly."""
    if not s: return ""
    s = re.sub(r'^\s*1[\)\.]\s*', '<b>What happened:</b> ', s, flags=re.IGNORECASE|re.MULTILINE)
    s = re.sub(r'^\s*2[\)\.]\s*', '<b>Business impact:</b> ', s, flags=re.IGNORECASE|re.MULTILINE)
    s = re.sub(r'^\s*3[\)\.]\s*', '<b>Risk or opportunity:</b> ', s, flags=re.IGNORECASE|re.MULTILINE)
    return s

def render_cards(rows: list[dict], entity_filter: str | None = None, sentiment_filter: str | None = None) -> str:
    if entity_filter:
        rows = [r for r in rows if entity_filter.lower() in (r.get("Entities","").lower())]
    if sentiment_filter and sentiment_filter != "ALL":
        rows = [r for r in rows if r.get("Sentiment","").upper() == sentiment_filter]
    html = [f"<style>{CSS_BASE}</style><div class='container'><div class='grid'>"]
    if not rows:
        html.append("<i>No results.</i>")
    for r in rows:
        senti = SENTI.get(r["Sentiment"].upper(), SENTI["NEUTRAL"])
        badge_style = f"background:{senti['color']}"
        ents = [e for e in (r.get('Entities') or '').split(', ') if e][:4]
        tag_html = "".join([f"<span class='tag'>{x}</span>" for x in ents]) or "<span class='tag'>No entities</span>"
        source_html = f"<div class='tags'><a target='_blank' href='{r['URL']}'>Source β†—</a></div>"
        html.append(f"""
        <div class="card">
          <div class="ctitle"><a target="_blank" href="{r['URL']}">{r['Title']}</a></div>
          <div class="csummary">{format_summary_html(r['Summary'])}</div>
          <div class="row">
            <span class="badge" style="{badge_style}">{senti['emoji']} {r['Sentiment'].title()}</span>
            <div class="tags">{tag_html}</div>
          </div>
          {source_html}
        </div>
        """)
    html.append("</div></div>")
    return "\n".join(html)

def make_sentiment_chart(df: pd.DataFrame):
    if df.empty: return px.bar()
    counts = df["Sentiment"].value_counts().reindex(["POSITIVE","NEUTRAL","NEGATIVE","MIXED"]).fillna(0).reset_index()
    counts.columns = ["Sentiment","Count"]
    fig = px.bar(counts, x="Sentiment", y="Count", text="Count", height=340, title="Sentiment distribution")
    fig.update_traces(textposition="outside")
    fig.update_layout(margin=dict(l=10,r=10,t=40,b=10), template="plotly_white", xaxis_title=None, yaxis_title=None)
    return fig

def make_trend_chart(df: pd.DataFrame):
    if df.empty or "Date" not in df.columns: return px.line()
    trend = df.copy()
    trend["Score"] = trend["Sentiment"].map({"POSITIVE":1, "NEUTRAL":0, "NEGATIVE":-1, "MIXED":0}).fillna(0)
    trend = trend.groupby("Date", as_index=False)["Score"].mean()
    fig = px.line(trend, x="Date", y="Score", title="Avg sentiment over time (by day)")
    fig.update_layout(margin=dict(l=10,r=10,t=40,b=10), template="plotly_white", yaxis_range=[-1,1])
    return fig

def make_forecast_chart(df: pd.DataFrame):
    """Linear fit on daily average sentiment -> 7-day projection."""
    if df.empty or "Date" not in df.columns:
        return px.line(title="Forecast (insufficient data)")
    work = df.copy()
    work["Score"] = work["Sentiment"].map({"POSITIVE":1,"NEUTRAL":0,"NEGATIVE":-1,"MIXED":0}).fillna(0)
    daily = work.groupby("Date", as_index=False)["Score"].mean().sort_values("Date")
    if len(daily) < 3:
        return px.line(daily, x="Date", y="Score", title="Forecast (needs β‰₯3 days)", template="plotly_white")
    x = pd.to_datetime(daily["Date"]).map(pd.Timestamp.toordinal).to_numpy()
    y = daily["Score"].to_numpy()
    a, b = np.polyfit(x, y, 1)
    last_day = pd.to_datetime(daily["Date"]).max()
    fut_dates = [last_day + pd.Timedelta(days=i) for i in range(1,8)]
    x_future = np.array([d.toordinal() for d in fut_dates])
    y_future = a * x_future + b
    base = px.line(daily, x="Date", y="Score", title="Sentiment: history & 7-day linear forecast", markers=True)
    fut = pd.DataFrame({"Date": fut_dates, "Score": y_future})
    base.add_scatter(x=fut["Date"], y=fut["Score"], mode="lines+markers", name="Forecast")
    base.update_layout(margin=dict(l=10,r=10,t=40,b=10), template="plotly_white", yaxis_range=[-1,1])
    return base

# ---------------------- Extra sources (simple + free) ----------------------
HEADERS = {"User-Agent": "NewsIntel/1.0"}

def fetch_press_releases(topic: str, limit: int = 5):
    q = requests.utils.quote(f"{topic} press release")
    url = f"https://news.google.com/rss/search?q={q}"
    feed = feedparser.parse(url)
    return [{"title":e.get("title",""),"link":e.get("link","")} for e in feed.entries[:limit]]

def _try_greenhouse(board: str):
    api = f"https://boards-api.greenhouse.io/v1/boards/{board}/jobs"
    try:
        r = requests.get(api, timeout=20, headers=HEADERS)
        if r.status_code == 200:
            data = r.json().get("jobs", [])
            return [{"title": j.get("title",""), "location": (j.get("location") or {}).get("name",""),
                     "url": j.get("absolute_url","")} for j in data]
    except Exception:
        pass
    return []

def _try_lever(board: str):
    api = f"https://api.lever.co/v0/postings/{board}?mode=json"
    try:
        r = requests.get(api, timeout=20, headers=HEADERS)
        if r.status_code == 200:
            data = r.json()
            return [{"title": j.get("text",""), "location": j.get("categories",{}).get("location",""),
                     "url": j.get("hostedUrl","")} for j in data]
    except Exception:
        pass
    return []

def fetch_jobs(topic: str, limit: int = 8):
    board_guess = topic.lower().replace(" ", "")
    jobs = _try_greenhouse(board_guess) or _try_lever(board_guess)
    return jobs[:limit]

def grounded_summary(news_text: str, context: str = "") -> str:
    prompt = (
        "You are an analyst. Ground your bullets ONLY in the provided context. "
        "If the context is insufficient, say 'insufficient context'.\n\n"
        f"Context:\n{(context or '').strip()}\n\n"
        "Task: Summarize the following news into 3 bullets:\n"
        "1) What happened  2) Business impact  3) Risk or opportunity\n"
        "Limit ~90 words.\n\n"
        f"News:\n{news_text}"
    )
    return summarize(prompt)

def make_briefing(topic: str, rows: list[dict], press: list[dict], jobs: list[dict], timestamp_str: str) -> str:
    news_bits  = "\n".join([f"- {r['Title']} ({r['URL']})" for r in rows[:6]]) or "β€”"
    press_bits = "\n".join([f"- {p['title']} ({p['link']})" for p in press[:5]]) or "β€”"
    jobs_bits  = "\n".join([f"- {j['title']}  ({j.get('location','')}) β€” {j['url']}" for j in jobs[:5]]) or "No jobs found."
    prompt = (
        f"You are preparing an interview briefing about '{topic}'. "
        "Synthesize:\n"
        f"Recent news:\n{news_bits}\n\n"
        f"Press releases:\n{press_bits}\n\n"
        f"Open roles snapshot:\n{jobs_bits}\n\n"
        "Output:\n- 3 bullets: momentum (facts)\n- 3 bullets: risks\n- 3 bullets: opportunities\n"
        "- 3 bullets: interview talking points with 1–2 citations.\nKeep it under 220 words.\n"
        f"(Data last updated: {timestamp_str})"
    )
    return summarize(prompt)

# ---------------------- Core pipeline with caching ----------------------
def agentic_get_news(topic: str, days: int, k: int, query_hint: str = ""):
    cache_obj = get_cache("news", topic, days, k, query_hint)
    if cache_obj:
        return cache_obj
    query = " OR ".join([
        f"{topic} {query_hint}".strip(),
        f'"{topic}" AND {query_hint}'.strip(),
        f"{topic} AI",
    ])
    res = get_news(query, days, k)
    if len(res) < k:
        res = get_news(query, min(days + 7, 30), k)
    set_cache(res, "news", topic, days, k, query_hint)
    return res

def cached_press(topic: str):
    c = get_cache("press", topic)
    if c: return c
    p = fetch_press_releases(topic, 6)
    set_cache(p, "press", topic)
    return p

def cached_jobs(topic: str):
    c = get_cache("jobs", topic)
    if c: return c
    j = fetch_jobs(topic, 8)
    set_cache(j, "jobs", topic)
    return j

def run_pipeline(topic, days, k, query_hint="", fast=True):
    articles = agentic_get_news(topic, int(days), int(k), query_hint=query_hint)
    press = cached_press(topic)
    jobs  = cached_jobs(topic)

    today = datetime.date.today()
    rows, metrics = [], []

    def _process(a):
        base_text = f"{a['title']} β€” {a['snippet']}"
        t0 = time.time()
        summary = grounded_summary(base_text, context=a.get("snippet",""))
        latency = time.time() - t0
        sent = analyze_sentiment(summary)
        ents = [] if fast else analyze_entities(summary)
        kws  = [] if fast else extract_keywords(summary, top_n=6)
        row = {
            "Title": a["title"],
            "URL": a["url"],
            "Summary": summary,
            "Sentiment": sent["label"].upper(),
            "Entities": "" if fast else ", ".join({e["word"] for e in ents[:6]}),
            "Key Phrases": "" if fast else ", ".join({k["keyword"] for k in kws[:6]}),
            "Date": a.get("published_date") or today,
        }
        met = {
            "title": a["title"], "latency_sec": round(latency,3),
            "summary_tokens": len(summary.split()),
            "sentiment": sent["label"].upper(),
            "entity_count": 0 if fast else len(ents)
        }
        return row, met

    with ThreadPoolExecutor(max_workers=min(4, max(1, k))) as ex:
        futures = [ex.submit(_process, a) for a in articles]
        for fut in as_completed(futures):
            r, m = fut.result()
            rows.append(r); metrics.append(m)

    rows.sort(key=lambda x: x["Title"])
    metrics.sort(key=lambda x: x["title"])

    df = pd.DataFrame(rows)
    mdf = pd.DataFrame(metrics)
    now = ts_now_utc()
    timestamp_str = f"{now.strftime('%b %d, %Y %I:%M %p')} UTC β€’ {human_ago(now)}"
    briefing = make_briefing(topic, rows, press, jobs, timestamp_str)
    rollup = pd.DataFrame([{
        "articles": len(rows),
        "jobs_found": len(jobs),
        "press_releases": len(press),
        "avg_latency_sec": round(mdf["latency_sec"].mean(),3) if not mdf.empty else 0.0,
        "updated_at": timestamp_str
    }])
    return rows, df, mdf, rollup, briefing, press, jobs, timestamp_str

# ---------------------- Exporters (with branding) ----------------------
def export_briefing_html(topic: str, briefing_md: str, timestamp_str: str):
    html = f"""<!doctype html>
<html><head><meta charset="utf-8"><title>{topic} β€” Briefing</title></head>
<body style="font-family:Arial,Helvetica,sans-serif;max-width:760px;margin:24px auto;line-height:1.5">
<h2>{topic} β€” Interview Briefing</h2>
<div style="color:#64748b;font-size:12px;margin-bottom:10px">Data last updated: {timestamp_str}</div>
<pre style="white-space:pre-wrap">{briefing_md}</pre>
<hr/>
<div style="color:#94a3b8;font-size:12px">Generated by NewsIntel Agent β€” Hasitha Varada</div>
</body></html>
"""
    path = os.path.join(tempfile.gettempdir(), f"{topic}_briefing.html")
    with open(path, "w", encoding="utf-8") as f:
        f.write(html)
    return path

try:
    import reportlab  # optional
    HAS_PDF = True
except Exception:
    HAS_PDF = False

def export_briefing_pdf(topic: str, briefing_md: str, timestamp_str: str):
    if not HAS_PDF: return None
    from reportlab.lib.pagesizes import letter
    from reportlab.pdfgen import canvas
    from reportlab.lib.units import inch
    path = os.path.join(tempfile.gettempdir(), f"{topic}_briefing.pdf")
    c = canvas.Canvas(path, pagesize=letter)
    width, height = letter
    x, y = 0.75*inch, height - 1*inch
    c.setFont("Helvetica-Bold", 14); c.drawString(x, y, f"{topic} β€” Interview Briefing")
    c.setFont("Helvetica", 9); y -= 0.25*inch; c.drawString(x, y, f"Data last updated: {timestamp_str}")
    c.setFont("Times-Roman", 11); y -= 0.35*inch
    for line in briefing_md.splitlines():
        if not line.strip(): y -= 0.18*inch; continue
        c.drawString(x, y, line[:115]); y -= 0.18*inch
        if y < 1*inch:
            c.setFont("Helvetica", 9)
            c.drawString(x, 0.7*inch, "Generated by NewsIntel Agent β€” Hasitha Varada")
            c.showPage()
            x, y = 0.75*inch, height - 1*inch
            c.setFont("Times-Roman", 11)
    c.setFont("Helvetica", 9)
    c.drawString(x, 0.7*inch, "Generated by NewsIntel Agent β€” Hasitha Varada")
    c.save()
    return path

# ---------------------- Gradio callbacks ----------------------
def estimate_eta_secs(k:int, fast:bool) -> int:
    base = 1 if fast else 3
    overhead = 2
    return max(3, base * int(k) + overhead)

def start_banner(k, fast_mode):
    eta = estimate_eta_secs(k, bool(fast_mode))
    return f"⏳ Running analysis (~{eta}s). Models are warm-started; first run may take longer..."

def analyze_news(mode, preset_company, topic, days, k, entity_filter, sentiment_filter, fast_mode):
    query_hint = ONE_CLICK.get(mode, ONE_CLICK["General"])["query_hint"] if mode in ONE_CLICK else ""
    if preset_company and preset_company.lower() not in (topic or "").lower():
        topic = f"{topic} {preset_company}".strip()

    rows, df, mdf, rollup, briefing, press, jobs, ts = run_pipeline(
        topic, days, k, query_hint=query_hint, fast=bool(fast_mode)
    )

    cards_html = render_cards(rows, entity_filter or None, sentiment_filter or None)
    all_ents = sorted(set(
        e.strip()
        for r in rows
        for e in (r.get("Entities", "").split(", "))
        if e.strip()
    ))[:50]

    header = f"πŸ—žοΈ NewsIntel β€” Data last updated: {ts}"

    return (
        header,
        cards_html,
        make_sentiment_chart(df),
        make_trend_chart(df),
        make_forecast_chart(df),
        df,
        mdf if not mdf.empty else pd.DataFrame([{"note": "No per-article metrics yet"}]),
        rollup,
        briefing,
        gr.update(choices=all_ents)
    )

def export_cb(topic, briefing_md, timestamp_str):
    html_path = export_briefing_html(topic, briefing_md, timestamp_str)
    pdf_path  = export_briefing_pdf(topic, briefing_md, timestamp_str)
    html_url = s3_upload(html_path)
    pdf_url  = s3_upload(pdf_path) if pdf_path else None
    links = []
    if html_url: links.append(f"<a href='{html_url}' target='_blank'>View HTML on S3</a>")
    if pdf_url:  links.append(f"<a href='{pdf_url}' target='_blank'>View PDF on S3</a>")
    links_html = "<br/>".join(links) if links else "<i>(S3 links will appear here if configured)</i>"
    return html_path, (pdf_path or None), links_html

def email_weekly_cb(topic, email, briefing_md, timestamp_str):
    if not email:
        return "Enter your email first."
    if not (briefing_md or "").strip():
        return "No briefing yet β€” run analysis first."
    ok = ses_send_email(
        email,
        f"Weekly Briefing – {topic}",
        f"<h3>{topic} – Weekly Briefing</h3>"
        f"<div style='color:#64748b;font-size:12px'>Data last updated: {timestamp_str}</div>"
        f"<pre style='white-space:pre-wrap'>{briefing_md}</pre>"
    )
    return "Email sent via SES βœ…" if ok else "SES not configured or send failed ❌"

# ---------------------- UI ----------------------
with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", neutral_hue="slate")) as demo:
    gr.HTML(f"""
    <style>{CSS_BASE}</style>
    <div class="container">
      <div class="hero" style="gap:18px">
        <div style="font-size:30px">πŸ—žοΈ</div>
        <div>
          <div class="title" style="
            background: linear-gradient(90deg,#1e3a8a,#7c3aed);
            -webkit-background-clip: text; background-clip:text; color:transparent;">
            NewsIntel Agent β€” Job Briefings & Hiring Signals
          </div>
          <div class="subtitle" style="display:flex;flex-wrap:wrap;gap:8px;margin-top:6px">
            <span class="tag">One‑click modes</span>
            <span class="tag">Cached results</span>
            <span class="tag">Branded HTML/PDF</span>
            <span class="tag">Optional S3/SES</span>
          </div>
        </div>
      </div>
    </div>
    """)

    # ---------- inputs ----------
    with gr.Row():
        mode  = gr.Dropdown(choices=list(ONE_CLICK.keys()), value="General", label="One-Click Mode")
        preset_company = gr.Dropdown(choices=H1B_TECH_PRESETS, label="Company Presets (H-1B Tech)", allow_custom_value=True)
        topic  = gr.Textbox(label="Topic / Company", value="", placeholder="e.g., AMD, Healthcare AI, EV market India")
        days   = gr.Slider(1, 30, value=7, step=1, label="Lookback (days)")
        k      = gr.Slider(3, 12, value=4, step=1, label="Articles")
        fast_mode = gr.Checkbox(value=True, label="⚑ Fast mode (skip Entities & Key Phrases)")

    with gr.Row():
        entity_filter = gr.Dropdown(choices=[], label="Filter by Mentioned Company/Person", value=None)
        sentiment_filter = gr.Dropdown(choices=["ALL","POSITIVE","NEUTRAL","NEGATIVE","MIXED"], value="ALL", label="Sentiment filter")

    run_btn = gr.Button("Run Analysis", variant="primary")

    # ---------- outputs ----------
    header_bar = gr.Markdown(value="πŸ—žοΈ NewsIntel β€” Data last updated: β€”")
    with gr.Tab("Insights"):
        tip_md = gr.Markdown("πŸ’‘ **Tip:** *Entities* are detected names of companies/people/places (e.g., β€œTSMC”, β€œARM”). Use the filters to focus the feed.")
        cards = gr.HTML()
    with gr.Tab("Charts"):
        plot_sent = gr.Plot(label="Sentiment distribution")
        plot_trend = gr.Plot(label="Trend (avg sentiment by day)")
    with gr.Tab("Forecast"):
        gr.Markdown("ℹ️ *The forecast projects the **average daily sentiment** trend 7 days ahead using a simple linear fit. It’s a quick momentum signal, not a trading model.*")
        plot_forecast = gr.Plot(label="7-day sentiment forecast")
    with gr.Tab("Table"):
        table = gr.Dataframe(wrap=True)
    with gr.Tab("Metrics"):
        per_article = gr.Dataframe(wrap=True, label="Per-article metrics")
        rollup = gr.Dataframe(wrap=True, label="Run summary")
    with gr.Tab("Briefing"):
        briefing_md = gr.Markdown()
        timestamp_str = gr.Textbox(label="Timestamp", interactive=False)
        export_html = gr.File(label="Download HTML")
        export_pdf  = gr.File(label="Download PDF (optional)")
        s3_links    = gr.HTML(value="<i>(S3 links will appear here if configured)</i>")
        export_btn  = gr.Button("Export Briefing (creates files)")
        with gr.Row():
            weekly_email = gr.Textbox(label="Email (SES)", placeholder="name@example.com")
            email_btn = gr.Button("Email Weekly Briefing (SES)")
            email_status = gr.Markdown()

    # ---------- helpers & wiring ----------
    def _apply_mode(m, current_topic):
        cfg = ONE_CLICK.get(m, ONE_CLICK["General"])
        return gr.update(value=current_topic or cfg.get("topic",""))

    mode.change(_apply_mode, inputs=[mode, topic], outputs=[topic])
    preset_company.change(lambda x: x or "", inputs=preset_company, outputs=topic)

    # Show banner -> run analysis -> stamp timestamp box
    run_btn.click(
        start_banner,
        inputs=[k, fast_mode],
        outputs=[header_bar]
    ).then(
        analyze_news,
        inputs=[mode, preset_company, topic, days, k, entity_filter, sentiment_filter, fast_mode],
        outputs=[header_bar, cards, plot_sent, plot_trend, plot_forecast, table, per_article, rollup, briefing_md, entity_filter]
    ).then(
        lambda: datetime.datetime.now().strftime("%b %d, %Y %I:%M %p"),
        inputs=[], outputs=[timestamp_str]
    )

    export_btn.click(export_cb, inputs=[topic, briefing_md, timestamp_str], outputs=[export_html, export_pdf, s3_links])
    email_btn.click(email_weekly_cb, inputs=[topic, weekly_email, briefing_md, timestamp_str], outputs=[email_status])

if __name__ == "__main__":
    print("πŸš€ Launching NewsIntel (light-only UI + caching + one-click modes + forecast)")
    demo.launch()