import os, time, json, tempfile, datetime, requests, feedparser
import re
import gradio as gr
import pandas as pd
import plotly.express as px
import numpy as np
from concurrent.futures import ThreadPoolExecutor, as_completed
# local modules (flat files in the root)
from search import get_news
from llm import summarize
from huggingface_nlp import analyze_sentiment, analyze_entities, extract_keywords
from aws import s3_upload, ses_send_email
from cache import get_cache, set_cache
# ---------------------- Time helpers ----------------------
def ts_now_utc():
return datetime.datetime.now(datetime.timezone.utc)
def human_ago(dt_utc):
delta = ts_now_utc() - dt_utc
s = int(delta.total_seconds())
if s < 60: return f"{s}s ago"
m = s // 60
if m < 60: return f"{m}m ago"
h = m // 60
return f"{h}h ago"
# ---------------------- Presets ----------------------
ONE_CLICK = {
"Healthcare AI": {"topic": "Healthcare AI", "query_hint": "hospital AI diagnostics EMR"},
"Drug Discovery": {"topic": "Drug discovery", "query_hint": "clinical trials FDA approvals biotech"},
"Hospital Staffing Trends": {"topic": "Hospital staffing", "query_hint": "nurse shortage hospital layoffs hiring"},
"Finance (Earnings/Stocks)": {"topic": "Earnings season", "query_hint": "earnings guidance revenue EPS"},
"Tech R&D (Patents/AI)": {"topic": "AI research", "query_hint": "foundation models patents transformer LLM"},
"General": {"topic": "", "query_hint": ""}
}
H1B_TECH_PRESETS = sorted(list({
"Google","Apple","Meta","Amazon","Microsoft","Netflix","NVIDIA","Tesla","Oracle","Salesforce",
"IBM","Intel","Qualcomm","Cisco","Adobe","Uber","Airbnb","ServiceNow","Snowflake",
"Databricks","OpenAI","Palantir","Zoom","Workday","Stripe","Block","Atlassian","DoorDash",
"eBay","LinkedIn","Lyft","Reddit","Shopify","Pinterest","Cloudflare","Twilio","Splunk",
"AMD","MongoDB","HashiCorp","GitHub","GitLab","Coinbase","TikTok","Bytedance"
}))
# ---------------------- Styling ----------------------
SENTI = {
"POSITIVE": {"color": "#10b981", "emoji": "π’"},
"NEGATIVE": {"color": "#ef4444", "emoji": "π΄"},
"NEUTRAL": {"color": "#f59e0b", "emoji": "π‘"},
"MIXED": {"color": "#06b6d4", "emoji": "π΅"},
}
CSS_BASE = """
:root{
--bg:#f7f8fb; --panel:#ffffff; --text:#0f172a; --muted:#475569;
--card:#ffffff; --chip:#eef2ff; --shadow: 0 10px 24px rgba(2,6,23,.08);
}
* { font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial; }
body { background: var(--bg); color: var(--text); }
#root, .gradio-container { background: var(--bg); }
.container { max-width: 1200px; margin: 0 auto; }
.hero { display:flex; align-items:center; gap:14px; margin: 8px 0 18px; }
.title { font-weight:800; font-size: 28px; line-height:1.1; }
.subtitle { color: var(--muted); font-size:14px; }
.grid { display:grid; grid-template-columns: repeat(2, minmax(0,1fr)); gap:14px; }
@media (max-width: 900px){ .grid { grid-template-columns: 1fr; } }
.card { background: var(--card); padding:16px; border-radius:16px; box-shadow: var(--shadow); transition: transform .12s ease; border: 1px solid #e5e7eb; }
.card:hover { transform: translateY(-2px); }
.ctitle { font-weight:800; font-size:18px; margin-bottom:6px; }
.ctitle a { color:#0f172a; }
.ctitle a:hover { text-decoration:underline; }
.csummary { font-size:14px; line-height:1.55; margin:8px 0 10px; color:#334155; }
.row { display:flex; align-items:center; justify-content:space-between; gap:8px; flex-wrap: wrap; }
.badge { padding:4px 10px; border-radius:999px; color:white; font-weight:700; font-size:12px; display:inline-flex; gap:6px; align-items:center; box-shadow: 0 1px 0 rgba(15,23,42,.06); }
.tags { display:flex; gap:8px; flex-wrap: wrap; }
.tag { background: var(--chip); color: var(--text); opacity:.9; padding:4px 10px; border-radius:999px; font-size:12px; }
a { color: #0b5dd7; text-decoration: none; }
a:hover { text-decoration: underline; }
.small { color: var(--muted); font-size:12px; margin: 6px 0 0; }
"""
# ---------------------- Render helpers ----------------------
def format_summary_html(s: str) -> str:
"""Bold the three bullet labels so recruiters can scan quickly."""
if not s: return ""
s = re.sub(r'^\s*1[\)\.]\s*', 'What happened: ', s, flags=re.IGNORECASE|re.MULTILINE)
s = re.sub(r'^\s*2[\)\.]\s*', 'Business impact: ', s, flags=re.IGNORECASE|re.MULTILINE)
s = re.sub(r'^\s*3[\)\.]\s*', 'Risk or opportunity: ', s, flags=re.IGNORECASE|re.MULTILINE)
return s
def render_cards(rows: list[dict], entity_filter: str | None = None, sentiment_filter: str | None = None) -> str:
if entity_filter:
rows = [r for r in rows if entity_filter.lower() in (r.get("Entities","").lower())]
if sentiment_filter and sentiment_filter != "ALL":
rows = [r for r in rows if r.get("Sentiment","").upper() == sentiment_filter]
html = [f"
"]
if not rows:
html.append("
No results.")
for r in rows:
senti = SENTI.get(r["Sentiment"].upper(), SENTI["NEUTRAL"])
badge_style = f"background:{senti['color']}"
ents = [e for e in (r.get('Entities') or '').split(', ') if e][:4]
tag_html = "".join([f"
{x}" for x in ents]) or "
No entities"
source_html = f"
"
html.append(f"""
{format_summary_html(r['Summary'])}
{senti['emoji']} {r['Sentiment'].title()}
{tag_html}
{source_html}
""")
html.append("
")
return "\n".join(html)
def make_sentiment_chart(df: pd.DataFrame):
if df.empty: return px.bar()
counts = df["Sentiment"].value_counts().reindex(["POSITIVE","NEUTRAL","NEGATIVE","MIXED"]).fillna(0).reset_index()
counts.columns = ["Sentiment","Count"]
fig = px.bar(counts, x="Sentiment", y="Count", text="Count", height=340, title="Sentiment distribution")
fig.update_traces(textposition="outside")
fig.update_layout(margin=dict(l=10,r=10,t=40,b=10), template="plotly_white", xaxis_title=None, yaxis_title=None)
return fig
def make_trend_chart(df: pd.DataFrame):
if df.empty or "Date" not in df.columns: return px.line()
trend = df.copy()
trend["Score"] = trend["Sentiment"].map({"POSITIVE":1, "NEUTRAL":0, "NEGATIVE":-1, "MIXED":0}).fillna(0)
trend = trend.groupby("Date", as_index=False)["Score"].mean()
fig = px.line(trend, x="Date", y="Score", title="Avg sentiment over time (by day)")
fig.update_layout(margin=dict(l=10,r=10,t=40,b=10), template="plotly_white", yaxis_range=[-1,1])
return fig
def make_forecast_chart(df: pd.DataFrame):
"""Linear fit on daily average sentiment -> 7-day projection."""
if df.empty or "Date" not in df.columns:
return px.line(title="Forecast (insufficient data)")
work = df.copy()
work["Score"] = work["Sentiment"].map({"POSITIVE":1,"NEUTRAL":0,"NEGATIVE":-1,"MIXED":0}).fillna(0)
daily = work.groupby("Date", as_index=False)["Score"].mean().sort_values("Date")
if len(daily) < 3:
return px.line(daily, x="Date", y="Score", title="Forecast (needs β₯3 days)", template="plotly_white")
x = pd.to_datetime(daily["Date"]).map(pd.Timestamp.toordinal).to_numpy()
y = daily["Score"].to_numpy()
a, b = np.polyfit(x, y, 1)
last_day = pd.to_datetime(daily["Date"]).max()
fut_dates = [last_day + pd.Timedelta(days=i) for i in range(1,8)]
x_future = np.array([d.toordinal() for d in fut_dates])
y_future = a * x_future + b
base = px.line(daily, x="Date", y="Score", title="Sentiment: history & 7-day linear forecast", markers=True)
fut = pd.DataFrame({"Date": fut_dates, "Score": y_future})
base.add_scatter(x=fut["Date"], y=fut["Score"], mode="lines+markers", name="Forecast")
base.update_layout(margin=dict(l=10,r=10,t=40,b=10), template="plotly_white", yaxis_range=[-1,1])
return base
# ---------------------- Extra sources (simple + free) ----------------------
HEADERS = {"User-Agent": "NewsIntel/1.0"}
def fetch_press_releases(topic: str, limit: int = 5):
q = requests.utils.quote(f"{topic} press release")
url = f"https://news.google.com/rss/search?q={q}"
feed = feedparser.parse(url)
return [{"title":e.get("title",""),"link":e.get("link","")} for e in feed.entries[:limit]]
def _try_greenhouse(board: str):
api = f"https://boards-api.greenhouse.io/v1/boards/{board}/jobs"
try:
r = requests.get(api, timeout=20, headers=HEADERS)
if r.status_code == 200:
data = r.json().get("jobs", [])
return [{"title": j.get("title",""), "location": (j.get("location") or {}).get("name",""),
"url": j.get("absolute_url","")} for j in data]
except Exception:
pass
return []
def _try_lever(board: str):
api = f"https://api.lever.co/v0/postings/{board}?mode=json"
try:
r = requests.get(api, timeout=20, headers=HEADERS)
if r.status_code == 200:
data = r.json()
return [{"title": j.get("text",""), "location": j.get("categories",{}).get("location",""),
"url": j.get("hostedUrl","")} for j in data]
except Exception:
pass
return []
def fetch_jobs(topic: str, limit: int = 8):
board_guess = topic.lower().replace(" ", "")
jobs = _try_greenhouse(board_guess) or _try_lever(board_guess)
return jobs[:limit]
def grounded_summary(news_text: str, context: str = "") -> str:
prompt = (
"You are an analyst. Ground your bullets ONLY in the provided context. "
"If the context is insufficient, say 'insufficient context'.\n\n"
f"Context:\n{(context or '').strip()}\n\n"
"Task: Summarize the following news into 3 bullets:\n"
"1) What happened 2) Business impact 3) Risk or opportunity\n"
"Limit ~90 words.\n\n"
f"News:\n{news_text}"
)
return summarize(prompt)
def make_briefing(topic: str, rows: list[dict], press: list[dict], jobs: list[dict], timestamp_str: str) -> str:
news_bits = "\n".join([f"- {r['Title']} ({r['URL']})" for r in rows[:6]]) or "β"
press_bits = "\n".join([f"- {p['title']} ({p['link']})" for p in press[:5]]) or "β"
jobs_bits = "\n".join([f"- {j['title']} ({j.get('location','')}) β {j['url']}" for j in jobs[:5]]) or "No jobs found."
prompt = (
f"You are preparing an interview briefing about '{topic}'. "
"Synthesize:\n"
f"Recent news:\n{news_bits}\n\n"
f"Press releases:\n{press_bits}\n\n"
f"Open roles snapshot:\n{jobs_bits}\n\n"
"Output:\n- 3 bullets: momentum (facts)\n- 3 bullets: risks\n- 3 bullets: opportunities\n"
"- 3 bullets: interview talking points with 1β2 citations.\nKeep it under 220 words.\n"
f"(Data last updated: {timestamp_str})"
)
return summarize(prompt)
# ---------------------- Core pipeline with caching ----------------------
def agentic_get_news(topic: str, days: int, k: int, query_hint: str = ""):
cache_obj = get_cache("news", topic, days, k, query_hint)
if cache_obj:
return cache_obj
query = " OR ".join([
f"{topic} {query_hint}".strip(),
f'"{topic}" AND {query_hint}'.strip(),
f"{topic} AI",
])
res = get_news(query, days, k)
if len(res) < k:
res = get_news(query, min(days + 7, 30), k)
set_cache(res, "news", topic, days, k, query_hint)
return res
def cached_press(topic: str):
c = get_cache("press", topic)
if c: return c
p = fetch_press_releases(topic, 6)
set_cache(p, "press", topic)
return p
def cached_jobs(topic: str):
c = get_cache("jobs", topic)
if c: return c
j = fetch_jobs(topic, 8)
set_cache(j, "jobs", topic)
return j
def run_pipeline(topic, days, k, query_hint="", fast=True):
articles = agentic_get_news(topic, int(days), int(k), query_hint=query_hint)
press = cached_press(topic)
jobs = cached_jobs(topic)
today = datetime.date.today()
rows, metrics = [], []
def _process(a):
base_text = f"{a['title']} β {a['snippet']}"
t0 = time.time()
summary = grounded_summary(base_text, context=a.get("snippet",""))
latency = time.time() - t0
sent = analyze_sentiment(summary)
ents = [] if fast else analyze_entities(summary)
kws = [] if fast else extract_keywords(summary, top_n=6)
row = {
"Title": a["title"],
"URL": a["url"],
"Summary": summary,
"Sentiment": sent["label"].upper(),
"Entities": "" if fast else ", ".join({e["word"] for e in ents[:6]}),
"Key Phrases": "" if fast else ", ".join({k["keyword"] for k in kws[:6]}),
"Date": a.get("published_date") or today,
}
met = {
"title": a["title"], "latency_sec": round(latency,3),
"summary_tokens": len(summary.split()),
"sentiment": sent["label"].upper(),
"entity_count": 0 if fast else len(ents)
}
return row, met
with ThreadPoolExecutor(max_workers=min(4, max(1, k))) as ex:
futures = [ex.submit(_process, a) for a in articles]
for fut in as_completed(futures):
r, m = fut.result()
rows.append(r); metrics.append(m)
rows.sort(key=lambda x: x["Title"])
metrics.sort(key=lambda x: x["title"])
df = pd.DataFrame(rows)
mdf = pd.DataFrame(metrics)
now = ts_now_utc()
timestamp_str = f"{now.strftime('%b %d, %Y %I:%M %p')} UTC β’ {human_ago(now)}"
briefing = make_briefing(topic, rows, press, jobs, timestamp_str)
rollup = pd.DataFrame([{
"articles": len(rows),
"jobs_found": len(jobs),
"press_releases": len(press),
"avg_latency_sec": round(mdf["latency_sec"].mean(),3) if not mdf.empty else 0.0,
"updated_at": timestamp_str
}])
return rows, df, mdf, rollup, briefing, press, jobs, timestamp_str
# ---------------------- Exporters (with branding) ----------------------
def export_briefing_html(topic: str, briefing_md: str, timestamp_str: str):
html = f"""
{topic} β Briefing
{topic} β Interview Briefing
Data last updated: {timestamp_str}
{briefing_md}
Generated by NewsIntel Agent β Hasitha Varada
"""
path = os.path.join(tempfile.gettempdir(), f"{topic}_briefing.html")
with open(path, "w", encoding="utf-8") as f:
f.write(html)
return path
try:
import reportlab # optional
HAS_PDF = True
except Exception:
HAS_PDF = False
def export_briefing_pdf(topic: str, briefing_md: str, timestamp_str: str):
if not HAS_PDF: return None
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.units import inch
path = os.path.join(tempfile.gettempdir(), f"{topic}_briefing.pdf")
c = canvas.Canvas(path, pagesize=letter)
width, height = letter
x, y = 0.75*inch, height - 1*inch
c.setFont("Helvetica-Bold", 14); c.drawString(x, y, f"{topic} β Interview Briefing")
c.setFont("Helvetica", 9); y -= 0.25*inch; c.drawString(x, y, f"Data last updated: {timestamp_str}")
c.setFont("Times-Roman", 11); y -= 0.35*inch
for line in briefing_md.splitlines():
if not line.strip(): y -= 0.18*inch; continue
c.drawString(x, y, line[:115]); y -= 0.18*inch
if y < 1*inch:
c.setFont("Helvetica", 9)
c.drawString(x, 0.7*inch, "Generated by NewsIntel Agent β Hasitha Varada")
c.showPage()
x, y = 0.75*inch, height - 1*inch
c.setFont("Times-Roman", 11)
c.setFont("Helvetica", 9)
c.drawString(x, 0.7*inch, "Generated by NewsIntel Agent β Hasitha Varada")
c.save()
return path
# ---------------------- Gradio callbacks ----------------------
def estimate_eta_secs(k:int, fast:bool) -> int:
base = 1 if fast else 3
overhead = 2
return max(3, base * int(k) + overhead)
def start_banner(k, fast_mode):
eta = estimate_eta_secs(k, bool(fast_mode))
return f"β³ Running analysis (~{eta}s). Models are warm-started; first run may take longer..."
def analyze_news(mode, preset_company, topic, days, k, entity_filter, sentiment_filter, fast_mode):
query_hint = ONE_CLICK.get(mode, ONE_CLICK["General"])["query_hint"] if mode in ONE_CLICK else ""
if preset_company and preset_company.lower() not in (topic or "").lower():
topic = f"{topic} {preset_company}".strip()
rows, df, mdf, rollup, briefing, press, jobs, ts = run_pipeline(
topic, days, k, query_hint=query_hint, fast=bool(fast_mode)
)
cards_html = render_cards(rows, entity_filter or None, sentiment_filter or None)
all_ents = sorted(set(
e.strip()
for r in rows
for e in (r.get("Entities", "").split(", "))
if e.strip()
))[:50]
header = f"ποΈ NewsIntel β Data last updated: {ts}"
return (
header,
cards_html,
make_sentiment_chart(df),
make_trend_chart(df),
make_forecast_chart(df),
df,
mdf if not mdf.empty else pd.DataFrame([{"note": "No per-article metrics yet"}]),
rollup,
briefing,
gr.update(choices=all_ents)
)
def export_cb(topic, briefing_md, timestamp_str):
html_path = export_briefing_html(topic, briefing_md, timestamp_str)
pdf_path = export_briefing_pdf(topic, briefing_md, timestamp_str)
html_url = s3_upload(html_path)
pdf_url = s3_upload(pdf_path) if pdf_path else None
links = []
if html_url: links.append(f"View HTML on S3")
if pdf_url: links.append(f"View PDF on S3")
links_html = "
".join(links) if links else "(S3 links will appear here if configured)"
return html_path, (pdf_path or None), links_html
def email_weekly_cb(topic, email, briefing_md, timestamp_str):
if not email:
return "Enter your email first."
if not (briefing_md or "").strip():
return "No briefing yet β run analysis first."
ok = ses_send_email(
email,
f"Weekly Briefing β {topic}",
f"{topic} β Weekly Briefing
"
f"Data last updated: {timestamp_str}
"
f"{briefing_md}"
)
return "Email sent via SES β
" if ok else "SES not configured or send failed β"
# ---------------------- UI ----------------------
with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", neutral_hue="slate")) as demo:
gr.HTML(f"""
ποΈ
NewsIntel Agent β Job Briefings & Hiring Signals
Oneβclick modes
Cached results
Branded HTML/PDF
Optional S3/SES
""")
# ---------- inputs ----------
with gr.Row():
mode = gr.Dropdown(choices=list(ONE_CLICK.keys()), value="General", label="One-Click Mode")
preset_company = gr.Dropdown(choices=H1B_TECH_PRESETS, label="Company Presets (H-1B Tech)", allow_custom_value=True)
topic = gr.Textbox(label="Topic / Company", value="", placeholder="e.g., AMD, Healthcare AI, EV market India")
days = gr.Slider(1, 30, value=7, step=1, label="Lookback (days)")
k = gr.Slider(3, 12, value=4, step=1, label="Articles")
fast_mode = gr.Checkbox(value=True, label="β‘ Fast mode (skip Entities & Key Phrases)")
with gr.Row():
entity_filter = gr.Dropdown(choices=[], label="Filter by Mentioned Company/Person", value=None)
sentiment_filter = gr.Dropdown(choices=["ALL","POSITIVE","NEUTRAL","NEGATIVE","MIXED"], value="ALL", label="Sentiment filter")
run_btn = gr.Button("Run Analysis", variant="primary")
# ---------- outputs ----------
header_bar = gr.Markdown(value="ποΈ NewsIntel β Data last updated: β")
with gr.Tab("Insights"):
tip_md = gr.Markdown("π‘ **Tip:** *Entities* are detected names of companies/people/places (e.g., βTSMCβ, βARMβ). Use the filters to focus the feed.")
cards = gr.HTML()
with gr.Tab("Charts"):
plot_sent = gr.Plot(label="Sentiment distribution")
plot_trend = gr.Plot(label="Trend (avg sentiment by day)")
with gr.Tab("Forecast"):
gr.Markdown("βΉοΈ *The forecast projects the **average daily sentiment** trend 7 days ahead using a simple linear fit. Itβs a quick momentum signal, not a trading model.*")
plot_forecast = gr.Plot(label="7-day sentiment forecast")
with gr.Tab("Table"):
table = gr.Dataframe(wrap=True)
with gr.Tab("Metrics"):
per_article = gr.Dataframe(wrap=True, label="Per-article metrics")
rollup = gr.Dataframe(wrap=True, label="Run summary")
with gr.Tab("Briefing"):
briefing_md = gr.Markdown()
timestamp_str = gr.Textbox(label="Timestamp", interactive=False)
export_html = gr.File(label="Download HTML")
export_pdf = gr.File(label="Download PDF (optional)")
s3_links = gr.HTML(value="(S3 links will appear here if configured)")
export_btn = gr.Button("Export Briefing (creates files)")
with gr.Row():
weekly_email = gr.Textbox(label="Email (SES)", placeholder="name@example.com")
email_btn = gr.Button("Email Weekly Briefing (SES)")
email_status = gr.Markdown()
# ---------- helpers & wiring ----------
def _apply_mode(m, current_topic):
cfg = ONE_CLICK.get(m, ONE_CLICK["General"])
return gr.update(value=current_topic or cfg.get("topic",""))
mode.change(_apply_mode, inputs=[mode, topic], outputs=[topic])
preset_company.change(lambda x: x or "", inputs=preset_company, outputs=topic)
# Show banner -> run analysis -> stamp timestamp box
run_btn.click(
start_banner,
inputs=[k, fast_mode],
outputs=[header_bar]
).then(
analyze_news,
inputs=[mode, preset_company, topic, days, k, entity_filter, sentiment_filter, fast_mode],
outputs=[header_bar, cards, plot_sent, plot_trend, plot_forecast, table, per_article, rollup, briefing_md, entity_filter]
).then(
lambda: datetime.datetime.now().strftime("%b %d, %Y %I:%M %p"),
inputs=[], outputs=[timestamp_str]
)
export_btn.click(export_cb, inputs=[topic, briefing_md, timestamp_str], outputs=[export_html, export_pdf, s3_links])
email_btn.click(email_weekly_cb, inputs=[topic, weekly_email, briefing_md, timestamp_str], outputs=[email_status])
if __name__ == "__main__":
print("π Launching NewsIntel (light-only UI + caching + one-click modes + forecast)")
demo.launch()