Hasitha16 commited on
Commit
ebdf502
·
verified ·
1 Parent(s): 4eb834c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -33
app.py CHANGED
@@ -20,7 +20,7 @@ ONE_CLICK = {
20
  # Other domains
21
  "Finance (Earnings/Stocks)": {"topic": "Earnings season", "query_hint": "earnings guidance revenue EPS"},
22
  "Tech R&D (Patents/AI)": {"topic": "AI research", "query_hint": "foundation models patents transformer LLM"},
23
- "General": {"topic": "NVIDIA", "query_hint": ""}
24
  }
25
 
26
  H1B_TECH_PRESETS = sorted(list({
@@ -207,7 +207,12 @@ def agentic_get_news(topic: str, days: int, k: int, query_hint: str = ""):
207
  cache_obj = get_cache("news", topic, days, k, query_hint)
208
  if cache_obj:
209
  return cache_obj
210
- query = f"{topic} {query_hint}".strip()
 
 
 
 
 
211
  res = get_news(query, days, k)
212
  if len(res) < k:
213
  res = get_news(query, min(days + 7, 30), k)
@@ -228,35 +233,52 @@ def cached_jobs(topic: str):
228
  set_cache(j, "jobs", topic)
229
  return j
230
 
231
- def run_pipeline(topic, days, k, query_hint=""):
 
 
232
  articles = agentic_get_news(topic, int(days), int(k), query_hint=query_hint)
233
  press = cached_press(topic)
234
  jobs = cached_jobs(topic)
235
- rows, metrics = [], []
236
  today = datetime.date.today()
237
- for a in articles:
 
 
238
  base_text = f"{a['title']} — {a['snippet']}"
239
  t0 = time.time()
240
  summary = grounded_summary(base_text, context=a.get("snippet",""))
241
  latency = time.time() - t0
242
  sent = analyze_sentiment(summary)
243
- ents = analyze_entities(summary)
244
- kws = extract_keywords(summary, top_n=6)
245
- rows.append({
246
  "Title": a["title"],
247
  "URL": a["url"],
248
  "Summary": summary,
249
  "Sentiment": sent["label"].upper(),
250
- "Entities": ", ".join({e["word"] for e in ents[:6]}),
251
- "Key Phrases": ", ".join({k["keyword"] for k in kws[:6]}),
252
  "Date": a.get("published_date") or today,
253
- })
254
- metrics.append({
255
  "title": a["title"], "latency_sec": round(latency,3),
256
  "summary_tokens": len(summary.split()),
257
  "sentiment": sent["label"].upper(),
258
- "entity_count": len(ents)
259
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  df = pd.DataFrame(rows)
261
  mdf = pd.DataFrame(metrics)
262
  timestamp_str = datetime.datetime.now().strftime("%b %d, %Y %I:%M %p")
@@ -270,6 +292,8 @@ def run_pipeline(topic, days, k, query_hint=""):
270
  }])
271
  return rows, df, mdf, rollup, briefing, press, jobs, timestamp_str
272
 
 
 
273
  # ---------------------- Exporters (with branding) ----------------------
274
  def export_briefing_html(topic: str, briefing_md: str, timestamp_str: str):
275
  html = f"""<!doctype html>
@@ -320,22 +344,31 @@ def export_briefing_pdf(topic: str, briefing_md: str, timestamp_str: str):
320
  return path
321
 
322
  # ---------------------- Gradio callbacks ----------------------
323
- def analyze_news(mode, preset_company, topic, days, k, entity_filter, sentiment_filter):
324
  query_hint = ONE_CLICK.get(mode, ONE_CLICK["General"])["query_hint"] if mode in ONE_CLICK else ""
325
- rows, df, mdf, rollup, briefing, press, jobs, ts = run_pipeline(topic, days, k, query_hint=query_hint)
326
-
327
- cards_html = render_cards(rows, entity_filter or None, sentiment_filter or None)
328
- all_ents = sorted(set([e.strip() for r in rows for e in (r.get("Entities","").split(", ")) if e.strip()]))[:50]
329
- header = f"🗞️ NewsIntel — Data last updated: {ts}"
330
- return (header, cards_html,
331
- make_sentiment_chart(df),
332
- make_trend_chart(df),
333
- make_forecast_chart(df),
334
- df,
335
- mdf if not mdf.empty else pd.DataFrame([{"note":"No per-article metrics yet"}]),
336
- rollup,
337
- briefing,
338
- gr.update(choices=all_ents))
 
 
 
 
 
 
 
 
 
339
 
340
  def export_cb(topic, briefing_md, timestamp_str):
341
  html_path = export_briefing_html(topic, briefing_md, timestamp_str)
@@ -373,9 +406,12 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", neutral_hue="slate"))
373
  with gr.Row():
374
  mode = gr.Dropdown(choices=list(ONE_CLICK.keys()), value="General", label="One-Click Mode")
375
  preset_company = gr.Dropdown(choices=H1B_TECH_PRESETS, label="Company Presets (H-1B Tech)", allow_custom_value=True)
376
- topic = gr.Textbox(label="Topic / Company", value="NVIDIA", placeholder="e.g., Apple, Healthcare AI, EV Market India")
 
 
377
  days = gr.Slider(1, 30, value=7, step=1, label="Lookback (days)")
378
  k = gr.Slider(3, 20, value=8, step=1, label="Articles")
 
379
 
380
  with gr.Row():
381
  entity_filter = gr.Dropdown(choices=[], label="Filter by Mentioned Company/Person", value=None)
@@ -410,11 +446,14 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", neutral_hue="slate"))
410
  email_status = gr.Markdown()
411
 
412
  # Wiring
413
- def _apply_mode(m):
414
  cfg = ONE_CLICK.get(m, ONE_CLICK["General"])
415
- return gr.update(value=cfg["topic"])
 
 
 
 
416
 
417
- mode.change(_apply_mode, inputs=[mode], outputs=[topic])
418
  preset_company.change(lambda x: x or "", inputs=preset_company, outputs=topic)
419
 
420
  run_btn.click(
 
20
  # Other domains
21
  "Finance (Earnings/Stocks)": {"topic": "Earnings season", "query_hint": "earnings guidance revenue EPS"},
22
  "Tech R&D (Patents/AI)": {"topic": "AI research", "query_hint": "foundation models patents transformer LLM"},
23
+ "General": {"topic": "", "query_hint": ""}
24
  }
25
 
26
  H1B_TECH_PRESETS = sorted(list({
 
207
  cache_obj = get_cache("news", topic, days, k, query_hint)
208
  if cache_obj:
209
  return cache_obj
210
+ query = " OR ".join([
211
+ f"{topic} {query_hint}".strip(),
212
+ f'"{topic}" AND {query_hint}'.strip(),
213
+ f'{topic} AI',
214
+ ])
215
+
216
  res = get_news(query, days, k)
217
  if len(res) < k:
218
  res = get_news(query, min(days + 7, 30), k)
 
233
  set_cache(j, "jobs", topic)
234
  return j
235
 
236
+ from concurrent.futures import ThreadPoolExecutor, as_completed
237
+
238
+ def run_pipeline(topic, days, k, query_hint="", fast=True):
239
  articles = agentic_get_news(topic, int(days), int(k), query_hint=query_hint)
240
  press = cached_press(topic)
241
  jobs = cached_jobs(topic)
242
+
243
  today = datetime.date.today()
244
+ rows, metrics = [], []
245
+
246
+ def _process(a):
247
  base_text = f"{a['title']} — {a['snippet']}"
248
  t0 = time.time()
249
  summary = grounded_summary(base_text, context=a.get("snippet",""))
250
  latency = time.time() - t0
251
  sent = analyze_sentiment(summary)
252
+ ents = [] if fast else analyze_entities(summary)
253
+ kws = [] if fast else extract_keywords(summary, top_n=6)
254
+ row = {
255
  "Title": a["title"],
256
  "URL": a["url"],
257
  "Summary": summary,
258
  "Sentiment": sent["label"].upper(),
259
+ "Entities": "" if fast else ", ".join({e["word"] for e in ents[:6]}),
260
+ "Key Phrases": "" if fast else ", ".join({k["keyword"] for k in kws[:6]}),
261
  "Date": a.get("published_date") or today,
262
+ }
263
+ met = {
264
  "title": a["title"], "latency_sec": round(latency,3),
265
  "summary_tokens": len(summary.split()),
266
  "sentiment": sent["label"].upper(),
267
+ "entity_count": 0 if fast else len(ents)
268
+ }
269
+ return row, met
270
+
271
+ # Small pool keeps CPU Spaces happy
272
+ with ThreadPoolExecutor(max_workers=min(4, max(1, k))) as ex:
273
+ futures = [ex.submit(_process, a) for a in articles]
274
+ for fut in as_completed(futures):
275
+ r, m = fut.result()
276
+ rows.append(r); metrics.append(m)
277
+
278
+ # Keep ordering stable by original article title
279
+ rows.sort(key=lambda x: x["Title"])
280
+ metrics.sort(key=lambda x: x["title"])
281
+
282
  df = pd.DataFrame(rows)
283
  mdf = pd.DataFrame(metrics)
284
  timestamp_str = datetime.datetime.now().strftime("%b %d, %Y %I:%M %p")
 
292
  }])
293
  return rows, df, mdf, rollup, briefing, press, jobs, timestamp_str
294
 
295
+
296
+
297
  # ---------------------- Exporters (with branding) ----------------------
298
  def export_briefing_html(topic: str, briefing_md: str, timestamp_str: str):
299
  html = f"""<!doctype html>
 
344
  return path
345
 
346
  # ---------------------- Gradio callbacks ----------------------
347
+ def analyze_news(mode, preset_company, topic, days, k, entity_filter, sentiment_filter, fast_mode):
348
  query_hint = ONE_CLICK.get(mode, ONE_CLICK["General"])["query_hint"] if mode in ONE_CLICK else ""
349
+
350
+ # add preset company into topic if missing
351
+ if preset_company and preset_company.lower() not in (topic or "").lower():
352
+ topic = f"{topic} {preset_company}".strip()
353
+
354
+ rows, df, mdf, rollup, briefing, press, jobs, ts = run_pipeline(
355
+ topic, days, k, query_hint=query_hint, fast=bool(fast_mode)
356
+ )
357
+ ...
358
+ return (header, cards_html, make_sentiment_chart(df), make_trend_chart(df),
359
+ make_forecast_chart(df), df, mdf if not mdf.empty else pd.DataFrame([{"note":"No per-article metrics yet"}]),
360
+ rollup, briefing, gr.update(choices=all_ents))
361
+
362
+ # wiring: include fast_mode in inputs
363
+ run_btn.click(
364
+ analyze_news,
365
+ inputs=[mode, preset_company, topic, days, k, entity_filter, sentiment_filter, fast_mode],
366
+ outputs=[header_bar, cards, plot_sent, plot_trend, plot_forecast, table, per_article, rollup, briefing_md, entity_filter]
367
+ ).then(
368
+ lambda: datetime.datetime.now().strftime("%b %d, %Y %I:%M %p"),
369
+ inputs=[], outputs=[timestamp_str]
370
+ )
371
+
372
 
373
  def export_cb(topic, briefing_md, timestamp_str):
374
  html_path = export_briefing_html(topic, briefing_md, timestamp_str)
 
406
  with gr.Row():
407
  mode = gr.Dropdown(choices=list(ONE_CLICK.keys()), value="General", label="One-Click Mode")
408
  preset_company = gr.Dropdown(choices=H1B_TECH_PRESETS, label="Company Presets (H-1B Tech)", allow_custom_value=True)
409
+ topic = gr.Textbox(label="Topic / Company",
410
+ value="", # was "NVIDIA"
411
+ placeholder="e.g., AMD, Healthcare AI, EV market India")
412
  days = gr.Slider(1, 30, value=7, step=1, label="Lookback (days)")
413
  k = gr.Slider(3, 20, value=8, step=1, label="Articles")
414
+ fast_mode = gr.Checkbox(value=True, label="⚡ Fast mode (skip Entities & Key Phrases)")
415
 
416
  with gr.Row():
417
  entity_filter = gr.Dropdown(choices=[], label="Filter by Mentioned Company/Person", value=None)
 
446
  email_status = gr.Markdown()
447
 
448
  # Wiring
449
+ def _apply_mode(m, current_topic):
450
  cfg = ONE_CLICK.get(m, ONE_CLICK["General"])
451
+ # only set the topic if the box is empty
452
+ new_value = current_topic or cfg.get("topic", "")
453
+ return gr.update(value=new_value)
454
+
455
+ mode.change(_apply_mode, inputs=[mode, topic], outputs=[topic])
456
 
 
457
  preset_company.change(lambda x: x or "", inputs=preset_company, outputs=topic)
458
 
459
  run_btn.click(