Spaces:

Hasitha16
/

newsintel-agent

Sleeping

App Files Files Community

Hasitha16 commited on Aug 19, 2025

Commit

ebdf502

verified ·

1 Parent(s): 4eb834c

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -33

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ ONE_CLICK = {
     # Other domains
     "Finance (Earnings/Stocks)":       {"topic": "Earnings season", "query_hint": "earnings guidance revenue EPS"},
     "Tech R&D (Patents/AI)":           {"topic": "AI research", "query_hint": "foundation models patents transformer LLM"},
-    "General":                         {"topic": "NVIDIA", "query_hint": ""}
 }
 H1B_TECH_PRESETS = sorted(list({
@@ -207,7 +207,12 @@ def agentic_get_news(topic: str, days: int, k: int, query_hint: str = ""):
     cache_obj = get_cache("news", topic, days, k, query_hint)
     if cache_obj:
         return cache_obj
-    query = f"{topic} {query_hint}".strip()
     res = get_news(query, days, k)
     if len(res) < k:
         res = get_news(query, min(days + 7, 30), k)
@@ -228,35 +233,52 @@ def cached_jobs(topic: str):
     set_cache(j, "jobs", topic)
     return j
-def run_pipeline(topic, days, k, query_hint=""):
     articles = agentic_get_news(topic, int(days), int(k), query_hint=query_hint)
     press = cached_press(topic)
     jobs  = cached_jobs(topic)
-    rows, metrics = [], []
     today = datetime.date.today()
-    for a in articles:
         base_text = f"{a['title']} — {a['snippet']}"
         t0 = time.time()
         summary = grounded_summary(base_text, context=a.get("snippet",""))
         latency = time.time() - t0
         sent = analyze_sentiment(summary)
-        ents = analyze_entities(summary)
-        kws  = extract_keywords(summary, top_n=6)
-        rows.append({
             "Title": a["title"],
             "URL": a["url"],
             "Summary": summary,
             "Sentiment": sent["label"].upper(),
-            "Entities": ", ".join({e["word"] for e in ents[:6]}),
-            "Key Phrases": ", ".join({k["keyword"] for k in kws[:6]}),
             "Date": a.get("published_date") or today,
-        })
-        metrics.append({
             "title": a["title"], "latency_sec": round(latency,3),
             "summary_tokens": len(summary.split()),
             "sentiment": sent["label"].upper(),
-            "entity_count": len(ents)
-        })
     df = pd.DataFrame(rows)
     mdf = pd.DataFrame(metrics)
     timestamp_str = datetime.datetime.now().strftime("%b %d, %Y %I:%M %p")
@@ -270,6 +292,8 @@ def run_pipeline(topic, days, k, query_hint=""):
     }])
     return rows, df, mdf, rollup, briefing, press, jobs, timestamp_str
 # ---------------------- Exporters (with branding) ----------------------
 def export_briefing_html(topic: str, briefing_md: str, timestamp_str: str):
     html = f"""<!doctype html>
@@ -320,22 +344,31 @@ def export_briefing_pdf(topic: str, briefing_md: str, timestamp_str: str):
     return path
 # ---------------------- Gradio callbacks ----------------------
-def analyze_news(mode, preset_company, topic, days, k, entity_filter, sentiment_filter):
     query_hint = ONE_CLICK.get(mode, ONE_CLICK["General"])["query_hint"] if mode in ONE_CLICK else ""
-    rows, df, mdf, rollup, briefing, press, jobs, ts = run_pipeline(topic, days, k, query_hint=query_hint)
-    cards_html = render_cards(rows, entity_filter or None, sentiment_filter or None)
-    all_ents = sorted(set([e.strip() for r in rows for e in (r.get("Entities","").split(", ")) if e.strip()]))[:50]
-    header = f"🗞️ NewsIntel — Data last updated: {ts}"
-    return (header, cards_html,
-            make_sentiment_chart(df),
-            make_trend_chart(df),
-            make_forecast_chart(df),
-            df,
-            mdf if not mdf.empty else pd.DataFrame([{"note":"No per-article metrics yet"}]),
-            rollup,
-            briefing,
-            gr.update(choices=all_ents))
 def export_cb(topic, briefing_md, timestamp_str):
     html_path = export_briefing_html(topic, briefing_md, timestamp_str)
@@ -373,9 +406,12 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", neutral_hue="slate"))
     with gr.Row():
         mode  = gr.Dropdown(choices=list(ONE_CLICK.keys()), value="General", label="One-Click Mode")
         preset_company = gr.Dropdown(choices=H1B_TECH_PRESETS, label="Company Presets (H-1B Tech)", allow_custom_value=True)
-        topic  = gr.Textbox(label="Topic / Company", value="NVIDIA", placeholder="e.g., Apple, Healthcare AI, EV Market India")
         days   = gr.Slider(1, 30, value=7, step=1, label="Lookback (days)")
         k      = gr.Slider(3, 20, value=8, step=1, label="Articles")
     with gr.Row():
         entity_filter = gr.Dropdown(choices=[], label="Filter by Mentioned Company/Person", value=None)
@@ -410,11 +446,14 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", neutral_hue="slate"))
             email_status = gr.Markdown()
     # Wiring
-    def _apply_mode(m):
         cfg = ONE_CLICK.get(m, ONE_CLICK["General"])
-        return gr.update(value=cfg["topic"])
-    mode.change(_apply_mode, inputs=[mode], outputs=[topic])
     preset_company.change(lambda x: x or "", inputs=preset_company, outputs=topic)
     run_btn.click(

     # Other domains
     "Finance (Earnings/Stocks)":       {"topic": "Earnings season", "query_hint": "earnings guidance revenue EPS"},
     "Tech R&D (Patents/AI)":           {"topic": "AI research", "query_hint": "foundation models patents transformer LLM"},
+    "General": {"topic": "", "query_hint": ""}
 }
 H1B_TECH_PRESETS = sorted(list({
     cache_obj = get_cache("news", topic, days, k, query_hint)
     if cache_obj:
         return cache_obj
+    query = " OR ".join([
+        f"{topic} {query_hint}".strip(),
+        f'"{topic}" AND {query_hint}'.strip(),
+        f'{topic} AI',
+    ])
     res = get_news(query, days, k)
     if len(res) < k:
         res = get_news(query, min(days + 7, 30), k)
     set_cache(j, "jobs", topic)
     return j
+from concurrent.futures import ThreadPoolExecutor, as_completed
+def run_pipeline(topic, days, k, query_hint="", fast=True):
     articles = agentic_get_news(topic, int(days), int(k), query_hint=query_hint)
     press = cached_press(topic)
     jobs  = cached_jobs(topic)
     today = datetime.date.today()
+    rows, metrics = [], []
+    def _process(a):
         base_text = f"{a['title']} — {a['snippet']}"
         t0 = time.time()
         summary = grounded_summary(base_text, context=a.get("snippet",""))
         latency = time.time() - t0
         sent = analyze_sentiment(summary)
+        ents = [] if fast else analyze_entities(summary)
+        kws  = [] if fast else extract_keywords(summary, top_n=6)
+        row = {
             "Title": a["title"],
             "URL": a["url"],
             "Summary": summary,
             "Sentiment": sent["label"].upper(),
+            "Entities": "" if fast else ", ".join({e["word"] for e in ents[:6]}),
+            "Key Phrases": "" if fast else ", ".join({k["keyword"] for k in kws[:6]}),
             "Date": a.get("published_date") or today,
+        }
+        met = {
             "title": a["title"], "latency_sec": round(latency,3),
             "summary_tokens": len(summary.split()),
             "sentiment": sent["label"].upper(),
+            "entity_count": 0 if fast else len(ents)
+        }
+        return row, met
+    # Small pool keeps CPU Spaces happy
+    with ThreadPoolExecutor(max_workers=min(4, max(1, k))) as ex:
+        futures = [ex.submit(_process, a) for a in articles]
+        for fut in as_completed(futures):
+            r, m = fut.result()
+            rows.append(r); metrics.append(m)
+    # Keep ordering stable by original article title
+    rows.sort(key=lambda x: x["Title"])
+    metrics.sort(key=lambda x: x["title"])
     df = pd.DataFrame(rows)
     mdf = pd.DataFrame(metrics)
     timestamp_str = datetime.datetime.now().strftime("%b %d, %Y %I:%M %p")
     }])
     return rows, df, mdf, rollup, briefing, press, jobs, timestamp_str
 # ---------------------- Exporters (with branding) ----------------------
 def export_briefing_html(topic: str, briefing_md: str, timestamp_str: str):
     html = f"""<!doctype html>
     return path
 # ---------------------- Gradio callbacks ----------------------
+def analyze_news(mode, preset_company, topic, days, k, entity_filter, sentiment_filter, fast_mode):
     query_hint = ONE_CLICK.get(mode, ONE_CLICK["General"])["query_hint"] if mode in ONE_CLICK else ""
+    # add preset company into topic if missing
+    if preset_company and preset_company.lower() not in (topic or "").lower():
+        topic = f"{topic} {preset_company}".strip()
+    rows, df, mdf, rollup, briefing, press, jobs, ts = run_pipeline(
+        topic, days, k, query_hint=query_hint, fast=bool(fast_mode)
+    )
+    ...
+    return (header, cards_html, make_sentiment_chart(df), make_trend_chart(df),
+            make_forecast_chart(df), df, mdf if not mdf.empty else pd.DataFrame([{"note":"No per-article metrics yet"}]),
+            rollup, briefing, gr.update(choices=all_ents))
+# wiring: include fast_mode in inputs
+run_btn.click(
+    analyze_news,
+    inputs=[mode, preset_company, topic, days, k, entity_filter, sentiment_filter, fast_mode],
+    outputs=[header_bar, cards, plot_sent, plot_trend, plot_forecast, table, per_article, rollup, briefing_md, entity_filter]
+).then(
+    lambda: datetime.datetime.now().strftime("%b %d, %Y %I:%M %p"),
+    inputs=[], outputs=[timestamp_str]
+)
 def export_cb(topic, briefing_md, timestamp_str):
     html_path = export_briefing_html(topic, briefing_md, timestamp_str)
     with gr.Row():
         mode  = gr.Dropdown(choices=list(ONE_CLICK.keys()), value="General", label="One-Click Mode")
         preset_company = gr.Dropdown(choices=H1B_TECH_PRESETS, label="Company Presets (H-1B Tech)", allow_custom_value=True)
+        topic  = gr.Textbox(label="Topic / Company",
+                            value="",               # was "NVIDIA"
+                            placeholder="e.g., AMD, Healthcare AI, EV market India")
         days   = gr.Slider(1, 30, value=7, step=1, label="Lookback (days)")
         k      = gr.Slider(3, 20, value=8, step=1, label="Articles")
+        fast_mode = gr.Checkbox(value=True, label="⚡ Fast mode (skip Entities & Key Phrases)")
     with gr.Row():
         entity_filter = gr.Dropdown(choices=[], label="Filter by Mentioned Company/Person", value=None)
             email_status = gr.Markdown()
     # Wiring
+    def _apply_mode(m, current_topic):
         cfg = ONE_CLICK.get(m, ONE_CLICK["General"])
+        # only set the topic if the box is empty
+        new_value = current_topic or cfg.get("topic", "")
+        return gr.update(value=new_value)
+    mode.change(_apply_mode, inputs=[mode, topic], outputs=[topic])
     preset_company.change(lambda x: x or "", inputs=preset_company, outputs=topic)
     run_btn.click(