Spaces:

ResearchEngineering
/

FinancialPlatform

Paused

App Files Files Community

Dmitry Beresnev commited on Mar 15

Commit

d9a7411

1 Parent(s): 4642f8e

add summary cache, fix summarizer, etc

Browse files

Files changed (3) hide show

app/pages/05_Dashboard.py +47 -41
app/utils/ai_summary_cache.py +119 -0
app/utils/llm_summarizer.py +3 -0

app/pages/05_Dashboard.py CHANGED Viewed

@@ -26,7 +26,7 @@ from components.news import (
     display_economic_calendar_widget
 )
 from utils.breaking_news_scorer import get_breaking_news_scorer
-from utils.llm_summarizer import OpenAICompatSummarizer
 # Import news scrapers
 try:
@@ -552,34 +552,16 @@ ai_summary_dfs = [
     predictions_df,
 ]
-summarizer = OpenAICompatSummarizer()
-if summarizer.enabled:
-    all_items = []
-    for df in ai_summary_dfs:
-        if df.empty:
-            continue
-        if "summary_raw" not in df.columns:
-            df["summary_raw"] = df.get("summary", "")
-        records = df.to_dict("records")
-        for record in records:
-            if "summary_raw" not in record:
-                record["summary_raw"] = record.get("summary", "")
-        all_items.extend(records)
-    if all_items:
-        with st.spinner("Summarizing news with AI..."):
-            summarizer.summarize_items(all_items, source="dashboard")
-        ai_map = {
-            item.get("id"): item.get("summary_ai")
-            for item in all_items
-            if item.get("id") is not None
-        }
-        for df in ai_summary_dfs:
-            if df.empty or "id" not in df.columns:
-                continue
-            df["summary_ai"] = df["id"].map(ai_map)
-            df["summary"] = df["summary_ai"].fillna(df["summary"])
 # Clear force refresh flag after fetching is complete
 if force_refresh:
@@ -891,19 +873,43 @@ for df in ai_summary_dfs:
 ai_summary_pct = (ai_summarized / total_items * 100) if total_items else 0.0
 st.markdown("---")
-st.markdown("## 🤖 AI Summary")
-st.markdown(
-    f"""
-    <div style="background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 8px; padding: 20px; margin-bottom: 12px;">
-        <div style="color: #E0E3EB; font-size: 16px; font-weight: 600; margin-bottom: 6px;">Current AI Summarizations</div>
-        <div style="color: #D1D4DC; font-size: 14px; line-height: 1.6;">
-            {ai_summarized} / {total_items} items summarized
-            <span style="color: #787B86; font-size: 12px; margin-left: 8px;">({ai_summary_pct:.1f}% coverage)</span>
         </div>
-    </div>
-    """,
-    unsafe_allow_html=True,
-)
 # Auto-refresh logic
 if auto_refresh:

     display_economic_calendar_widget
 )
 from utils.breaking_news_scorer import get_breaking_news_scorer
+from utils.ai_summary_cache import ai_summary_cache
 # Import news scrapers
 try:
     predictions_df,
 ]
+all_items = []
+for df in ai_summary_dfs:
+    if df.empty:
+        continue
+    records = df.to_dict("records")
+    all_items.extend(records)
+if all_items:
+    ai_summary_cache.buffer_items(all_items)
+    ai_summary_cache.maybe_flush()
 # Clear force refresh flag after fetching is complete
 if force_refresh:
 ai_summary_pct = (ai_summarized / total_items * 100) if total_items else 0.0
 st.markdown("---")
+@st.fragment(run_every=60)
+def render_ai_summary_section():
+    summaries, last_update = ai_summary_cache.get_summaries()
+    last_update_text = last_update.strftime("%Y-%m-%d %H:%M:%S") if last_update else "N/A"
+    st.markdown("## 🤖 AI Summary")
+    st.markdown(
+        f"""
+        <div style=\"background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 8px; padding: 20px; margin-bottom: 12px;\">
+            <div style=\"color: #E0E3EB; font-size: 16px; font-weight: 600; margin-bottom: 6px;\">Current AI Summarizations</div>
+            <div style=\"color: #D1D4DC; font-size: 14px; line-height: 1.6;\">
+                {ai_summarized} / {total_items} items summarized
+                <span style=\"color: #787B86; font-size: 12px; margin-left: 8px;\">({ai_summary_pct:.1f}% coverage)</span>
+            </div>
+            <div style=\"color: #787B86; font-size: 12px; margin-top: 6px;\">Last update: {last_update_text}</div>
         </div>
+        """,
+        unsafe_allow_html=True,
+    )
+    if summaries:
+        for item in summaries[:50]:
+            source = item.get("source", "")
+            summary = item.get("summary", "")
+            title = item.get("title", "")
+            st.markdown(
+                f"""
+                <div style=\"background: #131722; border: 1px solid #2A2E39; border-radius: 6px; padding: 10px; margin-bottom: 8px;\">
+                    <div style=\"color: #E0E3EB; font-size: 13px; font-weight: 600;\">{source} — {title}</div>
+                    <div style=\"color: #D1D4DC; font-size: 13px; margin-top: 4px;\">{summary}</div>
+                </div>
+                """,
+                unsafe_allow_html=True,
+            )
+    else:
+        st.info("AI summaries will appear after the 2-minute buffering window completes.")
+render_ai_summary_section()
 # Auto-refresh logic
 if auto_refresh:

app/utils/ai_summary_cache.py ADDED Viewed

	@@ -0,0 +1,119 @@

+"""Shared in-memory AI summary cache with buffering and batching."""
+import os
+import threading
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional, Tuple
+from utils.llm_summarizer import OpenAICompatSummarizer
+# Approx 4 chars per token -> 600 tokens ~= 2400 chars
+DEFAULT_BATCH_MAX_CHARS = int(os.getenv("LLM_SUMMARY_BATCH_MAX_CHARS", "2400"))
+BUFFER_SECONDS = int(os.getenv("LLM_SUMMARY_BUFFER_SECONDS", "120"))
+class AISummaryCache:
+    def __init__(self):
+        self._lock = threading.Lock()
+        self._buffer: List[Dict] = []
+        self._buffer_start: Optional[datetime] = None
+        self._summaries: Dict[str, Dict] = {}
+        self._last_update: Optional[datetime] = None
+    def buffer_items(self, items: List[Dict]):
+        if not items:
+            return
+        with self._lock:
+            for item in items:
+                key = self._item_key(item)
+                if not key or key in self._summaries:
+                    continue
+                self._buffer.append(item)
+            if self._buffer and self._buffer_start is None:
+                self._buffer_start = datetime.now()
+    def maybe_flush(self):
+        with self._lock:
+            if not self._buffer or self._buffer_start is None:
+                return
+            if datetime.now() - self._buffer_start < timedelta(seconds=BUFFER_SECONDS):
+                return
+            items = self._buffer
+            self._buffer = []
+            self._buffer_start = None
+        summarizer = OpenAICompatSummarizer()
+        if not summarizer.enabled:
+            return
+        batches = self._batch_items(items, DEFAULT_BATCH_MAX_CHARS)
+        for batch in batches:
+            texts = [self._build_input_text(item) for item in batch]
+            texts = [t for t in texts if t]
+            if not texts:
+                continue
+            summaries = summarizer._summarize_chunk(texts, source="dashboard")
+            if not summaries:
+                continue
+            with self._lock:
+                for item, summary in zip(batch, summaries):
+                    key = self._item_key(item)
+                    if not key:
+                        continue
+                    self._summaries[key] = {
+                        "id": item.get("id", key),
+                        "title": item.get("title", ""),
+                        "source": item.get("source", ""),
+                        "summary": summary,
+                        "timestamp": datetime.now(),
+                    }
+                self._last_update = datetime.now()
+    def get_summaries(self) -> Tuple[List[Dict], Optional[datetime]]:
+        with self._lock:
+            summaries = list(self._summaries.values())
+            last_update = self._last_update
+        summaries.sort(key=lambda x: x.get("timestamp", datetime.min), reverse=True)
+        return summaries, last_update
+    def _item_key(self, item: Dict) -> str:
+        if item.get("id") is not None:
+            return str(item.get("id"))
+        title = str(item.get("title", "")).strip()
+        source = str(item.get("source", "")).strip()
+        if not title:
+            return ""
+        return f"{source}|{title}".lower()
+    def _build_input_text(self, item: Dict) -> str:
+        title = str(item.get("title", "")).strip()
+        source = str(item.get("source", "")).strip()
+        if not title:
+            return ""
+        if source:
+            return f"Source: {source}\nTitle: {title}"
+        return f"Title: {title}"
+    def _batch_items(self, items: List[Dict], max_chars_total: int) -> List[List[Dict]]:
+        if max_chars_total <= 0:
+            return [items]
+        batches: List[List[Dict]] = []
+        current: List[Dict] = []
+        current_chars = 0
+        for item in items:
+            text = self._build_input_text(item)
+            if not text:
+                continue
+            text_len = len(text)
+            if current and current_chars + text_len > max_chars_total:
+                batches.append(current)
+                current = []
+                current_chars = 0
+            current.append(item)
+            current_chars += text_len
+        if current:
+            batches.append(current)
+        return batches
+ai_summary_cache = AISummaryCache()

app/utils/llm_summarizer.py CHANGED Viewed

@@ -74,8 +74,11 @@ class OpenAICompatSummarizer:
     def _build_input_text(self, item: Dict) -> str:
         title = str(item.get("title", "")).strip()
         if title:
             if len(title) > self.max_chars_per_item:
                 title = title[: self.max_chars_per_item].rstrip()
             return f"Title: {title}"
         return ""

     def _build_input_text(self, item: Dict) -> str:
         title = str(item.get("title", "")).strip()
         if title:
+            source = str(item.get("source", "")).strip()
             if len(title) > self.max_chars_per_item:
                 title = title[: self.max_chars_per_item].rstrip()
+            if source:
+                return f"Source: {source}\nTitle: {title}"
             return f"Title: {title}"
         return ""