Dmitry Beresnev commited on
Commit ·
d9a7411
1
Parent(s): 4642f8e
add summary cache, fix summarizer, etc
Browse files- app/pages/05_Dashboard.py +47 -41
- app/utils/ai_summary_cache.py +119 -0
- app/utils/llm_summarizer.py +3 -0
app/pages/05_Dashboard.py
CHANGED
|
@@ -26,7 +26,7 @@ from components.news import (
|
|
| 26 |
display_economic_calendar_widget
|
| 27 |
)
|
| 28 |
from utils.breaking_news_scorer import get_breaking_news_scorer
|
| 29 |
-
from utils.
|
| 30 |
|
| 31 |
# Import news scrapers
|
| 32 |
try:
|
|
@@ -552,34 +552,16 @@ ai_summary_dfs = [
|
|
| 552 |
predictions_df,
|
| 553 |
]
|
| 554 |
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
if "summary_raw" not in record:
|
| 566 |
-
record["summary_raw"] = record.get("summary", "")
|
| 567 |
-
all_items.extend(records)
|
| 568 |
-
|
| 569 |
-
if all_items:
|
| 570 |
-
with st.spinner("Summarizing news with AI..."):
|
| 571 |
-
summarizer.summarize_items(all_items, source="dashboard")
|
| 572 |
-
|
| 573 |
-
ai_map = {
|
| 574 |
-
item.get("id"): item.get("summary_ai")
|
| 575 |
-
for item in all_items
|
| 576 |
-
if item.get("id") is not None
|
| 577 |
-
}
|
| 578 |
-
for df in ai_summary_dfs:
|
| 579 |
-
if df.empty or "id" not in df.columns:
|
| 580 |
-
continue
|
| 581 |
-
df["summary_ai"] = df["id"].map(ai_map)
|
| 582 |
-
df["summary"] = df["summary_ai"].fillna(df["summary"])
|
| 583 |
|
| 584 |
# Clear force refresh flag after fetching is complete
|
| 585 |
if force_refresh:
|
|
@@ -891,19 +873,43 @@ for df in ai_summary_dfs:
|
|
| 891 |
ai_summary_pct = (ai_summarized / total_items * 100) if total_items else 0.0
|
| 892 |
|
| 893 |
st.markdown("---")
|
| 894 |
-
st.
|
| 895 |
-
|
| 896 |
-
|
| 897 |
-
|
| 898 |
-
|
| 899 |
-
|
| 900 |
-
|
| 901 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 902 |
</div>
|
| 903 |
-
|
| 904 |
-
|
| 905 |
-
|
| 906 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 907 |
|
| 908 |
# Auto-refresh logic
|
| 909 |
if auto_refresh:
|
|
|
|
| 26 |
display_economic_calendar_widget
|
| 27 |
)
|
| 28 |
from utils.breaking_news_scorer import get_breaking_news_scorer
|
| 29 |
+
from utils.ai_summary_cache import ai_summary_cache
|
| 30 |
|
| 31 |
# Import news scrapers
|
| 32 |
try:
|
|
|
|
| 552 |
predictions_df,
|
| 553 |
]
|
| 554 |
|
| 555 |
+
all_items = []
|
| 556 |
+
for df in ai_summary_dfs:
|
| 557 |
+
if df.empty:
|
| 558 |
+
continue
|
| 559 |
+
records = df.to_dict("records")
|
| 560 |
+
all_items.extend(records)
|
| 561 |
+
|
| 562 |
+
if all_items:
|
| 563 |
+
ai_summary_cache.buffer_items(all_items)
|
| 564 |
+
ai_summary_cache.maybe_flush()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 565 |
|
| 566 |
# Clear force refresh flag after fetching is complete
|
| 567 |
if force_refresh:
|
|
|
|
| 873 |
ai_summary_pct = (ai_summarized / total_items * 100) if total_items else 0.0
|
| 874 |
|
| 875 |
st.markdown("---")
|
| 876 |
+
@st.fragment(run_every=60)
|
| 877 |
+
def render_ai_summary_section():
|
| 878 |
+
summaries, last_update = ai_summary_cache.get_summaries()
|
| 879 |
+
last_update_text = last_update.strftime("%Y-%m-%d %H:%M:%S") if last_update else "N/A"
|
| 880 |
+
st.markdown("## 🤖 AI Summary")
|
| 881 |
+
st.markdown(
|
| 882 |
+
f"""
|
| 883 |
+
<div style=\"background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 8px; padding: 20px; margin-bottom: 12px;\">
|
| 884 |
+
<div style=\"color: #E0E3EB; font-size: 16px; font-weight: 600; margin-bottom: 6px;\">Current AI Summarizations</div>
|
| 885 |
+
<div style=\"color: #D1D4DC; font-size: 14px; line-height: 1.6;\">
|
| 886 |
+
{ai_summarized} / {total_items} items summarized
|
| 887 |
+
<span style=\"color: #787B86; font-size: 12px; margin-left: 8px;\">({ai_summary_pct:.1f}% coverage)</span>
|
| 888 |
+
</div>
|
| 889 |
+
<div style=\"color: #787B86; font-size: 12px; margin-top: 6px;\">Last update: {last_update_text}</div>
|
| 890 |
</div>
|
| 891 |
+
""",
|
| 892 |
+
unsafe_allow_html=True,
|
| 893 |
+
)
|
| 894 |
+
|
| 895 |
+
if summaries:
|
| 896 |
+
for item in summaries[:50]:
|
| 897 |
+
source = item.get("source", "")
|
| 898 |
+
summary = item.get("summary", "")
|
| 899 |
+
title = item.get("title", "")
|
| 900 |
+
st.markdown(
|
| 901 |
+
f"""
|
| 902 |
+
<div style=\"background: #131722; border: 1px solid #2A2E39; border-radius: 6px; padding: 10px; margin-bottom: 8px;\">
|
| 903 |
+
<div style=\"color: #E0E3EB; font-size: 13px; font-weight: 600;\">{source} — {title}</div>
|
| 904 |
+
<div style=\"color: #D1D4DC; font-size: 13px; margin-top: 4px;\">{summary}</div>
|
| 905 |
+
</div>
|
| 906 |
+
""",
|
| 907 |
+
unsafe_allow_html=True,
|
| 908 |
+
)
|
| 909 |
+
else:
|
| 910 |
+
st.info("AI summaries will appear after the 2-minute buffering window completes.")
|
| 911 |
+
|
| 912 |
+
render_ai_summary_section()
|
| 913 |
|
| 914 |
# Auto-refresh logic
|
| 915 |
if auto_refresh:
|
app/utils/ai_summary_cache.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Shared in-memory AI summary cache with buffering and batching."""
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import threading
|
| 5 |
+
from datetime import datetime, timedelta
|
| 6 |
+
from typing import Dict, List, Optional, Tuple
|
| 7 |
+
|
| 8 |
+
from utils.llm_summarizer import OpenAICompatSummarizer
|
| 9 |
+
|
| 10 |
+
# Approx 4 chars per token -> 600 tokens ~= 2400 chars
|
| 11 |
+
DEFAULT_BATCH_MAX_CHARS = int(os.getenv("LLM_SUMMARY_BATCH_MAX_CHARS", "2400"))
|
| 12 |
+
BUFFER_SECONDS = int(os.getenv("LLM_SUMMARY_BUFFER_SECONDS", "120"))
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class AISummaryCache:
|
| 16 |
+
def __init__(self):
|
| 17 |
+
self._lock = threading.Lock()
|
| 18 |
+
self._buffer: List[Dict] = []
|
| 19 |
+
self._buffer_start: Optional[datetime] = None
|
| 20 |
+
self._summaries: Dict[str, Dict] = {}
|
| 21 |
+
self._last_update: Optional[datetime] = None
|
| 22 |
+
|
| 23 |
+
def buffer_items(self, items: List[Dict]):
|
| 24 |
+
if not items:
|
| 25 |
+
return
|
| 26 |
+
with self._lock:
|
| 27 |
+
for item in items:
|
| 28 |
+
key = self._item_key(item)
|
| 29 |
+
if not key or key in self._summaries:
|
| 30 |
+
continue
|
| 31 |
+
self._buffer.append(item)
|
| 32 |
+
if self._buffer and self._buffer_start is None:
|
| 33 |
+
self._buffer_start = datetime.now()
|
| 34 |
+
|
| 35 |
+
def maybe_flush(self):
|
| 36 |
+
with self._lock:
|
| 37 |
+
if not self._buffer or self._buffer_start is None:
|
| 38 |
+
return
|
| 39 |
+
if datetime.now() - self._buffer_start < timedelta(seconds=BUFFER_SECONDS):
|
| 40 |
+
return
|
| 41 |
+
items = self._buffer
|
| 42 |
+
self._buffer = []
|
| 43 |
+
self._buffer_start = None
|
| 44 |
+
|
| 45 |
+
summarizer = OpenAICompatSummarizer()
|
| 46 |
+
if not summarizer.enabled:
|
| 47 |
+
return
|
| 48 |
+
|
| 49 |
+
batches = self._batch_items(items, DEFAULT_BATCH_MAX_CHARS)
|
| 50 |
+
for batch in batches:
|
| 51 |
+
texts = [self._build_input_text(item) for item in batch]
|
| 52 |
+
texts = [t for t in texts if t]
|
| 53 |
+
if not texts:
|
| 54 |
+
continue
|
| 55 |
+
summaries = summarizer._summarize_chunk(texts, source="dashboard")
|
| 56 |
+
if not summaries:
|
| 57 |
+
continue
|
| 58 |
+
with self._lock:
|
| 59 |
+
for item, summary in zip(batch, summaries):
|
| 60 |
+
key = self._item_key(item)
|
| 61 |
+
if not key:
|
| 62 |
+
continue
|
| 63 |
+
self._summaries[key] = {
|
| 64 |
+
"id": item.get("id", key),
|
| 65 |
+
"title": item.get("title", ""),
|
| 66 |
+
"source": item.get("source", ""),
|
| 67 |
+
"summary": summary,
|
| 68 |
+
"timestamp": datetime.now(),
|
| 69 |
+
}
|
| 70 |
+
self._last_update = datetime.now()
|
| 71 |
+
|
| 72 |
+
def get_summaries(self) -> Tuple[List[Dict], Optional[datetime]]:
|
| 73 |
+
with self._lock:
|
| 74 |
+
summaries = list(self._summaries.values())
|
| 75 |
+
last_update = self._last_update
|
| 76 |
+
summaries.sort(key=lambda x: x.get("timestamp", datetime.min), reverse=True)
|
| 77 |
+
return summaries, last_update
|
| 78 |
+
|
| 79 |
+
def _item_key(self, item: Dict) -> str:
|
| 80 |
+
if item.get("id") is not None:
|
| 81 |
+
return str(item.get("id"))
|
| 82 |
+
title = str(item.get("title", "")).strip()
|
| 83 |
+
source = str(item.get("source", "")).strip()
|
| 84 |
+
if not title:
|
| 85 |
+
return ""
|
| 86 |
+
return f"{source}|{title}".lower()
|
| 87 |
+
|
| 88 |
+
def _build_input_text(self, item: Dict) -> str:
|
| 89 |
+
title = str(item.get("title", "")).strip()
|
| 90 |
+
source = str(item.get("source", "")).strip()
|
| 91 |
+
if not title:
|
| 92 |
+
return ""
|
| 93 |
+
if source:
|
| 94 |
+
return f"Source: {source}\nTitle: {title}"
|
| 95 |
+
return f"Title: {title}"
|
| 96 |
+
|
| 97 |
+
def _batch_items(self, items: List[Dict], max_chars_total: int) -> List[List[Dict]]:
|
| 98 |
+
if max_chars_total <= 0:
|
| 99 |
+
return [items]
|
| 100 |
+
batches: List[List[Dict]] = []
|
| 101 |
+
current: List[Dict] = []
|
| 102 |
+
current_chars = 0
|
| 103 |
+
for item in items:
|
| 104 |
+
text = self._build_input_text(item)
|
| 105 |
+
if not text:
|
| 106 |
+
continue
|
| 107 |
+
text_len = len(text)
|
| 108 |
+
if current and current_chars + text_len > max_chars_total:
|
| 109 |
+
batches.append(current)
|
| 110 |
+
current = []
|
| 111 |
+
current_chars = 0
|
| 112 |
+
current.append(item)
|
| 113 |
+
current_chars += text_len
|
| 114 |
+
if current:
|
| 115 |
+
batches.append(current)
|
| 116 |
+
return batches
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
ai_summary_cache = AISummaryCache()
|
app/utils/llm_summarizer.py
CHANGED
|
@@ -74,8 +74,11 @@ class OpenAICompatSummarizer:
|
|
| 74 |
def _build_input_text(self, item: Dict) -> str:
|
| 75 |
title = str(item.get("title", "")).strip()
|
| 76 |
if title:
|
|
|
|
| 77 |
if len(title) > self.max_chars_per_item:
|
| 78 |
title = title[: self.max_chars_per_item].rstrip()
|
|
|
|
|
|
|
| 79 |
return f"Title: {title}"
|
| 80 |
return ""
|
| 81 |
|
|
|
|
| 74 |
def _build_input_text(self, item: Dict) -> str:
|
| 75 |
title = str(item.get("title", "")).strip()
|
| 76 |
if title:
|
| 77 |
+
source = str(item.get("source", "")).strip()
|
| 78 |
if len(title) > self.max_chars_per_item:
|
| 79 |
title = title[: self.max_chars_per_item].rstrip()
|
| 80 |
+
if source:
|
| 81 |
+
return f"Source: {source}\nTitle: {title}"
|
| 82 |
return f"Title: {title}"
|
| 83 |
return ""
|
| 84 |
|