Dmitry Beresnev commited on
Commit
d9a7411
·
1 Parent(s): 4642f8e

add summary cache, fix summarizer, etc

Browse files
app/pages/05_Dashboard.py CHANGED
@@ -26,7 +26,7 @@ from components.news import (
26
  display_economic_calendar_widget
27
  )
28
  from utils.breaking_news_scorer import get_breaking_news_scorer
29
- from utils.llm_summarizer import OpenAICompatSummarizer
30
 
31
  # Import news scrapers
32
  try:
@@ -552,34 +552,16 @@ ai_summary_dfs = [
552
  predictions_df,
553
  ]
554
 
555
- summarizer = OpenAICompatSummarizer()
556
- if summarizer.enabled:
557
- all_items = []
558
- for df in ai_summary_dfs:
559
- if df.empty:
560
- continue
561
- if "summary_raw" not in df.columns:
562
- df["summary_raw"] = df.get("summary", "")
563
- records = df.to_dict("records")
564
- for record in records:
565
- if "summary_raw" not in record:
566
- record["summary_raw"] = record.get("summary", "")
567
- all_items.extend(records)
568
-
569
- if all_items:
570
- with st.spinner("Summarizing news with AI..."):
571
- summarizer.summarize_items(all_items, source="dashboard")
572
-
573
- ai_map = {
574
- item.get("id"): item.get("summary_ai")
575
- for item in all_items
576
- if item.get("id") is not None
577
- }
578
- for df in ai_summary_dfs:
579
- if df.empty or "id" not in df.columns:
580
- continue
581
- df["summary_ai"] = df["id"].map(ai_map)
582
- df["summary"] = df["summary_ai"].fillna(df["summary"])
583
 
584
  # Clear force refresh flag after fetching is complete
585
  if force_refresh:
@@ -891,19 +873,43 @@ for df in ai_summary_dfs:
891
  ai_summary_pct = (ai_summarized / total_items * 100) if total_items else 0.0
892
 
893
  st.markdown("---")
894
- st.markdown("## 🤖 AI Summary")
895
- st.markdown(
896
- f"""
897
- <div style="background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 8px; padding: 20px; margin-bottom: 12px;">
898
- <div style="color: #E0E3EB; font-size: 16px; font-weight: 600; margin-bottom: 6px;">Current AI Summarizations</div>
899
- <div style="color: #D1D4DC; font-size: 14px; line-height: 1.6;">
900
- {ai_summarized} / {total_items} items summarized
901
- <span style="color: #787B86; font-size: 12px; margin-left: 8px;">({ai_summary_pct:.1f}% coverage)</span>
 
 
 
 
 
 
902
  </div>
903
- </div>
904
- """,
905
- unsafe_allow_html=True,
906
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
907
 
908
  # Auto-refresh logic
909
  if auto_refresh:
 
26
  display_economic_calendar_widget
27
  )
28
  from utils.breaking_news_scorer import get_breaking_news_scorer
29
+ from utils.ai_summary_cache import ai_summary_cache
30
 
31
  # Import news scrapers
32
  try:
 
552
  predictions_df,
553
  ]
554
 
555
+ all_items = []
556
+ for df in ai_summary_dfs:
557
+ if df.empty:
558
+ continue
559
+ records = df.to_dict("records")
560
+ all_items.extend(records)
561
+
562
+ if all_items:
563
+ ai_summary_cache.buffer_items(all_items)
564
+ ai_summary_cache.maybe_flush()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
565
 
566
  # Clear force refresh flag after fetching is complete
567
  if force_refresh:
 
873
  ai_summary_pct = (ai_summarized / total_items * 100) if total_items else 0.0
874
 
875
  st.markdown("---")
876
+ @st.fragment(run_every=60)
877
+ def render_ai_summary_section():
878
+ summaries, last_update = ai_summary_cache.get_summaries()
879
+ last_update_text = last_update.strftime("%Y-%m-%d %H:%M:%S") if last_update else "N/A"
880
+ st.markdown("## 🤖 AI Summary")
881
+ st.markdown(
882
+ f"""
883
+ <div style=\"background: linear-gradient(135deg, #1E222D 0%, #131722 100%); border: 1px solid #2A2E39; border-radius: 8px; padding: 20px; margin-bottom: 12px;\">
884
+ <div style=\"color: #E0E3EB; font-size: 16px; font-weight: 600; margin-bottom: 6px;\">Current AI Summarizations</div>
885
+ <div style=\"color: #D1D4DC; font-size: 14px; line-height: 1.6;\">
886
+ {ai_summarized} / {total_items} items summarized
887
+ <span style=\"color: #787B86; font-size: 12px; margin-left: 8px;\">({ai_summary_pct:.1f}% coverage)</span>
888
+ </div>
889
+ <div style=\"color: #787B86; font-size: 12px; margin-top: 6px;\">Last update: {last_update_text}</div>
890
  </div>
891
+ """,
892
+ unsafe_allow_html=True,
893
+ )
894
+
895
+ if summaries:
896
+ for item in summaries[:50]:
897
+ source = item.get("source", "")
898
+ summary = item.get("summary", "")
899
+ title = item.get("title", "")
900
+ st.markdown(
901
+ f"""
902
+ <div style=\"background: #131722; border: 1px solid #2A2E39; border-radius: 6px; padding: 10px; margin-bottom: 8px;\">
903
+ <div style=\"color: #E0E3EB; font-size: 13px; font-weight: 600;\">{source} — {title}</div>
904
+ <div style=\"color: #D1D4DC; font-size: 13px; margin-top: 4px;\">{summary}</div>
905
+ </div>
906
+ """,
907
+ unsafe_allow_html=True,
908
+ )
909
+ else:
910
+ st.info("AI summaries will appear after the 2-minute buffering window completes.")
911
+
912
+ render_ai_summary_section()
913
 
914
  # Auto-refresh logic
915
  if auto_refresh:
app/utils/ai_summary_cache.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Shared in-memory AI summary cache with buffering and batching."""
2
+
3
+ import os
4
+ import threading
5
+ from datetime import datetime, timedelta
6
+ from typing import Dict, List, Optional, Tuple
7
+
8
+ from utils.llm_summarizer import OpenAICompatSummarizer
9
+
10
+ # Approx 4 chars per token -> 600 tokens ~= 2400 chars
11
+ DEFAULT_BATCH_MAX_CHARS = int(os.getenv("LLM_SUMMARY_BATCH_MAX_CHARS", "2400"))
12
+ BUFFER_SECONDS = int(os.getenv("LLM_SUMMARY_BUFFER_SECONDS", "120"))
13
+
14
+
15
+ class AISummaryCache:
16
+ def __init__(self):
17
+ self._lock = threading.Lock()
18
+ self._buffer: List[Dict] = []
19
+ self._buffer_start: Optional[datetime] = None
20
+ self._summaries: Dict[str, Dict] = {}
21
+ self._last_update: Optional[datetime] = None
22
+
23
+ def buffer_items(self, items: List[Dict]):
24
+ if not items:
25
+ return
26
+ with self._lock:
27
+ for item in items:
28
+ key = self._item_key(item)
29
+ if not key or key in self._summaries:
30
+ continue
31
+ self._buffer.append(item)
32
+ if self._buffer and self._buffer_start is None:
33
+ self._buffer_start = datetime.now()
34
+
35
+ def maybe_flush(self):
36
+ with self._lock:
37
+ if not self._buffer or self._buffer_start is None:
38
+ return
39
+ if datetime.now() - self._buffer_start < timedelta(seconds=BUFFER_SECONDS):
40
+ return
41
+ items = self._buffer
42
+ self._buffer = []
43
+ self._buffer_start = None
44
+
45
+ summarizer = OpenAICompatSummarizer()
46
+ if not summarizer.enabled:
47
+ return
48
+
49
+ batches = self._batch_items(items, DEFAULT_BATCH_MAX_CHARS)
50
+ for batch in batches:
51
+ texts = [self._build_input_text(item) for item in batch]
52
+ texts = [t for t in texts if t]
53
+ if not texts:
54
+ continue
55
+ summaries = summarizer._summarize_chunk(texts, source="dashboard")
56
+ if not summaries:
57
+ continue
58
+ with self._lock:
59
+ for item, summary in zip(batch, summaries):
60
+ key = self._item_key(item)
61
+ if not key:
62
+ continue
63
+ self._summaries[key] = {
64
+ "id": item.get("id", key),
65
+ "title": item.get("title", ""),
66
+ "source": item.get("source", ""),
67
+ "summary": summary,
68
+ "timestamp": datetime.now(),
69
+ }
70
+ self._last_update = datetime.now()
71
+
72
+ def get_summaries(self) -> Tuple[List[Dict], Optional[datetime]]:
73
+ with self._lock:
74
+ summaries = list(self._summaries.values())
75
+ last_update = self._last_update
76
+ summaries.sort(key=lambda x: x.get("timestamp", datetime.min), reverse=True)
77
+ return summaries, last_update
78
+
79
+ def _item_key(self, item: Dict) -> str:
80
+ if item.get("id") is not None:
81
+ return str(item.get("id"))
82
+ title = str(item.get("title", "")).strip()
83
+ source = str(item.get("source", "")).strip()
84
+ if not title:
85
+ return ""
86
+ return f"{source}|{title}".lower()
87
+
88
+ def _build_input_text(self, item: Dict) -> str:
89
+ title = str(item.get("title", "")).strip()
90
+ source = str(item.get("source", "")).strip()
91
+ if not title:
92
+ return ""
93
+ if source:
94
+ return f"Source: {source}\nTitle: {title}"
95
+ return f"Title: {title}"
96
+
97
+ def _batch_items(self, items: List[Dict], max_chars_total: int) -> List[List[Dict]]:
98
+ if max_chars_total <= 0:
99
+ return [items]
100
+ batches: List[List[Dict]] = []
101
+ current: List[Dict] = []
102
+ current_chars = 0
103
+ for item in items:
104
+ text = self._build_input_text(item)
105
+ if not text:
106
+ continue
107
+ text_len = len(text)
108
+ if current and current_chars + text_len > max_chars_total:
109
+ batches.append(current)
110
+ current = []
111
+ current_chars = 0
112
+ current.append(item)
113
+ current_chars += text_len
114
+ if current:
115
+ batches.append(current)
116
+ return batches
117
+
118
+
119
+ ai_summary_cache = AISummaryCache()
app/utils/llm_summarizer.py CHANGED
@@ -74,8 +74,11 @@ class OpenAICompatSummarizer:
74
  def _build_input_text(self, item: Dict) -> str:
75
  title = str(item.get("title", "")).strip()
76
  if title:
 
77
  if len(title) > self.max_chars_per_item:
78
  title = title[: self.max_chars_per_item].rstrip()
 
 
79
  return f"Title: {title}"
80
  return ""
81
 
 
74
  def _build_input_text(self, item: Dict) -> str:
75
  title = str(item.get("title", "")).strip()
76
  if title:
77
+ source = str(item.get("source", "")).strip()
78
  if len(title) > self.max_chars_per_item:
79
  title = title[: self.max_chars_per_item].rstrip()
80
+ if source:
81
+ return f"Source: {source}\nTitle: {title}"
82
  return f"Title: {title}"
83
  return ""
84