DivYonko commited on
Commit
277d6ae
Β·
1 Parent(s): 7da3a1d

Add PDF report export - full dashboard data in one click

Browse files
Files changed (3) hide show
  1. app.py +19 -0
  2. ml/report_generator.py +337 -0
  3. requirements.txt +2 -1
app.py CHANGED
@@ -1091,6 +1091,25 @@ with st.sidebar:
1091
  mime="text/csv",
1092
  key=f"dl_{_rkey}",
1093
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1094
  else:
1095
  st.markdown(f'<div style="font-size:0.72rem;color:var(--text-3);">{_slabel}: no data yet</div>', unsafe_allow_html=True)
1096
  else:
 
1091
  mime="text/csv",
1092
  key=f"dl_{_rkey}",
1093
  )
1094
+ # ── PDF Report button ──────────────────────────────
1095
+ try:
1096
+ from ml.report_generator import generate_report
1097
+ _stream_title = _s.get("video_title") or _s.get("video_id") or _rkey
1098
+ _pdf_bytes = generate_report(
1099
+ all_data=_dl_rows,
1100
+ stream_title=_stream_title,
1101
+ msg_limit=st.session_state.get("msg_limit", 100),
1102
+ )
1103
+ _pdf_fname = f"livepulse_report_{_rkey}_{_ts}.pdf"
1104
+ st.download_button(
1105
+ label=f"\U0001f4cb {_slabel} PDF Report",
1106
+ data=_pdf_bytes,
1107
+ file_name=_pdf_fname,
1108
+ mime="application/pdf",
1109
+ key=f"pdf_{_rkey}",
1110
+ )
1111
+ except Exception as _pdf_err:
1112
+ st.markdown(f'<div style="font-size:0.7rem;color:var(--text-3);">PDF unavailable: {_pdf_err}</div>', unsafe_allow_html=True)
1113
  else:
1114
  st.markdown(f'<div style="font-size:0.72rem;color:var(--text-3);">{_slabel}: no data yet</div>', unsafe_allow_html=True)
1115
  else:
ml/report_generator.py ADDED
@@ -0,0 +1,337 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ml/report_generator.py
2
+ """
3
+ PDF report generator for LivePulse dashboard.
4
+ Generates a structured PDF containing both Comments and Stats views.
5
+ Uses fpdf2 β€” no system dependencies required.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import io
10
+ from datetime import datetime
11
+ from collections import Counter
12
+
13
+ from fpdf import FPDF
14
+
15
+
16
+ # ── Colour palette (matches dashboard theme) ──────────────────────────────────
17
+ _C_BG = (7, 7, 15) # dark background
18
+ _C_CARD = (15, 15, 30)
19
+ _C_ACCENT = (124, 58, 237) # purple
20
+ _C_POS = (34, 197, 94) # green
21
+ _C_NEU = (234, 179, 8) # yellow
22
+ _C_NEG = (239, 68, 68) # red
23
+ _C_TEXT1 = (241, 245, 249) # light text
24
+ _C_TEXT2 = (148, 163, 184) # muted text
25
+ _C_WHITE = (255, 255, 255)
26
+ _C_DIVIDER = (30, 30, 50)
27
+
28
+ TOPIC_COLORS = {
29
+ "Appreciation": (245, 158, 11),
30
+ "Question": ( 59, 130, 246),
31
+ "Request/Feedback":(139, 92, 246),
32
+ "Promo": (236, 72, 153),
33
+ "Spam": (239, 68, 68),
34
+ "General": (107, 114, 128),
35
+ "MCQ Answer": ( 16, 185, 129),
36
+ }
37
+
38
+
39
+ class LivePulsePDF(FPDF):
40
+ """Custom FPDF subclass with LivePulse branding."""
41
+
42
+ def __init__(self):
43
+ super().__init__(orientation="P", unit="mm", format="A4")
44
+ self.set_auto_page_break(auto=True, margin=15)
45
+ self.set_margins(15, 15, 15)
46
+
47
+ def header(self):
48
+ self.set_fill_color(*_C_BG)
49
+ self.rect(0, 0, 210, 20, "F")
50
+ self.set_font("Helvetica", "B", 11)
51
+ self.set_text_color(*_C_ACCENT)
52
+ self.set_y(6)
53
+ self.cell(0, 8, "LivePulse | YouTube Live Chat Analytics", align="L")
54
+ self.set_font("Helvetica", "", 8)
55
+ self.set_text_color(*_C_TEXT2)
56
+ self.cell(0, 8, f"Generated {datetime.now().strftime('%d %b %Y %H:%M')}", align="R")
57
+ self.ln(4)
58
+
59
+ def footer(self):
60
+ self.set_y(-12)
61
+ self.set_font("Helvetica", "", 8)
62
+ self.set_text_color(*_C_TEXT2)
63
+ self.cell(0, 8, f"Page {self.page_no()}", align="C")
64
+
65
+ # ── Helpers ───────────────────────────────────────────────────────────────
66
+
67
+ def section_title(self, title: str, pill: str = "") -> None:
68
+ self.set_fill_color(*_C_CARD)
69
+ self.set_draw_color(*_C_ACCENT)
70
+ self.set_line_width(0.5)
71
+ self.rect(15, self.get_y(), 180, 9, "FD")
72
+ self.set_font("Helvetica", "B", 10)
73
+ self.set_text_color(*_C_TEXT1)
74
+ self.set_x(17)
75
+ self.cell(140, 9, title, ln=0)
76
+ if pill:
77
+ self.set_font("Helvetica", "", 8)
78
+ self.set_text_color(*_C_ACCENT)
79
+ self.cell(0, 9, pill, align="R")
80
+ self.ln(11)
81
+
82
+ def stat_box(self, label: str, value: str, color: tuple, x: float, y: float, w: float = 42, h: float = 18) -> None:
83
+ self.set_fill_color(*_C_CARD)
84
+ self.set_draw_color(*color)
85
+ self.set_line_width(0.4)
86
+ self.rect(x, y, w, h, "FD")
87
+ # top accent bar
88
+ self.set_fill_color(*color)
89
+ self.rect(x, y, w, 1.5, "F")
90
+ self.set_font("Helvetica", "B", 14)
91
+ self.set_text_color(*color)
92
+ self.set_xy(x, y + 3)
93
+ self.cell(w, 7, value, align="C")
94
+ self.set_font("Helvetica", "", 7)
95
+ self.set_text_color(*_C_TEXT2)
96
+ self.set_xy(x, y + 10)
97
+ self.cell(w, 5, label.upper(), align="C")
98
+
99
+ def h_bar(self, label: str, value: int, max_val: int, color: tuple, bar_w: float = 100) -> None:
100
+ y = self.get_y()
101
+ # label
102
+ self.set_font("Helvetica", "", 8)
103
+ self.set_text_color(*_C_TEXT1)
104
+ self.set_x(17)
105
+ self.cell(55, 6, label[:35], ln=0)
106
+ # bar background
107
+ self.set_fill_color(*_C_DIVIDER)
108
+ self.rect(73, y + 1, bar_w, 4, "F")
109
+ # bar fill
110
+ fill_w = (value / max(max_val, 1)) * bar_w
111
+ self.set_fill_color(*color)
112
+ self.rect(73, y + 1, fill_w, 4, "F")
113
+ # value
114
+ self.set_font("Helvetica", "B", 8)
115
+ self.set_text_color(*_C_TEXT2)
116
+ self.set_xy(175, y)
117
+ self.cell(20, 6, str(value), align="R")
118
+ self.ln(7)
119
+
120
+ def table_header(self, cols: list[tuple[str, float]]) -> None:
121
+ self.set_fill_color(*_C_CARD)
122
+ self.set_font("Helvetica", "B", 8)
123
+ self.set_text_color(*_C_ACCENT)
124
+ for label, w in cols:
125
+ self.cell(w, 7, label, border=0, fill=True, align="L")
126
+ self.ln(7)
127
+ # divider line
128
+ self.set_draw_color(*_C_ACCENT)
129
+ self.set_line_width(0.3)
130
+ self.line(15, self.get_y(), 195, self.get_y())
131
+ self.ln(1)
132
+
133
+ def table_row(self, values: list[tuple[str, float]], alt: bool = False) -> None:
134
+ if alt:
135
+ self.set_fill_color(20, 20, 35)
136
+ else:
137
+ self.set_fill_color(*_C_BG)
138
+ self.set_font("Helvetica", "", 8)
139
+ self.set_text_color(*_C_TEXT1)
140
+ for val, w in values:
141
+ self.cell(w, 6, str(val)[:40], border=0, fill=True, align="L")
142
+ self.ln(6)
143
+
144
+
145
+ # ── Public API ─────────────────────────────────────────────────────────────────
146
+
147
+ def generate_report(
148
+ all_data: list[dict],
149
+ stream_title: str = "LivePulse Stream",
150
+ msg_limit: int = 100,
151
+ ) -> bytes:
152
+ """
153
+ Generate a PDF report from the full message history.
154
+
155
+ Parameters
156
+ ----------
157
+ all_data : list of message dicts from the SQLite store
158
+ stream_title: video title shown in the report header
159
+ msg_limit : max recent messages to include in the comments table
160
+
161
+ Returns
162
+ -------
163
+ bytes β€” PDF file content ready for st.download_button
164
+ """
165
+ pdf = LivePulsePDF()
166
+ pdf.add_page()
167
+
168
+ # ── Cover / title ─────────────────────────────────────────────────────────
169
+ pdf.set_fill_color(*_C_BG)
170
+ pdf.rect(0, 20, 210, 40, "F")
171
+ pdf.set_font("Helvetica", "B", 20)
172
+ pdf.set_text_color(*_C_TEXT1)
173
+ pdf.set_y(28)
174
+ pdf.cell(0, 10, "Dashboard Report", align="C", ln=True)
175
+ pdf.set_font("Helvetica", "", 11)
176
+ pdf.set_text_color(*_C_ACCENT)
177
+ pdf.cell(0, 8, stream_title[:80], align="C", ln=True)
178
+ pdf.set_font("Helvetica", "", 9)
179
+ pdf.set_text_color(*_C_TEXT2)
180
+ pdf.cell(0, 6, f"Total messages analysed: {len(all_data)}", align="C", ln=True)
181
+ pdf.ln(8)
182
+
183
+ if not all_data:
184
+ pdf.set_font("Helvetica", "", 11)
185
+ pdf.set_text_color(*_C_NEG)
186
+ pdf.cell(0, 10, "No data available.", align="C")
187
+ return bytes(pdf.output())
188
+
189
+ # ── Pre-compute stats ─────────────────────────────────────────────────────
190
+ sentiments = [m.get("sentiment", "Neutral") for m in all_data]
191
+ topics = [m.get("topic", "General") for m in all_data]
192
+ action_types= [m.get("action_type", "N/A") for m in all_data]
193
+ authors = [m.get("author", "Unknown") for m in all_data]
194
+
195
+ c_pos = sentiments.count("Positive")
196
+ c_neu = sentiments.count("Neutral")
197
+ c_neg = sentiments.count("Negative")
198
+ c_total = max(len(all_data), 1)
199
+
200
+ topic_counts = Counter(topics)
201
+ action_counts = Counter(a for a in action_types if a not in ("N/A", "", None))
202
+ author_counts = Counter(authors)
203
+
204
+ # Engagement score
205
+ try:
206
+ from datetime import datetime as _dt
207
+ recent = all_data[-50:]
208
+ n = len(recent)
209
+ t0 = _dt.fromisoformat(recent[0]["time"])
210
+ t1 = _dt.fromisoformat(recent[-1]["time"])
211
+ elapsed = max((t1 - t0).total_seconds() / 60, 0.1)
212
+ rate = round(n / elapsed, 1)
213
+ pos_ratio = sum(1 for m in recent if m.get("sentiment") == "Positive") / max(n, 1)
214
+ q_density = sum(1 for m in recent if m.get("topic") == "Question") / max(n, 1)
215
+ rate_norm = min(rate / 60, 1.0)
216
+ eng_score = round((rate_norm * 0.4 + pos_ratio * 0.4 + q_density * 0.2) * 100)
217
+ except Exception:
218
+ eng_score = 0
219
+ rate = 0.0
220
+ pos_ratio = 0.0
221
+ q_density = 0.0
222
+
223
+ # ── SECTION 1: Engagement Summary ─────────────────────────────────────────
224
+ pdf.section_title("Engagement Summary", "Live")
225
+
226
+ y_boxes = pdf.get_y()
227
+ pdf.stat_box("Engagement", str(eng_score), _C_ACCENT, 15, y_boxes)
228
+ pdf.stat_box("Positive", f"{c_pos} ({c_pos/c_total*100:.0f}%)", _C_POS, 59, y_boxes)
229
+ pdf.stat_box("Neutral", f"{c_neu} ({c_neu/c_total*100:.0f}%)", _C_NEU, 103, y_boxes)
230
+ pdf.stat_box("Negative", f"{c_neg} ({c_neg/c_total*100:.0f}%)", _C_NEG, 147, y_boxes)
231
+ pdf.set_y(y_boxes + 22)
232
+
233
+ y_boxes2 = pdf.get_y()
234
+ pdf.stat_box("Total Msgs", str(c_total), _C_TEXT2, 15, y_boxes2)
235
+ pdf.stat_box("Msgs/min", f"{rate:.1f}", _C_ACCENT, 59, y_boxes2)
236
+ pdf.stat_box("Pos ratio", f"{pos_ratio*100:.0f}%", _C_POS, 103, y_boxes2)
237
+ pdf.stat_box("Q density", f"{q_density*100:.0f}%", _C_NEU, 147, y_boxes2)
238
+ pdf.set_y(y_boxes2 + 22)
239
+ pdf.ln(4)
240
+
241
+ # ── SECTION 2: Topic Distribution ─────────────────────────────────────────
242
+ pdf.section_title("Topic Distribution", "All Time")
243
+ max_topic = max(topic_counts.values(), default=1)
244
+ for topic in ["Appreciation", "Question", "Request/Feedback", "Promo", "Spam", "General", "MCQ Answer"]:
245
+ count = topic_counts.get(topic, 0)
246
+ color = TOPIC_COLORS.get(topic, _C_TEXT2)
247
+ pdf.h_bar(topic, count, max_topic, color)
248
+ pdf.ln(4)
249
+
250
+ # ── SECTION 3: Action Type Breakdown ──────────────────────────────────────
251
+ if action_counts:
252
+ pdf.section_title("Top Action Types", "Questions & Requests")
253
+ max_action = max(action_counts.values(), default=1)
254
+ for action, count in action_counts.most_common(15):
255
+ pdf.h_bar(action[:40], count, max_action, _C_ACCENT)
256
+ pdf.ln(4)
257
+
258
+ # ── SECTION 4: Top Contributors ───────────────────────────────────────────
259
+ pdf.section_title("Top Contributors", "All Time")
260
+ cols = [("Author", 60), ("Messages", 25), ("Positive%", 30), ("Neutral%", 30), ("Negative%", 30)]
261
+ pdf.table_header(cols)
262
+
263
+ for i, (author, count) in enumerate(author_counts.most_common(15)):
264
+ author_msgs = [m for m in all_data if m.get("author") == author]
265
+ total_a = max(len(author_msgs), 1)
266
+ pos_p = round(sum(1 for m in author_msgs if m.get("sentiment") == "Positive") / total_a * 100)
267
+ neu_p = round(sum(1 for m in author_msgs if m.get("sentiment") == "Neutral") / total_a * 100)
268
+ neg_p = round(sum(1 for m in author_msgs if m.get("sentiment") == "Negative") / total_a * 100)
269
+ pdf.table_row([
270
+ (author[:28], 60),
271
+ (str(count), 25),
272
+ (f"{pos_p}%", 30),
273
+ (f"{neu_p}%", 30),
274
+ (f"{neg_p}%", 30),
275
+ ], alt=(i % 2 == 1))
276
+ pdf.ln(4)
277
+
278
+ # ── SECTION 5: Recent Comments ────────────────────────────────────────────
279
+ pdf.add_page()
280
+ recent_msgs = all_data[-msg_limit:]
281
+ pdf.section_title("Recent Comments", f"Last {len(recent_msgs)} messages")
282
+
283
+ cols_c = [("Author", 40), ("Message", 90), ("Sentiment", 22), ("Topic", 28)]
284
+ pdf.table_header(cols_c)
285
+
286
+ sent_colors = {"Positive": _C_POS, "Negative": _C_NEG, "Neutral": _C_NEU}
287
+
288
+ for i, msg in enumerate(reversed(recent_msgs)):
289
+ author = (msg.get("author", "") or "")[:18]
290
+ text = (msg.get("text", "") or "")[:55]
291
+ sent = msg.get("sentiment", "Neutral")
292
+ topic = (msg.get("topic", "General") or "General")[:14]
293
+
294
+ alt = (i % 2 == 1)
295
+ if alt:
296
+ pdf.set_fill_color(20, 20, 35)
297
+ else:
298
+ pdf.set_fill_color(*_C_BG)
299
+
300
+ pdf.set_font("Helvetica", "", 7.5)
301
+ pdf.set_text_color(*_C_TEXT1)
302
+ pdf.cell(40, 5.5, author, border=0, fill=True)
303
+ pdf.cell(90, 5.5, text, border=0, fill=True)
304
+
305
+ # Sentiment with colour
306
+ pdf.set_text_color(*sent_colors.get(sent, _C_TEXT2))
307
+ pdf.cell(22, 5.5, sent, border=0, fill=True)
308
+
309
+ # Topic with colour
310
+ t_color = TOPIC_COLORS.get(topic, _C_TEXT2)
311
+ pdf.set_text_color(*t_color)
312
+ pdf.cell(28, 5.5, topic, border=0, fill=True)
313
+ pdf.ln(5.5)
314
+
315
+ # ── SECTION 6: Questions Log ──────────────────────────────────────────────
316
+ questions = [m for m in all_data if m.get("topic") == "Question"]
317
+ if questions:
318
+ pdf.add_page()
319
+ pdf.section_title("Questions Asked", f"{len(questions)} total")
320
+ cols_q = [("Author", 40), ("Question", 115), ("Action Type", 40)]
321
+ pdf.table_header(cols_q)
322
+
323
+ for i, msg in enumerate(reversed(questions[-100:])):
324
+ author = (msg.get("author", "") or "")[:18]
325
+ text = (msg.get("text", "") or "")[:65]
326
+ action = (msg.get("action_type", "N/A") or "N/A")[:22]
327
+ alt = (i % 2 == 1)
328
+ pdf.set_fill_color(20, 20, 35) if alt else pdf.set_fill_color(*_C_BG)
329
+ pdf.set_font("Helvetica", "", 7.5)
330
+ pdf.set_text_color(*_C_TEXT1)
331
+ pdf.cell(40, 5.5, author, border=0, fill=True)
332
+ pdf.cell(115, 5.5, text, border=0, fill=True)
333
+ pdf.set_text_color(*_C_ACCENT)
334
+ pdf.cell(40, 5.5, action, border=0, fill=True)
335
+ pdf.ln(5.5)
336
+
337
+ return bytes(pdf.output())
requirements.txt CHANGED
@@ -2,7 +2,8 @@
2
  emoji>=2.10.0
3
  deep-translator>=1.11.4
4
 
5
- # Live chat scraping (uses YouTube Data API v3 β€” no extra package needed)
 
6
 
7
  # Dashboard
8
  streamlit>=1.35.0
 
2
  emoji>=2.10.0
3
  deep-translator>=1.11.4
4
 
5
+ # PDF export
6
+ fpdf2>=2.7.9
7
 
8
  # Dashboard
9
  streamlit>=1.35.0