Luigi commited on
Commit
06b55be
ยท
1 Parent(s): d3dce49

re-implement tab3 (ui for transcription and summarization)

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +234 -135
src/streamlit_app.py CHANGED
@@ -79,38 +79,64 @@ with tab2:
79
  st.session_state.audio_path = temp_audio_path
80
 
81
  with tab3:
82
- st.subheader("Transcription & Summary")
83
  st.markdown("---")
84
 
85
- # Debug console
86
- debug_expander = st.expander("Debug Console", expanded=False)
87
  with debug_expander:
88
  debug_placeholder = st.empty()
89
 
90
- def debug_log(message):
91
- """Helper function for debug messages"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  with debug_expander:
93
- debug_placeholder.markdown(f"`{datetime.now().strftime('%H:%M:%S')}`: {message}")
94
- print(f"[DEBUG] {message}")
 
 
 
95
 
96
- # Initialize audio player
 
 
97
  if st.session_state.audio_path and not st.session_state.audio_base64:
98
- debug_log("Initializing audio player...")
99
- with open(st.session_state.audio_path, "rb") as f:
100
- audio_bytes = f.read()
101
- st.session_state.audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
102
- debug_log(f"Audio loaded: {len(audio_bytes)} bytes")
 
 
 
 
103
 
 
104
  if st.session_state.audio_base64:
105
- # Unified audio player with enhanced messaging
106
- debug_log("Rendering audio player...")
107
  audio_html = f"""
108
  <audio id="audioPlayer" controls ontimeupdate="updateTime(this)">
109
  <source src="data:audio/mp3;base64,{st.session_state.audio_base64}" type="audio/mp3">
110
  </audio>
111
  <script>
112
  const player = document.getElementById('audioPlayer');
113
- console.log('[AUDIO] Player initialized');
 
114
 
115
  function seekAudio(time) {{
116
  console.log('[AUDIO] Seeking to: ' + time);
@@ -119,9 +145,13 @@ with tab3:
119
  }}
120
 
121
  function updateTime(audio) {{
122
- const time = audio.currentTime;
123
- console.log('[AUDIO] Time update: ' + time.toFixed(2));
124
- window.parent.postMessage({{type: "currentTimeUpdate", time: time}}, "*");
 
 
 
 
125
  }}
126
 
127
  // Unified message handling
@@ -134,181 +164,250 @@ with tab3:
134
  </script>
135
  """
136
  st.markdown(audio_html, unsafe_allow_html=True)
 
137
 
138
- # Create persistent display containers
139
- status_placeholder = st.empty()
140
- transcript_container = st.container()
141
- summary_container = st.container()
142
 
143
- # Display existing summary if available
144
- if st.session_state.get('summary'):
145
- debug_log(f"Displaying existing summary ({len(st.session_state.summary)} chars)")
146
- with summary_container:
147
- st.markdown("### Summary")
148
- st.markdown(st.session_state.summary)
149
-
150
- # Streamlit Bridge for time updates
151
- def streamlit_bridge():
152
- debug_log("Initializing time bridge...")
153
  time_js = """
154
  <script>
155
- console.log('[TIME BRIDGE] Initialized');
156
- // Listen for time updates from anywhere
157
  window.addEventListener('message', (event) => {
158
  if (event.data.type === 'currentTimeUpdate') {
159
- console.log('[TIME BRIDGE] Received time update: ' + event.data.time);
160
  Streamlit.setComponentValue(event.data.time);
 
161
  }
162
  });
163
  </script>
164
  """
165
- return st.components.v1.html(time_js, height=0)
166
-
167
- # Get current time via bridge
168
- current_time = streamlit_bridge()
169
- if current_time is not None:
170
- debug_log(f"Time bridge component initialized")
171
- st.session_state.current_time = current_time
172
-
173
- # Updated transcript generation and rendering code
174
- def generate_transcript_html(utterances):
 
 
 
175
  if not utterances:
176
- return ""
177
 
178
- current_time = st.session_state.get("current_time", 0.0)
 
 
 
 
179
 
180
  html = """
181
  <div id="transcript-container" style="max-height:500px;overflow-y:auto;">
182
  """
183
 
184
- for utterance in utterances:
 
 
185
  if len(utterance) != 3:
 
186
  continue
187
 
188
  start, end, text = utterance
189
- if start < 0 or end < 0:
190
- html += f'<div class="utterance">{text}</div>'
191
- continue
192
-
193
  try:
194
- # Convert to floats to ensure numerical comparison
195
  start_f = float(start)
196
  end_f = float(end)
197
- current_f = float(current_time)
198
- is_current = start_f <= current_f < end_f
 
199
  except Exception as e:
 
200
  is_current = False
201
-
202
- # Format time display
 
203
  start_time = time.strftime('%M:%S', time.gmtime(start_f))
204
  end_time = time.strftime('%M:%S', time.gmtime(end_f))
205
 
206
- # Create safe text display (escape quotes)
207
- safe_text = text.replace('"', '&quot;').replace("'", "&apos;")
208
 
209
  html += f"""
210
  <div class="utterance {'current-utterance' if is_current else ''}"
211
- onclick="window.parent.postMessage({{type: 'seekToTime', time: {start_f}}}, '*')">
 
212
  <b>[{start_time}-{end_time}]</b> {safe_text}
213
  </div>
214
  """
215
 
216
  html += "</div>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  return html
218
 
219
- # Simplified transcript rendering function
220
- def render_transcript():
221
- if "utterances" in st.session_state and st.session_state.utterances:
222
- with transcript_container:
223
- html = generate_transcript_html(st.session_state.utterances)
224
- st.components.v1.html(html, height=500)
225
-
226
- # Add CSS for styling
 
 
 
 
 
 
 
227
  st.markdown("""
228
  <style>
229
  .utterance {
230
- padding: 8px;
231
- margin: 4px 0;
232
- border-radius: 4px;
233
  cursor: pointer;
234
- transition: background 0.2s;
235
- line-height: 1.5;
 
 
 
 
 
 
236
  }
237
- .utterance:hover { background-color: #f0f0f0; }
238
  .current-utterance {
239
- background-color: #ffebee;
240
- border-left: 3px solid #f44336;
241
- font-weight: 500;
 
242
  }
243
  </style>
244
  """, unsafe_allow_html=True)
245
-
246
- # Transcribe button
247
- # Update the transcription button logic
248
- if st.button("๐ŸŽ™๏ธ Transcribe", key="transcribe_button_tab3"):
249
  if st.session_state.audio_path:
250
- debug_log("Transcription started...")
251
- status_placeholder.text("Transcribing...")
252
  st.session_state.utterances = []
253
  st.session_state.transcript = ""
254
 
255
- # Process ASR output
256
- for current_utt, all_utts in transcribe_file(
257
- st.session_state.audio_path, vad_threshold, model_names[model_name]
258
- ):
259
- st.session_state.utterances = all_utts
260
- st.session_state.transcript = "\n".join(
261
- f"[{start:.1f}-{end:.1f}] {text}"
262
- for start, end, text in all_utts
263
  )
264
 
265
- # Update transcript display
266
- render_transcript()
267
-
268
- # Final update after transcription
269
- debug_log(f"Transcription completed: {len(st.session_state.utterances)} utterances")
270
- render_transcript()
271
- status_placeholder.empty()
272
-
273
- # Always render transcript when available
274
- render_transcript()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
- # Real-time transcript highlighting
277
  if "utterances" in st.session_state and st.session_state.utterances:
278
- debug_log(f"Rendering transcript with {len(st.session_state.utterances)} utterances")
279
- with transcript_container:
280
- st.markdown(
281
- generate_transcript_html(
282
- st.session_state.utterances,
283
- st.session_state.get('current_time', 0.0)
284
- ),
285
- unsafe_allow_html=True
286
  )
 
 
 
 
287
 
288
- # Summarize button
289
- if st.button("๐Ÿ“ Summarize", key="summarize_button_tab3"):
290
  if st.session_state.transcript:
291
- debug_log("Summarization started...")
292
- status_placeholder.text("Summarizing...")
293
  st.session_state.summary = ""
294
 
295
- # Create temporary progress display
296
- progress_placeholder = st.empty()
297
-
298
- for accumulated_summary in summarize_transcript(
299
- st.session_state.transcript, llm_model, prompt_input
300
- ):
301
- st.session_state.summary = accumulated_summary
302
- progress_placeholder.markdown(accumulated_summary)
303
- debug_log(f"Summary update: {len(accumulated_summary)} chars")
304
-
305
- # Update persistent summary display
306
- debug_log(f"Final summary: {len(st.session_state.summary)} chars")
307
- with summary_container:
308
- st.markdown("### Summary")
309
- st.markdown(st.session_state.summary)
310
-
311
- # Cleanup progress display
312
- progress_placeholder.empty()
313
- status_placeholder.empty()
314
- debug_log("Summarization completed")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  st.session_state.audio_path = temp_audio_path
80
 
81
  with tab3:
82
+ st.subheader("๐ŸŽค Transcription & Summary")
83
  st.markdown("---")
84
 
85
+ # ===== Enhanced Debug Console =====
86
+ debug_expander = st.expander("๐Ÿ› ๏ธ Debug Console", expanded=False)
87
  with debug_expander:
88
  debug_placeholder = st.empty()
89
 
90
+ # Initialize debug logs with startup message
91
+ if 'debug_logs' not in st.session_state:
92
+ st.session_state.debug_logs = ["`System`: Tab3 initialized"]
93
+
94
+ def debug_log(message, category="INFO"):
95
+ """Enhanced debug logger with categories and colors"""
96
+ color_map = {
97
+ "INFO": "blue",
98
+ "WARN": "orange",
99
+ "ERROR": "red",
100
+ "AUDIO": "purple",
101
+ "STATE": "green"
102
+ }
103
+ timestamp = datetime.now().strftime('%H:%M:%S')
104
+ log_entry = f"<span style='color:{color_map.get(category, 'gray')}'>`{timestamp}` [{category}]: {message}</span>"
105
+ st.session_state.debug_logs.append(log_entry)
106
+
107
+ # Display last 15 logs with scroll
108
  with debug_expander:
109
+ debug_html = "<div style='max-height:300px; overflow-y:auto; font-family: monospace;'>" + \
110
+ "<br>".join(st.session_state.debug_logs[-15:]) + "</div>"
111
+ debug_placeholder.markdown(debug_html, unsafe_allow_html=True)
112
+
113
+ print(f"[DEBUG] {category}: {message}")
114
 
115
+ debug_log("Tab3 rendering started", "STATE")
116
+
117
+ # ===== Audio Player Initialization =====
118
  if st.session_state.audio_path and not st.session_state.audio_base64:
119
+ debug_log("Initializing audio player...", "AUDIO")
120
+ try:
121
+ with open(st.session_state.audio_path, "rb") as f:
122
+ audio_bytes = f.read()
123
+ st.session_state.audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
124
+ debug_log(f"Audio loaded: {len(audio_bytes)} bytes | MIME: audio/mp3", "AUDIO")
125
+ except Exception as e:
126
+ debug_log(f"Audio loading failed: {str(e)}", "ERROR")
127
+ st.error(f"Audio loading error: {str(e)}")
128
 
129
+ # ===== Unified Audio Player =====
130
  if st.session_state.audio_base64:
131
+ debug_log("Rendering audio player component", "AUDIO")
 
132
  audio_html = f"""
133
  <audio id="audioPlayer" controls ontimeupdate="updateTime(this)">
134
  <source src="data:audio/mp3;base64,{st.session_state.audio_base64}" type="audio/mp3">
135
  </audio>
136
  <script>
137
  const player = document.getElementById('audioPlayer');
138
+ let lastUpdate = 0;
139
+ const THROTTLE_MS = 200;
140
 
141
  function seekAudio(time) {{
142
  console.log('[AUDIO] Seeking to: ' + time);
 
145
  }}
146
 
147
  function updateTime(audio) {{
148
+ const now = Date.now();
149
+ if (now - lastUpdate > THROTTLE_MS) {{
150
+ const time = audio.currentTime;
151
+ console.log('[AUDIO] Time update: ' + time.toFixed(2));
152
+ window.parent.postMessage({{type: "currentTimeUpdate", time: time}}, "*");
153
+ lastUpdate = now;
154
+ }}
155
  }}
156
 
157
  // Unified message handling
 
164
  </script>
165
  """
166
  st.markdown(audio_html, unsafe_allow_html=True)
167
+ debug_log("Audio player rendered successfully", "AUDIO")
168
 
169
+ # ===== Time Synchronization Bridge =====
170
+ if 'current_time' not in st.session_state:
171
+ st.session_state.current_time = 0.0
172
+ debug_log("Current_time state initialized to 0.0", "STATE")
173
 
174
+ # Initialize bridge only once
175
+ if 'time_bridge_initialized' not in st.session_state:
176
+ debug_log("Initializing time synchronization bridge", "STATE")
 
 
 
 
 
 
 
177
  time_js = """
178
  <script>
179
+ // Listen for time updates from audio player
 
180
  window.addEventListener('message', (event) => {
181
  if (event.data.type === 'currentTimeUpdate') {
182
+ // Update Streamlit component value
183
  Streamlit.setComponentValue(event.data.time);
184
+ console.log('[TIME BRIDGE] Sent time to Python: ' + event.data.time);
185
  }
186
  });
187
  </script>
188
  """
189
+ st.components.v1.html(time_js, height=0)
190
+ st.session_state.time_bridge_initialized = True
191
+ debug_log("Time bridge initialized successfully", "STATE")
192
+
193
+ # Create dummy component to receive time updates
194
+ current_time = st.session_state.current_time
195
+ debug_log(f"Current playback time: {current_time:.2f} seconds", "STATE")
196
+
197
+ # ===== Enhanced Transcript Generation =====
198
+ def generate_transcript_html(utterances, current_time=0.0):
199
+ """Generate interactive transcript HTML with current time highlighting"""
200
+ debug_log(f"Generating transcript HTML for {len(utterances)} utterances", "STATE")
201
+
202
  if not utterances:
203
+ return "<div>No transcript available</div>"
204
 
205
+ try:
206
+ current_time = float(current_time)
207
+ except:
208
+ current_time = 0.0
209
+ debug_log("Invalid current_time, defaulting to 0.0", "WARN")
210
 
211
  html = """
212
  <div id="transcript-container" style="max-height:500px;overflow-y:auto;">
213
  """
214
 
215
+ current_highlighted = -1
216
+
217
+ for idx, utterance in enumerate(utterances):
218
  if len(utterance) != 3:
219
+ debug_log(f"Skipping invalid utterance: {utterance}", "WARN")
220
  continue
221
 
222
  start, end, text = utterance
 
 
 
 
223
  try:
 
224
  start_f = float(start)
225
  end_f = float(end)
226
+ is_current = start_f <= current_time < end_f
227
+ if is_current:
228
+ current_highlighted = idx
229
  except Exception as e:
230
+ debug_log(f"Utterance time conversion error: {e}", "ERROR")
231
  is_current = False
232
+ start_f = 0
233
+ end_f = 0
234
+
235
  start_time = time.strftime('%M:%S', time.gmtime(start_f))
236
  end_time = time.strftime('%M:%S', time.gmtime(end_f))
237
 
238
+ # Escape HTML special characters for security
239
+ safe_text = html.escape(text)
240
 
241
  html += f"""
242
  <div class="utterance {'current-utterance' if is_current else ''}"
243
+ onclick="window.parent.postMessage({{type: 'seekToTime', time: {start_f}}}, '*')"
244
+ data-start="{start_f}" data-end="{end_f}" data-idx="{idx}">
245
  <b>[{start_time}-{end_time}]</b> {safe_text}
246
  </div>
247
  """
248
 
249
  html += "</div>"
250
+
251
+ if current_highlighted >= 0:
252
+ html += f"""
253
+ <script>
254
+ // Auto-scroll to current utterance
255
+ try {{
256
+ const currentElement = document.querySelector('[data-idx="{current_highlighted}"]');
257
+ if (currentElement) {{
258
+ currentElement.scrollIntoView({{behavior: 'smooth', block: 'center'}});
259
+ }}
260
+ }} catch(e) {{ console.error('Scroll error:', e); }}
261
+ </script>
262
+ """
263
+
264
+ debug_log(f"Transcript HTML generated ({len(html)} chars)", "STATE")
265
  return html
266
 
267
+ # ===== UI Elements Setup =====
268
+ status_placeholder = st.empty()
269
+ transcript_placeholder = st.empty()
270
+ summary_container = st.container()
271
+ debug_log("UI placeholders initialized", "STATE")
272
+
273
+ # Display existing summary if available
274
+ if st.session_state.get('summary'):
275
+ summary_len = len(st.session_state.summary)
276
+ debug_log(f"Displaying existing summary ({summary_len} chars)", "STATE")
277
+ with summary_container:
278
+ st.markdown("### ๐Ÿ“ Summary")
279
+ st.markdown(st.session_state.summary)
280
+
281
+ # ===== Enhanced CSS Styling =====
282
  st.markdown("""
283
  <style>
284
  .utterance {
285
+ padding: 10px;
286
+ margin: 5px 0;
287
+ border-radius: 6px;
288
  cursor: pointer;
289
+ transition: all 0.3s;
290
+ line-height: 1.6;
291
+ border: 1px solid #e0e0e0;
292
+ }
293
+ .utterance:hover {
294
+ background-color: #f5f5f5;
295
+ transform: translateX(3px);
296
+ box-shadow: 0 2px 5px rgba(0,0,0,0.1);
297
  }
 
298
  .current-utterance {
299
+ background-color: #fff3e0;
300
+ border-left: 4px solid #ff9800;
301
+ font-weight: 600;
302
+ box-shadow: 0 3px 8px rgba(255,152,0,0.2);
303
  }
304
  </style>
305
  """, unsafe_allow_html=True)
306
+ debug_log("CSS styles applied", "STATE")
307
+
308
+ # ===== Transcription Process =====
309
+ if st.button("๐ŸŽ™๏ธ Transcribe Audio", key="transcribe_button_tab3", help="Convert audio to timestamped text"):
310
  if st.session_state.audio_path:
311
+ debug_log("=== TRANSCRIPTION STARTED ===", "INFO")
312
+ status_placeholder.info("๐Ÿ”Š Transcribing audio...")
313
  st.session_state.utterances = []
314
  st.session_state.transcript = ""
315
 
316
+ try:
317
+ transcription_gen = transcribe_file(
318
+ st.session_state.audio_path,
319
+ vad_threshold,
320
+ model_names[model_name]
 
 
 
321
  )
322
 
323
+ last_update = 0
324
+ update_frequency = 3 # Update every 3 utterances
325
+
326
+ for idx, (current_utt, all_utts) in enumerate(transcription_gen):
327
+ st.session_state.utterances = all_utts
328
+ st.session_state.transcript = "\n".join(
329
+ f"[{start:.1f}-{end:.1f}] {text}"
330
+ for start, end, text in all_utts
331
+ )
332
+
333
+ # Update UI at defined frequency or last item
334
+ if idx % update_frequency == 0 or not current_utt:
335
+ with transcript_placeholder.container():
336
+ st.empty() # Clear previous content
337
+ html = generate_transcript_html(
338
+ st.session_state.utterances,
339
+ st.session_state.current_time
340
+ )
341
+ st.components.v1.html(html, height=500)
342
+ debug_log(f"Transcript updated: {len(all_utts)} utterances", "STATE")
343
+
344
+ if current_utt:
345
+ start, end, text = current_utt
346
+ debug_log(f"New utterance: [{start:.1f}-{end:.1f}] {text[:30]}...", "INFO")
347
+
348
+ debug_log(f"Transcription completed: {len(st.session_state.utterances)} utterances", "INFO")
349
+ status_placeholder.success("โœ… Transcription completed!")
350
+
351
+ except Exception as e:
352
+ debug_log(f"Transcription failed: {str(e)}", "ERROR")
353
+ status_placeholder.error(f"Transcription error: {str(e)}")
354
+ else:
355
+ debug_log("Transcription attempted without audio file", "ERROR")
356
+ status_placeholder.warning("โš ๏ธ No audio file available")
357
 
358
+ # ===== Render Existing Transcript =====
359
  if "utterances" in st.session_state and st.session_state.utterances:
360
+ debug_log("Rendering existing transcript", "STATE")
361
+ with transcript_placeholder.container():
362
+ html = generate_transcript_html(
363
+ st.session_state.utterances,
364
+ st.session_state.current_time
 
 
 
365
  )
366
+ st.components.v1.html(html, height=500)
367
+ else:
368
+ with transcript_placeholder.container():
369
+ st.info("No transcript available. Click 'Transcribe Audio' to generate one.")
370
 
371
+ # ===== Summarization Process =====
372
+ if st.button("๐Ÿ“ Generate Summary", key="summarize_button_tab3", help="Create summary from transcript"):
373
  if st.session_state.transcript:
374
+ debug_log("=== SUMMARIZATION STARTED ===", "INFO")
375
+ status_placeholder.info("๐Ÿง  Generating summary...")
376
  st.session_state.summary = ""
377
 
378
+ try:
379
+ progress_placeholder = st.empty()
380
+ summary_gen = summarize_transcript(
381
+ st.session_state.transcript,
382
+ llm_model,
383
+ prompt_input
384
+ )
385
+
386
+ start_time = time.time()
387
+ char_count = 0
388
+
389
+ for accumulated_summary in summary_gen:
390
+ st.session_state.summary = accumulated_summary
391
+ progress_placeholder.markdown(accumulated_summary)
392
+ new_chars = len(accumulated_summary) - char_count
393
+ char_count = len(accumulated_summary)
394
+ debug_log(f"Summary update: +{new_chars} chars", "INFO")
395
+
396
+ duration = time.time() - start_time
397
+ debug_log(f"Summary completed: {char_count} chars in {duration:.1f}s", "INFO")
398
+
399
+ with summary_container:
400
+ st.markdown("### ๐Ÿ“ Final Summary")
401
+ st.markdown(st.session_state.summary)
402
+
403
+ progress_placeholder.empty()
404
+ status_placeholder.success(f"โœ… Summary generated ({char_count} chars)")
405
+
406
+ except Exception as e:
407
+ debug_log(f"Summarization failed: {str(e)}", "ERROR")
408
+ status_placeholder.error(f"Summary error: {str(e)}")
409
+ else:
410
+ debug_log("Summarization attempted without transcript", "ERROR")
411
+ status_placeholder.warning("โš ๏ธ No transcript available")
412
+
413
+ debug_log("Tab3 rendering completed", "STATE")