Files changed (1) hide show
  1. app.py +136 -109
app.py CHANGED
@@ -310,27 +310,32 @@ class UserSession:
310
  """Reconstruct transcription text from stream events."""
311
  stream1_text = ""
312
  stream2_text = ""
313
-
314
  # Reconstruct from text_delta events
315
  for event in self.stream_events['stream_1']:
316
  if event.get('type') == 'text_delta':
317
  stream1_text += event.get('text', '')
318
-
319
- for event in self.stream_events['stream_2']:
320
- if event.get('type') == 'text_delta':
321
- stream2_text += event.get('text', '')
322
-
323
- # Stream 3
 
 
 
 
 
 
324
  stream3_final = stream2_text
325
  stream3_preview = stream1_text
326
-
327
  stream3_final, stream3_preview = self._compute_display_texts(stream3_final, stream3_preview)
328
  stream3_text = stream3_final + self.new_color_open + stream3_preview + self.new_color_close
329
-
330
  # Return as tuple for compatibility with HTML function
331
  return (stream1_text, stream2_text, stream3_text)
332
 
333
-
334
  # Load CSS from external file
335
  css_path = os.path.join(os.path.dirname(__file__), "style.css")
336
  with open(css_path, "r") as f:
@@ -373,77 +378,85 @@ def get_transcription_html(transcripts: tuple, status: str, wpm: str = "Calibrat
373
  """Generate the full transcription card HTML."""
374
  status_badge = get_status_html(status)
375
  wpm_badge = f'<div class="wpm-badge"><span style="color: #1E1E1E !important;">{wpm}</span></div>'
376
-
377
  if transcripts:
378
- # Check if partial transcript is enabled and we have 3 streams
379
- if partial_transcript_enabled and len(transcripts) >= 3 and transcripts[0] and transcripts[1] and transcripts[2]:
380
- # Split into three streams
381
- stream1_content, stream2_content, stream3_content = transcripts
382
-
383
  cursor_html = '<span class="transcript-cursor"></span>' if status == "listening" else ""
384
  content_html = f"""
385
- <div class="triple-stream-container">
386
- <div class="stream-box">
387
- <div class="stream-label">Stream 1 (Preview - 240ms)</div>
388
- <div class="transcript-text" style="color: #000000 !important;">
 
 
 
 
 
 
 
 
 
 
 
 
389
  {stream1_content}{cursor_html}
 
390
  </div>
391
- </div>
392
- <div class="stream-box">
393
- <div class="stream-label">Stream 2 (Final - 2.4s)</div>
394
- <div class="transcript-text" style="color: #000000 !important;">
395
  {stream2_content}{cursor_html}
 
396
  </div>
397
- </div>
398
- <div class="stream-box">
399
- <div class="stream-label">Stream 3 (Merged)</div>
400
- <div class="transcript-text" style="color: #000000 !important;">
401
  {stream3_content}{cursor_html}
 
402
  </div>
403
  </div>
404
- </div>
405
- """
406
- # Check if we have 3 streams (backward compatibility for when partial transcript is disabled)
407
- elif len(transcripts) >= 3 and transcripts[0] and transcripts[1] and transcripts[2]:
408
- # Show only the merged stream when partial transcript is disabled
409
- stream3_content = transcripts[2]
410
-
411
- cursor_html = '<span class="transcript-cursor"></span>' if status == "listening" else ""
412
- content_html = f"""
413
- <div class="transcript-text" style="color: #000000 !important;">
414
  {stream3_content}{cursor_html}
415
- </div>
416
- """
417
- # Check if transcript contains both streams (backward compatibility)
418
- elif transcripts[0] and transcripts[1]:
419
- # Split the transcript into two streams
420
- stream1_content, stream2_content = transcripts
421
-
422
- cursor_html = '<span class="transcript-cursor"></span>' if status == "listening" else ""
423
- content_html = f"""
424
- <div class="dual-stream-container">
425
- <div class="stream-box">
426
- <div class="stream-label">Stream 1</div>
427
- <div class="transcript-text" style="color: #000000 !important;">
428
  {stream1_content}{cursor_html}
 
429
  </div>
430
- </div>
431
- <div class="stream-box">
432
- <div class="stream-label">Stream 2</div>
433
- <div class="transcript-text" style="color: #000000 !important;">
434
  {stream2_content}{cursor_html}
 
435
  </div>
436
  </div>
437
- </div>
438
- """
439
- else:
440
- # Single stream (backward compatibility)
441
- cursor_html = '<span class="transcript-cursor"></span>' if status == "listening" else ""
442
- content_html = f"""
443
- <div class="transcript-text" style="color: #000000 !important;">
444
  {transcripts[0]}{cursor_html}
445
- </div>
446
- """
447
  elif status in ["listening", "warming", "connecting"]:
448
  content_html = """
449
  <div class="empty-state">
@@ -469,13 +482,13 @@ def get_transcription_html(transcripts: tuple, status: str, wpm: str = "Calibrat
469
  <p class="empty-text" style="color: #555555 !important;">// Click the microphone to start.</p>
470
  </div>
471
  """
472
-
473
  # Use base64 image if available
474
  if VOXTRAL_ICON_B64:
475
  icon_html = f'<img src="data:image/png;base64,{VOXTRAL_ICON_B64}" alt="Voxtral" class="voxtral-icon" />'
476
  else:
477
  icon_html = '<span style="font-size:20px;">🎙️</span>'
478
-
479
  return f"""
480
  <div class="transcription-card">
481
  <div class="card-header">
@@ -669,45 +682,49 @@ async def audio_stream_duplicator_from_queue(session):
669
 
670
  return duplicator
671
 
672
-
673
  async def mistral_transcription_handler(session):
674
- """Connect to Mistral realtime API and handle transcription with 2 parallel streams."""
675
  try:
676
  if not session.api_key:
677
  session.status_message = "error"
678
  print(f"Session {session.session_id[:8]}: No API key provided")
679
  return
680
-
681
  # Create Mistral client
682
  client = Mistral(api_key=session.api_key, server_url=MISTRAL_BASE_URL)
683
  audio_format = AudioFormat(encoding="pcm_s16le", sample_rate=SAMPLE_RATE)
684
-
685
  session.status_message = "connecting"
686
-
687
  print(f"Session {session.session_id[:8]}: Connecting to Mistral realtime API...")
688
-
689
  # Create a duplicator that can serve multiple audio streams
690
  duplicator = await audio_stream_duplicator_from_queue(session)
691
  print(f"Session {session.session_id[:8]}: Created audio stream duplicator for parallel processing")
692
-
693
- # Create separate audio streams from the duplicator
694
  audio_stream_1 = await duplicator.add_consumer()
695
- audio_stream_2 = await duplicator.add_consumer()
696
- print(f"Session {session.session_id[:8]}: Created 2 separate audio streams from duplicator")
697
-
698
- # Create tasks for both transcription streams
 
 
 
 
 
699
  async def process_stream_1():
700
  async for event_1 in client.audio.realtime.transcribe_stream(
701
  audio_stream=audio_stream_1,
702
  model=MODEL,
703
  audio_format=audio_format,
704
- target_streaming_delay_ms=240
705
  ):
706
  if not session.is_running:
707
  break
708
-
709
  current_time = time.time()
710
-
711
  if isinstance(event_1, RealtimeTranscriptionSessionCreated):
712
  event_data = {
713
  'type': 'session_created',
@@ -717,17 +734,17 @@ async def mistral_transcription_handler(session):
717
  session.stream_events['stream_1'].append(event_data)
718
  session.last_event_timestamp = current_time
719
  print(f"Session {session.session_id[:8]}: Stream 1 connected to Mistral - {current_time:.3f}")
720
-
721
  elif isinstance(event_1, TranscriptionStreamTextDelta):
722
  delta = event_1.text
723
-
724
  # Get current full text by reconstructing from events
725
  current_full_text = ""
726
  for e in session.stream_events['stream_1']:
727
  if e.get('type') == 'text_delta':
728
  current_full_text += e.get('text', '')
729
  current_full_text += delta
730
-
731
  event_data = {
732
  'type': 'text_delta',
733
  'timestamp': current_time,
@@ -737,13 +754,13 @@ async def mistral_transcription_handler(session):
737
  session.stream_events['stream_1'].append(event_data)
738
  session.last_event_timestamp = current_time
739
  print(f'1 [{current_time:.3f}]', delta, end="", flush=True)
740
-
741
  words = delta.split()
742
  for _ in words:
743
  session.word_timestamps.append(time.time())
744
-
745
  session.current_wpm = calculate_wpm(session)
746
-
747
  elif isinstance(event_1, TranscriptionStreamDone):
748
  event_data = {
749
  'type': 'stream_done',
@@ -753,7 +770,7 @@ async def mistral_transcription_handler(session):
753
  session.last_event_timestamp = current_time
754
  print(f"Session {session.session_id[:8]}: Stream 1 transcription done - {current_time:.3f}")
755
  break
756
-
757
  elif isinstance(event_1, RealtimeTranscriptionError):
758
  event_data = {
759
  'type': 'error',
@@ -764,7 +781,7 @@ async def mistral_transcription_handler(session):
764
  session.last_event_timestamp = current_time
765
  print(f"Session {session.session_id[:8]}: Stream 1 error - {event_1.error} - {current_time:.3f}")
766
  break
767
-
768
  elif isinstance(event_1, UnknownRealtimeEvent):
769
  event_data = {
770
  'type': 'unknown_event',
@@ -774,8 +791,12 @@ async def mistral_transcription_handler(session):
774
  session.stream_events['stream_1'].append(event_data)
775
  session.last_event_timestamp = current_time
776
  continue # Ignore unknown events
777
-
778
  async def process_stream_2():
 
 
 
 
779
  async for event_2 in client.audio.realtime.transcribe_stream(
780
  audio_stream=audio_stream_2,
781
  model=MODEL,
@@ -784,9 +805,9 @@ async def mistral_transcription_handler(session):
784
  ):
785
  if not session.is_running:
786
  break
787
-
788
  current_time = time.time()
789
-
790
  if isinstance(event_2, RealtimeTranscriptionSessionCreated):
791
  event_data = {
792
  'type': 'session_created',
@@ -796,17 +817,17 @@ async def mistral_transcription_handler(session):
796
  session.stream_events['stream_2'].append(event_data)
797
  session.last_event_timestamp = current_time
798
  print(f"Session {session.session_id[:8]}: Stream 2 connected to Mistral - {current_time:.3f}")
799
-
800
  elif isinstance(event_2, TranscriptionStreamTextDelta):
801
  delta = event_2.text
802
-
803
  # Get current full text by reconstructing from events
804
  current_full_text = ""
805
  for e in session.stream_events['stream_2']:
806
  if e.get('type') == 'text_delta':
807
  current_full_text += e.get('text', '')
808
  current_full_text += delta
809
-
810
  event_data = {
811
  'type': 'text_delta',
812
  'timestamp': current_time,
@@ -816,9 +837,9 @@ async def mistral_transcription_handler(session):
816
  session.stream_events['stream_2'].append(event_data)
817
  session.last_event_timestamp = current_time
818
  print(f'2 [{current_time:.3f}]', delta, end="", flush=True)
819
-
820
  session.current_wpm = calculate_wpm(session)
821
-
822
  elif isinstance(event_2, TranscriptionStreamDone):
823
  event_data = {
824
  'type': 'stream_done',
@@ -828,7 +849,7 @@ async def mistral_transcription_handler(session):
828
  session.last_event_timestamp = current_time
829
  print(f"Session {session.session_id[:8]}: Stream 2 transcription done - {current_time:.3f}")
830
  break
831
-
832
  elif isinstance(event_2, RealtimeTranscriptionError):
833
  event_data = {
834
  'type': 'error',
@@ -839,7 +860,7 @@ async def mistral_transcription_handler(session):
839
  session.last_event_timestamp = current_time
840
  print(f"Session {session.session_id[:8]}: Stream 2 error - {event_2.error} - {current_time:.3f}")
841
  break
842
-
843
  elif isinstance(event_2, UnknownRealtimeEvent):
844
  event_data = {
845
  'type': 'unknown_event',
@@ -849,19 +870,26 @@ async def mistral_transcription_handler(session):
849
  session.stream_events['stream_2'].append(event_data)
850
  session.last_event_timestamp = current_time
851
  continue # Ignore unknown events
852
-
853
- # Run both streams in parallel
854
  stream1_task = asyncio.create_task(process_stream_1())
855
- stream2_task = asyncio.create_task(process_stream_2())
856
-
857
- # Wait for both streams to complete
858
- await asyncio.gather(stream1_task, stream2_task)
859
-
 
 
 
 
 
 
 
860
  # Final transcription is already reconstructed from events
861
  # Just add stats to the display
862
  event_summary = session.get_event_summary()
863
  stats_text = f"Events: {event_summary['stats']['total_events']} (S1: {event_summary['stats']['stream_1_count']}, S2: {event_summary['stats']['stream_2_count']})"
864
-
865
  # Store the reconstructed transcription as tuple
866
  session.transcription_tuple = session.reconstruct_transcription()
867
 
@@ -874,7 +902,7 @@ async def mistral_transcription_handler(session):
874
  session.status_message = "error"
875
  finally:
876
  session.is_running = False
877
-
878
  # Only remove and log if not already handled by stop_session
879
  if not session._stopped_by_user:
880
  with _sessions_lock:
@@ -883,7 +911,6 @@ async def mistral_transcription_handler(session):
883
  if removed:
884
  print(f"Session {session.session_id[:8]} ended. Active sessions: {active_count}")
885
 
886
-
887
  def start_transcription(session):
888
  """Start Mistral transcription using the shared event loop."""
889
  session.is_running = True
 
310
  """Reconstruct transcription text from stream events."""
311
  stream1_text = ""
312
  stream2_text = ""
313
+
314
  # Reconstruct from text_delta events
315
  for event in self.stream_events['stream_1']:
316
  if event.get('type') == 'text_delta':
317
  stream1_text += event.get('text', '')
318
+
319
+ # Only reconstruct Stream 2 if partial_transcript_enabled is True
320
+ if self.partial_transcript_enabled:
321
+ for event in self.stream_events['stream_2']:
322
+ if event.get('type') == 'text_delta':
323
+ stream2_text += event.get('text', '')
324
+
325
+ # If partial_transcript_enabled is False, just return Stream 1 for all streams
326
+ if not self.partial_transcript_enabled:
327
+ return (stream1_text, "", stream1_text)
328
+
329
+ # Stream 3 (merged)
330
  stream3_final = stream2_text
331
  stream3_preview = stream1_text
332
+
333
  stream3_final, stream3_preview = self._compute_display_texts(stream3_final, stream3_preview)
334
  stream3_text = stream3_final + self.new_color_open + stream3_preview + self.new_color_close
335
+
336
  # Return as tuple for compatibility with HTML function
337
  return (stream1_text, stream2_text, stream3_text)
338
 
 
339
  # Load CSS from external file
340
  css_path = os.path.join(os.path.dirname(__file__), "style.css")
341
  with open(css_path, "r") as f:
 
378
  """Generate the full transcription card HTML."""
379
  status_badge = get_status_html(status)
380
  wpm_badge = f'<div class="wpm-badge"><span style="color: #1E1E1E !important;">{wpm}</span></div>'
381
+
382
  if transcripts:
383
+ # If partial_transcript_enabled is False, only show Stream 1
384
+ if not partial_transcript_enabled:
385
+ stream1_content = transcripts[0]
 
 
386
  cursor_html = '<span class="transcript-cursor"></span>' if status == "listening" else ""
387
  content_html = f"""
388
+ <div class="transcript-text" style="color: #000000 !important;">
389
+ {stream1_content}{cursor_html}
390
+ </div>
391
+ """
392
+ else:
393
+ # Show all streams if partial_transcript_enabled is True
394
+ if len(transcripts) >= 3 and transcripts[0] and transcripts[1] and transcripts[2]:
395
+ # Split into three streams
396
+ stream1_content, stream2_content, stream3_content = transcripts
397
+
398
+ cursor_html = '<span class="transcript-cursor"></span>' if status == "listening" else ""
399
+ content_html = f"""
400
+ <div class="triple-stream-container">
401
+ <div class="stream-box">
402
+ <div class="stream-label">Stream 1 (Preview - 240ms)</div>
403
+ <div class="transcript-text" style="color: #000000 !important;">
404
  {stream1_content}{cursor_html}
405
+ </div>
406
  </div>
407
+ <div class="stream-box">
408
+ <div class="stream-label">Stream 2 (Final - 2.4s)</div>
409
+ <div class="transcript-text" style="color: #000000 !important;">
 
410
  {stream2_content}{cursor_html}
411
+ </div>
412
  </div>
413
+ <div class="stream-box">
414
+ <div class="stream-label">Stream 3 (Merged)</div>
415
+ <div class="transcript-text" style="color: #000000 !important;">
 
416
  {stream3_content}{cursor_html}
417
+ </div>
418
  </div>
419
  </div>
420
+ """
421
+ elif len(transcripts) >= 3 and transcripts[0] and transcripts[1] and transcripts[2]:
422
+ # Show only the merged stream when partial transcript is disabled
423
+ stream3_content = transcripts[2]
424
+
425
+ cursor_html = '<span class="transcript-cursor"></span>' if status == "listening" else ""
426
+ content_html = f"""
427
+ <div class="transcript-text" style="color: #000000 !important;">
 
 
428
  {stream3_content}{cursor_html}
429
+ </div>
430
+ """
431
+ elif transcripts[0] and transcripts[1]:
432
+ # Split the transcript into two streams
433
+ stream1_content, stream2_content = transcripts
434
+
435
+ cursor_html = '<span class="transcript-cursor"></span>' if status == "listening" else ""
436
+ content_html = f"""
437
+ <div class="dual-stream-container">
438
+ <div class="stream-box">
439
+ <div class="stream-label">Stream 1</div>
440
+ <div class="transcript-text" style="color: #000000 !important;">
 
441
  {stream1_content}{cursor_html}
442
+ </div>
443
  </div>
444
+ <div class="stream-box">
445
+ <div class="stream-label">Stream 2</div>
446
+ <div class="transcript-text" style="color: #000000 !important;">
 
447
  {stream2_content}{cursor_html}
448
+ </div>
449
  </div>
450
  </div>
451
+ """
452
+ else:
453
+ # Single stream (backward compatibility)
454
+ cursor_html = '<span class="transcript-cursor"></span>' if status == "listening" else ""
455
+ content_html = f"""
456
+ <div class="transcript-text" style="color: #000000 !important;">
 
457
  {transcripts[0]}{cursor_html}
458
+ </div>
459
+ """
460
  elif status in ["listening", "warming", "connecting"]:
461
  content_html = """
462
  <div class="empty-state">
 
482
  <p class="empty-text" style="color: #555555 !important;">// Click the microphone to start.</p>
483
  </div>
484
  """
485
+
486
  # Use base64 image if available
487
  if VOXTRAL_ICON_B64:
488
  icon_html = f'<img src="data:image/png;base64,{VOXTRAL_ICON_B64}" alt="Voxtral" class="voxtral-icon" />'
489
  else:
490
  icon_html = '<span style="font-size:20px;">🎙️</span>'
491
+
492
  return f"""
493
  <div class="transcription-card">
494
  <div class="card-header">
 
682
 
683
  return duplicator
684
 
 
685
  async def mistral_transcription_handler(session):
686
+ """Connect to Mistral realtime API and handle transcription with 1 or 2 parallel streams."""
687
  try:
688
  if not session.api_key:
689
  session.status_message = "error"
690
  print(f"Session {session.session_id[:8]}: No API key provided")
691
  return
692
+
693
  # Create Mistral client
694
  client = Mistral(api_key=session.api_key, server_url=MISTRAL_BASE_URL)
695
  audio_format = AudioFormat(encoding="pcm_s16le", sample_rate=SAMPLE_RATE)
696
+
697
  session.status_message = "connecting"
698
+
699
  print(f"Session {session.session_id[:8]}: Connecting to Mistral realtime API...")
700
+
701
  # Create a duplicator that can serve multiple audio streams
702
  duplicator = await audio_stream_duplicator_from_queue(session)
703
  print(f"Session {session.session_id[:8]}: Created audio stream duplicator for parallel processing")
704
+
705
+ # Always create Stream 1 (fast, 240ms delay)
706
  audio_stream_1 = await duplicator.add_consumer()
707
+ print(f"Session {session.session_id[:8]}: Created Stream 1 (240ms delay)")
708
+
709
+ # Only create Stream 2 if partial_transcript_enabled is True
710
+ audio_stream_2 = None
711
+ if session.partial_transcript_enabled:
712
+ audio_stream_2 = await duplicator.add_consumer()
713
+ print(f"Session {session.session_id[:8]}: Created Stream 2 (2400ms delay)")
714
+
715
+ # Create tasks for transcription streams
716
  async def process_stream_1():
717
  async for event_1 in client.audio.realtime.transcribe_stream(
718
  audio_stream=audio_stream_1,
719
  model=MODEL,
720
  audio_format=audio_format,
721
+ target_streaming_delay_ms=240 if session.partial_transcript_enabled else 480
722
  ):
723
  if not session.is_running:
724
  break
725
+
726
  current_time = time.time()
727
+
728
  if isinstance(event_1, RealtimeTranscriptionSessionCreated):
729
  event_data = {
730
  'type': 'session_created',
 
734
  session.stream_events['stream_1'].append(event_data)
735
  session.last_event_timestamp = current_time
736
  print(f"Session {session.session_id[:8]}: Stream 1 connected to Mistral - {current_time:.3f}")
737
+
738
  elif isinstance(event_1, TranscriptionStreamTextDelta):
739
  delta = event_1.text
740
+
741
  # Get current full text by reconstructing from events
742
  current_full_text = ""
743
  for e in session.stream_events['stream_1']:
744
  if e.get('type') == 'text_delta':
745
  current_full_text += e.get('text', '')
746
  current_full_text += delta
747
+
748
  event_data = {
749
  'type': 'text_delta',
750
  'timestamp': current_time,
 
754
  session.stream_events['stream_1'].append(event_data)
755
  session.last_event_timestamp = current_time
756
  print(f'1 [{current_time:.3f}]', delta, end="", flush=True)
757
+
758
  words = delta.split()
759
  for _ in words:
760
  session.word_timestamps.append(time.time())
761
+
762
  session.current_wpm = calculate_wpm(session)
763
+
764
  elif isinstance(event_1, TranscriptionStreamDone):
765
  event_data = {
766
  'type': 'stream_done',
 
770
  session.last_event_timestamp = current_time
771
  print(f"Session {session.session_id[:8]}: Stream 1 transcription done - {current_time:.3f}")
772
  break
773
+
774
  elif isinstance(event_1, RealtimeTranscriptionError):
775
  event_data = {
776
  'type': 'error',
 
781
  session.last_event_timestamp = current_time
782
  print(f"Session {session.session_id[:8]}: Stream 1 error - {event_1.error} - {current_time:.3f}")
783
  break
784
+
785
  elif isinstance(event_1, UnknownRealtimeEvent):
786
  event_data = {
787
  'type': 'unknown_event',
 
791
  session.stream_events['stream_1'].append(event_data)
792
  session.last_event_timestamp = current_time
793
  continue # Ignore unknown events
794
+
795
  async def process_stream_2():
796
+ # Only process Stream 2 if it exists and partial_transcript_enabled is True
797
+ if not session.partial_transcript_enabled or audio_stream_2 is None:
798
+ return
799
+
800
  async for event_2 in client.audio.realtime.transcribe_stream(
801
  audio_stream=audio_stream_2,
802
  model=MODEL,
 
805
  ):
806
  if not session.is_running:
807
  break
808
+
809
  current_time = time.time()
810
+
811
  if isinstance(event_2, RealtimeTranscriptionSessionCreated):
812
  event_data = {
813
  'type': 'session_created',
 
817
  session.stream_events['stream_2'].append(event_data)
818
  session.last_event_timestamp = current_time
819
  print(f"Session {session.session_id[:8]}: Stream 2 connected to Mistral - {current_time:.3f}")
820
+
821
  elif isinstance(event_2, TranscriptionStreamTextDelta):
822
  delta = event_2.text
823
+
824
  # Get current full text by reconstructing from events
825
  current_full_text = ""
826
  for e in session.stream_events['stream_2']:
827
  if e.get('type') == 'text_delta':
828
  current_full_text += e.get('text', '')
829
  current_full_text += delta
830
+
831
  event_data = {
832
  'type': 'text_delta',
833
  'timestamp': current_time,
 
837
  session.stream_events['stream_2'].append(event_data)
838
  session.last_event_timestamp = current_time
839
  print(f'2 [{current_time:.3f}]', delta, end="", flush=True)
840
+
841
  session.current_wpm = calculate_wpm(session)
842
+
843
  elif isinstance(event_2, TranscriptionStreamDone):
844
  event_data = {
845
  'type': 'stream_done',
 
849
  session.last_event_timestamp = current_time
850
  print(f"Session {session.session_id[:8]}: Stream 2 transcription done - {current_time:.3f}")
851
  break
852
+
853
  elif isinstance(event_2, RealtimeTranscriptionError):
854
  event_data = {
855
  'type': 'error',
 
860
  session.last_event_timestamp = current_time
861
  print(f"Session {session.session_id[:8]}: Stream 2 error - {event_2.error} - {current_time:.3f}")
862
  break
863
+
864
  elif isinstance(event_2, UnknownRealtimeEvent):
865
  event_data = {
866
  'type': 'unknown_event',
 
870
  session.stream_events['stream_2'].append(event_data)
871
  session.last_event_timestamp = current_time
872
  continue # Ignore unknown events
873
+
874
+ # Run Stream 1 always
875
  stream1_task = asyncio.create_task(process_stream_1())
876
+
877
+ # Run Stream 2 only if partial_transcript_enabled is True
878
+ stream2_task = None
879
+ if session.partial_transcript_enabled:
880
+ stream2_task = asyncio.create_task(process_stream_2())
881
+
882
+ # Wait for streams to complete
883
+ if stream2_task:
884
+ await asyncio.gather(stream1_task, stream2_task)
885
+ else:
886
+ await stream1_task
887
+
888
  # Final transcription is already reconstructed from events
889
  # Just add stats to the display
890
  event_summary = session.get_event_summary()
891
  stats_text = f"Events: {event_summary['stats']['total_events']} (S1: {event_summary['stats']['stream_1_count']}, S2: {event_summary['stats']['stream_2_count']})"
892
+
893
  # Store the reconstructed transcription as tuple
894
  session.transcription_tuple = session.reconstruct_transcription()
895
 
 
902
  session.status_message = "error"
903
  finally:
904
  session.is_running = False
905
+
906
  # Only remove and log if not already handled by stop_session
907
  if not session._stopped_by_user:
908
  with _sessions_lock:
 
911
  if removed:
912
  print(f"Session {session.session_id[:8]} ended. Active sessions: {active_count}")
913
 
 
914
  def start_transcription(session):
915
  """Start Mistral transcription using the shared event loop."""
916
  session.is_running = True