Handle no partial
#5
by
pandora-s
- opened
app.py
CHANGED
|
@@ -310,27 +310,32 @@ class UserSession:
|
|
| 310 |
"""Reconstruct transcription text from stream events."""
|
| 311 |
stream1_text = ""
|
| 312 |
stream2_text = ""
|
| 313 |
-
|
| 314 |
# Reconstruct from text_delta events
|
| 315 |
for event in self.stream_events['stream_1']:
|
| 316 |
if event.get('type') == 'text_delta':
|
| 317 |
stream1_text += event.get('text', '')
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
stream3_final = stream2_text
|
| 325 |
stream3_preview = stream1_text
|
| 326 |
-
|
| 327 |
stream3_final, stream3_preview = self._compute_display_texts(stream3_final, stream3_preview)
|
| 328 |
stream3_text = stream3_final + self.new_color_open + stream3_preview + self.new_color_close
|
| 329 |
-
|
| 330 |
# Return as tuple for compatibility with HTML function
|
| 331 |
return (stream1_text, stream2_text, stream3_text)
|
| 332 |
|
| 333 |
-
|
| 334 |
# Load CSS from external file
|
| 335 |
css_path = os.path.join(os.path.dirname(__file__), "style.css")
|
| 336 |
with open(css_path, "r") as f:
|
|
@@ -373,77 +378,85 @@ def get_transcription_html(transcripts: tuple, status: str, wpm: str = "Calibrat
|
|
| 373 |
"""Generate the full transcription card HTML."""
|
| 374 |
status_badge = get_status_html(status)
|
| 375 |
wpm_badge = f'<div class="wpm-badge"><span style="color: #1E1E1E !important;">{wpm}</span></div>'
|
| 376 |
-
|
| 377 |
if transcripts:
|
| 378 |
-
#
|
| 379 |
-
if partial_transcript_enabled
|
| 380 |
-
|
| 381 |
-
stream1_content, stream2_content, stream3_content = transcripts
|
| 382 |
-
|
| 383 |
cursor_html = '<span class="transcript-cursor"></span>' if status == "listening" else ""
|
| 384 |
content_html = f"""
|
| 385 |
-
<div class="
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
{stream1_content}{cursor_html}
|
|
|
|
| 390 |
</div>
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
<div class="transcript-text" style="color: #000000 !important;">
|
| 395 |
{stream2_content}{cursor_html}
|
|
|
|
| 396 |
</div>
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
<div class="transcript-text" style="color: #000000 !important;">
|
| 401 |
{stream3_content}{cursor_html}
|
|
|
|
| 402 |
</div>
|
| 403 |
</div>
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
content_html = f"""
|
| 413 |
-
<div class="transcript-text" style="color: #000000 !important;">
|
| 414 |
{stream3_content}{cursor_html}
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
<div class="transcript-text" style="color: #000000 !important;">
|
| 428 |
{stream1_content}{cursor_html}
|
|
|
|
| 429 |
</div>
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
<div class="transcript-text" style="color: #000000 !important;">
|
| 434 |
{stream2_content}{cursor_html}
|
|
|
|
| 435 |
</div>
|
| 436 |
</div>
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
<div class="transcript-text" style="color: #000000 !important;">
|
| 444 |
{transcripts[0]}{cursor_html}
|
| 445 |
-
|
| 446 |
-
|
| 447 |
elif status in ["listening", "warming", "connecting"]:
|
| 448 |
content_html = """
|
| 449 |
<div class="empty-state">
|
|
@@ -469,13 +482,13 @@ def get_transcription_html(transcripts: tuple, status: str, wpm: str = "Calibrat
|
|
| 469 |
<p class="empty-text" style="color: #555555 !important;">// Click the microphone to start.</p>
|
| 470 |
</div>
|
| 471 |
"""
|
| 472 |
-
|
| 473 |
# Use base64 image if available
|
| 474 |
if VOXTRAL_ICON_B64:
|
| 475 |
icon_html = f'<img src="data:image/png;base64,{VOXTRAL_ICON_B64}" alt="Voxtral" class="voxtral-icon" />'
|
| 476 |
else:
|
| 477 |
icon_html = '<span style="font-size:20px;">🎙️</span>'
|
| 478 |
-
|
| 479 |
return f"""
|
| 480 |
<div class="transcription-card">
|
| 481 |
<div class="card-header">
|
|
@@ -669,45 +682,49 @@ async def audio_stream_duplicator_from_queue(session):
|
|
| 669 |
|
| 670 |
return duplicator
|
| 671 |
|
| 672 |
-
|
| 673 |
async def mistral_transcription_handler(session):
|
| 674 |
-
"""Connect to Mistral realtime API and handle transcription with 2 parallel streams."""
|
| 675 |
try:
|
| 676 |
if not session.api_key:
|
| 677 |
session.status_message = "error"
|
| 678 |
print(f"Session {session.session_id[:8]}: No API key provided")
|
| 679 |
return
|
| 680 |
-
|
| 681 |
# Create Mistral client
|
| 682 |
client = Mistral(api_key=session.api_key, server_url=MISTRAL_BASE_URL)
|
| 683 |
audio_format = AudioFormat(encoding="pcm_s16le", sample_rate=SAMPLE_RATE)
|
| 684 |
-
|
| 685 |
session.status_message = "connecting"
|
| 686 |
-
|
| 687 |
print(f"Session {session.session_id[:8]}: Connecting to Mistral realtime API...")
|
| 688 |
-
|
| 689 |
# Create a duplicator that can serve multiple audio streams
|
| 690 |
duplicator = await audio_stream_duplicator_from_queue(session)
|
| 691 |
print(f"Session {session.session_id[:8]}: Created audio stream duplicator for parallel processing")
|
| 692 |
-
|
| 693 |
-
#
|
| 694 |
audio_stream_1 = await duplicator.add_consumer()
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 699 |
async def process_stream_1():
|
| 700 |
async for event_1 in client.audio.realtime.transcribe_stream(
|
| 701 |
audio_stream=audio_stream_1,
|
| 702 |
model=MODEL,
|
| 703 |
audio_format=audio_format,
|
| 704 |
-
target_streaming_delay_ms=240
|
| 705 |
):
|
| 706 |
if not session.is_running:
|
| 707 |
break
|
| 708 |
-
|
| 709 |
current_time = time.time()
|
| 710 |
-
|
| 711 |
if isinstance(event_1, RealtimeTranscriptionSessionCreated):
|
| 712 |
event_data = {
|
| 713 |
'type': 'session_created',
|
|
@@ -717,17 +734,17 @@ async def mistral_transcription_handler(session):
|
|
| 717 |
session.stream_events['stream_1'].append(event_data)
|
| 718 |
session.last_event_timestamp = current_time
|
| 719 |
print(f"Session {session.session_id[:8]}: Stream 1 connected to Mistral - {current_time:.3f}")
|
| 720 |
-
|
| 721 |
elif isinstance(event_1, TranscriptionStreamTextDelta):
|
| 722 |
delta = event_1.text
|
| 723 |
-
|
| 724 |
# Get current full text by reconstructing from events
|
| 725 |
current_full_text = ""
|
| 726 |
for e in session.stream_events['stream_1']:
|
| 727 |
if e.get('type') == 'text_delta':
|
| 728 |
current_full_text += e.get('text', '')
|
| 729 |
current_full_text += delta
|
| 730 |
-
|
| 731 |
event_data = {
|
| 732 |
'type': 'text_delta',
|
| 733 |
'timestamp': current_time,
|
|
@@ -737,13 +754,13 @@ async def mistral_transcription_handler(session):
|
|
| 737 |
session.stream_events['stream_1'].append(event_data)
|
| 738 |
session.last_event_timestamp = current_time
|
| 739 |
print(f'1 [{current_time:.3f}]', delta, end="", flush=True)
|
| 740 |
-
|
| 741 |
words = delta.split()
|
| 742 |
for _ in words:
|
| 743 |
session.word_timestamps.append(time.time())
|
| 744 |
-
|
| 745 |
session.current_wpm = calculate_wpm(session)
|
| 746 |
-
|
| 747 |
elif isinstance(event_1, TranscriptionStreamDone):
|
| 748 |
event_data = {
|
| 749 |
'type': 'stream_done',
|
|
@@ -753,7 +770,7 @@ async def mistral_transcription_handler(session):
|
|
| 753 |
session.last_event_timestamp = current_time
|
| 754 |
print(f"Session {session.session_id[:8]}: Stream 1 transcription done - {current_time:.3f}")
|
| 755 |
break
|
| 756 |
-
|
| 757 |
elif isinstance(event_1, RealtimeTranscriptionError):
|
| 758 |
event_data = {
|
| 759 |
'type': 'error',
|
|
@@ -764,7 +781,7 @@ async def mistral_transcription_handler(session):
|
|
| 764 |
session.last_event_timestamp = current_time
|
| 765 |
print(f"Session {session.session_id[:8]}: Stream 1 error - {event_1.error} - {current_time:.3f}")
|
| 766 |
break
|
| 767 |
-
|
| 768 |
elif isinstance(event_1, UnknownRealtimeEvent):
|
| 769 |
event_data = {
|
| 770 |
'type': 'unknown_event',
|
|
@@ -774,8 +791,12 @@ async def mistral_transcription_handler(session):
|
|
| 774 |
session.stream_events['stream_1'].append(event_data)
|
| 775 |
session.last_event_timestamp = current_time
|
| 776 |
continue # Ignore unknown events
|
| 777 |
-
|
| 778 |
async def process_stream_2():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 779 |
async for event_2 in client.audio.realtime.transcribe_stream(
|
| 780 |
audio_stream=audio_stream_2,
|
| 781 |
model=MODEL,
|
|
@@ -784,9 +805,9 @@ async def mistral_transcription_handler(session):
|
|
| 784 |
):
|
| 785 |
if not session.is_running:
|
| 786 |
break
|
| 787 |
-
|
| 788 |
current_time = time.time()
|
| 789 |
-
|
| 790 |
if isinstance(event_2, RealtimeTranscriptionSessionCreated):
|
| 791 |
event_data = {
|
| 792 |
'type': 'session_created',
|
|
@@ -796,17 +817,17 @@ async def mistral_transcription_handler(session):
|
|
| 796 |
session.stream_events['stream_2'].append(event_data)
|
| 797 |
session.last_event_timestamp = current_time
|
| 798 |
print(f"Session {session.session_id[:8]}: Stream 2 connected to Mistral - {current_time:.3f}")
|
| 799 |
-
|
| 800 |
elif isinstance(event_2, TranscriptionStreamTextDelta):
|
| 801 |
delta = event_2.text
|
| 802 |
-
|
| 803 |
# Get current full text by reconstructing from events
|
| 804 |
current_full_text = ""
|
| 805 |
for e in session.stream_events['stream_2']:
|
| 806 |
if e.get('type') == 'text_delta':
|
| 807 |
current_full_text += e.get('text', '')
|
| 808 |
current_full_text += delta
|
| 809 |
-
|
| 810 |
event_data = {
|
| 811 |
'type': 'text_delta',
|
| 812 |
'timestamp': current_time,
|
|
@@ -816,9 +837,9 @@ async def mistral_transcription_handler(session):
|
|
| 816 |
session.stream_events['stream_2'].append(event_data)
|
| 817 |
session.last_event_timestamp = current_time
|
| 818 |
print(f'2 [{current_time:.3f}]', delta, end="", flush=True)
|
| 819 |
-
|
| 820 |
session.current_wpm = calculate_wpm(session)
|
| 821 |
-
|
| 822 |
elif isinstance(event_2, TranscriptionStreamDone):
|
| 823 |
event_data = {
|
| 824 |
'type': 'stream_done',
|
|
@@ -828,7 +849,7 @@ async def mistral_transcription_handler(session):
|
|
| 828 |
session.last_event_timestamp = current_time
|
| 829 |
print(f"Session {session.session_id[:8]}: Stream 2 transcription done - {current_time:.3f}")
|
| 830 |
break
|
| 831 |
-
|
| 832 |
elif isinstance(event_2, RealtimeTranscriptionError):
|
| 833 |
event_data = {
|
| 834 |
'type': 'error',
|
|
@@ -839,7 +860,7 @@ async def mistral_transcription_handler(session):
|
|
| 839 |
session.last_event_timestamp = current_time
|
| 840 |
print(f"Session {session.session_id[:8]}: Stream 2 error - {event_2.error} - {current_time:.3f}")
|
| 841 |
break
|
| 842 |
-
|
| 843 |
elif isinstance(event_2, UnknownRealtimeEvent):
|
| 844 |
event_data = {
|
| 845 |
'type': 'unknown_event',
|
|
@@ -849,19 +870,26 @@ async def mistral_transcription_handler(session):
|
|
| 849 |
session.stream_events['stream_2'].append(event_data)
|
| 850 |
session.last_event_timestamp = current_time
|
| 851 |
continue # Ignore unknown events
|
| 852 |
-
|
| 853 |
-
# Run
|
| 854 |
stream1_task = asyncio.create_task(process_stream_1())
|
| 855 |
-
|
| 856 |
-
|
| 857 |
-
|
| 858 |
-
|
| 859 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 860 |
# Final transcription is already reconstructed from events
|
| 861 |
# Just add stats to the display
|
| 862 |
event_summary = session.get_event_summary()
|
| 863 |
stats_text = f"Events: {event_summary['stats']['total_events']} (S1: {event_summary['stats']['stream_1_count']}, S2: {event_summary['stats']['stream_2_count']})"
|
| 864 |
-
|
| 865 |
# Store the reconstructed transcription as tuple
|
| 866 |
session.transcription_tuple = session.reconstruct_transcription()
|
| 867 |
|
|
@@ -874,7 +902,7 @@ async def mistral_transcription_handler(session):
|
|
| 874 |
session.status_message = "error"
|
| 875 |
finally:
|
| 876 |
session.is_running = False
|
| 877 |
-
|
| 878 |
# Only remove and log if not already handled by stop_session
|
| 879 |
if not session._stopped_by_user:
|
| 880 |
with _sessions_lock:
|
|
@@ -883,7 +911,6 @@ async def mistral_transcription_handler(session):
|
|
| 883 |
if removed:
|
| 884 |
print(f"Session {session.session_id[:8]} ended. Active sessions: {active_count}")
|
| 885 |
|
| 886 |
-
|
| 887 |
def start_transcription(session):
|
| 888 |
"""Start Mistral transcription using the shared event loop."""
|
| 889 |
session.is_running = True
|
|
|
|
| 310 |
"""Reconstruct transcription text from stream events."""
|
| 311 |
stream1_text = ""
|
| 312 |
stream2_text = ""
|
| 313 |
+
|
| 314 |
# Reconstruct from text_delta events
|
| 315 |
for event in self.stream_events['stream_1']:
|
| 316 |
if event.get('type') == 'text_delta':
|
| 317 |
stream1_text += event.get('text', '')
|
| 318 |
+
|
| 319 |
+
# Only reconstruct Stream 2 if partial_transcript_enabled is True
|
| 320 |
+
if self.partial_transcript_enabled:
|
| 321 |
+
for event in self.stream_events['stream_2']:
|
| 322 |
+
if event.get('type') == 'text_delta':
|
| 323 |
+
stream2_text += event.get('text', '')
|
| 324 |
+
|
| 325 |
+
# If partial_transcript_enabled is False, just return Stream 1 for all streams
|
| 326 |
+
if not self.partial_transcript_enabled:
|
| 327 |
+
return (stream1_text, "", stream1_text)
|
| 328 |
+
|
| 329 |
+
# Stream 3 (merged)
|
| 330 |
stream3_final = stream2_text
|
| 331 |
stream3_preview = stream1_text
|
| 332 |
+
|
| 333 |
stream3_final, stream3_preview = self._compute_display_texts(stream3_final, stream3_preview)
|
| 334 |
stream3_text = stream3_final + self.new_color_open + stream3_preview + self.new_color_close
|
| 335 |
+
|
| 336 |
# Return as tuple for compatibility with HTML function
|
| 337 |
return (stream1_text, stream2_text, stream3_text)
|
| 338 |
|
|
|
|
| 339 |
# Load CSS from external file
|
| 340 |
css_path = os.path.join(os.path.dirname(__file__), "style.css")
|
| 341 |
with open(css_path, "r") as f:
|
|
|
|
| 378 |
"""Generate the full transcription card HTML."""
|
| 379 |
status_badge = get_status_html(status)
|
| 380 |
wpm_badge = f'<div class="wpm-badge"><span style="color: #1E1E1E !important;">{wpm}</span></div>'
|
| 381 |
+
|
| 382 |
if transcripts:
|
| 383 |
+
# If partial_transcript_enabled is False, only show Stream 1
|
| 384 |
+
if not partial_transcript_enabled:
|
| 385 |
+
stream1_content = transcripts[0]
|
|
|
|
|
|
|
| 386 |
cursor_html = '<span class="transcript-cursor"></span>' if status == "listening" else ""
|
| 387 |
content_html = f"""
|
| 388 |
+
<div class="transcript-text" style="color: #000000 !important;">
|
| 389 |
+
{stream1_content}{cursor_html}
|
| 390 |
+
</div>
|
| 391 |
+
"""
|
| 392 |
+
else:
|
| 393 |
+
# Show all streams if partial_transcript_enabled is True
|
| 394 |
+
if len(transcripts) >= 3 and transcripts[0] and transcripts[1] and transcripts[2]:
|
| 395 |
+
# Split into three streams
|
| 396 |
+
stream1_content, stream2_content, stream3_content = transcripts
|
| 397 |
+
|
| 398 |
+
cursor_html = '<span class="transcript-cursor"></span>' if status == "listening" else ""
|
| 399 |
+
content_html = f"""
|
| 400 |
+
<div class="triple-stream-container">
|
| 401 |
+
<div class="stream-box">
|
| 402 |
+
<div class="stream-label">Stream 1 (Preview - 240ms)</div>
|
| 403 |
+
<div class="transcript-text" style="color: #000000 !important;">
|
| 404 |
{stream1_content}{cursor_html}
|
| 405 |
+
</div>
|
| 406 |
</div>
|
| 407 |
+
<div class="stream-box">
|
| 408 |
+
<div class="stream-label">Stream 2 (Final - 2.4s)</div>
|
| 409 |
+
<div class="transcript-text" style="color: #000000 !important;">
|
|
|
|
| 410 |
{stream2_content}{cursor_html}
|
| 411 |
+
</div>
|
| 412 |
</div>
|
| 413 |
+
<div class="stream-box">
|
| 414 |
+
<div class="stream-label">Stream 3 (Merged)</div>
|
| 415 |
+
<div class="transcript-text" style="color: #000000 !important;">
|
|
|
|
| 416 |
{stream3_content}{cursor_html}
|
| 417 |
+
</div>
|
| 418 |
</div>
|
| 419 |
</div>
|
| 420 |
+
"""
|
| 421 |
+
elif len(transcripts) >= 3 and transcripts[0] and transcripts[1] and transcripts[2]:
|
| 422 |
+
# Show only the merged stream when partial transcript is disabled
|
| 423 |
+
stream3_content = transcripts[2]
|
| 424 |
+
|
| 425 |
+
cursor_html = '<span class="transcript-cursor"></span>' if status == "listening" else ""
|
| 426 |
+
content_html = f"""
|
| 427 |
+
<div class="transcript-text" style="color: #000000 !important;">
|
|
|
|
|
|
|
| 428 |
{stream3_content}{cursor_html}
|
| 429 |
+
</div>
|
| 430 |
+
"""
|
| 431 |
+
elif transcripts[0] and transcripts[1]:
|
| 432 |
+
# Split the transcript into two streams
|
| 433 |
+
stream1_content, stream2_content = transcripts
|
| 434 |
+
|
| 435 |
+
cursor_html = '<span class="transcript-cursor"></span>' if status == "listening" else ""
|
| 436 |
+
content_html = f"""
|
| 437 |
+
<div class="dual-stream-container">
|
| 438 |
+
<div class="stream-box">
|
| 439 |
+
<div class="stream-label">Stream 1</div>
|
| 440 |
+
<div class="transcript-text" style="color: #000000 !important;">
|
|
|
|
| 441 |
{stream1_content}{cursor_html}
|
| 442 |
+
</div>
|
| 443 |
</div>
|
| 444 |
+
<div class="stream-box">
|
| 445 |
+
<div class="stream-label">Stream 2</div>
|
| 446 |
+
<div class="transcript-text" style="color: #000000 !important;">
|
|
|
|
| 447 |
{stream2_content}{cursor_html}
|
| 448 |
+
</div>
|
| 449 |
</div>
|
| 450 |
</div>
|
| 451 |
+
"""
|
| 452 |
+
else:
|
| 453 |
+
# Single stream (backward compatibility)
|
| 454 |
+
cursor_html = '<span class="transcript-cursor"></span>' if status == "listening" else ""
|
| 455 |
+
content_html = f"""
|
| 456 |
+
<div class="transcript-text" style="color: #000000 !important;">
|
|
|
|
| 457 |
{transcripts[0]}{cursor_html}
|
| 458 |
+
</div>
|
| 459 |
+
"""
|
| 460 |
elif status in ["listening", "warming", "connecting"]:
|
| 461 |
content_html = """
|
| 462 |
<div class="empty-state">
|
|
|
|
| 482 |
<p class="empty-text" style="color: #555555 !important;">// Click the microphone to start.</p>
|
| 483 |
</div>
|
| 484 |
"""
|
| 485 |
+
|
| 486 |
# Use base64 image if available
|
| 487 |
if VOXTRAL_ICON_B64:
|
| 488 |
icon_html = f'<img src="data:image/png;base64,{VOXTRAL_ICON_B64}" alt="Voxtral" class="voxtral-icon" />'
|
| 489 |
else:
|
| 490 |
icon_html = '<span style="font-size:20px;">🎙️</span>'
|
| 491 |
+
|
| 492 |
return f"""
|
| 493 |
<div class="transcription-card">
|
| 494 |
<div class="card-header">
|
|
|
|
| 682 |
|
| 683 |
return duplicator
|
| 684 |
|
|
|
|
| 685 |
async def mistral_transcription_handler(session):
|
| 686 |
+
"""Connect to Mistral realtime API and handle transcription with 1 or 2 parallel streams."""
|
| 687 |
try:
|
| 688 |
if not session.api_key:
|
| 689 |
session.status_message = "error"
|
| 690 |
print(f"Session {session.session_id[:8]}: No API key provided")
|
| 691 |
return
|
| 692 |
+
|
| 693 |
# Create Mistral client
|
| 694 |
client = Mistral(api_key=session.api_key, server_url=MISTRAL_BASE_URL)
|
| 695 |
audio_format = AudioFormat(encoding="pcm_s16le", sample_rate=SAMPLE_RATE)
|
| 696 |
+
|
| 697 |
session.status_message = "connecting"
|
| 698 |
+
|
| 699 |
print(f"Session {session.session_id[:8]}: Connecting to Mistral realtime API...")
|
| 700 |
+
|
| 701 |
# Create a duplicator that can serve multiple audio streams
|
| 702 |
duplicator = await audio_stream_duplicator_from_queue(session)
|
| 703 |
print(f"Session {session.session_id[:8]}: Created audio stream duplicator for parallel processing")
|
| 704 |
+
|
| 705 |
+
# Always create Stream 1 (fast, 240ms delay)
|
| 706 |
audio_stream_1 = await duplicator.add_consumer()
|
| 707 |
+
print(f"Session {session.session_id[:8]}: Created Stream 1 (240ms delay)")
|
| 708 |
+
|
| 709 |
+
# Only create Stream 2 if partial_transcript_enabled is True
|
| 710 |
+
audio_stream_2 = None
|
| 711 |
+
if session.partial_transcript_enabled:
|
| 712 |
+
audio_stream_2 = await duplicator.add_consumer()
|
| 713 |
+
print(f"Session {session.session_id[:8]}: Created Stream 2 (2400ms delay)")
|
| 714 |
+
|
| 715 |
+
# Create tasks for transcription streams
|
| 716 |
async def process_stream_1():
|
| 717 |
async for event_1 in client.audio.realtime.transcribe_stream(
|
| 718 |
audio_stream=audio_stream_1,
|
| 719 |
model=MODEL,
|
| 720 |
audio_format=audio_format,
|
| 721 |
+
target_streaming_delay_ms=240 if session.partial_transcript_enabled else 480
|
| 722 |
):
|
| 723 |
if not session.is_running:
|
| 724 |
break
|
| 725 |
+
|
| 726 |
current_time = time.time()
|
| 727 |
+
|
| 728 |
if isinstance(event_1, RealtimeTranscriptionSessionCreated):
|
| 729 |
event_data = {
|
| 730 |
'type': 'session_created',
|
|
|
|
| 734 |
session.stream_events['stream_1'].append(event_data)
|
| 735 |
session.last_event_timestamp = current_time
|
| 736 |
print(f"Session {session.session_id[:8]}: Stream 1 connected to Mistral - {current_time:.3f}")
|
| 737 |
+
|
| 738 |
elif isinstance(event_1, TranscriptionStreamTextDelta):
|
| 739 |
delta = event_1.text
|
| 740 |
+
|
| 741 |
# Get current full text by reconstructing from events
|
| 742 |
current_full_text = ""
|
| 743 |
for e in session.stream_events['stream_1']:
|
| 744 |
if e.get('type') == 'text_delta':
|
| 745 |
current_full_text += e.get('text', '')
|
| 746 |
current_full_text += delta
|
| 747 |
+
|
| 748 |
event_data = {
|
| 749 |
'type': 'text_delta',
|
| 750 |
'timestamp': current_time,
|
|
|
|
| 754 |
session.stream_events['stream_1'].append(event_data)
|
| 755 |
session.last_event_timestamp = current_time
|
| 756 |
print(f'1 [{current_time:.3f}]', delta, end="", flush=True)
|
| 757 |
+
|
| 758 |
words = delta.split()
|
| 759 |
for _ in words:
|
| 760 |
session.word_timestamps.append(time.time())
|
| 761 |
+
|
| 762 |
session.current_wpm = calculate_wpm(session)
|
| 763 |
+
|
| 764 |
elif isinstance(event_1, TranscriptionStreamDone):
|
| 765 |
event_data = {
|
| 766 |
'type': 'stream_done',
|
|
|
|
| 770 |
session.last_event_timestamp = current_time
|
| 771 |
print(f"Session {session.session_id[:8]}: Stream 1 transcription done - {current_time:.3f}")
|
| 772 |
break
|
| 773 |
+
|
| 774 |
elif isinstance(event_1, RealtimeTranscriptionError):
|
| 775 |
event_data = {
|
| 776 |
'type': 'error',
|
|
|
|
| 781 |
session.last_event_timestamp = current_time
|
| 782 |
print(f"Session {session.session_id[:8]}: Stream 1 error - {event_1.error} - {current_time:.3f}")
|
| 783 |
break
|
| 784 |
+
|
| 785 |
elif isinstance(event_1, UnknownRealtimeEvent):
|
| 786 |
event_data = {
|
| 787 |
'type': 'unknown_event',
|
|
|
|
| 791 |
session.stream_events['stream_1'].append(event_data)
|
| 792 |
session.last_event_timestamp = current_time
|
| 793 |
continue # Ignore unknown events
|
| 794 |
+
|
| 795 |
async def process_stream_2():
|
| 796 |
+
# Only process Stream 2 if it exists and partial_transcript_enabled is True
|
| 797 |
+
if not session.partial_transcript_enabled or audio_stream_2 is None:
|
| 798 |
+
return
|
| 799 |
+
|
| 800 |
async for event_2 in client.audio.realtime.transcribe_stream(
|
| 801 |
audio_stream=audio_stream_2,
|
| 802 |
model=MODEL,
|
|
|
|
| 805 |
):
|
| 806 |
if not session.is_running:
|
| 807 |
break
|
| 808 |
+
|
| 809 |
current_time = time.time()
|
| 810 |
+
|
| 811 |
if isinstance(event_2, RealtimeTranscriptionSessionCreated):
|
| 812 |
event_data = {
|
| 813 |
'type': 'session_created',
|
|
|
|
| 817 |
session.stream_events['stream_2'].append(event_data)
|
| 818 |
session.last_event_timestamp = current_time
|
| 819 |
print(f"Session {session.session_id[:8]}: Stream 2 connected to Mistral - {current_time:.3f}")
|
| 820 |
+
|
| 821 |
elif isinstance(event_2, TranscriptionStreamTextDelta):
|
| 822 |
delta = event_2.text
|
| 823 |
+
|
| 824 |
# Get current full text by reconstructing from events
|
| 825 |
current_full_text = ""
|
| 826 |
for e in session.stream_events['stream_2']:
|
| 827 |
if e.get('type') == 'text_delta':
|
| 828 |
current_full_text += e.get('text', '')
|
| 829 |
current_full_text += delta
|
| 830 |
+
|
| 831 |
event_data = {
|
| 832 |
'type': 'text_delta',
|
| 833 |
'timestamp': current_time,
|
|
|
|
| 837 |
session.stream_events['stream_2'].append(event_data)
|
| 838 |
session.last_event_timestamp = current_time
|
| 839 |
print(f'2 [{current_time:.3f}]', delta, end="", flush=True)
|
| 840 |
+
|
| 841 |
session.current_wpm = calculate_wpm(session)
|
| 842 |
+
|
| 843 |
elif isinstance(event_2, TranscriptionStreamDone):
|
| 844 |
event_data = {
|
| 845 |
'type': 'stream_done',
|
|
|
|
| 849 |
session.last_event_timestamp = current_time
|
| 850 |
print(f"Session {session.session_id[:8]}: Stream 2 transcription done - {current_time:.3f}")
|
| 851 |
break
|
| 852 |
+
|
| 853 |
elif isinstance(event_2, RealtimeTranscriptionError):
|
| 854 |
event_data = {
|
| 855 |
'type': 'error',
|
|
|
|
| 860 |
session.last_event_timestamp = current_time
|
| 861 |
print(f"Session {session.session_id[:8]}: Stream 2 error - {event_2.error} - {current_time:.3f}")
|
| 862 |
break
|
| 863 |
+
|
| 864 |
elif isinstance(event_2, UnknownRealtimeEvent):
|
| 865 |
event_data = {
|
| 866 |
'type': 'unknown_event',
|
|
|
|
| 870 |
session.stream_events['stream_2'].append(event_data)
|
| 871 |
session.last_event_timestamp = current_time
|
| 872 |
continue # Ignore unknown events
|
| 873 |
+
|
| 874 |
+
# Run Stream 1 always
|
| 875 |
stream1_task = asyncio.create_task(process_stream_1())
|
| 876 |
+
|
| 877 |
+
# Run Stream 2 only if partial_transcript_enabled is True
|
| 878 |
+
stream2_task = None
|
| 879 |
+
if session.partial_transcript_enabled:
|
| 880 |
+
stream2_task = asyncio.create_task(process_stream_2())
|
| 881 |
+
|
| 882 |
+
# Wait for streams to complete
|
| 883 |
+
if stream2_task:
|
| 884 |
+
await asyncio.gather(stream1_task, stream2_task)
|
| 885 |
+
else:
|
| 886 |
+
await stream1_task
|
| 887 |
+
|
| 888 |
# Final transcription is already reconstructed from events
|
| 889 |
# Just add stats to the display
|
| 890 |
event_summary = session.get_event_summary()
|
| 891 |
stats_text = f"Events: {event_summary['stats']['total_events']} (S1: {event_summary['stats']['stream_1_count']}, S2: {event_summary['stats']['stream_2_count']})"
|
| 892 |
+
|
| 893 |
# Store the reconstructed transcription as tuple
|
| 894 |
session.transcription_tuple = session.reconstruct_transcription()
|
| 895 |
|
|
|
|
| 902 |
session.status_message = "error"
|
| 903 |
finally:
|
| 904 |
session.is_running = False
|
| 905 |
+
|
| 906 |
# Only remove and log if not already handled by stop_session
|
| 907 |
if not session._stopped_by_user:
|
| 908 |
with _sessions_lock:
|
|
|
|
| 911 |
if removed:
|
| 912 |
print(f"Session {session.session_id[:8]} ended. Active sessions: {active_count}")
|
| 913 |
|
|
|
|
| 914 |
def start_transcription(session):
|
| 915 |
"""Start Mistral transcription using the shared event loop."""
|
| 916 |
session.is_running = True
|