Update src/ui/stage_audio.py
Browse files- src/ui/stage_audio.py +36 -56
src/ui/stage_audio.py
CHANGED
|
@@ -3,14 +3,8 @@ ui/stage_audio.py
|
|
| 3 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 4 |
VoiceVerse Pro β Stage β£: Synthetic Audio Synthesis & Playback
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
- On "Synthesize": instantiate TTSEngine and call synthesize()
|
| 9 |
-
- Mutate PipelineState with audio bytes + format
|
| 10 |
-
- Render st.audio player + download buttons (audio & script)
|
| 11 |
-
|
| 12 |
-
Knows about: TTSEngine, PipelineState, SidebarConfig
|
| 13 |
-
Does NOT know about: DocumentProcessor, RAGEngine, LLMBackbone
|
| 14 |
"""
|
| 15 |
|
| 16 |
from __future__ import annotations
|
|
@@ -20,89 +14,75 @@ import logging
|
|
| 20 |
import streamlit as st
|
| 21 |
|
| 22 |
from modules import TTSEngine, TTSConfig
|
| 23 |
-
from
|
|
|
|
| 24 |
|
| 25 |
logger = logging.getLogger(__name__)
|
| 26 |
|
| 27 |
|
| 28 |
def render(state: PipelineState, config: SidebarConfig) -> None:
|
| 29 |
-
"""
|
| 30 |
-
Render the audio synthesis stage UI and update `state` in-place on success.
|
| 31 |
-
|
| 32 |
-
Args:
|
| 33 |
-
state: Shared pipeline state (mutated on successful synthesis).
|
| 34 |
-
config: Sidebar configuration (TTS backend + speaker_id).
|
| 35 |
-
"""
|
| 36 |
st.divider()
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
"ποΈ Synthesize Audio",
|
| 41 |
-
use_container_width=True,
|
| 42 |
-
disabled=not state.has_script,
|
| 43 |
-
)
|
| 44 |
|
| 45 |
-
if
|
| 46 |
-
_handle_synthesize(state, config)
|
| 47 |
|
| 48 |
-
# ββ Playback & download βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 49 |
if state.has_audio:
|
| 50 |
-
_render_audio_player(state)
|
| 51 |
|
| 52 |
|
| 53 |
-
|
| 54 |
-
# Internal handlers
|
| 55 |
-
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 56 |
-
|
| 57 |
-
def _handle_synthesize(state: PipelineState, config: SidebarConfig) -> None:
|
| 58 |
-
"""Validate β instantiate TTSEngine β synthesize β mutate state."""
|
| 59 |
if not state.generated_script:
|
| 60 |
st.error("Generate a script first.")
|
| 61 |
return
|
| 62 |
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
-
with st.status("Synthesizing
|
| 66 |
try:
|
| 67 |
st.write(f"π Loading {config.tts_backend.value} engineβ¦")
|
| 68 |
-
tts = TTSEngine(
|
| 69 |
-
backend=config.tts_backend,
|
| 70 |
-
speaker_id=config.speaker_id,
|
| 71 |
-
))
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
-
fmt =
|
| 77 |
-
"audio/wav"
|
| 78 |
-
if config.tts_backend == TTSBackend.SPEECHT5
|
| 79 |
-
else "audio/mp3"
|
| 80 |
-
)
|
| 81 |
|
| 82 |
-
# Mutate state
|
| 83 |
state.audio_bytes = audio_bytes
|
| 84 |
state.audio_format = fmt
|
| 85 |
state.stage = 4
|
| 86 |
|
| 87 |
kb = len(audio_bytes) / 1024
|
| 88 |
-
status.update(
|
| 89 |
-
|
| 90 |
-
state="complete",
|
| 91 |
-
)
|
| 92 |
except Exception as exc:
|
| 93 |
status.update(label=f"β Synthesis failed: {exc}", state="error")
|
| 94 |
logger.exception("TTS error")
|
| 95 |
|
| 96 |
|
| 97 |
-
def _render_audio_player(state: PipelineState) -> None:
|
| 98 |
-
"
|
| 99 |
-
st.markdown("#####
|
| 100 |
st.audio(state.audio_bytes, format=state.audio_format)
|
| 101 |
|
|
|
|
| 102 |
st.download_button(
|
| 103 |
label="β¬οΈ Download Audio",
|
| 104 |
data=state.audio_bytes,
|
| 105 |
-
file_name=
|
| 106 |
mime=state.audio_format,
|
| 107 |
use_container_width=True,
|
| 108 |
)
|
|
@@ -114,4 +94,4 @@ def _render_audio_player(state: PipelineState) -> None:
|
|
| 114 |
file_name="voiceverse_script.txt",
|
| 115 |
mime="text/plain",
|
| 116 |
use_container_width=True,
|
| 117 |
-
)
|
|
|
|
| 3 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 4 |
VoiceVerse Pro β Stage β£: Synthetic Audio Synthesis & Playback
|
| 5 |
|
| 6 |
+
Routes to single-voice (synthesize) or dual-voice (synthesize_podcast)
|
| 7 |
+
based on the selected OutputMode.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
"""
|
| 9 |
|
| 10 |
from __future__ import annotations
|
|
|
|
| 14 |
import streamlit as st
|
| 15 |
|
| 16 |
from modules import TTSEngine, TTSConfig
|
| 17 |
+
from modules.tts_engine import TTSBackend
|
| 18 |
+
from ui.state import PipelineState, SidebarConfig, OutputMode
|
| 19 |
|
| 20 |
logger = logging.getLogger(__name__)
|
| 21 |
|
| 22 |
|
| 23 |
def render(state: PipelineState, config: SidebarConfig) -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
st.divider()
|
| 25 |
+
is_podcast = config.output_mode == OutputMode.PODCAST
|
| 26 |
+
heading = "ποΈ Audio Synthesis β Podcast (2 Voices)" if is_podcast else "π Audio Synthesis"
|
| 27 |
+
st.markdown(f"#### {heading}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
+
if st.button("ποΈ Synthesize Audio", use_container_width=True, disabled=not state.has_script):
|
| 30 |
+
_handle_synthesize(state, config, is_podcast)
|
| 31 |
|
|
|
|
| 32 |
if state.has_audio:
|
| 33 |
+
_render_audio_player(state, is_podcast)
|
| 34 |
|
| 35 |
|
| 36 |
+
def _handle_synthesize(state: PipelineState, config: SidebarConfig, is_podcast: bool) -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
if not state.generated_script:
|
| 38 |
st.error("Generate a script first.")
|
| 39 |
return
|
| 40 |
|
| 41 |
+
tts_config = TTSConfig(
|
| 42 |
+
backend=config.tts_backend,
|
| 43 |
+
speaker_id=config.speaker_id,
|
| 44 |
+
female_speaker_id=config.female_speaker_id,
|
| 45 |
+
male_speaker_id=config.male_speaker_id,
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
mode_label = "dual-voice podcast" if is_podcast else "single-voice transcript"
|
| 49 |
|
| 50 |
+
with st.status(f"Synthesizing {mode_label}β¦", expanded=True) as status:
|
| 51 |
try:
|
| 52 |
st.write(f"π Loading {config.tts_backend.value} engineβ¦")
|
| 53 |
+
tts = TTSEngine(tts_config)
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
+
if is_podcast:
|
| 56 |
+
st.write("ποΈ Synthesizing HOST (female) + GUEST (male) turnsβ¦")
|
| 57 |
+
audio_bytes = tts.synthesize_podcast(state.generated_script)
|
| 58 |
+
else:
|
| 59 |
+
st.write("π΅ Generating waveform (chunked synthesis)β¦")
|
| 60 |
+
audio_bytes = tts.synthesize(state.generated_script)
|
| 61 |
|
| 62 |
+
fmt = "audio/wav" if config.tts_backend == TTSBackend.SPEECHT5 else "audio/mp3"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
|
|
|
| 64 |
state.audio_bytes = audio_bytes
|
| 65 |
state.audio_format = fmt
|
| 66 |
state.stage = 4
|
| 67 |
|
| 68 |
kb = len(audio_bytes) / 1024
|
| 69 |
+
status.update(label=f"β
Audio ready β {kb:.1f} KB", state="complete")
|
| 70 |
+
|
|
|
|
|
|
|
| 71 |
except Exception as exc:
|
| 72 |
status.update(label=f"β Synthesis failed: {exc}", state="error")
|
| 73 |
logger.exception("TTS error")
|
| 74 |
|
| 75 |
|
| 76 |
+
def _render_audio_player(state: PipelineState, is_podcast: bool) -> None:
|
| 77 |
+
label = "π§ Podcast Playback" if is_podcast else "π§ Playback"
|
| 78 |
+
st.markdown(f"##### {label}")
|
| 79 |
st.audio(state.audio_bytes, format=state.audio_format)
|
| 80 |
|
| 81 |
+
filename = "voiceverse_podcast.wav" if is_podcast else "voiceverse_output.wav"
|
| 82 |
st.download_button(
|
| 83 |
label="β¬οΈ Download Audio",
|
| 84 |
data=state.audio_bytes,
|
| 85 |
+
file_name=filename,
|
| 86 |
mime=state.audio_format,
|
| 87 |
use_container_width=True,
|
| 88 |
)
|
|
|
|
| 94 |
file_name="voiceverse_script.txt",
|
| 95 |
mime="text/plain",
|
| 96 |
use_container_width=True,
|
| 97 |
+
)
|