Spaces:

ahanbose
/

voiceAI

Sleeping

App Files Files Community

ahanbose commited on Feb 19

Commit

a8979df

verified ·

1 Parent(s): 49c59a3

Update src/ui/stage_audio.py

Browse files

Files changed (1) hide show

src/ui/stage_audio.py +36 -56

src/ui/stage_audio.py CHANGED Viewed

@@ -3,14 +3,8 @@ ui/stage_audio.py
 ──────────────────────────────────────────────────────────────────────────────
 VoiceVerse Pro — Stage ④: Synthetic Audio Synthesis & Playback
-Responsibilities:
-  - Render synthesis trigger button
-  - On "Synthesize": instantiate TTSEngine and call synthesize()
-  - Mutate PipelineState with audio bytes + format
-  - Render st.audio player + download buttons (audio & script)
-Knows about: TTSEngine, PipelineState, SidebarConfig
-Does NOT know about: DocumentProcessor, RAGEngine, LLMBackbone
 """
 from __future__ import annotations
@@ -20,89 +14,75 @@ import logging
 import streamlit as st
 from modules import TTSEngine, TTSConfig
-from ui.state import PipelineState, SidebarConfig
 logger = logging.getLogger(__name__)
 def render(state: PipelineState, config: SidebarConfig) -> None:
-    """
-    Render the audio synthesis stage UI and update `state` in-place on success.
-    Args:
-        state:  Shared pipeline state (mutated on successful synthesis).
-        config: Sidebar configuration (TTS backend + speaker_id).
-    """
     st.divider()
-    st.markdown("#### 🔊 Audio Synthesis")
-    synthesize_btn = st.button(
-        "🎙️ Synthesize Audio",
-        use_container_width=True,
-        disabled=not state.has_script,
-    )
-    if synthesize_btn:
-        _handle_synthesize(state, config)
-    # ── Playback & download ───────────────────────────────────────────────────
     if state.has_audio:
-        _render_audio_player(state)
-# ──────────────────────────────────────────────────────────────────────────────
-# Internal handlers
-# ──────────────────────────────────────────────────────────────────────────────
-def _handle_synthesize(state: PipelineState, config: SidebarConfig) -> None:
-    """Validate → instantiate TTSEngine → synthesize → mutate state."""
     if not state.generated_script:
         st.error("Generate a script first.")
         return
-    from modules.tts_engine import TTSBackend
-    with st.status("Synthesizing audio…", expanded=True) as status:
         try:
             st.write(f"🔉 Loading {config.tts_backend.value} engine…")
-            tts = TTSEngine(TTSConfig(
-                backend=config.tts_backend,
-                speaker_id=config.speaker_id,
-            ))
-            st.write("🎵 Generating waveform (chunked synthesis)…")
-            audio_bytes = tts.synthesize(state.generated_script)
-            fmt = (
-                "audio/wav"
-                if config.tts_backend == TTSBackend.SPEECHT5
-                else "audio/mp3"
-            )
-            # Mutate state
             state.audio_bytes = audio_bytes
             state.audio_format = fmt
             state.stage = 4
             kb = len(audio_bytes) / 1024
-            status.update(
-                label=f"✅ Audio ready — {kb:.1f} KB",
-                state="complete",
-            )
         except Exception as exc:
             status.update(label=f"❌ Synthesis failed: {exc}", state="error")
             logger.exception("TTS error")
-def _render_audio_player(state: PipelineState) -> None:
-    """Render the audio player and download buttons."""
-    st.markdown("##### 🎧 Playback")
     st.audio(state.audio_bytes, format=state.audio_format)
     st.download_button(
         label="⬇️ Download Audio",
         data=state.audio_bytes,
-        file_name="voiceverse_output.wav",
         mime=state.audio_format,
         use_container_width=True,
     )
@@ -114,4 +94,4 @@ def _render_audio_player(state: PipelineState) -> None:
             file_name="voiceverse_script.txt",
             mime="text/plain",
             use_container_width=True,
-        )

 ──────────────────────────────────────────────────────────────────────────────
 VoiceVerse Pro — Stage ④: Synthetic Audio Synthesis & Playback
+Routes to single-voice (synthesize) or dual-voice (synthesize_podcast)
+based on the selected OutputMode.
 """
 from __future__ import annotations
 import streamlit as st
 from modules import TTSEngine, TTSConfig
+from modules.tts_engine import TTSBackend
+from ui.state import PipelineState, SidebarConfig, OutputMode
 logger = logging.getLogger(__name__)
 def render(state: PipelineState, config: SidebarConfig) -> None:
     st.divider()
+    is_podcast = config.output_mode == OutputMode.PODCAST
+    heading = "🎙️ Audio Synthesis — Podcast (2 Voices)" if is_podcast else "🔊 Audio Synthesis"
+    st.markdown(f"#### {heading}")
+    if st.button("🎙️ Synthesize Audio", use_container_width=True, disabled=not state.has_script):
+        _handle_synthesize(state, config, is_podcast)
     if state.has_audio:
+        _render_audio_player(state, is_podcast)
+def _handle_synthesize(state: PipelineState, config: SidebarConfig, is_podcast: bool) -> None:
     if not state.generated_script:
         st.error("Generate a script first.")
         return
+    tts_config = TTSConfig(
+        backend=config.tts_backend,
+        speaker_id=config.speaker_id,
+        female_speaker_id=config.female_speaker_id,
+        male_speaker_id=config.male_speaker_id,
+    )
+    mode_label = "dual-voice podcast" if is_podcast else "single-voice transcript"
+    with st.status(f"Synthesizing {mode_label}…", expanded=True) as status:
         try:
             st.write(f"🔉 Loading {config.tts_backend.value} engine…")
+            tts = TTSEngine(tts_config)
+            if is_podcast:
+                st.write("🎙️ Synthesizing HOST (female) + GUEST (male) turns…")
+                audio_bytes = tts.synthesize_podcast(state.generated_script)
+            else:
+                st.write("🎵 Generating waveform (chunked synthesis)…")
+                audio_bytes = tts.synthesize(state.generated_script)
+            fmt = "audio/wav" if config.tts_backend == TTSBackend.SPEECHT5 else "audio/mp3"
             state.audio_bytes = audio_bytes
             state.audio_format = fmt
             state.stage = 4
             kb = len(audio_bytes) / 1024
+            status.update(label=f"✅ Audio ready — {kb:.1f} KB", state="complete")
         except Exception as exc:
             status.update(label=f"❌ Synthesis failed: {exc}", state="error")
             logger.exception("TTS error")
+def _render_audio_player(state: PipelineState, is_podcast: bool) -> None:
+    label = "🎧 Podcast Playback" if is_podcast else "🎧 Playback"
+    st.markdown(f"##### {label}")
     st.audio(state.audio_bytes, format=state.audio_format)
+    filename = "voiceverse_podcast.wav" if is_podcast else "voiceverse_output.wav"
     st.download_button(
         label="⬇️ Download Audio",
         data=state.audio_bytes,
+        file_name=filename,
         mime=state.audio_format,
         use_container_width=True,
     )
             file_name="voiceverse_script.txt",
             mime="text/plain",
             use_container_width=True,
+        )