ahanbose commited on
Commit
a8979df
Β·
verified Β·
1 Parent(s): 49c59a3

Update src/ui/stage_audio.py

Browse files
Files changed (1) hide show
  1. src/ui/stage_audio.py +36 -56
src/ui/stage_audio.py CHANGED
@@ -3,14 +3,8 @@ ui/stage_audio.py
3
  ──────────────────────────────────────────────────────────────────────────────
4
  VoiceVerse Pro β€” Stage β‘£: Synthetic Audio Synthesis & Playback
5
 
6
- Responsibilities:
7
- - Render synthesis trigger button
8
- - On "Synthesize": instantiate TTSEngine and call synthesize()
9
- - Mutate PipelineState with audio bytes + format
10
- - Render st.audio player + download buttons (audio & script)
11
-
12
- Knows about: TTSEngine, PipelineState, SidebarConfig
13
- Does NOT know about: DocumentProcessor, RAGEngine, LLMBackbone
14
  """
15
 
16
  from __future__ import annotations
@@ -20,89 +14,75 @@ import logging
20
  import streamlit as st
21
 
22
  from modules import TTSEngine, TTSConfig
23
- from ui.state import PipelineState, SidebarConfig
 
24
 
25
  logger = logging.getLogger(__name__)
26
 
27
 
28
  def render(state: PipelineState, config: SidebarConfig) -> None:
29
- """
30
- Render the audio synthesis stage UI and update `state` in-place on success.
31
-
32
- Args:
33
- state: Shared pipeline state (mutated on successful synthesis).
34
- config: Sidebar configuration (TTS backend + speaker_id).
35
- """
36
  st.divider()
37
- st.markdown("#### πŸ”Š Audio Synthesis")
38
-
39
- synthesize_btn = st.button(
40
- "πŸŽ™οΈ Synthesize Audio",
41
- use_container_width=True,
42
- disabled=not state.has_script,
43
- )
44
 
45
- if synthesize_btn:
46
- _handle_synthesize(state, config)
47
 
48
- # ── Playback & download ───────────────────────────────────────────────────
49
  if state.has_audio:
50
- _render_audio_player(state)
51
 
52
 
53
- # ──────────────────────────────────────────────────────────────────────────────
54
- # Internal handlers
55
- # ──────────────────────────────────────────────────────────────────────────────
56
-
57
- def _handle_synthesize(state: PipelineState, config: SidebarConfig) -> None:
58
- """Validate β†’ instantiate TTSEngine β†’ synthesize β†’ mutate state."""
59
  if not state.generated_script:
60
  st.error("Generate a script first.")
61
  return
62
 
63
- from modules.tts_engine import TTSBackend
 
 
 
 
 
 
 
64
 
65
- with st.status("Synthesizing audio…", expanded=True) as status:
66
  try:
67
  st.write(f"πŸ”‰ Loading {config.tts_backend.value} engine…")
68
- tts = TTSEngine(TTSConfig(
69
- backend=config.tts_backend,
70
- speaker_id=config.speaker_id,
71
- ))
72
 
73
- st.write("🎡 Generating waveform (chunked synthesis)…")
74
- audio_bytes = tts.synthesize(state.generated_script)
 
 
 
 
75
 
76
- fmt = (
77
- "audio/wav"
78
- if config.tts_backend == TTSBackend.SPEECHT5
79
- else "audio/mp3"
80
- )
81
 
82
- # Mutate state
83
  state.audio_bytes = audio_bytes
84
  state.audio_format = fmt
85
  state.stage = 4
86
 
87
  kb = len(audio_bytes) / 1024
88
- status.update(
89
- label=f"βœ… Audio ready β€” {kb:.1f} KB",
90
- state="complete",
91
- )
92
  except Exception as exc:
93
  status.update(label=f"❌ Synthesis failed: {exc}", state="error")
94
  logger.exception("TTS error")
95
 
96
 
97
- def _render_audio_player(state: PipelineState) -> None:
98
- """Render the audio player and download buttons."""
99
- st.markdown("##### 🎧 Playback")
100
  st.audio(state.audio_bytes, format=state.audio_format)
101
 
 
102
  st.download_button(
103
  label="⬇️ Download Audio",
104
  data=state.audio_bytes,
105
- file_name="voiceverse_output.wav",
106
  mime=state.audio_format,
107
  use_container_width=True,
108
  )
@@ -114,4 +94,4 @@ def _render_audio_player(state: PipelineState) -> None:
114
  file_name="voiceverse_script.txt",
115
  mime="text/plain",
116
  use_container_width=True,
117
- )
 
3
  ──────────────────────────────────────────────────────────────────────────────
4
  VoiceVerse Pro β€” Stage β‘£: Synthetic Audio Synthesis & Playback
5
 
6
+ Routes to single-voice (synthesize) or dual-voice (synthesize_podcast)
7
+ based on the selected OutputMode.
 
 
 
 
 
 
8
  """
9
 
10
  from __future__ import annotations
 
14
  import streamlit as st
15
 
16
  from modules import TTSEngine, TTSConfig
17
+ from modules.tts_engine import TTSBackend
18
+ from ui.state import PipelineState, SidebarConfig, OutputMode
19
 
20
  logger = logging.getLogger(__name__)
21
 
22
 
23
  def render(state: PipelineState, config: SidebarConfig) -> None:
 
 
 
 
 
 
 
24
  st.divider()
25
+ is_podcast = config.output_mode == OutputMode.PODCAST
26
+ heading = "πŸŽ™οΈ Audio Synthesis β€” Podcast (2 Voices)" if is_podcast else "πŸ”Š Audio Synthesis"
27
+ st.markdown(f"#### {heading}")
 
 
 
 
28
 
29
+ if st.button("πŸŽ™οΈ Synthesize Audio", use_container_width=True, disabled=not state.has_script):
30
+ _handle_synthesize(state, config, is_podcast)
31
 
 
32
  if state.has_audio:
33
+ _render_audio_player(state, is_podcast)
34
 
35
 
36
+ def _handle_synthesize(state: PipelineState, config: SidebarConfig, is_podcast: bool) -> None:
 
 
 
 
 
37
  if not state.generated_script:
38
  st.error("Generate a script first.")
39
  return
40
 
41
+ tts_config = TTSConfig(
42
+ backend=config.tts_backend,
43
+ speaker_id=config.speaker_id,
44
+ female_speaker_id=config.female_speaker_id,
45
+ male_speaker_id=config.male_speaker_id,
46
+ )
47
+
48
+ mode_label = "dual-voice podcast" if is_podcast else "single-voice transcript"
49
 
50
+ with st.status(f"Synthesizing {mode_label}…", expanded=True) as status:
51
  try:
52
  st.write(f"πŸ”‰ Loading {config.tts_backend.value} engine…")
53
+ tts = TTSEngine(tts_config)
 
 
 
54
 
55
+ if is_podcast:
56
+ st.write("πŸŽ™οΈ Synthesizing HOST (female) + GUEST (male) turns…")
57
+ audio_bytes = tts.synthesize_podcast(state.generated_script)
58
+ else:
59
+ st.write("🎡 Generating waveform (chunked synthesis)…")
60
+ audio_bytes = tts.synthesize(state.generated_script)
61
 
62
+ fmt = "audio/wav" if config.tts_backend == TTSBackend.SPEECHT5 else "audio/mp3"
 
 
 
 
63
 
 
64
  state.audio_bytes = audio_bytes
65
  state.audio_format = fmt
66
  state.stage = 4
67
 
68
  kb = len(audio_bytes) / 1024
69
+ status.update(label=f"βœ… Audio ready β€” {kb:.1f} KB", state="complete")
70
+
 
 
71
  except Exception as exc:
72
  status.update(label=f"❌ Synthesis failed: {exc}", state="error")
73
  logger.exception("TTS error")
74
 
75
 
76
+ def _render_audio_player(state: PipelineState, is_podcast: bool) -> None:
77
+ label = "🎧 Podcast Playback" if is_podcast else "🎧 Playback"
78
+ st.markdown(f"##### {label}")
79
  st.audio(state.audio_bytes, format=state.audio_format)
80
 
81
+ filename = "voiceverse_podcast.wav" if is_podcast else "voiceverse_output.wav"
82
  st.download_button(
83
  label="⬇️ Download Audio",
84
  data=state.audio_bytes,
85
+ file_name=filename,
86
  mime=state.audio_format,
87
  use_container_width=True,
88
  )
 
94
  file_name="voiceverse_script.txt",
95
  mime="text/plain",
96
  use_container_width=True,
97
+ )