Spaces:

ahanbose
/

voiceAI

Sleeping

App Files Files Community

ahanbose commited on Feb 19

Commit

49c59a3

verified ·

1 Parent(s): f0251df

Update src/ui/sidebar.py

Browse files

Files changed (1) hide show

src/ui/sidebar.py +48 -34

src/ui/sidebar.py CHANGED Viewed

@@ -2,9 +2,6 @@
 ui/sidebar.py
 ──────────────────────────────────────────────────────────────────────────────
 VoiceVerse Pro — Configuration Sidebar
-Renders all user-tunable controls and returns a typed SidebarConfig.
-Has zero knowledge of pipeline state — it only produces config.
 """
 from __future__ import annotations
@@ -13,20 +10,12 @@ import os
 import streamlit as st
 from modules.tts_engine import TTSBackend
-from ui.state import SidebarConfig
 def render(current_stage: int) -> SidebarConfig:
-    """
-    Render the sidebar and return the current SidebarConfig.
-    Args:
-        current_stage: Used to show pipeline progress summary.
-    Returns:
-        SidebarConfig populated from widget values.
-    """
     with st.sidebar:
         st.markdown("### ⚙️ Configuration")
         st.divider()
@@ -39,19 +28,37 @@ def render(current_stage: int) -> SidebarConfig:
             type="password",
             label_visibility="collapsed",
             placeholder="hf_…",
-            help="Required for Mistral inference. huggingface.co/settings/tokens",
         )
         if hf_token:
             os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token
         st.divider()
         # ── RAG ───────────────────────────────────────────────────────────────
         st.markdown("**🔍 RAG Parameters**")
-        top_k = st.slider(
-            "Top-K Chunks", 1, 8, 4,
-            help="Number of document chunks retrieved per query.",
-        )
         chunk_size = st.slider("Chunk Size", 400, 2000, 1000, step=100)
         chunk_overlap = st.slider("Chunk Overlap", 0, 300, 100, step=50)
@@ -59,14 +66,7 @@ def render(current_stage: int) -> SidebarConfig:
         # ── LLM ───────────────────────────────────────────────────────────────
         st.markdown("**🧠 LLM Settings**")
-        from modules.llm_backbone import SUPPORTED_MODELS, DEFAULT_MODEL
-        model_id = st.selectbox(
-            "Model",
-            options=SUPPORTED_MODELS,
-            index=0,
-            help="All models routed via HF Inference API (provider=auto).",
-        )
         temperature = st.slider("Temperature", 0.1, 1.2, 0.65, step=0.05)
         max_tokens = st.slider("Max New Tokens", 256, 2048, 1024, step=128)
         target_words = st.slider("Target Script Words", 100, 800, 400, step=50)
@@ -84,19 +84,30 @@ def render(current_stage: int) -> SidebarConfig:
         tts_backend = TTSBackend(tts_label)
         speaker_id = 7306
         if tts_backend == TTSBackend.SPEECHT5:
-            speaker_id = st.slider(
-                "Speaker ID (xvector)", 0, 7500, 7306, step=100,
-                help="Speaker embedding index from CMU Arctic dataset.",
-            )
         st.divider()
-        # ── Pipeline status summary ───────────────────────────────────────────
         _stage_icons = ["⭕", "🔵", "🟡", "🟠", "🟢"]
-        st.markdown(
-            f"**Pipeline:** {_stage_icons[current_stage]} Stage {current_stage}/4"
-        )
         st.markdown(
             "<small style='color:#555'>VoiceVerse Pro · 2026 Stable</small>",
             unsafe_allow_html=True,
@@ -104,6 +115,7 @@ def render(current_stage: int) -> SidebarConfig:
     return SidebarConfig(
         hf_token=hf_token,
         top_k=top_k,
         chunk_size=chunk_size,
         chunk_overlap=chunk_overlap,
@@ -113,4 +125,6 @@ def render(current_stage: int) -> SidebarConfig:
         target_words=target_words,
         tts_backend=tts_backend,
         speaker_id=speaker_id,
     )

 ui/sidebar.py
 ──────────────────────────────────────────────────────────────────────────────
 VoiceVerse Pro — Configuration Sidebar
 """
 from __future__ import annotations
 import streamlit as st
+from modules.llm_backbone import SUPPORTED_MODELS
 from modules.tts_engine import TTSBackend
+from ui.state import SidebarConfig, OutputMode
 def render(current_stage: int) -> SidebarConfig:
     with st.sidebar:
         st.markdown("### ⚙️ Configuration")
         st.divider()
             type="password",
             label_visibility="collapsed",
             placeholder="hf_…",
         )
         if hf_token:
             os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token
         st.divider()
+        # ── Output Mode ───────────────────────────────────────────────────────
+        st.markdown("**🎭 Output Mode**")
+        # Read current value from session state (set by main-area toggle)
+        current_mode = st.session_state.get("output_mode", "Audio Transcript")
+        mode_options = [m.value for m in OutputMode]
+        mode_index = mode_options.index(current_mode) if current_mode in mode_options else 0
+        mode_label = st.radio(
+            "Output Mode",
+            options=mode_options,
+            index=mode_index,
+            label_visibility="collapsed",
+            help=(
+                "Also controllable via the main toggle above the pipeline. "
+                "Both controls are in sync."
+            ),
+        )
+        # Write back to session state so main-area toggle reflects sidebar change
+        st.session_state["output_mode"] = mode_label
+        output_mode = OutputMode(mode_label)
+        st.divider()
         # ── RAG ───────────────────────────────────────────────────────────────
         st.markdown("**🔍 RAG Parameters**")
+        top_k = st.slider("Top-K Chunks", 1, 8, 4)
         chunk_size = st.slider("Chunk Size", 400, 2000, 1000, step=100)
         chunk_overlap = st.slider("Chunk Overlap", 0, 300, 100, step=50)
         # ── LLM ───────────────────────────────────────────────────────────────
         st.markdown("**🧠 LLM Settings**")
+        model_id = st.selectbox("Model", options=SUPPORTED_MODELS, index=0)
         temperature = st.slider("Temperature", 0.1, 1.2, 0.65, step=0.05)
         max_tokens = st.slider("Max New Tokens", 256, 2048, 1024, step=128)
         target_words = st.slider("Target Script Words", 100, 800, 400, step=50)
         tts_backend = TTSBackend(tts_label)
         speaker_id = 7306
+        female_speaker_id = 1580
+        male_speaker_id = 7306
         if tts_backend == TTSBackend.SPEECHT5:
+            if output_mode == OutputMode.TRANSCRIPT:
+                speaker_id = st.slider(
+                    "Speaker ID (xvector)", 0, 7500, 7306, step=100,
+                    help="CMU Arctic speaker index for the narrator voice.",
+                )
+            else:
+                st.markdown("*Podcast voices (CMU Arctic xvectors):*")
+                female_speaker_id = st.slider(
+                    "🎙️ HOST — Female Voice ID", 0, 7500, 1580, step=100,
+                    help="Speaker embedding for the female host.",
+                )
+                male_speaker_id = st.slider(
+                    "🎙️ GUEST — Male Voice ID", 0, 7500, 7306, step=100,
+                    help="Speaker embedding for the male guest.",
+                )
         st.divider()
         _stage_icons = ["⭕", "🔵", "🟡", "🟠", "🟢"]
+        st.markdown(f"**Pipeline:** {_stage_icons[current_stage]} Stage {current_stage}/4")
         st.markdown(
             "<small style='color:#555'>VoiceVerse Pro · 2026 Stable</small>",
             unsafe_allow_html=True,
     return SidebarConfig(
         hf_token=hf_token,
+        output_mode=output_mode,
         top_k=top_k,
         chunk_size=chunk_size,
         chunk_overlap=chunk_overlap,
         target_words=target_words,
         tts_backend=tts_backend,
         speaker_id=speaker_id,
+        female_speaker_id=female_speaker_id,
+        male_speaker_id=male_speaker_id,
     )