Update src/ui/sidebar.py
Browse files- src/ui/sidebar.py +48 -34
src/ui/sidebar.py
CHANGED
|
@@ -2,9 +2,6 @@
|
|
| 2 |
ui/sidebar.py
|
| 3 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 4 |
VoiceVerse Pro β Configuration Sidebar
|
| 5 |
-
|
| 6 |
-
Renders all user-tunable controls and returns a typed SidebarConfig.
|
| 7 |
-
Has zero knowledge of pipeline state β it only produces config.
|
| 8 |
"""
|
| 9 |
|
| 10 |
from __future__ import annotations
|
|
@@ -13,20 +10,12 @@ import os
|
|
| 13 |
|
| 14 |
import streamlit as st
|
| 15 |
|
|
|
|
| 16 |
from modules.tts_engine import TTSBackend
|
| 17 |
-
from ui.state import SidebarConfig
|
| 18 |
|
| 19 |
|
| 20 |
def render(current_stage: int) -> SidebarConfig:
|
| 21 |
-
"""
|
| 22 |
-
Render the sidebar and return the current SidebarConfig.
|
| 23 |
-
|
| 24 |
-
Args:
|
| 25 |
-
current_stage: Used to show pipeline progress summary.
|
| 26 |
-
|
| 27 |
-
Returns:
|
| 28 |
-
SidebarConfig populated from widget values.
|
| 29 |
-
"""
|
| 30 |
with st.sidebar:
|
| 31 |
st.markdown("### βοΈ Configuration")
|
| 32 |
st.divider()
|
|
@@ -39,19 +28,37 @@ def render(current_stage: int) -> SidebarConfig:
|
|
| 39 |
type="password",
|
| 40 |
label_visibility="collapsed",
|
| 41 |
placeholder="hf_β¦",
|
| 42 |
-
help="Required for Mistral inference. huggingface.co/settings/tokens",
|
| 43 |
)
|
| 44 |
if hf_token:
|
| 45 |
os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token
|
| 46 |
|
| 47 |
st.divider()
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
# ββ RAG βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 50 |
st.markdown("**π RAG Parameters**")
|
| 51 |
-
top_k = st.slider(
|
| 52 |
-
"Top-K Chunks", 1, 8, 4,
|
| 53 |
-
help="Number of document chunks retrieved per query.",
|
| 54 |
-
)
|
| 55 |
chunk_size = st.slider("Chunk Size", 400, 2000, 1000, step=100)
|
| 56 |
chunk_overlap = st.slider("Chunk Overlap", 0, 300, 100, step=50)
|
| 57 |
|
|
@@ -59,14 +66,7 @@ def render(current_stage: int) -> SidebarConfig:
|
|
| 59 |
|
| 60 |
# ββ LLM βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 61 |
st.markdown("**π§ LLM Settings**")
|
| 62 |
-
|
| 63 |
-
from modules.llm_backbone import SUPPORTED_MODELS, DEFAULT_MODEL
|
| 64 |
-
model_id = st.selectbox(
|
| 65 |
-
"Model",
|
| 66 |
-
options=SUPPORTED_MODELS,
|
| 67 |
-
index=0,
|
| 68 |
-
help="All models routed via HF Inference API (provider=auto).",
|
| 69 |
-
)
|
| 70 |
temperature = st.slider("Temperature", 0.1, 1.2, 0.65, step=0.05)
|
| 71 |
max_tokens = st.slider("Max New Tokens", 256, 2048, 1024, step=128)
|
| 72 |
target_words = st.slider("Target Script Words", 100, 800, 400, step=50)
|
|
@@ -84,19 +84,30 @@ def render(current_stage: int) -> SidebarConfig:
|
|
| 84 |
tts_backend = TTSBackend(tts_label)
|
| 85 |
|
| 86 |
speaker_id = 7306
|
|
|
|
|
|
|
|
|
|
| 87 |
if tts_backend == TTSBackend.SPEECHT5:
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
st.divider()
|
| 94 |
|
| 95 |
-
# ββ Pipeline status summary βββββββββββββββββββββββββββββββββββββββββββ
|
| 96 |
_stage_icons = ["β", "π΅", "π‘", "π ", "π’"]
|
| 97 |
-
st.markdown(
|
| 98 |
-
f"**Pipeline:** {_stage_icons[current_stage]} Stage {current_stage}/4"
|
| 99 |
-
)
|
| 100 |
st.markdown(
|
| 101 |
"<small style='color:#555'>VoiceVerse Pro Β· 2026 Stable</small>",
|
| 102 |
unsafe_allow_html=True,
|
|
@@ -104,6 +115,7 @@ def render(current_stage: int) -> SidebarConfig:
|
|
| 104 |
|
| 105 |
return SidebarConfig(
|
| 106 |
hf_token=hf_token,
|
|
|
|
| 107 |
top_k=top_k,
|
| 108 |
chunk_size=chunk_size,
|
| 109 |
chunk_overlap=chunk_overlap,
|
|
@@ -113,4 +125,6 @@ def render(current_stage: int) -> SidebarConfig:
|
|
| 113 |
target_words=target_words,
|
| 114 |
tts_backend=tts_backend,
|
| 115 |
speaker_id=speaker_id,
|
|
|
|
|
|
|
| 116 |
)
|
|
|
|
| 2 |
ui/sidebar.py
|
| 3 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 4 |
VoiceVerse Pro β Configuration Sidebar
|
|
|
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
from __future__ import annotations
|
|
|
|
| 10 |
|
| 11 |
import streamlit as st
|
| 12 |
|
| 13 |
+
from modules.llm_backbone import SUPPORTED_MODELS
|
| 14 |
from modules.tts_engine import TTSBackend
|
| 15 |
+
from ui.state import SidebarConfig, OutputMode
|
| 16 |
|
| 17 |
|
| 18 |
def render(current_stage: int) -> SidebarConfig:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
with st.sidebar:
|
| 20 |
st.markdown("### βοΈ Configuration")
|
| 21 |
st.divider()
|
|
|
|
| 28 |
type="password",
|
| 29 |
label_visibility="collapsed",
|
| 30 |
placeholder="hf_β¦",
|
|
|
|
| 31 |
)
|
| 32 |
if hf_token:
|
| 33 |
os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token
|
| 34 |
|
| 35 |
st.divider()
|
| 36 |
|
| 37 |
+
# ββ Output Mode βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 38 |
+
st.markdown("**π Output Mode**")
|
| 39 |
+
# Read current value from session state (set by main-area toggle)
|
| 40 |
+
current_mode = st.session_state.get("output_mode", "Audio Transcript")
|
| 41 |
+
mode_options = [m.value for m in OutputMode]
|
| 42 |
+
mode_index = mode_options.index(current_mode) if current_mode in mode_options else 0
|
| 43 |
+
mode_label = st.radio(
|
| 44 |
+
"Output Mode",
|
| 45 |
+
options=mode_options,
|
| 46 |
+
index=mode_index,
|
| 47 |
+
label_visibility="collapsed",
|
| 48 |
+
help=(
|
| 49 |
+
"Also controllable via the main toggle above the pipeline. "
|
| 50 |
+
"Both controls are in sync."
|
| 51 |
+
),
|
| 52 |
+
)
|
| 53 |
+
# Write back to session state so main-area toggle reflects sidebar change
|
| 54 |
+
st.session_state["output_mode"] = mode_label
|
| 55 |
+
output_mode = OutputMode(mode_label)
|
| 56 |
+
|
| 57 |
+
st.divider()
|
| 58 |
+
|
| 59 |
# ββ RAG βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 60 |
st.markdown("**π RAG Parameters**")
|
| 61 |
+
top_k = st.slider("Top-K Chunks", 1, 8, 4)
|
|
|
|
|
|
|
|
|
|
| 62 |
chunk_size = st.slider("Chunk Size", 400, 2000, 1000, step=100)
|
| 63 |
chunk_overlap = st.slider("Chunk Overlap", 0, 300, 100, step=50)
|
| 64 |
|
|
|
|
| 66 |
|
| 67 |
# ββ LLM βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 68 |
st.markdown("**π§ LLM Settings**")
|
| 69 |
+
model_id = st.selectbox("Model", options=SUPPORTED_MODELS, index=0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
temperature = st.slider("Temperature", 0.1, 1.2, 0.65, step=0.05)
|
| 71 |
max_tokens = st.slider("Max New Tokens", 256, 2048, 1024, step=128)
|
| 72 |
target_words = st.slider("Target Script Words", 100, 800, 400, step=50)
|
|
|
|
| 84 |
tts_backend = TTSBackend(tts_label)
|
| 85 |
|
| 86 |
speaker_id = 7306
|
| 87 |
+
female_speaker_id = 1580
|
| 88 |
+
male_speaker_id = 7306
|
| 89 |
+
|
| 90 |
if tts_backend == TTSBackend.SPEECHT5:
|
| 91 |
+
if output_mode == OutputMode.TRANSCRIPT:
|
| 92 |
+
speaker_id = st.slider(
|
| 93 |
+
"Speaker ID (xvector)", 0, 7500, 7306, step=100,
|
| 94 |
+
help="CMU Arctic speaker index for the narrator voice.",
|
| 95 |
+
)
|
| 96 |
+
else:
|
| 97 |
+
st.markdown("*Podcast voices (CMU Arctic xvectors):*")
|
| 98 |
+
female_speaker_id = st.slider(
|
| 99 |
+
"ποΈ HOST β Female Voice ID", 0, 7500, 1580, step=100,
|
| 100 |
+
help="Speaker embedding for the female host.",
|
| 101 |
+
)
|
| 102 |
+
male_speaker_id = st.slider(
|
| 103 |
+
"ποΈ GUEST β Male Voice ID", 0, 7500, 7306, step=100,
|
| 104 |
+
help="Speaker embedding for the male guest.",
|
| 105 |
+
)
|
| 106 |
|
| 107 |
st.divider()
|
| 108 |
|
|
|
|
| 109 |
_stage_icons = ["β", "π΅", "π‘", "π ", "π’"]
|
| 110 |
+
st.markdown(f"**Pipeline:** {_stage_icons[current_stage]} Stage {current_stage}/4")
|
|
|
|
|
|
|
| 111 |
st.markdown(
|
| 112 |
"<small style='color:#555'>VoiceVerse Pro Β· 2026 Stable</small>",
|
| 113 |
unsafe_allow_html=True,
|
|
|
|
| 115 |
|
| 116 |
return SidebarConfig(
|
| 117 |
hf_token=hf_token,
|
| 118 |
+
output_mode=output_mode,
|
| 119 |
top_k=top_k,
|
| 120 |
chunk_size=chunk_size,
|
| 121 |
chunk_overlap=chunk_overlap,
|
|
|
|
| 125 |
target_words=target_words,
|
| 126 |
tts_backend=tts_backend,
|
| 127 |
speaker_id=speaker_id,
|
| 128 |
+
female_speaker_id=female_speaker_id,
|
| 129 |
+
male_speaker_id=male_speaker_id,
|
| 130 |
)
|