File size: 5,559 Bytes
4d03adc 49c59a3 4d03adc 49c59a3 4d03adc 49c59a3 4d03adc 49c59a3 4d03adc 49c59a3 4d03adc 49c59a3 4d03adc 49c59a3 4d03adc 49c59a3 4d03adc 49c59a3 4d03adc 49c59a3 4d03adc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 | """
ui/sidebar.py
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
VoiceVerse Pro β Configuration Sidebar
"""
from __future__ import annotations
import os
import streamlit as st
from modules.llm_backbone import SUPPORTED_MODELS
from modules.tts_engine import TTSBackend
from ui.state import SidebarConfig, OutputMode
def render(current_stage: int) -> SidebarConfig:
with st.sidebar:
st.markdown("### βοΈ Configuration")
st.divider()
# ββ Auth ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
st.markdown("**π Hugging Face API Token**")
hf_token = st.text_input(
"HF Token",
value=os.getenv("HUGGINGFACEHUB_API_TOKEN", ""),
type="password",
label_visibility="collapsed",
placeholder="hf_β¦",
)
if hf_token:
os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token
st.divider()
# ββ Output Mode βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
st.markdown("**π Output Mode**")
# Read current value from session state (set by main-area toggle)
current_mode = st.session_state.get("output_mode", "Audio Transcript")
mode_options = [m.value for m in OutputMode]
mode_index = mode_options.index(current_mode) if current_mode in mode_options else 0
mode_label = st.radio(
"Output Mode",
options=mode_options,
index=mode_index,
label_visibility="collapsed",
help=(
"Also controllable via the main toggle above the pipeline. "
"Both controls are in sync."
),
)
# Write back to session state so main-area toggle reflects sidebar change
st.session_state["output_mode"] = mode_label
output_mode = OutputMode(mode_label)
st.divider()
# ββ RAG βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
st.markdown("**π RAG Parameters**")
top_k = st.slider("Top-K Chunks", 1, 8, 4)
chunk_size = st.slider("Chunk Size", 400, 2000, 1000, step=100)
chunk_overlap = st.slider("Chunk Overlap", 0, 300, 100, step=50)
st.divider()
# ββ LLM βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
st.markdown("**π§ LLM Settings**")
model_id = st.selectbox("Model", options=SUPPORTED_MODELS, index=0)
temperature = st.slider("Temperature", 0.1, 1.2, 0.65, step=0.05)
max_tokens = st.slider("Max New Tokens", 256, 2048, 1024, step=128)
target_words = st.slider("Target Script Words", 100, 800, 400, step=50)
st.divider()
# ββ TTS βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
st.markdown("**π TTS Backend**")
tts_label = st.selectbox(
"TTS Engine",
options=[b.value for b in TTSBackend],
index=0,
label_visibility="collapsed",
)
tts_backend = TTSBackend(tts_label)
speaker_id = 7306
female_speaker_id = 1580
male_speaker_id = 7306
if tts_backend == TTSBackend.SPEECHT5:
if output_mode == OutputMode.TRANSCRIPT:
speaker_id = st.slider(
"Speaker ID (xvector)", 0, 7500, 7306, step=100,
help="CMU Arctic speaker index for the narrator voice.",
)
else:
st.markdown("*Podcast voices (CMU Arctic xvectors):*")
female_speaker_id = st.slider(
"ποΈ HOST β Female Voice ID", 0, 7500, 1580, step=100,
help="Speaker embedding for the female host.",
)
male_speaker_id = st.slider(
"ποΈ GUEST β Male Voice ID", 0, 7500, 7306, step=100,
help="Speaker embedding for the male guest.",
)
st.divider()
_stage_icons = ["β", "π΅", "π‘", "π ", "π’"]
st.markdown(f"**Pipeline:** {_stage_icons[current_stage]} Stage {current_stage}/4")
st.markdown(
"<small style='color:#555'>VoiceVerse Pro Β· 2026 Stable</small>",
unsafe_allow_html=True,
)
return SidebarConfig(
hf_token=hf_token,
output_mode=output_mode,
top_k=top_k,
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
model_id=model_id,
temperature=temperature,
max_tokens=max_tokens,
target_words=target_words,
tts_backend=tts_backend,
speaker_id=speaker_id,
female_speaker_id=female_speaker_id,
male_speaker_id=male_speaker_id,
) |