| """ |
| ui/sidebar.py |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| VoiceVerse Pro β Configuration Sidebar |
| """ |
|
|
| from __future__ import annotations |
|
|
| import os |
|
|
| import streamlit as st |
|
|
| from modules.llm_backbone import SUPPORTED_MODELS |
| from modules.tts_engine import TTSBackend |
| from ui.state import SidebarConfig, OutputMode |
|
|
|
|
| def render(current_stage: int) -> SidebarConfig: |
| with st.sidebar: |
| st.markdown("### βοΈ Configuration") |
| st.divider() |
|
|
| |
| st.markdown("**π Hugging Face API Token**") |
| hf_token = st.text_input( |
| "HF Token", |
| value=os.getenv("HUGGINGFACEHUB_API_TOKEN", ""), |
| type="password", |
| label_visibility="collapsed", |
| placeholder="hf_β¦", |
| ) |
| if hf_token: |
| os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token |
|
|
| st.divider() |
|
|
| |
| st.markdown("**π Output Mode**") |
| |
| current_mode = st.session_state.get("output_mode", "Audio Transcript") |
| mode_options = [m.value for m in OutputMode] |
| mode_index = mode_options.index(current_mode) if current_mode in mode_options else 0 |
| mode_label = st.radio( |
| "Output Mode", |
| options=mode_options, |
| index=mode_index, |
| label_visibility="collapsed", |
| help=( |
| "Also controllable via the main toggle above the pipeline. " |
| "Both controls are in sync." |
| ), |
| ) |
| |
| st.session_state["output_mode"] = mode_label |
| output_mode = OutputMode(mode_label) |
|
|
| st.divider() |
|
|
| |
| st.markdown("**π RAG Parameters**") |
| top_k = st.slider("Top-K Chunks", 1, 8, 4) |
| chunk_size = st.slider("Chunk Size", 400, 2000, 1000, step=100) |
| chunk_overlap = st.slider("Chunk Overlap", 0, 300, 100, step=50) |
|
|
| st.divider() |
|
|
| |
| st.markdown("**π§ LLM Settings**") |
| model_id = st.selectbox("Model", options=SUPPORTED_MODELS, index=0) |
| temperature = st.slider("Temperature", 0.1, 1.2, 0.65, step=0.05) |
| max_tokens = st.slider("Max New Tokens", 256, 2048, 1024, step=128) |
| target_words = st.slider("Target Script Words", 100, 800, 400, step=50) |
|
|
| st.divider() |
|
|
| |
| st.markdown("**π TTS Backend**") |
| tts_label = st.selectbox( |
| "TTS Engine", |
| options=[b.value for b in TTSBackend], |
| index=0, |
| label_visibility="collapsed", |
| ) |
| tts_backend = TTSBackend(tts_label) |
|
|
| speaker_id = 7306 |
| female_speaker_id = 1580 |
| male_speaker_id = 7306 |
|
|
| if tts_backend == TTSBackend.SPEECHT5: |
| if output_mode == OutputMode.TRANSCRIPT: |
| speaker_id = st.slider( |
| "Speaker ID (xvector)", 0, 7500, 7306, step=100, |
| help="CMU Arctic speaker index for the narrator voice.", |
| ) |
| else: |
| st.markdown("*Podcast voices (CMU Arctic xvectors):*") |
| female_speaker_id = st.slider( |
| "ποΈ HOST β Female Voice ID", 0, 7500, 1580, step=100, |
| help="Speaker embedding for the female host.", |
| ) |
| male_speaker_id = st.slider( |
| "ποΈ GUEST β Male Voice ID", 0, 7500, 7306, step=100, |
| help="Speaker embedding for the male guest.", |
| ) |
|
|
| st.divider() |
|
|
| _stage_icons = ["β", "π΅", "π‘", "π ", "π’"] |
| st.markdown(f"**Pipeline:** {_stage_icons[current_stage]} Stage {current_stage}/4") |
| st.markdown( |
| "<small style='color:#555'>VoiceVerse Pro Β· 2026 Stable</small>", |
| unsafe_allow_html=True, |
| ) |
|
|
| return SidebarConfig( |
| hf_token=hf_token, |
| output_mode=output_mode, |
| top_k=top_k, |
| chunk_size=chunk_size, |
| chunk_overlap=chunk_overlap, |
| model_id=model_id, |
| temperature=temperature, |
| max_tokens=max_tokens, |
| target_words=target_words, |
| tts_backend=tts_backend, |
| speaker_id=speaker_id, |
| female_speaker_id=female_speaker_id, |
| male_speaker_id=male_speaker_id, |
| ) |