""" ui/sidebar.py ────────────────────────────────────────────────────────────────────────────── VoiceVerse Pro — Configuration Sidebar """ from __future__ import annotations import os import streamlit as st from modules.llm_backbone import SUPPORTED_MODELS from modules.tts_engine import TTSBackend from ui.state import SidebarConfig, OutputMode def render(current_stage: int) -> SidebarConfig: with st.sidebar: st.markdown("### ⚙️ Configuration") st.divider() # ── Auth ────────────────────────────────────────────────────────────── st.markdown("**🔑 Hugging Face API Token**") hf_token = st.text_input( "HF Token", value=os.getenv("HUGGINGFACEHUB_API_TOKEN", ""), type="password", label_visibility="collapsed", placeholder="hf_…", ) if hf_token: os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token st.divider() # ── Output Mode ─────────────────────────────────────────────────────── st.markdown("**🎭 Output Mode**") # Read current value from session state (set by main-area toggle) current_mode = st.session_state.get("output_mode", "Audio Transcript") mode_options = [m.value for m in OutputMode] mode_index = mode_options.index(current_mode) if current_mode in mode_options else 0 mode_label = st.radio( "Output Mode", options=mode_options, index=mode_index, label_visibility="collapsed", help=( "Also controllable via the main toggle above the pipeline. " "Both controls are in sync." ), ) # Write back to session state so main-area toggle reflects sidebar change st.session_state["output_mode"] = mode_label output_mode = OutputMode(mode_label) st.divider() # ── RAG ─────────────────────────────────────────────────────────────── st.markdown("**🔍 RAG Parameters**") top_k = st.slider("Top-K Chunks", 1, 8, 4) chunk_size = st.slider("Chunk Size", 400, 2000, 1000, step=100) chunk_overlap = st.slider("Chunk Overlap", 0, 300, 100, step=50) st.divider() # ── LLM ─────────────────────────────────────────────────────────────── st.markdown("**🧠 LLM Settings**") model_id = st.selectbox("Model", options=SUPPORTED_MODELS, index=0) temperature = st.slider("Temperature", 0.1, 1.2, 0.65, step=0.05) max_tokens = st.slider("Max New Tokens", 256, 2048, 1024, step=128) target_words = st.slider("Target Script Words", 100, 800, 400, step=50) st.divider() # ── TTS ─────────────────────────────────────────────────────────────── st.markdown("**🔊 TTS Backend**") tts_label = st.selectbox( "TTS Engine", options=[b.value for b in TTSBackend], index=0, label_visibility="collapsed", ) tts_backend = TTSBackend(tts_label) speaker_id = 7306 female_speaker_id = 1580 male_speaker_id = 7306 if tts_backend == TTSBackend.SPEECHT5: if output_mode == OutputMode.TRANSCRIPT: speaker_id = st.slider( "Speaker ID (xvector)", 0, 7500, 7306, step=100, help="CMU Arctic speaker index for the narrator voice.", ) else: st.markdown("*Podcast voices (CMU Arctic xvectors):*") female_speaker_id = st.slider( "🎙️ HOST — Female Voice ID", 0, 7500, 1580, step=100, help="Speaker embedding for the female host.", ) male_speaker_id = st.slider( "🎙️ GUEST — Male Voice ID", 0, 7500, 7306, step=100, help="Speaker embedding for the male guest.", ) st.divider() _stage_icons = ["⭕", "🔵", "🟡", "🟠", "🟢"] st.markdown(f"**Pipeline:** {_stage_icons[current_stage]} Stage {current_stage}/4") st.markdown( "VoiceVerse Pro · 2026 Stable", unsafe_allow_html=True, ) return SidebarConfig( hf_token=hf_token, output_mode=output_mode, top_k=top_k, chunk_size=chunk_size, chunk_overlap=chunk_overlap, model_id=model_id, temperature=temperature, max_tokens=max_tokens, target_words=target_words, tts_backend=tts_backend, speaker_id=speaker_id, female_speaker_id=female_speaker_id, male_speaker_id=male_speaker_id, )