Spaces:

ahanbose
/

voiceAI

Sleeping

File size: 5,559 Bytes

"""
ui/sidebar.py
──────────────────────────────────────────────────────────────────────────────
VoiceVerse Pro — Configuration Sidebar
"""

from __future__ import annotations

import os

import streamlit as st

from modules.llm_backbone import SUPPORTED_MODELS
from modules.tts_engine import TTSBackend
from ui.state import SidebarConfig, OutputMode


def render(current_stage: int) -> SidebarConfig:
    with st.sidebar:
        st.markdown("### ⚙️ Configuration")
        st.divider()

        # ── Auth ──────────────────────────────────────────────────────────────
        st.markdown("**🔑 Hugging Face API Token**")
        hf_token = st.text_input(
            "HF Token",
            value=os.getenv("HUGGINGFACEHUB_API_TOKEN", ""),
            type="password",
            label_visibility="collapsed",
            placeholder="hf_…",
        )
        if hf_token:
            os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token

        st.divider()

        # ── Output Mode ───────────────────────────────────────────────────────
        st.markdown("**🎭 Output Mode**")
        # Read current value from session state (set by main-area toggle)
        current_mode = st.session_state.get("output_mode", "Audio Transcript")
        mode_options = [m.value for m in OutputMode]
        mode_index = mode_options.index(current_mode) if current_mode in mode_options else 0
        mode_label = st.radio(
            "Output Mode",
            options=mode_options,
            index=mode_index,
            label_visibility="collapsed",
            help=(
                "Also controllable via the main toggle above the pipeline. "
                "Both controls are in sync."
            ),
        )
        # Write back to session state so main-area toggle reflects sidebar change
        st.session_state["output_mode"] = mode_label
        output_mode = OutputMode(mode_label)

        st.divider()

        # ── RAG ───────────────────────────────────────────────────────────────
        st.markdown("**🔍 RAG Parameters**")
        top_k = st.slider("Top-K Chunks", 1, 8, 4)
        chunk_size = st.slider("Chunk Size", 400, 2000, 1000, step=100)
        chunk_overlap = st.slider("Chunk Overlap", 0, 300, 100, step=50)

        st.divider()

        # ── LLM ───────────────────────────────────────────────────────────────
        st.markdown("**🧠 LLM Settings**")
        model_id = st.selectbox("Model", options=SUPPORTED_MODELS, index=0)
        temperature = st.slider("Temperature", 0.1, 1.2, 0.65, step=0.05)
        max_tokens = st.slider("Max New Tokens", 256, 2048, 1024, step=128)
        target_words = st.slider("Target Script Words", 100, 800, 400, step=50)

        st.divider()

        # ── TTS ───────────────────────────────────────────────────────────────
        st.markdown("**🔊 TTS Backend**")
        tts_label = st.selectbox(
            "TTS Engine",
            options=[b.value for b in TTSBackend],
            index=0,
            label_visibility="collapsed",
        )
        tts_backend = TTSBackend(tts_label)

        speaker_id = 7306
        female_speaker_id = 1580
        male_speaker_id = 7306

        if tts_backend == TTSBackend.SPEECHT5:
            if output_mode == OutputMode.TRANSCRIPT:
                speaker_id = st.slider(
                    "Speaker ID (xvector)", 0, 7500, 7306, step=100,
                    help="CMU Arctic speaker index for the narrator voice.",
                )
            else:
                st.markdown("*Podcast voices (CMU Arctic xvectors):*")
                female_speaker_id = st.slider(
                    "🎙️ HOST — Female Voice ID", 0, 7500, 1580, step=100,
                    help="Speaker embedding for the female host.",
                )
                male_speaker_id = st.slider(
                    "🎙️ GUEST — Male Voice ID", 0, 7500, 7306, step=100,
                    help="Speaker embedding for the male guest.",
                )

        st.divider()

        _stage_icons = ["⭕", "🔵", "🟡", "🟠", "🟢"]
        st.markdown(f"**Pipeline:** {_stage_icons[current_stage]} Stage {current_stage}/4")
        st.markdown(
            "<small style='color:#555'>VoiceVerse Pro · 2026 Stable</small>",
            unsafe_allow_html=True,
        )

    return SidebarConfig(
        hf_token=hf_token,
        output_mode=output_mode,
        top_k=top_k,
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        model_id=model_id,
        temperature=temperature,
        max_tokens=max_tokens,
        target_words=target_words,
        tts_backend=tts_backend,
        speaker_id=speaker_id,
        female_speaker_id=female_speaker_id,
        male_speaker_id=male_speaker_id,
    )