""" ui/state.py ────────────────────────────────────────────────────────────────────────────── VoiceVerse Pro — Centralised Session State & Config Contracts """ from __future__ import annotations from dataclasses import dataclass, field from enum import Enum from typing import List, Optional from modules import RetrievedContext, IngestedFile from modules.tts_engine import TTSBackend # ────────────────────────────────────────────────────────────────────────────── # Output mode # ────────────────────────────────────────────────────────────────────────────── class OutputMode(str, Enum): TRANSCRIPT = "Audio Transcript" # single narrator voice PODCAST = "Podcast (2 Speakers)" # HOST (female) + GUEST (male) dialogue # ────────────────────────────────────────────────────────────────────────────── # Sidebar configuration # ────────────────────────────────────────────────────────────────────────────── @dataclass class SidebarConfig: # Auth hf_token: str = "" # Mode output_mode: OutputMode = OutputMode.TRANSCRIPT # RAG top_k: int = 4 chunk_size: int = 1000 chunk_overlap: int = 100 # LLM model_id: str = "meta-llama/Llama-3.1-8B-Instruct" temperature: float = 0.65 max_tokens: int = 1024 target_words: int = 400 # TTS tts_backend: TTSBackend = TTSBackend.SPEECHT5 # Single-speaker (transcript mode) speaker_id: int = 7306 # Dual-speaker (podcast mode) — CMU Arctic xvectors female_speaker_id: int = 1580 # SLT-style female male_speaker_id: int = 7306 # BDL-style male # ────────────────────────────────────────────────────────────────────────────── # Pipeline state # ────────────────────────────────────────────────────────────────────────────── @dataclass class PipelineState: """ Single source of truth for pipeline progress. stage: 0 → idle 1 → documents indexed 2 → context retrieved 3 → script generated 4 → audio synthesised """ stage: int = 0 ingested_files: List[IngestedFile] = field(default_factory=list) total_chunks: int = 0 retrieved_context: Optional[RetrievedContext] = None generated_script: Optional[str] = None audio_bytes: Optional[bytes] = None audio_format: str = "audio/wav" def reset_from(self, stage: int) -> None: if stage <= 1: self.ingested_files = []; self.total_chunks = 0; self.stage = 0 if stage <= 2: self.retrieved_context = None if self.stage >= 2: self.stage = 1 if stage <= 3: self.generated_script = None if self.stage >= 3: self.stage = 2 if stage <= 4: self.audio_bytes = None; self.audio_format = "audio/wav" if self.stage >= 4: self.stage = 3 @property def has_index(self) -> bool: return self.stage >= 1 @property def has_context(self) -> bool: return self.stage >= 2 and self.retrieved_context is not None @property def has_script(self) -> bool: return self.stage >= 3 and self.generated_script is not None @property def has_audio(self) -> bool: return self.stage >= 4 and self.audio_bytes is not None # ────────────────────────────────────────────────────────────────────────────── # Session bootstrap # ────────────────────────────────────────────────────────────────────────────── def get_pipeline_state() -> PipelineState: import streamlit as st if "pipeline_state" not in st.session_state: st.session_state["pipeline_state"] = PipelineState() if "rag_engine" not in st.session_state: st.session_state["rag_engine"] = None return st.session_state["pipeline_state"]