File size: 5,128 Bytes
4d03adc 5e3415b 4d03adc 5e3415b 4d03adc 5e3415b 4d03adc 5e3415b 4d03adc 5e3415b 4d03adc 5e3415b 4d03adc 5e3415b 4d03adc 5e3415b 4d03adc 5e3415b 4d03adc 5e3415b 4d03adc 5e3415b 4d03adc 5e3415b 4d03adc 5e3415b 4d03adc 5e3415b 4d03adc 5e3415b 4d03adc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | """
ui/state.py
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
VoiceVerse Pro β Centralised Session State & Config Contracts
"""
from __future__ import annotations
from dataclasses import dataclass, field
from enum import Enum
from typing import List, Optional
from modules import RetrievedContext, IngestedFile
from modules.tts_engine import TTSBackend
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Output mode
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class OutputMode(str, Enum):
TRANSCRIPT = "Audio Transcript" # single narrator voice
PODCAST = "Podcast (2 Speakers)" # HOST (female) + GUEST (male) dialogue
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Sidebar configuration
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@dataclass
class SidebarConfig:
# Auth
hf_token: str = ""
# Mode
output_mode: OutputMode = OutputMode.TRANSCRIPT
# RAG
top_k: int = 4
chunk_size: int = 1000
chunk_overlap: int = 100
# LLM
model_id: str = "meta-llama/Llama-3.1-8B-Instruct"
temperature: float = 0.65
max_tokens: int = 1024
target_words: int = 400
# TTS
tts_backend: TTSBackend = TTSBackend.SPEECHT5
# Single-speaker (transcript mode)
speaker_id: int = 7306
# Dual-speaker (podcast mode) β CMU Arctic xvectors
female_speaker_id: int = 1580 # SLT-style female
male_speaker_id: int = 7306 # BDL-style male
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Pipeline state
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@dataclass
class PipelineState:
"""
Single source of truth for pipeline progress.
stage:
0 β idle
1 β documents indexed
2 β context retrieved
3 β script generated
4 β audio synthesised
"""
stage: int = 0
ingested_files: List[IngestedFile] = field(default_factory=list)
total_chunks: int = 0
retrieved_context: Optional[RetrievedContext] = None
generated_script: Optional[str] = None
audio_bytes: Optional[bytes] = None
audio_format: str = "audio/wav"
def reset_from(self, stage: int) -> None:
if stage <= 1:
self.ingested_files = []; self.total_chunks = 0; self.stage = 0
if stage <= 2:
self.retrieved_context = None
if self.stage >= 2: self.stage = 1
if stage <= 3:
self.generated_script = None
if self.stage >= 3: self.stage = 2
if stage <= 4:
self.audio_bytes = None; self.audio_format = "audio/wav"
if self.stage >= 4: self.stage = 3
@property
def has_index(self) -> bool: return self.stage >= 1
@property
def has_context(self) -> bool: return self.stage >= 2 and self.retrieved_context is not None
@property
def has_script(self) -> bool: return self.stage >= 3 and self.generated_script is not None
@property
def has_audio(self) -> bool: return self.stage >= 4 and self.audio_bytes is not None
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Session bootstrap
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def get_pipeline_state() -> PipelineState:
import streamlit as st
if "pipeline_state" not in st.session_state:
st.session_state["pipeline_state"] = PipelineState()
if "rag_engine" not in st.session_state:
st.session_state["rag_engine"] = None
return st.session_state["pipeline_state"] |