File size: 5,128 Bytes
4d03adc
 
 
 
 
 
 
 
 
5e3415b
4d03adc
 
 
 
 
 
 
5e3415b
 
 
 
 
 
 
 
 
 
4d03adc
 
 
 
 
 
 
5e3415b
 
 
4d03adc
 
 
 
 
 
 
 
 
 
 
 
 
5e3415b
4d03adc
5e3415b
 
 
4d03adc
 
 
5e3415b
4d03adc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e3415b
4d03adc
 
5e3415b
4d03adc
 
5e3415b
4d03adc
5e3415b
 
4d03adc
 
5e3415b
4d03adc
5e3415b
4d03adc
5e3415b
4d03adc
5e3415b
4d03adc
 
 
5e3415b
4d03adc
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
"""
ui/state.py
──────────────────────────────────────────────────────────────────────────────
VoiceVerse Pro β€” Centralised Session State & Config Contracts
"""

from __future__ import annotations

from dataclasses import dataclass, field
from enum import Enum
from typing import List, Optional

from modules import RetrievedContext, IngestedFile
from modules.tts_engine import TTSBackend


# ──────────────────────────────────────────────────────────────────────────────
# Output mode
# ──────────────────────────────────────────────────────────────────────────────

class OutputMode(str, Enum):
    TRANSCRIPT = "Audio Transcript"     # single narrator voice
    PODCAST    = "Podcast (2 Speakers)" # HOST (female) + GUEST (male) dialogue


# ──────────────────────────────────────────────────────────────────────────────
# Sidebar configuration
# ──────────────────────────────────────────────────────────────────────────────

@dataclass
class SidebarConfig:
    # Auth
    hf_token: str = ""

    # Mode
    output_mode: OutputMode = OutputMode.TRANSCRIPT

    # RAG
    top_k: int = 4
    chunk_size: int = 1000
    chunk_overlap: int = 100

    # LLM
    model_id: str = "meta-llama/Llama-3.1-8B-Instruct"
    temperature: float = 0.65
    max_tokens: int = 1024
    target_words: int = 400

    # TTS
    tts_backend: TTSBackend = TTSBackend.SPEECHT5
    # Single-speaker (transcript mode)
    speaker_id: int = 7306
    # Dual-speaker (podcast mode) β€” CMU Arctic xvectors
    female_speaker_id: int = 1580   # SLT-style female
    male_speaker_id: int = 7306     # BDL-style male


# ──────────────────────────────────────────────────────────────────────────────
# Pipeline state
# ──────────────────────────────────────────────────────────────────────────────

@dataclass
class PipelineState:
    """
    Single source of truth for pipeline progress.

    stage:
        0 β†’ idle
        1 β†’ documents indexed
        2 β†’ context retrieved
        3 β†’ script generated
        4 β†’ audio synthesised
    """
    stage: int = 0

    ingested_files: List[IngestedFile] = field(default_factory=list)
    total_chunks: int = 0
    retrieved_context: Optional[RetrievedContext] = None
    generated_script: Optional[str] = None
    audio_bytes: Optional[bytes] = None
    audio_format: str = "audio/wav"

    def reset_from(self, stage: int) -> None:
        if stage <= 1:
            self.ingested_files = []; self.total_chunks = 0; self.stage = 0
        if stage <= 2:
            self.retrieved_context = None
            if self.stage >= 2: self.stage = 1
        if stage <= 3:
            self.generated_script = None
            if self.stage >= 3: self.stage = 2
        if stage <= 4:
            self.audio_bytes = None; self.audio_format = "audio/wav"
            if self.stage >= 4: self.stage = 3

    @property
    def has_index(self) -> bool: return self.stage >= 1
    @property
    def has_context(self) -> bool: return self.stage >= 2 and self.retrieved_context is not None
    @property
    def has_script(self) -> bool: return self.stage >= 3 and self.generated_script is not None
    @property
    def has_audio(self) -> bool: return self.stage >= 4 and self.audio_bytes is not None


# ──────────────────────────────────────────────────────────────────────────────
# Session bootstrap
# ──────────────────────────────────────────────────────────────────────────────

def get_pipeline_state() -> PipelineState:
    import streamlit as st
    if "pipeline_state" not in st.session_state:
        st.session_state["pipeline_state"] = PipelineState()
    if "rag_engine" not in st.session_state:
        st.session_state["rag_engine"] = None
    return st.session_state["pipeline_state"]