File size: 5,231 Bytes
9ea9ec8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4979fb4
9ea9ec8
 
 
 
 
 
 
 
 
 
 
 
 
10a3914
4979fb4
9ea9ec8
 
 
 
 
 
4979fb4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
"""
Chatterbox Turbo TTS β€” Centralized Configuration
═══════════════════════════════════════════════════
Optimised for HF Space free tier (2 vCPU).
Adjust MODEL_DTYPE to switch quantization (q8/q4/fp16/fp32).
All settings overridable via environment variables prefixed CB_.
"""
import os

_HERE = os.path.dirname(os.path.abspath(__file__))


def _get_bool(name: str, default: bool) -> bool:
    raw = os.getenv(name)
    if raw is None:
        return default
    return raw.strip().lower() in {"1", "true", "yes", "on"}


class Config:
    # ── Model ────────────────────────────────────────────────────
    MODEL_ID: str = os.getenv("CB_MODEL_ID", "ResembleAI/chatterbox-turbo-ONNX")

    #   fp32  β†’ highest quality, ~1.4 GB, slowest
    #   fp16  β†’ good quality,    ~0.7 GB
    #   q8    β†’ β˜… recommended,   ~0.35 GB, best balance
    #   q4    β†’ smallest,        ~0.17 GB, fastest, slight loss
    #   q4f16 β†’ q4 weights + fp16 activations
    MODEL_DTYPE: str = os.getenv("CB_MODEL_DTYPE", "q4")

    MODELS_DIR: str = os.getenv("CB_MODELS_DIR", os.path.join(_HERE, "models"))

    # ── ONNX Runtime CPU tuning (optimised for 2 vCPU) ───────────
    #
    # KEY RULE: intra_op threads MUST match physical cores.
    #   β†’ 4 threads on 2 cores = oversubscription = SLOWER.
    #   β†’ 2 threads on 2 cores = each op uses both cores perfectly.
    #
    # MAX_WORKERS = 1 ensures ONE inference gets both cores.
    #   β†’ 2 workers would split 2 cores = both requests slow.
    #
    CPU_THREADS: int = int(os.getenv("CB_CPU_THREADS", "2"))
    MAX_WORKERS: int = int(os.getenv("CB_MAX_WORKERS", "1"))

    # ── Generation defaults ──────────────────────────────────────
    SAMPLE_RATE: int = 24000
    MAX_NEW_TOKENS: int = int(os.getenv("CB_MAX_NEW_TOKENS", "768"))
    REPETITION_PENALTY: float = float(os.getenv("CB_REPETITION_PENALTY", "1.2"))
    MAX_TEXT_LENGTH: int = int(os.getenv("CB_MAX_TEXT_LENGTH", "50000"))

    # ── Model constants (official card β€” do not change) ──────────
    START_SPEECH_TOKEN: int = 6561
    STOP_SPEECH_TOKEN: int = 6562
    SILENCE_TOKEN: int = 4299
    NUM_KV_HEADS: int = 16
    HEAD_DIM: int = 64

    # ── Paralinguistic tags (Turbo native) ───────────────────────
    PARALINGUISTIC_TAGS: tuple = (
        "laugh", "chuckle", "cough", "sigh", "gasp",
        "shush", "groan", "sniff", "clear throat",
    )

    # ── Voice / reference audio ──────────────────────────────────
    # NOTE: Official ResembleAI/chatterbox-turbo-ONNX has no bundled voice.
    # The default_voice.wav is a plain audio sample from community repo
    # (not a model β€” just a reference WAV, safe to use from any source).
    DEFAULT_VOICE_REPO: str = "onnx-community/chatterbox-ONNX"
    DEFAULT_VOICE_FILE: str = "default_voice.wav"
    MAX_VOICE_UPLOAD_BYTES: int = 10 * 1024 * 1024   # 10 MB
    MIN_REF_DURATION_SEC: float = 1.5
    MAX_REF_DURATION_SEC: float = 30.0
    VOICE_CACHE_SIZE: int = int(os.getenv("CB_VOICE_CACHE_SIZE", "20"))
    VOICE_CACHE_TTL_SEC: int = int(os.getenv("CB_VOICE_CACHE_TTL", "3600"))  # 1 hour

    # ── Streaming ────────────────────────────────────────────────
    # Smaller chunks = faster TTFB (first audio arrives sooner)
    # ~200 chars β‰ˆ 1–2 sentences β‰ˆ fastest first-chunk on 2 vCPU
    MAX_CHUNK_CHARS: int = int(os.getenv("CB_MAX_CHUNK_CHARS", "100"))
    # Additive parallel mode (odd/even split across primary/helper).
    ENABLE_PARALLEL_MODE: bool = _get_bool("CB_ENABLE_PARALLEL_MODE", True)
    HELPER_BASE_URL: str = os.getenv("CB_HELPER_BASE_URL", "https://shadowhunter222-chab2.hf.space").strip()
    HELPER_TIMEOUT_SEC: float = float(os.getenv("CB_HELPER_TIMEOUT_SEC", "45"))
    HELPER_RETRY_ONCE: bool = _get_bool("CB_HELPER_RETRY_ONCE", True)
    # Optional shared secret for internal chunk endpoints.
    INTERNAL_SHARED_SECRET: str = os.getenv("CB_INTERNAL_SHARED_SECRET", "").strip()

    # ── Server ───────────────────────────────────────────────────
    HOST: str = os.getenv("CB_HOST", "0.0.0.0")
    PORT: int = int(os.getenv("CB_PORT", "7860"))

    ALLOWED_ORIGINS: list = [
        "https://www.toolboxesai.com",
        "www.toolboxesai.com",
        "https://toolboxesai.com",
        "http://localhost:8788",  "http://127.0.0.1:8788",
        "http://localhost:5502",  "http://127.0.0.1:5502",
        "http://localhost:5501",  "http://127.0.0.1:5501",
        "http://localhost:5500",  "http://127.0.0.1:5500",
        "http://localhost:5173",  "http://127.0.0.1:5173",
        "http://localhost:7860",  "http://127.0.0.1:7860",
    ]