Text-to-Speech
Safetensors
English
voxtream
zero-shot
streaming
File size: 418 Bytes
b3e384a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
{
    "phone_former": "phone_former",
    "temp_former": "temp_former",
    "dep_former": "dep_former_csm",
    "phone_vocab_size": 125,
    "audio_vocab_size": 2050,
    "audio_pad_size": 0,
    "embedding_dim": 1024,
    "spk_embedding_dim": 192,
    "num_codebooks": 16,
    "num_phone_states": 6,
    "amortization_divisor": 16,
    "max_look_ahead": 5,
    "audio_window_size": 625,
    "phone_window_size": 625
}