chatterbox-turbo-4bit / config.json
prince-canuma's picture
Upload folder using huggingface_hub
52a6af8 verified
{
"architecture": "chatterbox_turbo",
"dec_cond_len_seconds": 10,
"enc_cond_len_seconds": 15,
"gpt2": {
"activation_function": "gelu_new",
"n_ctx": 8196,
"n_embd": 1024,
"hidden_size": 1024,
"n_head": 16,
"n_layer": 24,
"n_positions": 8196,
"vocab_size": 50276,
"layer_norm_epsilon": 1e-05,
"attn_pdrop": 0.1,
"embd_pdrop": 0.1,
"resid_pdrop": 0.1
},
"model_type": "chatterbox_turbo",
"quantization": {
"group_size": 64,
"bits": 4,
"mode": "affine"
},
"quantization_config": {
"group_size": 64,
"bits": 4,
"mode": "affine"
},
"s3gen": {
"output_sample_rate": 24000,
"input_sample_rate": 16000,
"silence_token": 4299,
"speech_vocab_size": 6561,
"meanflow": true,
"token_embedding_dim": 512,
"encoder_attention_heads": 8,
"encoder_linear_units": 2048,
"encoder_num_blocks": 6,
"encoder_dropout_rate": 0.1,
"decoder_in_channels": 320,
"decoder_out_channels": 80,
"decoder_channels": [
256
],
"decoder_attention_head_dim": 64,
"decoder_n_blocks": 4,
"decoder_num_mid_blocks": 12,
"decoder_num_heads": 8,
"cfm_sigma_min": 1e-06,
"cfm_t_scheduler": "cosine",
"cfm_inference_cfg_rate": 0.7
},
"sample_rate": 24000,
"t3": {
"start_text_token": 255,
"stop_text_token": 0,
"text_tokens_dict_size": 50276,
"max_text_tokens": 2048,
"start_speech_token": 6561,
"stop_speech_token": 6562,
"speech_tokens_dict_size": 6563,
"max_speech_tokens": 4096,
"llama_config_name": "GPT2_medium",
"input_pos_emb": null,
"speech_cond_prompt_len": 375,
"encoder_type": "voice_encoder",
"speaker_embed_size": 256,
"use_perceiver_resampler": false,
"emotion_adv": false
},
"voice_encoder": {
"num_mels": 40,
"sample_rate": 16000,
"speaker_embed_size": 256,
"ve_hidden_size": 256,
"flatten_lstm_params": false,
"n_fft": 400,
"hop_size": 160,
"win_size": 400,
"fmax": 8000,
"fmin": 0,
"preemphasis": 0.0,
"mel_power": 2.0,
"mel_type": "amp",
"normalized_mels": false,
"ve_partial_frames": 160,
"ve_final_relu": true,
"stft_magnitude_min": 0.0001
}
}