{ "architecture": "chatterbox_turbo", "dec_cond_len_seconds": 10, "enc_cond_len_seconds": 15, "gpt2": { "activation_function": "gelu_new", "n_ctx": 8196, "n_embd": 1024, "hidden_size": 1024, "n_head": 16, "n_layer": 24, "n_positions": 8196, "vocab_size": 50276, "layer_norm_epsilon": 1e-05, "attn_pdrop": 0.1, "embd_pdrop": 0.1, "resid_pdrop": 0.1 }, "model_type": "chatterbox_turbo", "quantization": { "group_size": 64, "bits": 4, "mode": "affine" }, "quantization_config": { "group_size": 64, "bits": 4, "mode": "affine" }, "s3gen": { "output_sample_rate": 24000, "input_sample_rate": 16000, "silence_token": 4299, "speech_vocab_size": 6561, "meanflow": true, "token_embedding_dim": 512, "encoder_attention_heads": 8, "encoder_linear_units": 2048, "encoder_num_blocks": 6, "encoder_dropout_rate": 0.1, "decoder_in_channels": 320, "decoder_out_channels": 80, "decoder_channels": [ 256 ], "decoder_attention_head_dim": 64, "decoder_n_blocks": 4, "decoder_num_mid_blocks": 12, "decoder_num_heads": 8, "cfm_sigma_min": 1e-06, "cfm_t_scheduler": "cosine", "cfm_inference_cfg_rate": 0.7 }, "sample_rate": 24000, "t3": { "start_text_token": 255, "stop_text_token": 0, "text_tokens_dict_size": 50276, "max_text_tokens": 2048, "start_speech_token": 6561, "stop_speech_token": 6562, "speech_tokens_dict_size": 6563, "max_speech_tokens": 4096, "llama_config_name": "GPT2_medium", "input_pos_emb": null, "speech_cond_prompt_len": 375, "encoder_type": "voice_encoder", "speaker_embed_size": 256, "use_perceiver_resampler": false, "emotion_adv": false }, "voice_encoder": { "num_mels": 40, "sample_rate": 16000, "speaker_embed_size": 256, "ve_hidden_size": 256, "flatten_lstm_params": false, "n_fft": 400, "hop_size": 160, "win_size": 400, "fmax": 8000, "fmin": 0, "preemphasis": 0.0, "mel_power": 2.0, "mel_type": "amp", "normalized_mels": false, "ve_partial_frames": 160, "ve_final_relu": true, "stft_magnitude_min": 0.0001 } }