| { | |
| "flow_lm": { | |
| "dtype": "float32", | |
| "flow": { | |
| "dim": 512, | |
| "depth": 6 | |
| }, | |
| "transformer": { | |
| "hidden_scale": 4, | |
| "max_period": 10000, | |
| "d_model": 1024, | |
| "num_heads": 16, | |
| "num_layers": 6 | |
| }, | |
| "lookup_table": { | |
| "dim": 1024, | |
| "n_bins": 4000, | |
| "tokenizer": "sentencepiece", | |
| "tokenizer_path": "hf://kyutai/pocket-tts-without-voice-cloning/tokenizer.model@d4fdd22ae8c8e1cb3634e150ebeff1dab2d16df3" | |
| }, | |
| "weights_path": null | |
| }, | |
| "mimi": { | |
| "dtype": "float32", | |
| "sample_rate": 24000, | |
| "channels": 1, | |
| "frame_rate": 12.5, | |
| "seanet": { | |
| "dimension": 512, | |
| "channels": 1, | |
| "n_filters": 64, | |
| "n_residual_layers": 1, | |
| "ratios": [ | |
| 6, | |
| 5, | |
| 4 | |
| ], | |
| "kernel_size": 7, | |
| "residual_kernel_size": 3, | |
| "last_kernel_size": 3, | |
| "dilation_base": 2, | |
| "pad_mode": "constant", | |
| "compress": 2 | |
| }, | |
| "transformer": { | |
| "d_model": 512, | |
| "input_dimension": 512, | |
| "output_dimensions": [ | |
| 512 | |
| ], | |
| "num_heads": 8, | |
| "num_layers": 2, | |
| "layer_scale": 0.01, | |
| "context": 250, | |
| "dim_feedforward": 2048, | |
| "max_period": 10000.0 | |
| }, | |
| "quantizer": { | |
| "dimension": 32, | |
| "output_dimension": 512 | |
| }, | |
| "weights_path": null | |
| }, | |
| "model_path": null, | |
| "model_type": "pocket_tts", | |
| "quantization": { | |
| "group_size": 64, | |
| "bits": 8, | |
| "mode": "affine" | |
| }, | |
| "quantization_config": { | |
| "group_size": 64, | |
| "bits": 8, | |
| "mode": "affine" | |
| }, | |
| "weights_path": "hf://kyutai/pocket-tts/tts_b6369a24.safetensors@427e3d61b276ed69fdd03de0d185fa8a8d97fc5b", | |
| "weights_path_without_voice_cloning": "hf://kyutai/pocket-tts-without-voice-cloning/tts_b6369a24.safetensors@d4fdd22ae8c8e1cb3634e150ebeff1dab2d16df3" | |
| } |