File size: 2,203 Bytes
c90e9b5 ee0d088 c90e9b5 ee0d088 c90e9b5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | {
"flow_lm": {
"dtype": "float32",
"flow": {
"dim": 512,
"depth": 6
},
"transformer": {
"hidden_scale": 4,
"max_period": 10000,
"d_model": 1024,
"num_heads": 16,
"num_layers": 6
},
"lookup_table": {
"dim": 1024,
"n_bins": 4000,
"tokenizer": "sentencepiece",
"tokenizer_path": "hf://kyutai/pocket-tts-without-voice-cloning/tokenizer.model@d4fdd22ae8c8e1cb3634e150ebeff1dab2d16df3"
},
"weights_path": null
},
"mimi": {
"dtype": "float32",
"sample_rate": 24000,
"channels": 1,
"frame_rate": 12.5,
"seanet": {
"dimension": 512,
"channels": 1,
"n_filters": 64,
"n_residual_layers": 1,
"ratios": [
6,
5,
4
],
"kernel_size": 7,
"residual_kernel_size": 3,
"last_kernel_size": 3,
"dilation_base": 2,
"pad_mode": "constant",
"compress": 2
},
"transformer": {
"d_model": 512,
"input_dimension": 512,
"output_dimensions": [
512
],
"num_heads": 8,
"num_layers": 2,
"layer_scale": 0.01,
"context": 250,
"dim_feedforward": 2048,
"max_period": 10000.0
},
"quantizer": {
"dimension": 32,
"output_dimension": 512
},
"weights_path": null
},
"model_path": null,
"model_type": "pocket_tts",
"quantization": {
"group_size": 64,
"bits": 8,
"mode": "affine"
},
"quantization_config": {
"group_size": 64,
"bits": 8,
"mode": "affine"
},
"weights_path": "hf://kyutai/pocket-tts/tts_b6369a24.safetensors@427e3d61b276ed69fdd03de0d185fa8a8d97fc5b",
"weights_path_without_voice_cloning": "hf://kyutai/pocket-tts-without-voice-cloning/tts_b6369a24.safetensors@d4fdd22ae8c8e1cb3634e150ebeff1dab2d16df3"
} |