File size: 2,203 Bytes
c90e9b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee0d088
c90e9b5
 
 
 
ee0d088
c90e9b5
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
{
    "flow_lm": {
        "dtype": "float32",
        "flow": {
            "dim": 512,
            "depth": 6
        },
        "transformer": {
            "hidden_scale": 4,
            "max_period": 10000,
            "d_model": 1024,
            "num_heads": 16,
            "num_layers": 6
        },
        "lookup_table": {
            "dim": 1024,
            "n_bins": 4000,
            "tokenizer": "sentencepiece",
            "tokenizer_path": "hf://kyutai/pocket-tts-without-voice-cloning/tokenizer.model@d4fdd22ae8c8e1cb3634e150ebeff1dab2d16df3"
        },
        "weights_path": null
    },
    "mimi": {
        "dtype": "float32",
        "sample_rate": 24000,
        "channels": 1,
        "frame_rate": 12.5,
        "seanet": {
            "dimension": 512,
            "channels": 1,
            "n_filters": 64,
            "n_residual_layers": 1,
            "ratios": [
                6,
                5,
                4
            ],
            "kernel_size": 7,
            "residual_kernel_size": 3,
            "last_kernel_size": 3,
            "dilation_base": 2,
            "pad_mode": "constant",
            "compress": 2
        },
        "transformer": {
            "d_model": 512,
            "input_dimension": 512,
            "output_dimensions": [
                512
            ],
            "num_heads": 8,
            "num_layers": 2,
            "layer_scale": 0.01,
            "context": 250,
            "dim_feedforward": 2048,
            "max_period": 10000.0
        },
        "quantizer": {
            "dimension": 32,
            "output_dimension": 512
        },
        "weights_path": null
    },
    "model_path": null,
    "model_type": "pocket_tts",
    "quantization": {
        "group_size": 64,
        "bits": 8,
        "mode": "affine"
    },
    "quantization_config": {
        "group_size": 64,
        "bits": 8,
        "mode": "affine"
    },
    "weights_path": "hf://kyutai/pocket-tts/tts_b6369a24.safetensors@427e3d61b276ed69fdd03de0d185fa8a8d97fc5b",
    "weights_path_without_voice_cloning": "hf://kyutai/pocket-tts-without-voice-cloning/tts_b6369a24.safetensors@d4fdd22ae8c8e1cb3634e150ebeff1dab2d16df3"
}