{ "bos_emb": { "shape": [ 32 ], "dtype": "float32", "bytes": 128 }, "emb_mean": { "shape": [ 32 ], "dtype": "float32", "bytes": 128 }, "emb_std": { "shape": [ 32 ], "dtype": "float32", "bytes": 128 }, "quantizer_weight": { "shape": [ 512, 32, 1 ], "dtype": "float32", "bytes": 65536 }, "text_embed_table": { "shape": [ 4001, 1024 ], "dtype": "float32", "bytes": 16388096 }, "mimi_init_state": { "upsample_partial": { "shape": [ 1, 512, 16 ], "dtype": "float32", "bytes": 32768 }, "attn0_cache": { "shape": [ 2, 1, 8, 256, 64 ], "dtype": "float32", "bytes": 1048576 }, "attn0_offset": { "shape": [ 1 ], "dtype": "float32", "bytes": 4 }, "attn0_end_offset": { "shape": [ 1 ], "dtype": "float32", "bytes": 4 }, "attn1_cache": { "shape": [ 2, 1, 8, 256, 64 ], "dtype": "float32", "bytes": 1048576 }, "attn1_offset": { "shape": [ 1 ], "dtype": "float32", "bytes": 4 }, "attn1_end_offset": { "shape": [ 1 ], "dtype": "float32", "bytes": 4 }, "conv0_prev": { "shape": [ 1, 512, 6 ], "dtype": "float32", "bytes": 12288 }, "conv0_first": { "shape": [ 1 ], "dtype": "float32", "bytes": 4 }, "convtr0_partial": { "shape": [ 1, 256, 6 ], "dtype": "float32", "bytes": 6144 }, "res0_conv0_prev": { "shape": [ 1, 256, 2 ], "dtype": "float32", "bytes": 2048 }, "res0_conv0_first": { "shape": [ 1 ], "dtype": "float32", "bytes": 4 }, "res0_conv1_prev": { "shape": [ 1, 128, 0 ], "dtype": "float32", "bytes": 0 }, "res0_conv1_first": { "shape": [ 1 ], "dtype": "float32", "bytes": 4 }, "convtr1_partial": { "shape": [ 1, 128, 5 ], "dtype": "float32", "bytes": 2560 }, "res1_conv0_prev": { "shape": [ 1, 128, 2 ], "dtype": "float32", "bytes": 1024 }, "res1_conv0_first": { "shape": [ 1 ], "dtype": "float32", "bytes": 4 }, "res1_conv1_prev": { "shape": [ 1, 64, 0 ], "dtype": "float32", "bytes": 0 }, "res1_conv1_first": { "shape": [ 1 ], "dtype": "float32", "bytes": 4 }, "convtr2_partial": { "shape": [ 1, 64, 4 ], "dtype": "float32", "bytes": 1024 }, "res2_conv0_prev": { "shape": [ 1, 64, 2 ], "dtype": "float32", "bytes": 512 }, "res2_conv0_first": { "shape": [ 1 ], "dtype": "float32", "bytes": 4 }, "res2_conv1_prev": { "shape": [ 1, 32, 0 ], "dtype": "float32", "bytes": 0 }, "res2_conv1_first": { "shape": [ 1 ], "dtype": "float32", "bytes": 4 }, "conv_final_prev": { "shape": [ 1, 64, 2 ], "dtype": "float32", "bytes": 512 }, "conv_final_first": { "shape": [ 1 ], "dtype": "float32", "bytes": 4 } }, "alba_voice_cache": { "cache0": { "shape": [ 2, 1, 200, 16, 64 ], "dtype": "float32", "bytes": 1638400 }, "position0": { "shape": [ 1 ], "dtype": "float32", "bytes": 4 }, "cache1": { "shape": [ 2, 1, 200, 16, 64 ], "dtype": "float32", "bytes": 1638400 }, "position1": { "shape": [ 1 ], "dtype": "float32", "bytes": 4 }, "cache2": { "shape": [ 2, 1, 200, 16, 64 ], "dtype": "float32", "bytes": 1638400 }, "position2": { "shape": [ 1 ], "dtype": "float32", "bytes": 4 }, "cache3": { "shape": [ 2, 1, 200, 16, 64 ], "dtype": "float32", "bytes": 1638400 }, "position3": { "shape": [ 1 ], "dtype": "float32", "bytes": 4 }, "cache4": { "shape": [ 2, 1, 200, 16, 64 ], "dtype": "float32", "bytes": 1638400 }, "position4": { "shape": [ 1 ], "dtype": "float32", "bytes": 4 }, "cache5": { "shape": [ 2, 1, 200, 16, 64 ], "dtype": "float32", "bytes": 1638400 }, "position5": { "shape": [ 1 ], "dtype": "float32", "bytes": 4 } }, "alba_audio_prompt": { "shape": [ 1, 125, 1024 ], "dtype": "float32", "bytes": 512000 }, "tokenizer": { "format": "sentencepiece_protobuf", "bytes": 59339 } }