sam-audio-base / config.json
prince-canuma's picture
Upload folder using huggingface_hub
d20305b verified
raw
history blame contribute delete
754 Bytes
{
"model_type": "sam_audio",
"model_size": "base",
"in_channels": 768,
"audio_codec": {
"encoder_dim": 64,
"encoder_rates": [
2,
8,
10,
12
],
"latent_dim": 1024,
"decoder_dim": 1536,
"decoder_rates": [
12,
10,
8,
2
],
"n_codebooks": 16,
"codebook_size": 1024,
"codebook_dim": 128,
"sample_rate": 48000
},
"text_encoder": {
"name": "t5-base",
"max_length": 512,
"dim": 768
},
"transformer": {
"dim": 2048,
"n_heads": 16,
"n_layers": 16,
"dropout": 0.1,
"qk_norm": true,
"fc_bias": false,
"ffn_exp": 4,
"context_dim": 2048,
"out_channels": 256
},
"num_anchors": 3,
"anchor_embedding_dim": 128
}