mlx-community
/

sam-audio-base

speech generation

voice isolation

Model card Files Files and versions

sam-audio-base / config.json

prince-canuma's picture

Upload folder using huggingface_hub

d20305b verified 21 days ago

history blame contribute delete

754 Bytes

	{
	"model_type": "sam_audio",
	"model_size": "base",
	"in_channels": 768,
	"audio_codec": {
	"encoder_dim": 64,
	"encoder_rates": [
	2,
	8,
	10,
	12
	],
	"latent_dim": 1024,
	"decoder_dim": 1536,
	"decoder_rates": [
	12,
	10,
	8,
	2
	],
	"n_codebooks": 16,
	"codebook_size": 1024,
	"codebook_dim": 128,
	"sample_rate": 48000
	},
	"text_encoder": {
	"name": "t5-base",
	"max_length": 512,
	"dim": 768
	},
	"transformer": {
	"dim": 2048,
	"n_heads": 16,
	"n_layers": 16,
	"dropout": 0.1,
	"qk_norm": true,
	"fc_bias": false,
	"ffn_exp": 4,
	"context_dim": 2048,
	"out_channels": 256
	},
	"num_anchors": 3,
	"anchor_embedding_dim": 128
	}