mlx-community
/

sam-audio-base

speech generation

voice isolation

Model card Files Files and versions

prince-canuma commited on 21 days ago

Commit

d20305b

·

verified ·

1 Parent(s): bd6a1d7

Upload folder using huggingface_hub

Files changed (2) hide show

config.json +44 -0
model.safetensors +3 -0

config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "model_type": "sam_audio",
+  "model_size": "base",
+  "in_channels": 768,
+  "audio_codec": {
+    "encoder_dim": 64,
+    "encoder_rates": [
+      2,
+      8,
+      10,
+      12
+    ],
+    "latent_dim": 1024,
+    "decoder_dim": 1536,
+    "decoder_rates": [
+      12,
+      10,
+      8,
+      2
+    ],
+    "n_codebooks": 16,
+    "codebook_size": 1024,
+    "codebook_dim": 128,
+    "sample_rate": 48000
+  },
+  "text_encoder": {
+    "name": "t5-base",
+    "max_length": 512,
+    "dim": 768
+  },
+  "transformer": {
+    "dim": 2048,
+    "n_heads": 16,
+    "n_layers": 16,
+    "dropout": 0.1,
+    "qk_norm": true,
+    "fc_bias": false,
+    "ffn_exp": 4,
+    "context_dim": 2048,
+    "out_channels": 256
+  },
+  "num_anchors": 3,
+  "anchor_embedding_dim": 128
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d7fa64c16b4e72f4486b05fa7993edc2a40fd13891089495b3e5e0be36b3232b
+size 5032052232