Upload 8 files

Browse files

Files changed (9) hide show

.gitattributes +6 -0
base_model_config.json +108 -0
jerry_encoded_bs128_HARD_epoch=19-step=60 +3 -0
jerry_encoded_bs128_HARD_epoch=6-step=20 +3 -0
jerry_encoded_bs16_HARDER_epoch=4-step=150 +3 -0
jerry_encoded_bs32_HARDER_epoch=6-step=100 +3 -0
jerry_encoded_bs64_epoch=14-step=100 +3 -0
jerry_encoded_epoch=33-step=100 +3 -0
jerry_un-encoded_epoch=32-step=2000.ckpt +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+jerry_encoded_bs128_HARD_epoch=19-step=60 filter=lfs diff=lfs merge=lfs -text
+jerry_encoded_bs128_HARD_epoch=6-step=20 filter=lfs diff=lfs merge=lfs -text
+jerry_encoded_bs16_HARDER_epoch=4-step=150 filter=lfs diff=lfs merge=lfs -text
+jerry_encoded_bs32_HARDER_epoch=6-step=100 filter=lfs diff=lfs merge=lfs -text
+jerry_encoded_bs64_epoch=14-step=100 filter=lfs diff=lfs merge=lfs -text
+jerry_encoded_epoch=33-step=100 filter=lfs diff=lfs merge=lfs -text

base_model_config.json ADDED Viewed

	@@ -0,0 +1,108 @@

+{
+  "model_type": "diffusion_cond",
+  "sample_size": 524288,
+  "sample_rate": 44100,
+  "audio_channels": 2,
+  "model": {
+    "pretransform": {
+      "type": "autoencoder",
+      "iterate_batch": false,
+      "model_half": true,
+      "config": {
+        "encoder": {
+          "type": "oobleck",
+          "requires_grad": false,
+          "config": {
+            "in_channels": 2,
+            "channels": 128,
+            "c_mults": [1, 2, 4, 8, 16],
+            "strides": [2, 4, 4, 8, 8],
+            "latent_dim": 128,
+            "use_snake": true
+          }
+        },
+        "decoder": {
+          "type": "oobleck",
+          "config": {
+            "out_channels": 2,
+            "channels": 128,
+            "c_mults": [1, 2, 4, 8, 16],
+            "strides": [2, 4, 4, 8, 8],
+            "latent_dim": 64,
+            "use_snake": true,
+            "final_tanh": false
+          }
+        },
+        "bottleneck": { "type": "vae" },
+        "latent_dim": 64,
+        "downsampling_ratio": 2048,
+        "io_channels": 2
+      }
+    },
+    "conditioning": {
+      "configs": [
+        { "id": "prompt", "type": "t5", "config": { "t5_model_name": "t5-base", "max_length": 64 } },
+        { "id": "seconds_total", "type": "number", "config": { "min_val": 0, "max_val": 256 } }
+      ],
+      "cond_dim": 768
+    },
+    "diffusion": {
+      "cross_attention_cond_ids": ["prompt", "seconds_total"],
+      "global_cond_ids": ["seconds_total"],
+      "diffusion_objective": "rectified_flow",
+      "distribution_shift_options": { "min_length": 256, "max_length": 4096 },
+      "type": "dit",
+      "config": {
+        "io_channels": 64,
+        "embed_dim": 1024,
+        "depth": 16,
+        "num_heads": 8,
+        "cond_token_dim": 768,
+        "global_cond_dim": 768,
+        "transformer_type": "continuous_transformer",
+        "attn_kwargs": { "qk_norm": "ln" }
+      }
+    },
+    "io_channels": 64
+  },
+  "training": {
+        "use_ema": true,
+        "log_loss_info": false,
+        "pre_encoded": false,
+        "timestep_sampler": "trunc_logit_normal",
+        "optimizer_configs": {
+          "diffusion": {
+            "optimizer": {
+              "type": "AdamW",
+              "config": {
+                "lr": 5e-5,
+                "betas": [0.9, 0.95],
+                "eps": 1e-8,
+                "weight_decay": 0.01,
+                "foreach": true
+              }
+            },
+            "scheduler": {
+              "type": "InverseLR",
+              "config": { "inv_gamma": 500000, "power": 0.5, "warmup": 0.999 }
+            }
+          }
+        },
+    "demo": {
+      "demo_every": 2000,
+      "demo_steps": 50,
+      "num_demos": 8,
+      "demo_cond": [
+        {"prompt": "Amen break 174 BPM", "seconds_total": 6},
+        {"prompt": "People talking in a crowded cafe", "seconds_total": 10},
+        {"prompt": "chillhop 91 bpm", "seconds_total": 6},
+        {"prompt": "trap 120bpm", "seconds_total": 12},
+        {"prompt": "A dog barking next to a waterfall", "seconds_total": 6},
+        {"prompt": "Glitchy bass design, I used Serum for this", "seconds_total": 4},
+        {"prompt": "chillhop 132 bpm", "seconds_total": 12},
+        {"prompt": "Birds singing in the forest", "seconds_total": 10}
+      ],
+      "demo_cfg_scales": [1, 4, 7]
+    }
+  }
+}

jerry_encoded_bs128_HARD_epoch=19-step=60 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f24f224bba10e77e0dbc9589e33675d18ce63fef9d282d099fd396de21f6ca85
+size 5769867602

jerry_encoded_bs128_HARD_epoch=6-step=20 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ac49bf8935c0d8c64cade8c154704d78ab2368f5642d2baf6aa8eef219ea5a2
+size 5769867602

jerry_encoded_bs16_HARDER_epoch=4-step=150 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b3f114ff0f518a23a3dcdee67c6901b8ad342920993e0d68b3888f59701aa6dc
+size 5769867602

jerry_encoded_bs32_HARDER_epoch=6-step=100 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e738227f15765d4bb6fb70b34b9eb3434d0a131d1de87ede0b065e229c66da1f
+size 5769867602

jerry_encoded_bs64_epoch=14-step=100 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0e8689afe38c1df786348cc9f5c0b553286c8710839f6537e0d8a33026ff07a
+size 5769867538

jerry_encoded_epoch=33-step=100 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef79774cf86ec8b30aa7036d7f8214985da32fafe570c8922dee720c16f9dd83
+size 5769867538

jerry_un-encoded_epoch=32-step=2000.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4248b6b2004ee918b7e0a40a6cc7958e2e74b50e263dabaffe868ddc1dc67dc
+size 5769867538