flavioschneider commited on
Commit
3d43b81
·
1 Parent(s): 6665260

Upload DMAE1d

Browse files
Files changed (3) hide show
  1. config.json +6 -1
  2. model.py +56 -0
  3. pytorch_model.bin +3 -0
config.json CHANGED
@@ -1,7 +1,12 @@
1
  {
 
 
 
2
  "auto_map": {
3
- "AutoConfig": "config.DMAE1dConfig"
 
4
  },
5
  "model_type": "archinetai/dmae1d-ATC32-v3",
 
6
  "transformers_version": "4.24.0"
7
  }
 
1
  {
2
+ "architectures": [
3
+ "DMAE1d"
4
+ ],
5
  "auto_map": {
6
+ "AutoConfig": "config.DMAE1dConfig",
7
+ "AutoModel": "model.DMAE1d"
8
  },
9
  "model_type": "archinetai/dmae1d-ATC32-v3",
10
+ "torch_dtype": "float32",
11
  "transformers_version": "4.24.0"
12
  }
model.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import Tensor
3
+ from transformers import PreTrainedModel
4
+ from audio_encoders_pytorch import MelE1d, TanhBottleneck
5
+ from audio_diffusion_pytorch import DiffusionAE, UNetV0, LTPlugin, VDiffusion, VSampler
6
+ from .config import DMAE1dConfig
7
+
8
+
9
+ class DMAE1d(PreTrainedModel):
10
+
11
+ config_class = DMAE1dConfig
12
+
13
+ def __init__(self, config: DMAE1dConfig):
14
+ super().__init__(config)
15
+
16
+ UNet = LTPlugin(
17
+ UNetV0,
18
+ num_filters=128,
19
+ window_length=64,
20
+ stride=32,
21
+ )
22
+
23
+ self.model = DiffusionAE(
24
+ net_t=UNet,
25
+ dim=1,
26
+ in_channels=2,
27
+ channels=[256, 512, 512, 512, 512],
28
+ factors=[1, 2, 2, 2, 2],
29
+ items=[1, 2, 2, 2, 4],
30
+ inject_depth=4,
31
+ encoder=MelE1d(
32
+ in_channels=2,
33
+ channels=512,
34
+ multipliers=[1, 1],
35
+ factors=[2],
36
+ num_blocks=[12],
37
+ out_channels=32,
38
+ mel_channels=80,
39
+ mel_sample_rate=48000,
40
+ mel_normalize_log=True,
41
+ bottleneck=TanhBottleneck()
42
+ ),
43
+ diffusion_t=VDiffusion,
44
+ sampler_t=VSampler
45
+ )
46
+
47
+ def forward(self, *args, **kwargs):
48
+ return self.model(*args, **kwargs)
49
+
50
+ def encode(self, *args, **kwargs):
51
+ return self.model.encode(*args, **kwargs)
52
+
53
+ @torch.no_grad()
54
+ def decode(self, *args, **kwargs):
55
+ return self.model.decode(*args, **kwargs)
56
+
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:302c66edf614e12da3004192f8517f0a9883bcd36e937ab471b3123ae66fdca6
3
+ size 345503551