oriyonay commited on
Commit
d5f183c
·
verified ·
1 Parent(s): 0da3090

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. config.json +1 -0
  2. model.safetensors +2 -2
  3. myna.py +56 -38
config.json CHANGED
@@ -4,6 +4,7 @@
4
  "Myna"
5
  ],
6
  "auto_map": {
 
7
  "AutoModel": "myna.Myna"
8
  },
9
  "model_type": "myna"
 
4
  "Myna"
5
  ],
6
  "auto_map": {
7
+ "AutoConfig": "myna.MynaConfig",
8
  "AutoModel": "myna.Myna"
9
  },
10
  "model_type": "myna"
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b109662c85f0f79093c223dee42dc8a4f2f4cfe3bb755125397e1647f32f1d6a
3
- size 85516688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dde8aa186da0fc0bfdd2e5a7c1ea3e6c6af6cd4683089b65c4fed6af8335374
3
+ size 85516720
myna.py CHANGED
@@ -14,6 +14,7 @@ import torchaudio.transforms as T
14
 
15
  # for uploading to huggingface hub
16
  from huggingface_hub import HfApi, PyTorchModelHubMixin
 
17
  import shutil
18
 
19
 
@@ -47,20 +48,6 @@ def load_model(model: nn.Module, checkpoint_path: str, device: str = 'cpu', igno
47
  print(f'==> Loaded model from {checkpoint_path}, ignoring layers: {", ".join(ignore_layers)}')
48
 
49
 
50
- def get_arch(arch: str):
51
- if arch.lower() in ['vit-s-16', 'vit-s-32']:
52
- # dim 384, depth 12, MLP 1536, 6 heads, 22M parameters
53
- return {'dim': 384, 'depth': 12, 'mlp_dim': 1536, 'heads': 6}
54
- if arch.lower() == 'vit-b-16':
55
- # dim 768, depth 12, MLP 3072, 12 heads, 87M parameters
56
- return {'dim': 768, 'depth': 12, 'mlp_dim': 3072, 'heads': 12}
57
- if arch.lower() == 'vit-l-16':
58
- # dim 1024, depth 24, MLP 4096, 16 heads, 303M parameters
59
- return {'dim': 1024, 'depth': 24, 'mlp_dim': 4096, 'heads': 16}
60
-
61
- raise ValueError(f'Architecture {arch} not implemented')
62
-
63
-
64
  class FeedForward(nn.Module):
65
  def __init__(self, dim, hidden_dim):
66
  super().__init__()
@@ -152,44 +139,74 @@ class MynaPreprocessor:
152
  return cls(**config)
153
 
154
 
155
- class Myna(nn.Module, PyTorchModelHubMixin):
 
156
  def __init__(
157
- self, *, spec_size=(128, 4096), patch_size=16, dim=384, depth=12,
158
  heads=6, mlp_dim=1536, dim_head = 64, arch=None, additional_patch_size = None,
159
- hybrid_mode: bool = False
160
  ):
161
- super().__init__()
 
 
 
 
 
 
 
 
 
 
162
 
163
  # load architecture if provided
164
  if arch:
165
- arch = get_arch(arch)
166
- dim = arch['dim']
167
- depth = arch['depth']
168
- heads = arch['heads']
169
- mlp_dim = arch['mlp_dim']
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
- self.hybrid_mode = hybrid_mode
172
- spec_height, spec_width = pair(spec_size)
173
- patch_height, patch_width = pair(patch_size)
 
 
 
 
 
 
174
 
175
  assert spec_height % patch_height == 0 and spec_width % patch_width == 0, 'Image dimensions must be divisible by the patch size.'
176
 
177
- self.additional_patch_size = additional_patch_size
178
- if additional_patch_size:
179
- patch_height_b, patch_width_b = pair(additional_patch_size)
180
  patch_dim_b = patch_height_b * patch_width_b
181
 
182
  self.to_patch_embedding_b, self.pos_embedding_b = self._make_embeddings(
183
- patch_height_b, patch_width_b, patch_dim_b, dim, spec_height, spec_width
184
  )
185
 
186
  patch_dim = patch_height * patch_width
187
 
188
  self.to_patch_embedding, self.pos_embedding = self._make_embeddings(
189
- patch_height, patch_width, patch_dim, dim, spec_height, spec_width
190
  )
191
 
192
- self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim)
193
 
194
  self.pool = 'mean'
195
  self.to_latent = nn.Identity()
@@ -261,6 +278,7 @@ def save_model_and_push(model, repo_name, save_dir='myna-temp', to_hub=False):
261
  '_name_or_path': repo_name,
262
  'architectures': ['Myna'],
263
  'auto_map': {
 
264
  'AutoModel': 'myna.Myna'
265
  },
266
  'model_type': 'myna'
@@ -279,12 +297,12 @@ def save_model_and_push(model, repo_name, save_dir='myna-temp', to_hub=False):
279
 
280
 
281
  if __name__ == '__main__':
282
- config = {
283
- 'arch': 'vit-s-16',
284
- 'additional_patch_size': None,
285
- 'hybrid_mode': False
286
- }
287
- model = Myna(**config)
288
  load_model(model, 'checkpoints/myna-base.pth', verbose=True)
289
  print(f'Model contains {model.n_params:,} parameters')
290
 
 
14
 
15
  # for uploading to huggingface hub
16
  from huggingface_hub import HfApi, PyTorchModelHubMixin
17
+ from transformers import PretrainedConfig, PreTrainedModel
18
  import shutil
19
 
20
 
 
48
  print(f'==> Loaded model from {checkpoint_path}, ignoring layers: {", ".join(ignore_layers)}')
49
 
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  class FeedForward(nn.Module):
52
  def __init__(self, dim, hidden_dim):
53
  super().__init__()
 
139
  return cls(**config)
140
 
141
 
142
+ class MynaConfig(PretrainedConfig):
143
+ model_type = 'myna'
144
  def __init__(
145
+ self, spec_size=(128, 4096), patch_size=16, dim=384, depth=12,
146
  heads=6, mlp_dim=1536, dim_head = 64, arch=None, additional_patch_size = None,
147
+ hybrid_mode: bool = False, **kwargs
148
  ):
149
+ super().__init__(**kwargs)
150
+ self.spec_size = spec_size
151
+ self.patch_size = patch_size
152
+ self.dim = dim
153
+ self.depth = depth
154
+ self.heads = heads
155
+ self.mlp_dim = mlp_dim
156
+ self.dim_head = dim_head
157
+ self.arch = arch
158
+ self.additional_patch_size = additional_patch_size
159
+ self.hybrid_mode = hybrid_mode
160
 
161
  # load architecture if provided
162
  if arch:
163
+ arch = self._get_arch(arch)
164
+ self.dim = arch['dim']
165
+ self.depth = arch['depth']
166
+ self.heads = arch['heads']
167
+ self.mlp_dim = arch['mlp_dim']
168
+
169
+ def _get_arch(self, arch: str):
170
+ if arch.lower() in ['vit-s-16', 'vit-s-32']:
171
+ # dim 384, depth 12, MLP 1536, 6 heads, 22M parameters
172
+ return {'dim': 384, 'depth': 12, 'mlp_dim': 1536, 'heads': 6}
173
+ if arch.lower() == 'vit-b-16':
174
+ # dim 768, depth 12, MLP 3072, 12 heads, 87M parameters
175
+ return {'dim': 768, 'depth': 12, 'mlp_dim': 3072, 'heads': 12}
176
+ if arch.lower() == 'vit-l-16':
177
+ # dim 1024, depth 24, MLP 4096, 16 heads, 303M parameters
178
+ return {'dim': 1024, 'depth': 24, 'mlp_dim': 4096, 'heads': 16}
179
+
180
+ raise ValueError(f'Architecture {arch} not implemented')
181
 
182
+
183
+ class Myna(PreTrainedModel, PyTorchModelHubMixin):
184
+ config_class = MynaConfig
185
+ def __init__(self, config: MynaConfig):
186
+ super().__init__(config)
187
+
188
+ self.hybrid_mode = config.hybrid_mode
189
+ spec_height, spec_width = pair(config.spec_size)
190
+ patch_height, patch_width = pair(config.patch_size)
191
 
192
  assert spec_height % patch_height == 0 and spec_width % patch_width == 0, 'Image dimensions must be divisible by the patch size.'
193
 
194
+ self.additional_patch_size = config.additional_patch_size
195
+ if config.additional_patch_size:
196
+ patch_height_b, patch_width_b = pair(config.additional_patch_size)
197
  patch_dim_b = patch_height_b * patch_width_b
198
 
199
  self.to_patch_embedding_b, self.pos_embedding_b = self._make_embeddings(
200
+ patch_height_b, patch_width_b, patch_dim_b, config.dim, spec_height, spec_width
201
  )
202
 
203
  patch_dim = patch_height * patch_width
204
 
205
  self.to_patch_embedding, self.pos_embedding = self._make_embeddings(
206
+ patch_height, patch_width, patch_dim, config.dim, spec_height, spec_width
207
  )
208
 
209
+ self.transformer = Transformer(config.dim, config.depth, config.heads, config.dim_head, config.mlp_dim)
210
 
211
  self.pool = 'mean'
212
  self.to_latent = nn.Identity()
 
278
  '_name_or_path': repo_name,
279
  'architectures': ['Myna'],
280
  'auto_map': {
281
+ 'AutoConfig': 'myna.MynaConfig',
282
  'AutoModel': 'myna.Myna'
283
  },
284
  'model_type': 'myna'
 
297
 
298
 
299
  if __name__ == '__main__':
300
+ config = MynaConfig(
301
+ arch='vit-s-16',
302
+ additional_patch_size=None,
303
+ hybrid_mode=False
304
+ )
305
+ model = Myna(config)
306
  load_model(model, 'checkpoints/myna-base.pth', verbose=True)
307
  print(f'Model contains {model.n_params:,} parameters')
308