oriyonay
/

myna-base

         "Myna"
     ],
     "auto_map": {
+        "AutoConfig": "myna.MynaConfig",
         "AutoModel": "myna.Myna"
     },
     "model_type": "myna"

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b109662c85f0f79093c223dee42dc8a4f2f4cfe3bb755125397e1647f32f1d6a
-size 85516688

 version https://git-lfs.github.com/spec/v1
+oid sha256:5dde8aa186da0fc0bfdd2e5a7c1ea3e6c6af6cd4683089b65c4fed6af8335374
+size 85516720

myna.py CHANGED Viewed

@@ -14,6 +14,7 @@ import torchaudio.transforms as T
 # for uploading to huggingface hub
 from huggingface_hub import HfApi, PyTorchModelHubMixin
 import shutil
@@ -47,20 +48,6 @@ def load_model(model: nn.Module, checkpoint_path: str, device: str = 'cpu', igno
         print(f'==> Loaded model from {checkpoint_path}, ignoring layers: {", ".join(ignore_layers)}')
-def get_arch(arch: str):
-    if arch.lower() in ['vit-s-16', 'vit-s-32']:
-        # dim 384, depth 12, MLP 1536, 6 heads, 22M parameters
-        return {'dim': 384, 'depth': 12, 'mlp_dim': 1536, 'heads': 6}
-    if arch.lower() == 'vit-b-16':
-        # dim 768, depth 12, MLP 3072, 12 heads, 87M parameters
-        return {'dim': 768, 'depth': 12, 'mlp_dim': 3072, 'heads': 12}
-    if arch.lower() == 'vit-l-16':
-        # dim 1024, depth 24, MLP 4096, 16 heads, 303M parameters
-        return {'dim': 1024, 'depth': 24, 'mlp_dim': 4096, 'heads': 16}
-    raise ValueError(f'Architecture {arch} not implemented')
 class FeedForward(nn.Module):
     def __init__(self, dim, hidden_dim):
         super().__init__()
@@ -152,44 +139,74 @@ class MynaPreprocessor:
         return cls(**config)
-class Myna(nn.Module, PyTorchModelHubMixin):
     def __init__(
-        self, *, spec_size=(128, 4096), patch_size=16, dim=384, depth=12,
         heads=6, mlp_dim=1536, dim_head = 64, arch=None, additional_patch_size = None,
-        hybrid_mode: bool = False
     ):
-        super().__init__()
         # load architecture if provided
         if arch:
-            arch = get_arch(arch)
-            dim = arch['dim']
-            depth = arch['depth']
-            heads = arch['heads']
-            mlp_dim = arch['mlp_dim']
-        self.hybrid_mode = hybrid_mode
-        spec_height, spec_width = pair(spec_size)
-        patch_height, patch_width = pair(patch_size)
         assert spec_height % patch_height == 0 and spec_width % patch_width == 0, 'Image dimensions must be divisible by the patch size.'
-        self.additional_patch_size = additional_patch_size
-        if additional_patch_size:
-            patch_height_b, patch_width_b = pair(additional_patch_size)
             patch_dim_b = patch_height_b * patch_width_b
             self.to_patch_embedding_b, self.pos_embedding_b = self._make_embeddings(
-                patch_height_b, patch_width_b, patch_dim_b, dim, spec_height, spec_width
             )
         patch_dim = patch_height * patch_width
         self.to_patch_embedding, self.pos_embedding = self._make_embeddings(
-            patch_height, patch_width, patch_dim, dim, spec_height, spec_width
         )
-        self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim)
         self.pool = 'mean'
         self.to_latent = nn.Identity()
@@ -261,6 +278,7 @@ def save_model_and_push(model, repo_name, save_dir='myna-temp', to_hub=False):
         '_name_or_path': repo_name,
         'architectures': ['Myna'],
         'auto_map': {
             'AutoModel': 'myna.Myna'
         },
         'model_type': 'myna'
@@ -279,12 +297,12 @@ def save_model_and_push(model, repo_name, save_dir='myna-temp', to_hub=False):
 if __name__ == '__main__':
-    config = {
-        'arch': 'vit-s-16',
-        'additional_patch_size': None,
-        'hybrid_mode': False
-    }
-    model = Myna(**config)
     load_model(model, 'checkpoints/myna-base.pth', verbose=True)
     print(f'Model contains {model.n_params:,} parameters')

 # for uploading to huggingface hub
 from huggingface_hub import HfApi, PyTorchModelHubMixin
+from transformers import PretrainedConfig, PreTrainedModel
 import shutil
         print(f'==> Loaded model from {checkpoint_path}, ignoring layers: {", ".join(ignore_layers)}')
 class FeedForward(nn.Module):
     def __init__(self, dim, hidden_dim):
         super().__init__()
         return cls(**config)
+class MynaConfig(PretrainedConfig):
+    model_type = 'myna'
     def __init__(
+        self, spec_size=(128, 4096), patch_size=16, dim=384, depth=12,
         heads=6, mlp_dim=1536, dim_head = 64, arch=None, additional_patch_size = None,
+        hybrid_mode: bool = False, **kwargs
     ):
+        super().__init__(**kwargs)
+        self.spec_size = spec_size
+        self.patch_size = patch_size
+        self.dim = dim
+        self.depth = depth
+        self.heads = heads
+        self.mlp_dim = mlp_dim
+        self.dim_head = dim_head
+        self.arch = arch
+        self.additional_patch_size = additional_patch_size
+        self.hybrid_mode = hybrid_mode
         # load architecture if provided
         if arch:
+            arch = self._get_arch(arch)
+            self.dim = arch['dim']
+            self.depth = arch['depth']
+            self.heads = arch['heads']
+            self.mlp_dim = arch['mlp_dim']
+    def _get_arch(self, arch: str):
+        if arch.lower() in ['vit-s-16', 'vit-s-32']:
+            # dim 384, depth 12, MLP 1536, 6 heads, 22M parameters
+            return {'dim': 384, 'depth': 12, 'mlp_dim': 1536, 'heads': 6}
+        if arch.lower() == 'vit-b-16':
+            # dim 768, depth 12, MLP 3072, 12 heads, 87M parameters
+            return {'dim': 768, 'depth': 12, 'mlp_dim': 3072, 'heads': 12}
+        if arch.lower() == 'vit-l-16':
+            # dim 1024, depth 24, MLP 4096, 16 heads, 303M parameters
+            return {'dim': 1024, 'depth': 24, 'mlp_dim': 4096, 'heads': 16}
+        raise ValueError(f'Architecture {arch} not implemented')
+class Myna(PreTrainedModel, PyTorchModelHubMixin):
+    config_class = MynaConfig
+    def __init__(self, config: MynaConfig):
+        super().__init__(config)
+        self.hybrid_mode = config.hybrid_mode
+        spec_height, spec_width = pair(config.spec_size)
+        patch_height, patch_width = pair(config.patch_size)
         assert spec_height % patch_height == 0 and spec_width % patch_width == 0, 'Image dimensions must be divisible by the patch size.'
+        self.additional_patch_size = config.additional_patch_size
+        if config.additional_patch_size:
+            patch_height_b, patch_width_b = pair(config.additional_patch_size)
             patch_dim_b = patch_height_b * patch_width_b
             self.to_patch_embedding_b, self.pos_embedding_b = self._make_embeddings(
+                patch_height_b, patch_width_b, patch_dim_b, config.dim, spec_height, spec_width
             )
         patch_dim = patch_height * patch_width
         self.to_patch_embedding, self.pos_embedding = self._make_embeddings(
+            patch_height, patch_width, patch_dim, config.dim, spec_height, spec_width
         )
+        self.transformer = Transformer(config.dim, config.depth, config.heads, config.dim_head, config.mlp_dim)
         self.pool = 'mean'
         self.to_latent = nn.Identity()
         '_name_or_path': repo_name,
         'architectures': ['Myna'],
         'auto_map': {
+            'AutoConfig': 'myna.MynaConfig',
             'AutoModel': 'myna.Myna'
         },
         'model_type': 'myna'
 if __name__ == '__main__':
+    config = MynaConfig(
+        arch='vit-s-16',
+        additional_patch_size=None,
+        hybrid_mode=False
+    )
+    model = Myna(config)
     load_model(model, 'checkpoints/myna-base.pth', verbose=True)
     print(f'Model contains {model.n_params:,} parameters')