Jonathan Schmok commited on
Commit
4b70876
·
1 Parent(s): f3aad89

Add config, wrapper, weights for transformers loading

Browse files
README.md CHANGED
@@ -2,4 +2,5 @@
2
  license: apache-2.0
3
  base_model:
4
  - arcinstitute/evo2_7b_base
5
- ---
 
 
2
  license: apache-2.0
3
  base_model:
4
  - arcinstitute/evo2_7b_base
5
+ ---
6
+ Lightweight exon/intron classifier built on Evo-2 embeddings.
__pycache__/configuration_exon_classifier.cpython-311.pyc ADDED
Binary file (1.1 kB). View file
 
__pycache__/wrapper_exon_classifier.cpython-311.pyc ADDED
Binary file (2.66 kB). View file
 
config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Evo2ExonModel"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_exon_classifier.Evo2ExonConfig",
7
+ "AutoModel": "wrapper_exon_classifier.Evo2ExonModel"
8
+ },
9
+ "embedding_dim": 8192,
10
+ "hidden_dim": 1024,
11
+ "model_type": "evo2_exon_classifier",
12
+ "num_hidden_layers": 1,
13
+ "torch_dtype": "float32",
14
+ "transformers_version": "4.36.2"
15
+ }
configuration_exon_classifier.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import PretrainedConfig
2
+
3
+ class Evo2ExonConfig(PretrainedConfig):
4
+ model_type = "evo2_exon_classifier"
5
+
6
+ def __init__(self,
7
+ embedding_dim: int = 8192, # match your input width
8
+ hidden_dim: int = 1024, # width of hidden layers
9
+ num_hidden_layers: int = 1, # depth ≥1
10
+ **kwargs):
11
+ super().__init__(**kwargs)
12
+ self.embedding_dim = embedding_dim
13
+ self.hidden_dim = hidden_dim
14
+ self.num_hidden_layers = num_hidden_layers
evo2_7b_gen-blocks_26-proteinCoding.pth → model.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:463ffbcf24b8e439b6c82a90cdf50f346ea26e36c474ab21d1431352fc14d03c
3
- size 33564896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0394fdbf4f533a280a41357d795df29a53a9d26444cfa2ab1bb670b8161ae191
3
+ size 33563004
wrapper_exon_classifier.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+ from transformers import PreTrainedModel
3
+ from configuration_exon_classifier import Evo2ExonConfig
4
+
5
+ class Evo2ExonModel(PreTrainedModel):
6
+ config_class = Evo2ExonConfig
7
+ base_model_prefix = "evo2_exon_classifier"
8
+
9
+ def __init__(self, config: Evo2ExonConfig):
10
+ super().__init__(config)
11
+
12
+ # ▸ build (Linear + ReLU) * n + final Linear(…, 1)
13
+ layers = [nn.Linear(config.embedding_dim, config.hidden_dim), nn.ReLU()]
14
+ for _ in range(config.num_hidden_layers - 1):
15
+ layers += [nn.Linear(config.hidden_dim, config.hidden_dim), nn.ReLU()]
16
+ layers += [nn.Linear(config.hidden_dim, 1)]
17
+
18
+ self.fc_layers = nn.Sequential(*layers)
19
+ self.sigmoid = nn.Sigmoid() # convert logits → probability
20
+
21
+ def forward(self, inputs_embeds, labels=None, **kwargs):
22
+ """
23
+ inputs_embeds : (batch, seq_len, embedding_dim)
24
+ labels : (batch, seq_len) optional, 0/1 floats or ints
25
+ """
26
+ bsz, seq_len, _ = inputs_embeds.shape
27
+
28
+ # flatten → run FC layers → reshape back
29
+ logits = self.fc_layers(inputs_embeds.view(-1, inputs_embeds.size(-1)))
30
+ logits = logits.view(bsz, seq_len)
31
+ probs = self.sigmoid(logits)
32
+
33
+ if labels is not None:
34
+ loss = nn.BCELoss()(probs, labels.float())
35
+ return {"loss": loss, "logits": probs}
36
+
37
+ return {"logits": probs}