Sync latest aetheris source code
Browse files- aetheris/__pycache__/__init__.cpython-310.pyc +0 -0
- aetheris/__pycache__/__init__.cpython-313.pyc +0 -0
- aetheris/__pycache__/config.cpython-310.pyc +0 -0
- aetheris/__pycache__/config.cpython-313.pyc +0 -0
- aetheris/__pycache__/data.cpython-310.pyc +0 -0
- aetheris/__pycache__/inference.cpython-310.pyc +0 -0
- aetheris/__pycache__/model.cpython-310.pyc +0 -0
- aetheris/__pycache__/model.cpython-313.pyc +0 -0
- aetheris/__pycache__/utils.cpython-310.pyc +0 -0
- aetheris/api/__pycache__/schemas.cpython-310.pyc +0 -0
- aetheris/api/__pycache__/server.cpython-310.pyc +0 -0
- aetheris/cli/__pycache__/__init__.cpython-310.pyc +0 -0
- aetheris/cli/__pycache__/main.cpython-310.pyc +0 -0
- aetheris/model.py +12 -7
- aetheris/modules/__pycache__/__init__.cpython-310.pyc +0 -0
- aetheris/modules/__pycache__/__init__.cpython-313.pyc +0 -0
- aetheris/modules/__pycache__/expert.cpython-310.pyc +0 -0
- aetheris/modules/__pycache__/expert.cpython-313.pyc +0 -0
- aetheris/modules/__pycache__/moe.cpython-310.pyc +0 -0
- aetheris/modules/__pycache__/moe.cpython-313.pyc +0 -0
- aetheris/modules/__pycache__/ssm.cpython-310.pyc +0 -0
- aetheris/modules/__pycache__/ssm.cpython-313.pyc +0 -0
- aetheris/trainer/__pycache__/__init__.cpython-310.pyc +0 -0
- aetheris/trainer/__pycache__/trainer.cpython-310.pyc +0 -0
aetheris/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (239 Bytes). View file
|
|
|
aetheris/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (251 Bytes). View file
|
|
|
aetheris/__pycache__/config.cpython-310.pyc
ADDED
|
Binary file (2.15 kB). View file
|
|
|
aetheris/__pycache__/config.cpython-313.pyc
ADDED
|
Binary file (3.17 kB). View file
|
|
|
aetheris/__pycache__/data.cpython-310.pyc
ADDED
|
Binary file (5.52 kB). View file
|
|
|
aetheris/__pycache__/inference.cpython-310.pyc
ADDED
|
Binary file (3.64 kB). View file
|
|
|
aetheris/__pycache__/model.cpython-310.pyc
ADDED
|
Binary file (3.43 kB). View file
|
|
|
aetheris/__pycache__/model.cpython-313.pyc
ADDED
|
Binary file (7.05 kB). View file
|
|
|
aetheris/__pycache__/utils.cpython-310.pyc
ADDED
|
Binary file (1.76 kB). View file
|
|
|
aetheris/api/__pycache__/schemas.cpython-310.pyc
ADDED
|
Binary file (4.63 kB). View file
|
|
|
aetheris/api/__pycache__/server.cpython-310.pyc
ADDED
|
Binary file (4.64 kB). View file
|
|
|
aetheris/cli/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (154 Bytes). View file
|
|
|
aetheris/cli/__pycache__/main.cpython-310.pyc
ADDED
|
Binary file (10.7 kB). View file
|
|
|
aetheris/model.py
CHANGED
|
@@ -30,17 +30,22 @@ class HybridMambaMoE(nn.Module):
|
|
| 30 |
nn.init.normal_(self.embedding.weight, mean=0.0, std=0.02)
|
| 31 |
|
| 32 |
def resize_token_embeddings(self, new_vocab_size: int):
|
| 33 |
-
"""Resize embedding and lm_head
|
| 34 |
old_vocab_size = self.embedding.num_embeddings
|
| 35 |
if new_vocab_size == old_vocab_size:
|
| 36 |
return
|
| 37 |
-
old_weight = self.embedding.weight.data
|
| 38 |
-
mean_embed = old_weight.mean(dim=0)
|
| 39 |
self.embedding = nn.Embedding(new_vocab_size, self.config.d_model)
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
self.lm_head = nn.Linear(self.config.d_model, new_vocab_size, bias=False)
|
| 45 |
self.lm_head.weight = self.embedding.weight # Re-tie weights
|
| 46 |
self.config.vocab_size = new_vocab_size
|
|
|
|
| 30 |
nn.init.normal_(self.embedding.weight, mean=0.0, std=0.02)
|
| 31 |
|
| 32 |
def resize_token_embeddings(self, new_vocab_size: int):
|
| 33 |
+
"""Resize embedding and lm_head. Handles both growing and shrinking."""
|
| 34 |
old_vocab_size = self.embedding.num_embeddings
|
| 35 |
if new_vocab_size == old_vocab_size:
|
| 36 |
return
|
| 37 |
+
old_weight = self.embedding.weight.data.clone()
|
|
|
|
| 38 |
self.embedding = nn.Embedding(new_vocab_size, self.config.d_model)
|
| 39 |
+
if new_vocab_size > old_vocab_size:
|
| 40 |
+
# Growing: copy old weights, init new with mean
|
| 41 |
+
mean_embed = old_weight.mean(dim=0)
|
| 42 |
+
self.embedding.weight.data[:old_vocab_size] = old_weight
|
| 43 |
+
self.embedding.weight.data[old_vocab_size:] = mean_embed.unsqueeze(0).expand(
|
| 44 |
+
new_vocab_size - old_vocab_size, -1
|
| 45 |
+
)
|
| 46 |
+
else:
|
| 47 |
+
# Shrinking: truncate (caller should copy correct rows afterward)
|
| 48 |
+
self.embedding.weight.data[:] = old_weight[:new_vocab_size]
|
| 49 |
self.lm_head = nn.Linear(self.config.d_model, new_vocab_size, bias=False)
|
| 50 |
self.lm_head.weight = self.embedding.weight # Re-tie weights
|
| 51 |
self.config.vocab_size = new_vocab_size
|
aetheris/modules/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (305 Bytes). View file
|
|
|
aetheris/modules/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (318 Bytes). View file
|
|
|
aetheris/modules/__pycache__/expert.cpython-310.pyc
ADDED
|
Binary file (1.34 kB). View file
|
|
|
aetheris/modules/__pycache__/expert.cpython-313.pyc
ADDED
|
Binary file (2.54 kB). View file
|
|
|
aetheris/modules/__pycache__/moe.cpython-310.pyc
ADDED
|
Binary file (2.48 kB). View file
|
|
|
aetheris/modules/__pycache__/moe.cpython-313.pyc
ADDED
|
Binary file (4.73 kB). View file
|
|
|
aetheris/modules/__pycache__/ssm.cpython-310.pyc
ADDED
|
Binary file (3.64 kB). View file
|
|
|
aetheris/modules/__pycache__/ssm.cpython-313.pyc
ADDED
|
Binary file (8.68 kB). View file
|
|
|
aetheris/trainer/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (197 Bytes). View file
|
|
|
aetheris/trainer/__pycache__/trainer.cpython-310.pyc
ADDED
|
Binary file (4.11 kB). View file
|
|
|