rcgalbo commited on
Commit
26bc130
·
verified ·
1 Parent(s): 7d6e04b

Sync latest aetheris source code

Browse files
aetheris/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (239 Bytes). View file
 
aetheris/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (251 Bytes). View file
 
aetheris/__pycache__/config.cpython-310.pyc ADDED
Binary file (2.15 kB). View file
 
aetheris/__pycache__/config.cpython-313.pyc ADDED
Binary file (3.17 kB). View file
 
aetheris/__pycache__/data.cpython-310.pyc ADDED
Binary file (5.52 kB). View file
 
aetheris/__pycache__/inference.cpython-310.pyc ADDED
Binary file (3.64 kB). View file
 
aetheris/__pycache__/model.cpython-310.pyc ADDED
Binary file (3.43 kB). View file
 
aetheris/__pycache__/model.cpython-313.pyc ADDED
Binary file (7.05 kB). View file
 
aetheris/__pycache__/utils.cpython-310.pyc ADDED
Binary file (1.76 kB). View file
 
aetheris/api/__pycache__/schemas.cpython-310.pyc ADDED
Binary file (4.63 kB). View file
 
aetheris/api/__pycache__/server.cpython-310.pyc ADDED
Binary file (4.64 kB). View file
 
aetheris/cli/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (154 Bytes). View file
 
aetheris/cli/__pycache__/main.cpython-310.pyc ADDED
Binary file (10.7 kB). View file
 
aetheris/model.py CHANGED
@@ -30,17 +30,22 @@ class HybridMambaMoE(nn.Module):
30
  nn.init.normal_(self.embedding.weight, mean=0.0, std=0.02)
31
 
32
  def resize_token_embeddings(self, new_vocab_size: int):
33
- """Resize embedding and lm_head for new tokens. New embeddings initialized from mean of existing."""
34
  old_vocab_size = self.embedding.num_embeddings
35
  if new_vocab_size == old_vocab_size:
36
  return
37
- old_weight = self.embedding.weight.data
38
- mean_embed = old_weight.mean(dim=0)
39
  self.embedding = nn.Embedding(new_vocab_size, self.config.d_model)
40
- self.embedding.weight.data[:old_vocab_size] = old_weight
41
- self.embedding.weight.data[old_vocab_size:] = mean_embed.unsqueeze(0).expand(
42
- new_vocab_size - old_vocab_size, -1
43
- )
 
 
 
 
 
 
44
  self.lm_head = nn.Linear(self.config.d_model, new_vocab_size, bias=False)
45
  self.lm_head.weight = self.embedding.weight # Re-tie weights
46
  self.config.vocab_size = new_vocab_size
 
30
  nn.init.normal_(self.embedding.weight, mean=0.0, std=0.02)
31
 
32
  def resize_token_embeddings(self, new_vocab_size: int):
33
+ """Resize embedding and lm_head. Handles both growing and shrinking."""
34
  old_vocab_size = self.embedding.num_embeddings
35
  if new_vocab_size == old_vocab_size:
36
  return
37
+ old_weight = self.embedding.weight.data.clone()
 
38
  self.embedding = nn.Embedding(new_vocab_size, self.config.d_model)
39
+ if new_vocab_size > old_vocab_size:
40
+ # Growing: copy old weights, init new with mean
41
+ mean_embed = old_weight.mean(dim=0)
42
+ self.embedding.weight.data[:old_vocab_size] = old_weight
43
+ self.embedding.weight.data[old_vocab_size:] = mean_embed.unsqueeze(0).expand(
44
+ new_vocab_size - old_vocab_size, -1
45
+ )
46
+ else:
47
+ # Shrinking: truncate (caller should copy correct rows afterward)
48
+ self.embedding.weight.data[:] = old_weight[:new_vocab_size]
49
  self.lm_head = nn.Linear(self.config.d_model, new_vocab_size, bias=False)
50
  self.lm_head.weight = self.embedding.weight # Re-tie weights
51
  self.config.vocab_size = new_vocab_size
aetheris/modules/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (305 Bytes). View file
 
aetheris/modules/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (318 Bytes). View file
 
aetheris/modules/__pycache__/expert.cpython-310.pyc ADDED
Binary file (1.34 kB). View file
 
aetheris/modules/__pycache__/expert.cpython-313.pyc ADDED
Binary file (2.54 kB). View file
 
aetheris/modules/__pycache__/moe.cpython-310.pyc ADDED
Binary file (2.48 kB). View file
 
aetheris/modules/__pycache__/moe.cpython-313.pyc ADDED
Binary file (4.73 kB). View file
 
aetheris/modules/__pycache__/ssm.cpython-310.pyc ADDED
Binary file (3.64 kB). View file
 
aetheris/modules/__pycache__/ssm.cpython-313.pyc ADDED
Binary file (8.68 kB). View file
 
aetheris/trainer/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (197 Bytes). View file
 
aetheris/trainer/__pycache__/trainer.cpython-310.pyc ADDED
Binary file (4.11 kB). View file