Update modelling_RW.py
#13
by
SpiridonSunRotator
- opened
- modelling_RW.py +3 -3
modelling_RW.py
CHANGED
|
@@ -76,8 +76,8 @@ class RotaryEmbedding(torch.nn.Module):
|
|
| 76 |
freqs = torch.einsum("i,j->ij", t, self.inv_freq)
|
| 77 |
emb = torch.cat((freqs, freqs), dim=-1).to(device)
|
| 78 |
|
| 79 |
-
if dtype
|
| 80 |
-
emb = emb.
|
| 81 |
|
| 82 |
self.cos_cached = emb.cos()[None, :, :]
|
| 83 |
self.sin_cached = emb.sin()[None, :, :]
|
|
@@ -89,7 +89,7 @@ class RotaryEmbedding(torch.nn.Module):
|
|
| 89 |
|
| 90 |
def forward(self, q, k):
|
| 91 |
batch, seq_len, head_dim = q.shape
|
| 92 |
-
cos, sin = self.cos_sin(seq_len, q.device)
|
| 93 |
return (q * cos) + (rotate_half(q) * sin), (k * cos) + (rotate_half(k) * sin)
|
| 94 |
|
| 95 |
|
|
|
|
| 76 |
freqs = torch.einsum("i,j->ij", t, self.inv_freq)
|
| 77 |
emb = torch.cat((freqs, freqs), dim=-1).to(device)
|
| 78 |
|
| 79 |
+
if dtype != emb.dtype:
|
| 80 |
+
emb = emb.to(dtype)
|
| 81 |
|
| 82 |
self.cos_cached = emb.cos()[None, :, :]
|
| 83 |
self.sin_cached = emb.sin()[None, :, :]
|
|
|
|
| 89 |
|
| 90 |
def forward(self, q, k):
|
| 91 |
batch, seq_len, head_dim = q.shape
|
| 92 |
+
cos, sin = self.cos_sin(seq_len, q.device, q.dtype)
|
| 93 |
return (q * cos) + (rotate_half(q) * sin), (k * cos) + (rotate_half(k) * sin)
|
| 94 |
|
| 95 |
|