Commit
·
44d2569
1
Parent(s):
009ff4d
fix
Browse files- __pycache__/attn.cpython-312.pyc +0 -0
- attn.py +1 -2
__pycache__/attn.cpython-312.pyc
CHANGED
|
Binary files a/__pycache__/attn.cpython-312.pyc and b/__pycache__/attn.cpython-312.pyc differ
|
|
|
attn.py
CHANGED
|
@@ -37,7 +37,6 @@ def _get_alibi_slopes(self, n_heads: int, interpolation_factor: float = 0.25):
|
|
| 37 |
slopes = slopes_power_of_two + extra_slopes_trunc
|
| 38 |
slopes = torch.tensor(slopes, device=self.device, dtype=torch.float32)
|
| 39 |
slopes = slopes * interpolation_factor # https://arxiv.org/pdf/2310.13017
|
| 40 |
-
slopes = slopes.to(torch.float32)
|
| 41 |
return slopes
|
| 42 |
|
| 43 |
|
|
@@ -127,7 +126,7 @@ class Attention(nn.Module):
|
|
| 127 |
extra_slopes = self._generate_slopes(2 * n)
|
| 128 |
extra_slopes_trunc = extra_slopes[0::2][: num_heads - n]
|
| 129 |
slopes = slopes_power_of_two + extra_slopes_trunc
|
| 130 |
-
slopes = torch.tensor(slopes, device=torch.device("cuda"))
|
| 131 |
slopes = slopes * interpolation_factor # https://arxiv.org/pdf/2310.13017
|
| 132 |
return slopes
|
| 133 |
|
|
|
|
| 37 |
slopes = slopes_power_of_two + extra_slopes_trunc
|
| 38 |
slopes = torch.tensor(slopes, device=self.device, dtype=torch.float32)
|
| 39 |
slopes = slopes * interpolation_factor # https://arxiv.org/pdf/2310.13017
|
|
|
|
| 40 |
return slopes
|
| 41 |
|
| 42 |
|
|
|
|
| 126 |
extra_slopes = self._generate_slopes(2 * n)
|
| 127 |
extra_slopes_trunc = extra_slopes[0::2][: num_heads - n]
|
| 128 |
slopes = slopes_power_of_two + extra_slopes_trunc
|
| 129 |
+
slopes = torch.tensor(slopes, device=torch.device("cuda"), dtype=torch.float32)
|
| 130 |
slopes = slopes * interpolation_factor # https://arxiv.org/pdf/2310.13017
|
| 131 |
return slopes
|
| 132 |
|