yagizdevre commited on
Commit
44d2569
·
1 Parent(s): 009ff4d
Files changed (2) hide show
  1. __pycache__/attn.cpython-312.pyc +0 -0
  2. attn.py +1 -2
__pycache__/attn.cpython-312.pyc CHANGED
Binary files a/__pycache__/attn.cpython-312.pyc and b/__pycache__/attn.cpython-312.pyc differ
 
attn.py CHANGED
@@ -37,7 +37,6 @@ def _get_alibi_slopes(self, n_heads: int, interpolation_factor: float = 0.25):
37
  slopes = slopes_power_of_two + extra_slopes_trunc
38
  slopes = torch.tensor(slopes, device=self.device, dtype=torch.float32)
39
  slopes = slopes * interpolation_factor # https://arxiv.org/pdf/2310.13017
40
- slopes = slopes.to(torch.float32)
41
  return slopes
42
 
43
 
@@ -127,7 +126,7 @@ class Attention(nn.Module):
127
  extra_slopes = self._generate_slopes(2 * n)
128
  extra_slopes_trunc = extra_slopes[0::2][: num_heads - n]
129
  slopes = slopes_power_of_two + extra_slopes_trunc
130
- slopes = torch.tensor(slopes, device=torch.device("cuda"))
131
  slopes = slopes * interpolation_factor # https://arxiv.org/pdf/2310.13017
132
  return slopes
133
 
 
37
  slopes = slopes_power_of_two + extra_slopes_trunc
38
  slopes = torch.tensor(slopes, device=self.device, dtype=torch.float32)
39
  slopes = slopes * interpolation_factor # https://arxiv.org/pdf/2310.13017
 
40
  return slopes
41
 
42
 
 
126
  extra_slopes = self._generate_slopes(2 * n)
127
  extra_slopes_trunc = extra_slopes[0::2][: num_heads - n]
128
  slopes = slopes_power_of_two + extra_slopes_trunc
129
+ slopes = torch.tensor(slopes, device=torch.device("cuda"), dtype=torch.float32)
130
  slopes = slopes * interpolation_factor # https://arxiv.org/pdf/2310.13017
131
  return slopes
132