macto
/

aicas-test

@@ -180,7 +180,6 @@ class FlashSelfAttention(torch.nn.Module):
         return rearrange(output, '(b s) ... -> b s ...', b=batch)
     def forward(self, q, k, v, attention_mask=None):
-        q, k, v = q.to(torch.bfloat16), k.to(torch.bfloat16), v.to(to(torch.bfloat16))
         assert all((i.dtype in [torch.float16, torch.bfloat16] for i in (q, k, v)))
         assert all((i.is_cuda for i in (q, k, v)))
         batch_size, seqlen_q = q.shape[0], q.shape[1]

         return rearrange(output, '(b s) ... -> b s ...', b=batch)
     def forward(self, q, k, v, attention_mask=None):
         assert all((i.dtype in [torch.float16, torch.bfloat16] for i in (q, k, v)))
         assert all((i.is_cuda for i in (q, k, v)))
         batch_size, seqlen_q = q.shape[0], q.shape[1]