nathanrchn
/

phi

@@ -382,7 +382,6 @@ class SelfAttention(nn.Module):
         attention = self.drop(attention)
         output = torch.einsum("bhts,bshd->bthd", attention, v)
-        print(output[0][0][0])
         return output

         attention = self.drop(attention)
         output = torch.einsum("bhts,bshd->bthd", attention, v)
         return output