rrivera1849
/

LUAR-MUD

Feature Extraction

Model card Files Files and versions

rrivera1849 commited on Oct 9, 2023

Commit

8191fb5

·

1 Parent(s): 47450e4

Upload LUAR

Files changed (1) hide show

model.py +9 -5

model.py CHANGED Viewed

@@ -16,11 +16,15 @@ class SelfAttention(nn.Module):
         super(SelfAttention, self).__init__()
     def forward(self, k, q, v):
-        d_k = q.size(-1)
-        scores = torch.matmul(k, q.transpose(-2, -1)) / math.sqrt(d_k)
-        p_attn = F.softmax(scores, dim=-1)
-        return torch.matmul(p_attn, v)
 class LUAR(PreTrainedModel):
     """Defines the LUAR model.
@@ -85,4 +89,4 @@ class LUAR(PreTrainedModel):
         """
         output = self.get_episode_embeddings(input_ids, attention_mask, output_attentions)
-        return output

         super(SelfAttention, self).__init__()
     def forward(self, k, q, v):
+        if hasattr(F, "scaled_dot_product_attention") and torch.cuda.is_available():
+            with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_mem_efficient=True):
+                return F.scaled_dot_product_attention(k, q, v)
+        else:
+            d_k = q.size(-1)
+            scores = torch.matmul(k, q.transpose(-2, -1)) / math.sqrt(d_k)
+            p_attn = F.softmax(scores, dim=-1)
+            return torch.matmul(p_attn, v)
 class LUAR(PreTrainedModel):
     """Defines the LUAR model.
         """
         output = self.get_episode_embeddings(input_ids, attention_mask, output_attentions)
+        return output