manbeast3b
/

deserve_edge4_ape

Model card Files Files and versions

manbeast3b commited on Dec 17, 2024

Commit

f8f95c3

·

verified ·

1 Parent(s): 692691e

Update src/pipeline.py

Files changed (1) hide show

src/pipeline.py +2 -1

src/pipeline.py CHANGED Viewed

@@ -923,7 +923,7 @@ def xattn1(query, key, value, attn_mask=None, dropout_p=0.0,
 def xattn1(query, key, value, attn_mask=None, dropout_p=0.0,
            is_causal=False, scale=None, enable_gqa=False) -> torch.Tensor:
     device = query.device
-    query, key, value, attn_mask = query.cpu(), key.cpu(), value.cpu(), None if attn_mask is None else attn_mask
     with torch.autocast(device_type="cpu", dtype=torch.bfloat16):
         # Dynamically handle dimensions
         if query.ndim == 2:
@@ -959,6 +959,7 @@ def xattn1(query, key, value, attn_mask=None, dropout_p=0.0,
             )
         if attn_mask is not None:
             if attn_mask.ndim == 2:
                 attn_mask = attn_mask.unsqueeze(0).unsqueeze(0)
             elif attn_mask.ndim == 3:

 def xattn1(query, key, value, attn_mask=None, dropout_p=0.0,
            is_causal=False, scale=None, enable_gqa=False) -> torch.Tensor:
     device = query.device
+    query, key, value = query.cpu(), key.cpu(), value.cpu()
     with torch.autocast(device_type="cpu", dtype=torch.bfloat16):
         # Dynamically handle dimensions
         if query.ndim == 2:
             )
         if attn_mask is not None:
+            attn_mask = attn_mask.cpu()
             if attn_mask.ndim == 2:
                 attn_mask = attn_mask.unsqueeze(0).unsqueeze(0)
             elif attn_mask.ndim == 3: