Wi11Chan
/

custom-myvit4segnew

Feature Extraction

Model card Files Files and versions

Wi11Chan commited on Jul 24, 2023

Commit

76a1ce2

·

1 Parent(s): eeac6f1

Upload ViTForSemanticSegmentation

Files changed (2) hide show

modeling_vit.py +12 -4
pytorch_model.bin +1 -1

modeling_vit.py CHANGED Viewed

@@ -206,7 +206,8 @@ class ViTSelfAttention(nn.Module):
     def transpose_for_scores(self, x: torch.Tensor) -> torch.Tensor:
         new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
         x = x.view(new_x_shape)
-        return x.permute(0, 2, 1, 3)
     def forward(
         self, hidden_states, head_mask: Optional[torch.Tensor] = None, output_attentions: bool = False
@@ -245,10 +246,17 @@ class ViTSelfAttention(nn.Module):
         #         query_layer, key_layer, value_layer, attn_bias=xops.LowerTriangularMask(), p=self.dropout_prob
         #     )
-        from flash_attn import flash_attn_qkvpacked_func, flash_attn_func
-        context_layer = flash_attn_func(query_layer, key_layer, value_layer, dropout_p=self.dropout_prob)
-        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
         new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
         context_layer = context_layer.view(new_context_layer_shape)
         outputs = (context_layer,)

     def transpose_for_scores(self, x: torch.Tensor) -> torch.Tensor:
         new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
         x = x.view(new_x_shape)
+        # return x.permute(0, 2, 1, 3)
+        return x
     def forward(
         self, hidden_states, head_mask: Optional[torch.Tensor] = None, output_attentions: bool = False
         #         query_layer, key_layer, value_layer, attn_bias=xops.LowerTriangularMask(), p=self.dropout_prob
         #     )
+        # from flash_attn import flash_attn_qkvpacked_func, flash_attn_func
+        from flash_attn.flash_attn_interface import flash_attn_unpadded_func
+        myseq = torch.tensor([0, query_layer.shape[1]], dtype=torch.int32, device=query_layer.device)
+        # myseq = torch.arange(0, (batch_size + 1) * seqlen, step=seqlen, dtype=torch.int32,
+        #                                 device=qkv.device)
+        context_layer = flash_attn_unpadded_func(query_layer.squeeze(), key_layer.squeeze(), value_layer.squeeze(),
+                                                 cu_seqlens_q=myseq, cu_seqlens_k=myseq, max_seqlen_q=query_layer.shape[1], max_seqlen_k=query_layer.shape[1],
+                                                 dropout_p=self.dropout_prob)
+        # context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        context_layer = context_layer.unsqueeze(0).contiguous()
         new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
         context_layer = context_layer.view(new_context_layer_shape)
         outputs = (context_layer,)

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4b20e2f83b85cbd7bc4f8ee1a756bd503550dea7b76a5b0a16584263f69bbf3b
 size 345082557

 version https://git-lfs.github.com/spec/v1
+oid sha256:312d9a0cddd14deb1a040a7006433acdf9a934e8ff4b9f84b85b14e0e34f610b
 size 345082557