natmin322
/

Continual

natmin322 commited on Mar 24

Commit

454979d

1 Parent(s): 5e23c54

v10a

Files changed (2) hide show

improve_gainlora/src/t5_gainlora_inflora.py CHANGED Viewed

@@ -681,8 +681,7 @@ class T5Attention(nn.Module):
                 position_bias = torch.zeros(
                     (1, self.n_heads, real_seq_length, key_length), device=scores.device, dtype=scores.dtype
                 )
-                if self.gradient_checkpointing and self.training:
-                    position_bias.requires_grad = True
             else:
                 position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)

                 position_bias = torch.zeros(
                     (1, self.n_heads, real_seq_length, key_length), device=scores.device, dtype=scores.dtype
                 )
             else:
                 position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)

improve_gainlora/src/t5_specroute.py CHANGED Viewed

@@ -663,10 +663,8 @@ class T5Stack(T5PreTrainedModel):
             if self.gradient_checkpointing and self.training:
                 def create_custom_forward(module):
                     def custom_forward(*inputs):
-                        return tuple(module(*inputs, use_cache, output_attentions,
-                                           key_attention_weights=key_attention_weights))
                     return custom_forward
                 # Use _gradient_checkpointing_func (set by new-format
                 # gradient_checkpointing_enable) if available, else fallback
                 gc_fn = getattr(self, '_gradient_checkpointing_func', None)

             if self.gradient_checkpointing and self.training:
                 def create_custom_forward(module):
                     def custom_forward(*inputs):
+                        return tuple(module(*inputs, use_cache, output_attentions))
                     return custom_forward
                 # Use _gradient_checkpointing_func (set by new-format
                 # gradient_checkpointing_enable) if available, else fallback
                 gc_fn = getattr(self, '_gradient_checkpointing_func', None)