ycchen
/

yc-test1

@@ -784,6 +784,9 @@ class QWenModel(QWenPreTrainedModel):
         self.post_init()
     def get_input_embeddings(self):
         return self.wte
@@ -926,8 +929,12 @@ class QWenModel(QWenPreTrainedModel):
             if output_hidden_states:
                 all_hidden_states = all_hidden_states + (hidden_states,)
-            if self.gradient_checkpointing and self.training:
                 def create_custom_forward(module):
                     def custom_forward(*inputs):
                         # None for past_key_value

         self.post_init()
+        # BUG: hardcode
+        self.skip_checkpointing_layer_ids = list(range(30))
     def get_input_embeddings(self):
         return self.wte
             if output_hidden_states:
                 all_hidden_states = all_hidden_states + (hidden_states,)
+            # BUG: not work
+            forward_checkpointing = (self.gradient_checkpointing and self.training)
+            if self.skip_checkpointing_layer_ids is not None and i in self.skip_checkpointing_layer_ids:
+                forward_checkpointing = False
+            if forward_checkpointing:
                 def create_custom_forward(module):
                     def custom_forward(*inputs):
                         # None for past_key_value