trainer

Paused

mayank1365 commited on Apr 26

Commit

fe123ff

1 Parent(s): 0d40379

Fix CUDA device-side assert by adjusting max_prompt_length and disabling use_cache

Files changed (1) hide show

app.py CHANGED Viewed

@@ -209,6 +209,7 @@ class SuspectXModel:
             use_gradient_checkpointing="unsloth",
             random_state=42,
         )
         print(f"✅ Model loaded: {self.config.MODEL_NAME}")
         print(f"   Trainable params: {sum(p.numel() for p in self.peft_model.parameters() if p.requires_grad):,}")
@@ -534,8 +535,8 @@ class SuspectXTrainer:
             per_device_train_batch_size=self.config.BATCH_SIZE,
             gradient_accumulation_steps=self.config.GRADIENT_ACCUM,
             max_steps=self.config.NUM_EPISODES,
-            max_prompt_length=512,  # Increased to fit conversation history
-            max_completion_length=256,
             learning_rate=1e-5,  # Slightly higher for faster learning
             logging_steps=1,     # Log every step to see weight updates
             save_steps=10,

             use_gradient_checkpointing="unsloth",
             random_state=42,
         )
+        self.peft_model.config.use_cache = False  # Critical for training stability
         print(f"✅ Model loaded: {self.config.MODEL_NAME}")
         print(f"   Trainable params: {sum(p.numel() for p in self.peft_model.parameters() if p.requires_grad):,}")
             per_device_train_batch_size=self.config.BATCH_SIZE,
             gradient_accumulation_steps=self.config.GRADIENT_ACCUM,
             max_steps=self.config.NUM_EPISODES,
+            max_prompt_length=self.config.MAX_SEQ_LENGTH // 4,
+            max_completion_length=self.config.MAX_SEQ_LENGTH // 8,
             learning_rate=1e-5,  # Slightly higher for faster learning
             logging_steps=1,     # Log every step to see weight updates
             save_steps=10,