mayank1365 commited on
Commit
fe123ff
·
1 Parent(s): 0d40379

Fix CUDA device-side assert by adjusting max_prompt_length and disabling use_cache

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -209,6 +209,7 @@ class SuspectXModel:
209
  use_gradient_checkpointing="unsloth",
210
  random_state=42,
211
  )
 
212
 
213
  print(f"✅ Model loaded: {self.config.MODEL_NAME}")
214
  print(f" Trainable params: {sum(p.numel() for p in self.peft_model.parameters() if p.requires_grad):,}")
@@ -534,8 +535,8 @@ class SuspectXTrainer:
534
  per_device_train_batch_size=self.config.BATCH_SIZE,
535
  gradient_accumulation_steps=self.config.GRADIENT_ACCUM,
536
  max_steps=self.config.NUM_EPISODES,
537
- max_prompt_length=512, # Increased to fit conversation history
538
- max_completion_length=256,
539
  learning_rate=1e-5, # Slightly higher for faster learning
540
  logging_steps=1, # Log every step to see weight updates
541
  save_steps=10,
 
209
  use_gradient_checkpointing="unsloth",
210
  random_state=42,
211
  )
212
+ self.peft_model.config.use_cache = False # Critical for training stability
213
 
214
  print(f"✅ Model loaded: {self.config.MODEL_NAME}")
215
  print(f" Trainable params: {sum(p.numel() for p in self.peft_model.parameters() if p.requires_grad):,}")
 
535
  per_device_train_batch_size=self.config.BATCH_SIZE,
536
  gradient_accumulation_steps=self.config.GRADIENT_ACCUM,
537
  max_steps=self.config.NUM_EPISODES,
538
+ max_prompt_length=self.config.MAX_SEQ_LENGTH // 4,
539
+ max_completion_length=self.config.MAX_SEQ_LENGTH // 8,
540
  learning_rate=1e-5, # Slightly higher for faster learning
541
  logging_steps=1, # Log every step to see weight updates
542
  save_steps=10,