Spaces:
Paused
Paused
mayank1365 commited on
Commit ·
fe123ff
1
Parent(s): 0d40379
Fix CUDA device-side assert by adjusting max_prompt_length and disabling use_cache
Browse files
app.py
CHANGED
|
@@ -209,6 +209,7 @@ class SuspectXModel:
|
|
| 209 |
use_gradient_checkpointing="unsloth",
|
| 210 |
random_state=42,
|
| 211 |
)
|
|
|
|
| 212 |
|
| 213 |
print(f"✅ Model loaded: {self.config.MODEL_NAME}")
|
| 214 |
print(f" Trainable params: {sum(p.numel() for p in self.peft_model.parameters() if p.requires_grad):,}")
|
|
@@ -534,8 +535,8 @@ class SuspectXTrainer:
|
|
| 534 |
per_device_train_batch_size=self.config.BATCH_SIZE,
|
| 535 |
gradient_accumulation_steps=self.config.GRADIENT_ACCUM,
|
| 536 |
max_steps=self.config.NUM_EPISODES,
|
| 537 |
-
max_prompt_length=
|
| 538 |
-
max_completion_length=
|
| 539 |
learning_rate=1e-5, # Slightly higher for faster learning
|
| 540 |
logging_steps=1, # Log every step to see weight updates
|
| 541 |
save_steps=10,
|
|
|
|
| 209 |
use_gradient_checkpointing="unsloth",
|
| 210 |
random_state=42,
|
| 211 |
)
|
| 212 |
+
self.peft_model.config.use_cache = False # Critical for training stability
|
| 213 |
|
| 214 |
print(f"✅ Model loaded: {self.config.MODEL_NAME}")
|
| 215 |
print(f" Trainable params: {sum(p.numel() for p in self.peft_model.parameters() if p.requires_grad):,}")
|
|
|
|
| 535 |
per_device_train_batch_size=self.config.BATCH_SIZE,
|
| 536 |
gradient_accumulation_steps=self.config.GRADIENT_ACCUM,
|
| 537 |
max_steps=self.config.NUM_EPISODES,
|
| 538 |
+
max_prompt_length=self.config.MAX_SEQ_LENGTH // 4,
|
| 539 |
+
max_completion_length=self.config.MAX_SEQ_LENGTH // 8,
|
| 540 |
learning_rate=1e-5, # Slightly higher for faster learning
|
| 541 |
logging_steps=1, # Log every step to see weight updates
|
| 542 |
save_steps=10,
|