Tonic commited on
Commit
4e59f6d
·
1 Parent(s): 8b56686

increases batchsize and gradient accumulation steps in memory optimized

Browse files
config/train_gpt_oss_openhermes_fr_memory_optimized.py CHANGED
@@ -42,8 +42,8 @@ config = GPTOSSEnhancedCustomConfig(
42
  # ============================================================================
43
  # Batch configuration following memory optimization principles
44
  num_train_epochs=1.0, # Single epoch to reduce memory pressure
45
- batch_size=2, # Reduced from 6 for memory efficiency
46
- gradient_accumulation_steps=16, # Increased to maintain effective batch size 32
47
 
48
  # Learning rate optimized for single epoch + memory constraints
49
  learning_rate=2e-4, # Standard GPT-OSS learning rate
 
42
  # ============================================================================
43
  # Batch configuration following memory optimization principles
44
  num_train_epochs=1.0, # Single epoch to reduce memory pressure
45
+ batch_size=8, # Reduced from 6 for memory efficiency
46
+ gradient_accumulation_steps=8, # Increased to maintain effective batch size 32
47
 
48
  # Learning rate optimized for single epoch + memory constraints
49
  learning_rate=2e-4, # Standard GPT-OSS learning rate