luohoa97 commited on
Commit
2abfd60
·
verified ·
1 Parent(s): 2032bce

Deploy BitNet-Transformer Trainer

Browse files
Files changed (1) hide show
  1. scripts/train_ai_model.py +1 -1
scripts/train_ai_model.py CHANGED
@@ -28,7 +28,7 @@ logger = logging.getLogger(__name__)
28
 
29
  # Hyperparameters
30
  EPOCHS = 100
31
- BATCH_SIZE = 1024 # Significant increase for T4/A100 utilization
32
  LR = 0.0003
33
  HIDDEN_DIM = 512
34
  LAYERS = 8
 
28
 
29
  # Hyperparameters
30
  EPOCHS = 100
31
+ BATCH_SIZE = 8192 # Aggressive batch size to saturate T4 GPU (16GB)
32
  LR = 0.0003
33
  HIDDEN_DIM = 512
34
  LAYERS = 8