luxopes
/

NanoGPT-X_Base

Model card Files Files and versions

luxopes commited on Jan 11

Commit

776b5b5

·

verified ·

1 Parent(s): 711e74d

Update train.py

Files changed (1) hide show

train.py +3 -1

train.py CHANGED Viewed

@@ -282,7 +282,7 @@ def train(
     grad_accum=8,
     epochs=1,
     lr=1e-5,
-    warmup_steps=0,
 ):
     accelerator = Accelerator(
         mixed_precision="bf16" if torch.cuda.is_bf16_supported() else "fp16",
@@ -491,6 +491,7 @@ if __name__ == "__main__":
     model = Transformer(args)
     RESUME_FROM = "checkpoints/step_200000.pt"
     if os.path.exists(RESUME_FROM):
@@ -505,6 +506,7 @@ if __name__ == "__main__":
             # Old format: checkpoint is directly the model state_dict
             model.load_state_dict(checkpoint)
             print(f"[Resume] Loaded model (old format)")
     train(
         model,

     grad_accum=8,
     epochs=1,
     lr=1e-5,
+    warmup_steps=500,
 ):
     accelerator = Accelerator(
         mixed_precision="bf16" if torch.cuda.is_bf16_supported() else "fp16",
     model = Transformer(args)
+    '''
     RESUME_FROM = "checkpoints/step_200000.pt"
     if os.path.exists(RESUME_FROM):
             # Old format: checkpoint is directly the model state_dict
             model.load_state_dict(checkpoint)
             print(f"[Resume] Loaded model (old format)")
+    '''
     train(
         model,