tangledgroup
/

tangled-alpha-0.5-core

Text Generation

Model card Files Files and versions

mtasic85 commited on Mar 2, 2025

Commit

8500ccf

·

1 Parent(s): 869eee4

pretrain core model

Files changed (1) hide show

scripts/pretrain_core_model.yaml +2 -2

scripts/pretrain_core_model.yaml CHANGED Viewed

@@ -85,7 +85,7 @@ train:
   max_norm: 1.0
   #   (type: float, default: 4e-05)
-  min_lr: 1e-05
 # Evaluation-related arguments. See ``litgpt.args.EvalArgs`` for details
 eval:
@@ -114,7 +114,7 @@ optimizer:
   # class_path: bitsandbytes.optim.PagedAdamW8bit
   init_args:
     # (type: float, default: 0.001)
-    lr: 1e-4
     # (type: float, default: 0.01)
     weight_decay: 0.01
     # (type: tuple, default: (0.9,0.999))

   max_norm: 1.0
   #   (type: float, default: 4e-05)
+  min_lr: 3e-5
 # Evaluation-related arguments. See ``litgpt.args.EvalArgs`` for details
 eval:
   # class_path: bitsandbytes.optim.PagedAdamW8bit
   init_args:
     # (type: float, default: 0.001)
+    lr: 3e-4
     # (type: float, default: 0.01)
     weight_decay: 0.01
     # (type: tuple, default: (0.9,0.999))