pretrain core model
Browse files
scripts/pretrain_core_model.yaml
CHANGED
|
@@ -85,7 +85,7 @@ train:
|
|
| 85 |
max_norm: 1.0
|
| 86 |
|
| 87 |
# (type: float, default: 4e-05)
|
| 88 |
-
min_lr:
|
| 89 |
|
| 90 |
# Evaluation-related arguments. See ``litgpt.args.EvalArgs`` for details
|
| 91 |
eval:
|
|
@@ -114,7 +114,7 @@ optimizer:
|
|
| 114 |
# class_path: bitsandbytes.optim.PagedAdamW8bit
|
| 115 |
init_args:
|
| 116 |
# (type: float, default: 0.001)
|
| 117 |
-
lr:
|
| 118 |
# (type: float, default: 0.01)
|
| 119 |
weight_decay: 0.01
|
| 120 |
# (type: tuple, default: (0.9,0.999))
|
|
|
|
| 85 |
max_norm: 1.0
|
| 86 |
|
| 87 |
# (type: float, default: 4e-05)
|
| 88 |
+
min_lr: 3e-5
|
| 89 |
|
| 90 |
# Evaluation-related arguments. See ``litgpt.args.EvalArgs`` for details
|
| 91 |
eval:
|
|
|
|
| 114 |
# class_path: bitsandbytes.optim.PagedAdamW8bit
|
| 115 |
init_args:
|
| 116 |
# (type: float, default: 0.001)
|
| 117 |
+
lr: 3e-4
|
| 118 |
# (type: float, default: 0.01)
|
| 119 |
weight_decay: 0.01
|
| 120 |
# (type: tuple, default: (0.9,0.999))
|