Training in progress, step 120, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 84962944
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eefc043abf545b1a9056c0abf733392ba05d898ea8ec803b6799e50eeaa1f44c
|
| 3 |
size 84962944
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 43387339
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68c8d28949f0d88f53c349673f9eb37b7c9d7b5324f0fea87d89f07ce4a3d0bf
|
| 3 |
size 43387339
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14709
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3da3a8ea9a349e34715a0e680e246db9bce3ac95cfe61981f857abc88096d4e9
|
| 3 |
size 14709
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8098b00a7c8df434ccbc8255df3dab9ab568965934b2c15b0908bddeb0e9b559
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 1.
|
| 4 |
-
"best_model_checkpoint": "./outputs/ctrl_abort_real_right/checkpoint-
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 5,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -954,6 +954,92 @@
|
|
| 954 |
"eval_samples_per_second": 13.609,
|
| 955 |
"eval_steps_per_second": 6.805,
|
| 956 |
"step": 110
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 957 |
}
|
| 958 |
],
|
| 959 |
"logging_steps": 1,
|
|
@@ -977,12 +1063,12 @@
|
|
| 977 |
"should_evaluate": false,
|
| 978 |
"should_log": false,
|
| 979 |
"should_save": true,
|
| 980 |
-
"should_training_stop":
|
| 981 |
},
|
| 982 |
"attributes": {}
|
| 983 |
}
|
| 984 |
},
|
| 985 |
-
"total_flos": 1.
|
| 986 |
"train_batch_size": 2,
|
| 987 |
"trial_name": null,
|
| 988 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 120,
|
| 3 |
+
"best_metric": 1.2101209163665771,
|
| 4 |
+
"best_model_checkpoint": "./outputs/ctrl_abort_real_right/checkpoint-120",
|
| 5 |
+
"epoch": 0.2570969469737547,
|
| 6 |
"eval_steps": 5,
|
| 7 |
+
"global_step": 120,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 954 |
"eval_samples_per_second": 13.609,
|
| 955 |
"eval_steps_per_second": 6.805,
|
| 956 |
"step": 110
|
| 957 |
+
},
|
| 958 |
+
{
|
| 959 |
+
"epoch": 0.2378146759507231,
|
| 960 |
+
"grad_norm": 0.696169912815094,
|
| 961 |
+
"learning_rate": 2.0253513192751374e-07,
|
| 962 |
+
"loss": 1.1758,
|
| 963 |
+
"step": 111
|
| 964 |
+
},
|
| 965 |
+
{
|
| 966 |
+
"epoch": 0.2399571505088377,
|
| 967 |
+
"grad_norm": 0.6544473767280579,
|
| 968 |
+
"learning_rate": 1.6426572649021477e-07,
|
| 969 |
+
"loss": 1.1242,
|
| 970 |
+
"step": 112
|
| 971 |
+
},
|
| 972 |
+
{
|
| 973 |
+
"epoch": 0.24209962506695232,
|
| 974 |
+
"grad_norm": 0.8569499254226685,
|
| 975 |
+
"learning_rate": 1.2994041528833267e-07,
|
| 976 |
+
"loss": 1.2727,
|
| 977 |
+
"step": 113
|
| 978 |
+
},
|
| 979 |
+
{
|
| 980 |
+
"epoch": 0.24424209962506696,
|
| 981 |
+
"grad_norm": 0.85068279504776,
|
| 982 |
+
"learning_rate": 9.958719453803278e-08,
|
| 983 |
+
"loss": 1.2417,
|
| 984 |
+
"step": 114
|
| 985 |
+
},
|
| 986 |
+
{
|
| 987 |
+
"epoch": 0.24638457418318158,
|
| 988 |
+
"grad_norm": 0.8679143786430359,
|
| 989 |
+
"learning_rate": 7.32308207615351e-08,
|
| 990 |
+
"loss": 1.2296,
|
| 991 |
+
"step": 115
|
| 992 |
+
},
|
| 993 |
+
{
|
| 994 |
+
"epoch": 0.24638457418318158,
|
| 995 |
+
"eval_loss": 1.2105430364608765,
|
| 996 |
+
"eval_runtime": 1.1269,
|
| 997 |
+
"eval_samples_per_second": 14.198,
|
| 998 |
+
"eval_steps_per_second": 7.099,
|
| 999 |
+
"step": 115
|
| 1000 |
+
},
|
| 1001 |
+
{
|
| 1002 |
+
"epoch": 0.2485270487412962,
|
| 1003 |
+
"grad_norm": 0.674493670463562,
|
| 1004 |
+
"learning_rate": 5.089279059533658e-08,
|
| 1005 |
+
"loss": 1.1677,
|
| 1006 |
+
"step": 116
|
| 1007 |
+
},
|
| 1008 |
+
{
|
| 1009 |
+
"epoch": 0.25066952329941083,
|
| 1010 |
+
"grad_norm": 0.6304970979690552,
|
| 1011 |
+
"learning_rate": 3.25913232572489e-08,
|
| 1012 |
+
"loss": 1.1482,
|
| 1013 |
+
"step": 117
|
| 1014 |
+
},
|
| 1015 |
+
{
|
| 1016 |
+
"epoch": 0.25281199785752545,
|
| 1017 |
+
"grad_norm": 0.6986701488494873,
|
| 1018 |
+
"learning_rate": 1.834134568654333e-08,
|
| 1019 |
+
"loss": 1.1476,
|
| 1020 |
+
"step": 118
|
| 1021 |
+
},
|
| 1022 |
+
{
|
| 1023 |
+
"epoch": 0.25495447241564007,
|
| 1024 |
+
"grad_norm": 0.6568459868431091,
|
| 1025 |
+
"learning_rate": 8.15448036932176e-09,
|
| 1026 |
+
"loss": 1.109,
|
| 1027 |
+
"step": 119
|
| 1028 |
+
},
|
| 1029 |
+
{
|
| 1030 |
+
"epoch": 0.2570969469737547,
|
| 1031 |
+
"grad_norm": 0.8193196654319763,
|
| 1032 |
+
"learning_rate": 2.0390358590538507e-09,
|
| 1033 |
+
"loss": 1.1736,
|
| 1034 |
+
"step": 120
|
| 1035 |
+
},
|
| 1036 |
+
{
|
| 1037 |
+
"epoch": 0.2570969469737547,
|
| 1038 |
+
"eval_loss": 1.2101209163665771,
|
| 1039 |
+
"eval_runtime": 1.4407,
|
| 1040 |
+
"eval_samples_per_second": 11.106,
|
| 1041 |
+
"eval_steps_per_second": 5.553,
|
| 1042 |
+
"step": 120
|
| 1043 |
}
|
| 1044 |
],
|
| 1045 |
"logging_steps": 1,
|
|
|
|
| 1063 |
"should_evaluate": false,
|
| 1064 |
"should_log": false,
|
| 1065 |
"should_save": true,
|
| 1066 |
+
"should_training_stop": true
|
| 1067 |
},
|
| 1068 |
"attributes": {}
|
| 1069 |
}
|
| 1070 |
},
|
| 1071 |
+
"total_flos": 1.1935339253376e+16,
|
| 1072 |
"train_batch_size": 2,
|
| 1073 |
"trial_name": null,
|
| 1074 |
"trial_params": null
|