Training in progress, epoch 1, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step9700/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step9700/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step9700/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step9700/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step9700/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +160 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1037269336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:386a6380325bc3dff1a7a5f881832a0696cbe9be2672febd8c95a996479adb3e
|
| 3 |
size 1037269336
|
last-checkpoint/global_step9700/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d96dfe1d3b0bce855880e2d23009bef0264fec55853ad94d2e36720de87856c
|
| 3 |
+
size 781993445
|
last-checkpoint/global_step9700/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd77cdbaa722f4b8912db62f39f33a77ebcb2c4b56e744f47b25c61d4f150680
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step9700/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06050d6663481f1a7ff845243d9881b454d43f213a7fb01187ac4f95e030533e
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step9700/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61080b35f30c80f43393735f9295771ec864832f9102797e3e593e019c3378d5
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step9700/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7499f1c06e56dcadbbf5d0fd1a13a4f469aefe348f76c2b2bff829e5697961f1
|
| 3 |
+
size 2610290277
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step9700
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b56969535e9a8e88cd3829c988a0a37451d46c9a48a232e2bf2ff895e958e53f
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac5d72eb18852fad4db4fcc6f4250d07f49de688916884e0bd15cf332644e3c4
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e093e8dca30af25bb4868596fab940bd5b96385b2a5252906d4fb7506ec6e3c
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a02a2ce27f65153b8be850fa84fb66458319a4fbe52b6b4116118eb9d4b7ccda
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d5c6f3cc57d69dd40ef86ebd5faf9e78cc6a0d89512a7f5fd9a4c13cda1f059a
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -14984,6 +14984,162 @@
|
|
| 14984 |
"eval_samples_per_second": 174.882,
|
| 14985 |
"eval_steps_per_second": 10.967,
|
| 14986 |
"step": 9600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14987 |
}
|
| 14988 |
],
|
| 14989 |
"logging_steps": 5,
|
|
@@ -15012,7 +15168,7 @@
|
|
| 15012 |
"attributes": {}
|
| 15013 |
}
|
| 15014 |
},
|
| 15015 |
-
"total_flos": 2.
|
| 15016 |
"train_batch_size": 4,
|
| 15017 |
"trial_name": null,
|
| 15018 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 1.521620512008667,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.4100886756796047,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 9700,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 14984 |
"eval_samples_per_second": 174.882,
|
| 14985 |
"eval_steps_per_second": 10.967,
|
| 14986 |
"step": 9600
|
| 14987 |
+
},
|
| 14988 |
+
{
|
| 14989 |
+
"epoch": 1.3962785288559383,
|
| 14990 |
+
"grad_norm": 2.4075584411621094,
|
| 14991 |
+
"learning_rate": 3.9504059213659793e-07,
|
| 14992 |
+
"loss": 1.5897,
|
| 14993 |
+
"step": 9605
|
| 14994 |
+
},
|
| 14995 |
+
{
|
| 14996 |
+
"epoch": 1.3970053786887628,
|
| 14997 |
+
"grad_norm": 2.440012216567993,
|
| 14998 |
+
"learning_rate": 3.8514051886811723e-07,
|
| 14999 |
+
"loss": 1.6766,
|
| 15000 |
+
"step": 9610
|
| 15001 |
+
},
|
| 15002 |
+
{
|
| 15003 |
+
"epoch": 1.3977322285215874,
|
| 15004 |
+
"grad_norm": 2.658358335494995,
|
| 15005 |
+
"learning_rate": 3.7536559858959155e-07,
|
| 15006 |
+
"loss": 1.6694,
|
| 15007 |
+
"step": 9615
|
| 15008 |
+
},
|
| 15009 |
+
{
|
| 15010 |
+
"epoch": 1.398459078354412,
|
| 15011 |
+
"grad_norm": 2.324554443359375,
|
| 15012 |
+
"learning_rate": 3.657158559093597e-07,
|
| 15013 |
+
"loss": 1.7643,
|
| 15014 |
+
"step": 9620
|
| 15015 |
+
},
|
| 15016 |
+
{
|
| 15017 |
+
"epoch": 1.3991859281872365,
|
| 15018 |
+
"grad_norm": 2.6129276752471924,
|
| 15019 |
+
"learning_rate": 3.56191315120649e-07,
|
| 15020 |
+
"loss": 1.5292,
|
| 15021 |
+
"step": 9625
|
| 15022 |
+
},
|
| 15023 |
+
{
|
| 15024 |
+
"epoch": 1.399912778020061,
|
| 15025 |
+
"grad_norm": 2.4178617000579834,
|
| 15026 |
+
"learning_rate": 3.467920002014695e-07,
|
| 15027 |
+
"loss": 1.556,
|
| 15028 |
+
"step": 9630
|
| 15029 |
+
},
|
| 15030 |
+
{
|
| 15031 |
+
"epoch": 1.4006396278528856,
|
| 15032 |
+
"grad_norm": 2.558295726776123,
|
| 15033 |
+
"learning_rate": 3.375179348145972e-07,
|
| 15034 |
+
"loss": 1.5579,
|
| 15035 |
+
"step": 9635
|
| 15036 |
+
},
|
| 15037 |
+
{
|
| 15038 |
+
"epoch": 1.4013664776857102,
|
| 15039 |
+
"grad_norm": 2.540734052658081,
|
| 15040 |
+
"learning_rate": 3.283691423074685e-07,
|
| 15041 |
+
"loss": 1.6343,
|
| 15042 |
+
"step": 9640
|
| 15043 |
+
},
|
| 15044 |
+
{
|
| 15045 |
+
"epoch": 1.4020933275185348,
|
| 15046 |
+
"grad_norm": 2.0778424739837646,
|
| 15047 |
+
"learning_rate": 3.193456457121636e-07,
|
| 15048 |
+
"loss": 1.5255,
|
| 15049 |
+
"step": 9645
|
| 15050 |
+
},
|
| 15051 |
+
{
|
| 15052 |
+
"epoch": 1.402820177351359,
|
| 15053 |
+
"grad_norm": 2.8635857105255127,
|
| 15054 |
+
"learning_rate": 3.1044746774532277e-07,
|
| 15055 |
+
"loss": 1.604,
|
| 15056 |
+
"step": 9650
|
| 15057 |
+
},
|
| 15058 |
+
{
|
| 15059 |
+
"epoch": 1.402820177351359,
|
| 15060 |
+
"eval_loss": 1.5223361253738403,
|
| 15061 |
+
"eval_runtime": 20.5763,
|
| 15062 |
+
"eval_samples_per_second": 160.428,
|
| 15063 |
+
"eval_steps_per_second": 10.06,
|
| 15064 |
+
"step": 9650
|
| 15065 |
+
},
|
| 15066 |
+
{
|
| 15067 |
+
"epoch": 1.4035470271841837,
|
| 15068 |
+
"grad_norm": 2.443467617034912,
|
| 15069 |
+
"learning_rate": 3.0167463080810214e-07,
|
| 15070 |
+
"loss": 1.6844,
|
| 15071 |
+
"step": 9655
|
| 15072 |
+
},
|
| 15073 |
+
{
|
| 15074 |
+
"epoch": 1.4042738770170082,
|
| 15075 |
+
"grad_norm": 2.570190906524658,
|
| 15076 |
+
"learning_rate": 2.9302715698610123e-07,
|
| 15077 |
+
"loss": 1.6661,
|
| 15078 |
+
"step": 9660
|
| 15079 |
+
},
|
| 15080 |
+
{
|
| 15081 |
+
"epoch": 1.4050007268498328,
|
| 15082 |
+
"grad_norm": 2.4715726375579834,
|
| 15083 |
+
"learning_rate": 2.845050680493296e-07,
|
| 15084 |
+
"loss": 1.579,
|
| 15085 |
+
"step": 9665
|
| 15086 |
+
},
|
| 15087 |
+
{
|
| 15088 |
+
"epoch": 1.4057275766826574,
|
| 15089 |
+
"grad_norm": 2.529876947402954,
|
| 15090 |
+
"learning_rate": 2.761083854521403e-07,
|
| 15091 |
+
"loss": 1.7274,
|
| 15092 |
+
"step": 9670
|
| 15093 |
+
},
|
| 15094 |
+
{
|
| 15095 |
+
"epoch": 1.406454426515482,
|
| 15096 |
+
"grad_norm": 2.4188828468322754,
|
| 15097 |
+
"learning_rate": 2.678371303331627e-07,
|
| 15098 |
+
"loss": 1.5238,
|
| 15099 |
+
"step": 9675
|
| 15100 |
+
},
|
| 15101 |
+
{
|
| 15102 |
+
"epoch": 1.4071812763483065,
|
| 15103 |
+
"grad_norm": 2.511361598968506,
|
| 15104 |
+
"learning_rate": 2.5969132351527523e-07,
|
| 15105 |
+
"loss": 1.5761,
|
| 15106 |
+
"step": 9680
|
| 15107 |
+
},
|
| 15108 |
+
{
|
| 15109 |
+
"epoch": 1.407908126181131,
|
| 15110 |
+
"grad_norm": 2.82676362991333,
|
| 15111 |
+
"learning_rate": 2.5167098550553806e-07,
|
| 15112 |
+
"loss": 1.6957,
|
| 15113 |
+
"step": 9685
|
| 15114 |
+
},
|
| 15115 |
+
{
|
| 15116 |
+
"epoch": 1.4086349760139556,
|
| 15117 |
+
"grad_norm": 2.6926026344299316,
|
| 15118 |
+
"learning_rate": 2.437761364951492e-07,
|
| 15119 |
+
"loss": 1.6426,
|
| 15120 |
+
"step": 9690
|
| 15121 |
+
},
|
| 15122 |
+
{
|
| 15123 |
+
"epoch": 1.40936182584678,
|
| 15124 |
+
"grad_norm": 2.8157596588134766,
|
| 15125 |
+
"learning_rate": 2.36006796359366e-07,
|
| 15126 |
+
"loss": 1.6126,
|
| 15127 |
+
"step": 9695
|
| 15128 |
+
},
|
| 15129 |
+
{
|
| 15130 |
+
"epoch": 1.4100886756796047,
|
| 15131 |
+
"grad_norm": 2.3840818405151367,
|
| 15132 |
+
"learning_rate": 2.2836298465750569e-07,
|
| 15133 |
+
"loss": 1.551,
|
| 15134 |
+
"step": 9700
|
| 15135 |
+
},
|
| 15136 |
+
{
|
| 15137 |
+
"epoch": 1.4100886756796047,
|
| 15138 |
+
"eval_loss": 1.521620512008667,
|
| 15139 |
+
"eval_runtime": 19.0149,
|
| 15140 |
+
"eval_samples_per_second": 173.601,
|
| 15141 |
+
"eval_steps_per_second": 10.886,
|
| 15142 |
+
"step": 9700
|
| 15143 |
}
|
| 15144 |
],
|
| 15145 |
"logging_steps": 5,
|
|
|
|
| 15168 |
"attributes": {}
|
| 15169 |
}
|
| 15170 |
},
|
| 15171 |
+
"total_flos": 2.5325929674917806e+18,
|
| 15172 |
"train_batch_size": 4,
|
| 15173 |
"trial_name": null,
|
| 15174 |
"trial_params": null
|