Training in progress, step 5400, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 527048968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4eaaf03b78bd99375228f4e3780fd0588fc02582773008769bf7177550d4b48
|
| 3 |
size 527048968
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1054135994
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87baf48f45b21dd7d9a1576417255bf546e8558ecd18cd75468fb9ffa32e54f8
|
| 3 |
size 1054135994
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cde3dd12f91204388f748ef22c42d0af6362a11af96ae2767080c430a3556fd7
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d8a06f6e764a4c806b3b6aa6930ec3c05d14769ecbf5db87f5122a0c04e591e
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7166205048561096,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -5082,6 +5082,151 @@
|
|
| 5082 |
"EMA_steps_per_second": 25.783,
|
| 5083 |
"epoch": 228.2608695652174,
|
| 5084 |
"step": 5250
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5085 |
}
|
| 5086 |
],
|
| 5087 |
"logging_steps": 10,
|
|
@@ -5101,7 +5246,7 @@
|
|
| 5101 |
"attributes": {}
|
| 5102 |
}
|
| 5103 |
},
|
| 5104 |
-
"total_flos": 1.
|
| 5105 |
"train_batch_size": 4,
|
| 5106 |
"trial_name": null,
|
| 5107 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7166205048561096,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
+
"epoch": 234.7826086956522,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 5400,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 5082 |
"EMA_steps_per_second": 25.783,
|
| 5083 |
"epoch": 228.2608695652174,
|
| 5084 |
"step": 5250
|
| 5085 |
+
},
|
| 5086 |
+
{
|
| 5087 |
+
"epoch": 228.69565217391303,
|
| 5088 |
+
"grad_norm": 3.2444028854370117,
|
| 5089 |
+
"learning_rate": 1.085552546442246e-07,
|
| 5090 |
+
"loss": 0.2128,
|
| 5091 |
+
"step": 5260
|
| 5092 |
+
},
|
| 5093 |
+
{
|
| 5094 |
+
"epoch": 229.1304347826087,
|
| 5095 |
+
"grad_norm": 2.6189353466033936,
|
| 5096 |
+
"learning_rate": 2.171105092884492e-07,
|
| 5097 |
+
"loss": 0.2259,
|
| 5098 |
+
"step": 5270
|
| 5099 |
+
},
|
| 5100 |
+
{
|
| 5101 |
+
"epoch": 229.56521739130434,
|
| 5102 |
+
"grad_norm": 2.1571950912475586,
|
| 5103 |
+
"learning_rate": 3.2566576393267376e-07,
|
| 5104 |
+
"loss": 0.2638,
|
| 5105 |
+
"step": 5280
|
| 5106 |
+
},
|
| 5107 |
+
{
|
| 5108 |
+
"epoch": 230.0,
|
| 5109 |
+
"grad_norm": 2.62457013130188,
|
| 5110 |
+
"learning_rate": 4.342210185768984e-07,
|
| 5111 |
+
"loss": 0.2062,
|
| 5112 |
+
"step": 5290
|
| 5113 |
+
},
|
| 5114 |
+
{
|
| 5115 |
+
"epoch": 230.43478260869566,
|
| 5116 |
+
"grad_norm": 1.7154628038406372,
|
| 5117 |
+
"learning_rate": 5.42776273221123e-07,
|
| 5118 |
+
"loss": 0.2114,
|
| 5119 |
+
"step": 5300
|
| 5120 |
+
},
|
| 5121 |
+
{
|
| 5122 |
+
"epoch": 230.8695652173913,
|
| 5123 |
+
"grad_norm": 2.018242835998535,
|
| 5124 |
+
"learning_rate": 6.513315278653475e-07,
|
| 5125 |
+
"loss": 0.24,
|
| 5126 |
+
"step": 5310
|
| 5127 |
+
},
|
| 5128 |
+
{
|
| 5129 |
+
"epoch": 231.30434782608697,
|
| 5130 |
+
"grad_norm": 2.380286693572998,
|
| 5131 |
+
"learning_rate": 7.598867825095721e-07,
|
| 5132 |
+
"loss": 0.214,
|
| 5133 |
+
"step": 5320
|
| 5134 |
+
},
|
| 5135 |
+
{
|
| 5136 |
+
"epoch": 231.7391304347826,
|
| 5137 |
+
"grad_norm": 1.5191930532455444,
|
| 5138 |
+
"learning_rate": 8.684420371537968e-07,
|
| 5139 |
+
"loss": 0.2008,
|
| 5140 |
+
"step": 5330
|
| 5141 |
+
},
|
| 5142 |
+
{
|
| 5143 |
+
"epoch": 232.17391304347825,
|
| 5144 |
+
"grad_norm": 2.8159291744232178,
|
| 5145 |
+
"learning_rate": 9.769972917980214e-07,
|
| 5146 |
+
"loss": 0.2521,
|
| 5147 |
+
"step": 5340
|
| 5148 |
+
},
|
| 5149 |
+
{
|
| 5150 |
+
"epoch": 232.6086956521739,
|
| 5151 |
+
"grad_norm": 2.023869752883911,
|
| 5152 |
+
"learning_rate": 1.085552546442246e-06,
|
| 5153 |
+
"loss": 0.2095,
|
| 5154 |
+
"step": 5350
|
| 5155 |
+
},
|
| 5156 |
+
{
|
| 5157 |
+
"epoch": 233.04347826086956,
|
| 5158 |
+
"grad_norm": 1.9053815603256226,
|
| 5159 |
+
"learning_rate": 1.0855524388726596e-06,
|
| 5160 |
+
"loss": 0.2192,
|
| 5161 |
+
"step": 5360
|
| 5162 |
+
},
|
| 5163 |
+
{
|
| 5164 |
+
"epoch": 233.47826086956522,
|
| 5165 |
+
"grad_norm": 2.135075092315674,
|
| 5166 |
+
"learning_rate": 1.0855521161639428e-06,
|
| 5167 |
+
"loss": 0.2301,
|
| 5168 |
+
"step": 5370
|
| 5169 |
+
},
|
| 5170 |
+
{
|
| 5171 |
+
"epoch": 233.91304347826087,
|
| 5172 |
+
"grad_norm": 2.302385091781616,
|
| 5173 |
+
"learning_rate": 1.0855515783162238e-06,
|
| 5174 |
+
"loss": 0.2402,
|
| 5175 |
+
"step": 5380
|
| 5176 |
+
},
|
| 5177 |
+
{
|
| 5178 |
+
"epoch": 234.34782608695653,
|
| 5179 |
+
"grad_norm": 2.3211662769317627,
|
| 5180 |
+
"learning_rate": 1.0855508253297159e-06,
|
| 5181 |
+
"loss": 0.2156,
|
| 5182 |
+
"step": 5390
|
| 5183 |
+
},
|
| 5184 |
+
{
|
| 5185 |
+
"epoch": 234.7826086956522,
|
| 5186 |
+
"grad_norm": 2.5585744380950928,
|
| 5187 |
+
"learning_rate": 1.0855498572047172e-06,
|
| 5188 |
+
"loss": 0.2164,
|
| 5189 |
+
"step": 5400
|
| 5190 |
+
},
|
| 5191 |
+
{
|
| 5192 |
+
"epoch": 234.7826086956522,
|
| 5193 |
+
"eval_loss": 0.9845991134643555,
|
| 5194 |
+
"eval_runtime": 0.3916,
|
| 5195 |
+
"eval_samples_per_second": 25.539,
|
| 5196 |
+
"eval_steps_per_second": 25.539,
|
| 5197 |
+
"step": 5400
|
| 5198 |
+
},
|
| 5199 |
+
{
|
| 5200 |
+
"Start_State_loss": 0.8609819412231445,
|
| 5201 |
+
"Start_State_runtime": 0.3948,
|
| 5202 |
+
"Start_State_samples_per_second": 25.328,
|
| 5203 |
+
"Start_State_steps_per_second": 25.328,
|
| 5204 |
+
"epoch": 234.7826086956522,
|
| 5205 |
+
"step": 5400
|
| 5206 |
+
},
|
| 5207 |
+
{
|
| 5208 |
+
"Raw_Model_loss": 0.9845991134643555,
|
| 5209 |
+
"Raw_Model_runtime": 0.3961,
|
| 5210 |
+
"Raw_Model_samples_per_second": 25.244,
|
| 5211 |
+
"Raw_Model_steps_per_second": 25.244,
|
| 5212 |
+
"epoch": 234.7826086956522,
|
| 5213 |
+
"step": 5400
|
| 5214 |
+
},
|
| 5215 |
+
{
|
| 5216 |
+
"SWA_loss": 0.8302789926528931,
|
| 5217 |
+
"SWA_runtime": 0.3853,
|
| 5218 |
+
"SWA_samples_per_second": 25.953,
|
| 5219 |
+
"SWA_steps_per_second": 25.953,
|
| 5220 |
+
"epoch": 234.7826086956522,
|
| 5221 |
+
"step": 5400
|
| 5222 |
+
},
|
| 5223 |
+
{
|
| 5224 |
+
"EMA_loss": 0.8594452142715454,
|
| 5225 |
+
"EMA_runtime": 0.3912,
|
| 5226 |
+
"EMA_samples_per_second": 25.563,
|
| 5227 |
+
"EMA_steps_per_second": 25.563,
|
| 5228 |
+
"epoch": 234.7826086956522,
|
| 5229 |
+
"step": 5400
|
| 5230 |
}
|
| 5231 |
],
|
| 5232 |
"logging_steps": 10,
|
|
|
|
| 5246 |
"attributes": {}
|
| 5247 |
}
|
| 5248 |
},
|
| 5249 |
+
"total_flos": 1.3884011525792563e+17,
|
| 5250 |
"train_batch_size": 4,
|
| 5251 |
"trial_name": null,
|
| 5252 |
"trial_params": null
|