Erland commited on
Commit
ba5b3f5
·
verified ·
1 Parent(s): 96ebed7

Training in progress, step 40, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56a595ab8d0501db585f02e5044fa57e61fcac8cca3667878ad850a07e524b4e
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:158b59b088d9e4102d6980a95e49bd76d284fd8aa4cee95bfab2f2ef44851982
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f21feea2e2f7292f02c7822951aa623f6cf71bda055968e00277d00429c41ed
3
  size 86889042
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3df03bfe471d96aa40d3a3614213ee1ae1aa37dd508ffcaa6fb8dee23587ceb9
3
  size 86889042
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2f0c5f75e18f8cb763d8ea5434e79bf35d98af96676e76ac16c35aac4009f48
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ef0281527db1a7092e344d658b16faf0f0d248b31b0fd2f143c85afbb18c88f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0024,
5
  "eval_steps": 500,
6
- "global_step": 30,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -217,6 +217,76 @@
217
  "learning_rate": 0.00017435897435897436,
218
  "loss": 0.7906,
219
  "step": 30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  }
221
  ],
222
  "logging_steps": 1,
@@ -236,7 +306,7 @@
236
  "attributes": {}
237
  }
238
  },
239
- "total_flos": 8053805909950464.0,
240
  "train_batch_size": 2,
241
  "trial_name": null,
242
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0032,
5
  "eval_steps": 500,
6
+ "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
217
  "learning_rate": 0.00017435897435897436,
218
  "loss": 0.7906,
219
  "step": 30
220
+ },
221
+ {
222
+ "epoch": 0.00248,
223
+ "grad_norm": 0.6179051995277405,
224
+ "learning_rate": 0.00017333333333333334,
225
+ "loss": 0.4689,
226
+ "step": 31
227
+ },
228
+ {
229
+ "epoch": 0.00256,
230
+ "grad_norm": 0.42703530192375183,
231
+ "learning_rate": 0.00017230769230769234,
232
+ "loss": 0.4424,
233
+ "step": 32
234
+ },
235
+ {
236
+ "epoch": 0.00264,
237
+ "grad_norm": 0.5435040593147278,
238
+ "learning_rate": 0.00017128205128205128,
239
+ "loss": 0.3853,
240
+ "step": 33
241
+ },
242
+ {
243
+ "epoch": 0.00272,
244
+ "grad_norm": 0.45631420612335205,
245
+ "learning_rate": 0.00017025641025641026,
246
+ "loss": 0.4006,
247
+ "step": 34
248
+ },
249
+ {
250
+ "epoch": 0.0028,
251
+ "grad_norm": 0.48986274003982544,
252
+ "learning_rate": 0.00016923076923076923,
253
+ "loss": 0.557,
254
+ "step": 35
255
+ },
256
+ {
257
+ "epoch": 0.00288,
258
+ "grad_norm": 0.4985139071941376,
259
+ "learning_rate": 0.00016820512820512823,
260
+ "loss": 0.7894,
261
+ "step": 36
262
+ },
263
+ {
264
+ "epoch": 0.00296,
265
+ "grad_norm": 0.5833737850189209,
266
+ "learning_rate": 0.0001671794871794872,
267
+ "loss": 0.6526,
268
+ "step": 37
269
+ },
270
+ {
271
+ "epoch": 0.00304,
272
+ "grad_norm": 0.3877828121185303,
273
+ "learning_rate": 0.00016615384615384617,
274
+ "loss": 0.5351,
275
+ "step": 38
276
+ },
277
+ {
278
+ "epoch": 0.00312,
279
+ "grad_norm": 0.48045578598976135,
280
+ "learning_rate": 0.00016512820512820512,
281
+ "loss": 0.5696,
282
+ "step": 39
283
+ },
284
+ {
285
+ "epoch": 0.0032,
286
+ "grad_norm": 0.47378939390182495,
287
+ "learning_rate": 0.0001641025641025641,
288
+ "loss": 0.7905,
289
+ "step": 40
290
  }
291
  ],
292
  "logging_steps": 1,
 
306
  "attributes": {}
307
  }
308
  },
309
+ "total_flos": 1.0616812747358208e+16,
310
  "train_batch_size": 2,
311
  "trial_name": null,
312
  "trial_params": null