FredericFan commited on
Commit
c9e788e
·
verified ·
1 Parent(s): a55527f

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2de3e8a0e43fd8dd9c15b000eeb4f2021cdd5eccb54819b3b0728d1b7267178
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ef30dc9a100b240120af62823ca4707e4a35c361f060e8d6c15efa77b5e60f1
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b82ac85f6bf6ebbbd22442579d6ae1b2c1bf03a31bf32cacfc36ffd617eda63
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12c3c123305a27e77f9affa5e1e0fa48210446cf570c456de0079a60956cc284
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2fae9953ba8f56f4659e95c4daffe7b5a7e29fdb6b98008ab375ed9889239ac9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29621a933fe39840d93ef11565a36bb6ba3b5a377ed5e55e63eb1777b2373cd7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e08cecb96b0575af87573c227caf7ff2501936ebe822db040ebe990bb471ce73
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b929b010a2bf1a7268bbc9d5744f2ae71afa768419f9ef267d54626a2e8ef40d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.09038107097148895,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-1500",
4
- "epoch": 0.12,
5
  "eval_steps": 500,
6
- "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -241,6 +241,84 @@
241
  "eval_samples_per_second": 22.759,
242
  "eval_steps_per_second": 5.69,
243
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  }
245
  ],
246
  "logging_steps": 50,
@@ -260,7 +338,7 @@
260
  "attributes": {}
261
  }
262
  },
263
- "total_flos": 3653747343360000.0,
264
  "train_batch_size": 4,
265
  "trial_name": null,
266
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.08910445868968964,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-2000",
4
+ "epoch": 0.16,
5
  "eval_steps": 500,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
241
  "eval_samples_per_second": 22.759,
242
  "eval_steps_per_second": 5.69,
243
  "step": 1500
244
+ },
245
+ {
246
+ "epoch": 0.124,
247
+ "grad_norm": 0.19083499908447266,
248
+ "learning_rate": 2.81412e-05,
249
+ "loss": 0.0736,
250
+ "step": 1550
251
+ },
252
+ {
253
+ "epoch": 0.128,
254
+ "grad_norm": 0.19123569130897522,
255
+ "learning_rate": 2.80812e-05,
256
+ "loss": 0.0766,
257
+ "step": 1600
258
+ },
259
+ {
260
+ "epoch": 0.132,
261
+ "grad_norm": 0.24691827595233917,
262
+ "learning_rate": 2.80212e-05,
263
+ "loss": 0.0798,
264
+ "step": 1650
265
+ },
266
+ {
267
+ "epoch": 0.136,
268
+ "grad_norm": 0.17910048365592957,
269
+ "learning_rate": 2.79612e-05,
270
+ "loss": 0.0687,
271
+ "step": 1700
272
+ },
273
+ {
274
+ "epoch": 0.14,
275
+ "grad_norm": 0.1740667223930359,
276
+ "learning_rate": 2.79012e-05,
277
+ "loss": 0.0758,
278
+ "step": 1750
279
+ },
280
+ {
281
+ "epoch": 0.144,
282
+ "grad_norm": 0.15178219974040985,
283
+ "learning_rate": 2.78412e-05,
284
+ "loss": 0.0732,
285
+ "step": 1800
286
+ },
287
+ {
288
+ "epoch": 0.148,
289
+ "grad_norm": 0.1904926896095276,
290
+ "learning_rate": 2.77812e-05,
291
+ "loss": 0.0734,
292
+ "step": 1850
293
+ },
294
+ {
295
+ "epoch": 0.152,
296
+ "grad_norm": 0.2795208990573883,
297
+ "learning_rate": 2.77212e-05,
298
+ "loss": 0.076,
299
+ "step": 1900
300
+ },
301
+ {
302
+ "epoch": 0.156,
303
+ "grad_norm": 0.18160228431224823,
304
+ "learning_rate": 2.76612e-05,
305
+ "loss": 0.0716,
306
+ "step": 1950
307
+ },
308
+ {
309
+ "epoch": 0.16,
310
+ "grad_norm": 0.15877611935138702,
311
+ "learning_rate": 2.76012e-05,
312
+ "loss": 0.0773,
313
+ "step": 2000
314
+ },
315
+ {
316
+ "epoch": 0.16,
317
+ "eval_loss": 0.08910445868968964,
318
+ "eval_runtime": 87.8635,
319
+ "eval_samples_per_second": 22.763,
320
+ "eval_steps_per_second": 5.691,
321
+ "step": 2000
322
  }
323
  ],
324
  "logging_steps": 50,
 
338
  "attributes": {}
339
  }
340
  },
341
+ "total_flos": 4871663124480000.0,
342
  "train_batch_size": 4,
343
  "trial_name": null,
344
  "trial_params": null