FredericFan commited on
Commit
70921f2
·
verified ·
1 Parent(s): e35c19c

Training in progress, step 15000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:054ab94a66b126df267b052c3963349825d38029b9947a5eeef3e088fc94d5e3
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8b87db48356f50aba644c7f62751e63e8a04ec2268f84e4eba37dd50c1a0411
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c700b2458cc3a7d705b174e5b082b75ff9b46e4556e47eb6bc98ed85f7b5b362
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85a117e1ecba729ee34a78722b6baa410d7ae44754ab32c14568e8bd85298764
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c9886b9061bb2e70af0da0a78b4bba065bbf4e416078705ff5fff6c95adfc84
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cb981e2bdbf592329578c66e6b81722ec222bc355b0ddd3bdc9f7689a385e7a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6ce624ffb18558fd63335de21c66bfccbf585f56176b1bb9297748553d5fb95
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0054105d8fb5cdfddd8876e0968830f9a7aa658759cebb2ad97ea276facb582b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.0824647843837738,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-12500",
4
- "epoch": 1.16,
5
  "eval_steps": 500,
6
- "global_step": 14500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2269,6 +2269,84 @@
2269
  "eval_samples_per_second": 22.714,
2270
  "eval_steps_per_second": 5.678,
2271
  "step": 14500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2272
  }
2273
  ],
2274
  "logging_steps": 50,
@@ -2288,7 +2366,7 @@
2288
  "attributes": {}
2289
  }
2290
  },
2291
- "total_flos": 3.531955765248e+16,
2292
  "train_batch_size": 4,
2293
  "trial_name": null,
2294
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.08243728429079056,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-15000",
4
+ "epoch": 1.2,
5
  "eval_steps": 500,
6
+ "global_step": 15000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2269
  "eval_samples_per_second": 22.714,
2270
  "eval_steps_per_second": 5.678,
2271
  "step": 14500
2272
+ },
2273
+ {
2274
+ "epoch": 1.164,
2275
+ "grad_norm": 0.06833196431398392,
2276
+ "learning_rate": 1.25436e-05,
2277
+ "loss": 0.0586,
2278
+ "step": 14550
2279
+ },
2280
+ {
2281
+ "epoch": 1.168,
2282
+ "grad_norm": 0.16051365435123444,
2283
+ "learning_rate": 1.24836e-05,
2284
+ "loss": 0.056,
2285
+ "step": 14600
2286
+ },
2287
+ {
2288
+ "epoch": 1.172,
2289
+ "grad_norm": 0.18909616768360138,
2290
+ "learning_rate": 1.24236e-05,
2291
+ "loss": 0.057,
2292
+ "step": 14650
2293
+ },
2294
+ {
2295
+ "epoch": 1.176,
2296
+ "grad_norm": 0.17333486676216125,
2297
+ "learning_rate": 1.23636e-05,
2298
+ "loss": 0.0562,
2299
+ "step": 14700
2300
+ },
2301
+ {
2302
+ "epoch": 1.18,
2303
+ "grad_norm": 0.0701974630355835,
2304
+ "learning_rate": 1.23036e-05,
2305
+ "loss": 0.0545,
2306
+ "step": 14750
2307
+ },
2308
+ {
2309
+ "epoch": 1.184,
2310
+ "grad_norm": 0.1582074612379074,
2311
+ "learning_rate": 1.2243599999999999e-05,
2312
+ "loss": 0.06,
2313
+ "step": 14800
2314
+ },
2315
+ {
2316
+ "epoch": 1.188,
2317
+ "grad_norm": 0.13948781788349152,
2318
+ "learning_rate": 1.21836e-05,
2319
+ "loss": 0.0529,
2320
+ "step": 14850
2321
+ },
2322
+ {
2323
+ "epoch": 1.192,
2324
+ "grad_norm": 0.0946699008345604,
2325
+ "learning_rate": 1.21236e-05,
2326
+ "loss": 0.0668,
2327
+ "step": 14900
2328
+ },
2329
+ {
2330
+ "epoch": 1.196,
2331
+ "grad_norm": 0.1089014783501625,
2332
+ "learning_rate": 1.20636e-05,
2333
+ "loss": 0.0562,
2334
+ "step": 14950
2335
+ },
2336
+ {
2337
+ "epoch": 1.2,
2338
+ "grad_norm": 0.09682592004537582,
2339
+ "learning_rate": 1.20036e-05,
2340
+ "loss": 0.0548,
2341
+ "step": 15000
2342
+ },
2343
+ {
2344
+ "epoch": 1.2,
2345
+ "eval_loss": 0.08243728429079056,
2346
+ "eval_runtime": 88.0302,
2347
+ "eval_samples_per_second": 22.719,
2348
+ "eval_steps_per_second": 5.68,
2349
+ "step": 15000
2350
  }
2351
  ],
2352
  "logging_steps": 50,
 
2366
  "attributes": {}
2367
  }
2368
  },
2369
+ "total_flos": 3.65374734336e+16,
2370
  "train_batch_size": 4,
2371
  "trial_name": null,
2372
  "trial_params": null