madhuHuggingface commited on
Commit
156f269
·
verified ·
1 Parent(s): 079569d

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d27e9476dc918efebb61651f7e9f759934ffdffe549c6fc2165cc9ced32d93a6
3
  size 60785144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ac7cc399e9e803e832d3e4a887b8e17a2bca991693fd5fedaf23f9a68a33002
3
  size 60785144
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6ce41753fb994b8f796034d662e460bea6de0413caeaa2d0ec5be27d28aba58
3
  size 31149205
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78f004ecba8cf08197c7c0bc5a876982d8a5c63197217f74df63c2ea81b3e5c3
3
  size 31149205
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c800b778fa7e115e4c34de8529902de8b61c9a1b4bab3eb8295d06dafff030e
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:181c5f0270cf39930062ddfa3767a2481d0c360f120b11f8e25dbf533a1cdaba
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d35a14be938754e2a3aa3ebe18bdde4e86b890e4e0ff3c1f2a56a75942036606
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:863116f078b55fcd26c21f209dcf85d6cb8d8e08cee3e74f49dae023ed260e47
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.0,
6
  "eval_steps": 500,
7
- "global_step": 500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -358,6 +358,76 @@
358
  "learning_rate": 5.288404923261361e-05,
359
  "loss": 0.0199,
360
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  }
362
  ],
363
  "logging_steps": 10,
@@ -377,7 +447,7 @@
377
  "attributes": {}
378
  }
379
  },
380
- "total_flos": 1597906326839808.0,
381
  "train_batch_size": 2,
382
  "trial_name": null,
383
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.4,
6
  "eval_steps": 500,
7
+ "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
358
  "learning_rate": 5.288404923261361e-05,
359
  "loss": 0.0199,
360
  "step": 500
361
+ },
362
+ {
363
+ "epoch": 2.04,
364
+ "grad_norm": 0.19935500621795654,
365
+ "learning_rate": 4.913290561478957e-05,
366
+ "loss": 0.0126,
367
+ "step": 510
368
+ },
369
+ {
370
+ "epoch": 2.08,
371
+ "grad_norm": 0.19961194694042206,
372
+ "learning_rate": 4.547595614593488e-05,
373
+ "loss": 0.0149,
374
+ "step": 520
375
+ },
376
+ {
377
+ "epoch": 2.12,
378
+ "grad_norm": 0.24504683911800385,
379
+ "learning_rate": 4.1919972654619935e-05,
380
+ "loss": 0.0187,
381
+ "step": 530
382
+ },
383
+ {
384
+ "epoch": 2.16,
385
+ "grad_norm": 0.1561822146177292,
386
+ "learning_rate": 3.8471540003667214e-05,
387
+ "loss": 0.0188,
388
+ "step": 540
389
+ },
390
+ {
391
+ "epoch": 2.2,
392
+ "grad_norm": 0.35855334997177124,
393
+ "learning_rate": 3.513704389650188e-05,
394
+ "loss": 0.0143,
395
+ "step": 550
396
+ },
397
+ {
398
+ "epoch": 2.24,
399
+ "grad_norm": 0.17385642230510712,
400
+ "learning_rate": 3.192265905229836e-05,
401
+ "loss": 0.0085,
402
+ "step": 560
403
+ },
404
+ {
405
+ "epoch": 2.2800000000000002,
406
+ "grad_norm": 0.15791386365890503,
407
+ "learning_rate": 2.883433777182255e-05,
408
+ "loss": 0.0113,
409
+ "step": 570
410
+ },
411
+ {
412
+ "epoch": 2.32,
413
+ "grad_norm": 0.053924959152936935,
414
+ "learning_rate": 2.5877798915140416e-05,
415
+ "loss": 0.0154,
416
+ "step": 580
417
+ },
418
+ {
419
+ "epoch": 2.36,
420
+ "grad_norm": 0.21498742699623108,
421
+ "learning_rate": 2.3058517311606233e-05,
422
+ "loss": 0.0119,
423
+ "step": 590
424
+ },
425
+ {
426
+ "epoch": 2.4,
427
+ "grad_norm": 0.1106470450758934,
428
+ "learning_rate": 2.038171362173843e-05,
429
+ "loss": 0.0118,
430
+ "step": 600
431
  }
432
  ],
433
  "logging_steps": 10,
 
447
  "attributes": {}
448
  }
449
  },
450
+ "total_flos": 1918926157158912.0,
451
  "train_batch_size": 2,
452
  "trial_name": null,
453
  "trial_params": null