madhuHuggingface commited on
Commit
8a468dd
·
verified ·
1 Parent(s): 5403dd4

Training in progress, step 700, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ac7cc399e9e803e832d3e4a887b8e17a2bca991693fd5fedaf23f9a68a33002
3
  size 60785144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:303343475309841b85eed76df986b9e6645cec918e116af2f899f51b3ecf6251
3
  size 60785144
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78f004ecba8cf08197c7c0bc5a876982d8a5c63197217f74df63c2ea81b3e5c3
3
  size 31149205
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fde37edca5adb4005644c406622925ee8e2714e074424bc24af3f6441bbc502
3
  size 31149205
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:863116f078b55fcd26c21f209dcf85d6cb8d8e08cee3e74f49dae023ed260e47
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff4532c1ad6082d83324dc653af69e29d03fa02637d181855b2e21b79b948367
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.4,
6
  "eval_steps": 500,
7
- "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -428,6 +428,76 @@
428
  "learning_rate": 2.038171362173843e-05,
429
  "loss": 0.0118,
430
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431
  }
432
  ],
433
  "logging_steps": 10,
@@ -447,7 +517,7 @@
447
  "attributes": {}
448
  }
449
  },
450
- "total_flos": 1918926157158912.0,
451
  "train_batch_size": 2,
452
  "trial_name": null,
453
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.8,
6
  "eval_steps": 500,
7
+ "global_step": 700,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
428
  "learning_rate": 2.038171362173843e-05,
429
  "loss": 0.0118,
430
  "step": 600
431
+ },
432
+ {
433
+ "epoch": 2.44,
434
+ "grad_norm": 0.3860418498516083,
435
+ "learning_rate": 1.7852344669758593e-05,
436
+ "loss": 0.0108,
437
+ "step": 610
438
+ },
439
+ {
440
+ "epoch": 2.48,
441
+ "grad_norm": 0.0032947103027254343,
442
+ "learning_rate": 1.547509426469368e-05,
443
+ "loss": 0.0132,
444
+ "step": 620
445
+ },
446
+ {
447
+ "epoch": 2.52,
448
+ "grad_norm": 0.1931905448436737,
449
+ "learning_rate": 1.325436452704033e-05,
450
+ "loss": 0.0165,
451
+ "step": 630
452
+ },
453
+ {
454
+ "epoch": 2.56,
455
+ "grad_norm": 0.12002695351839066,
456
+ "learning_rate": 1.119426773705068e-05,
457
+ "loss": 0.0086,
458
+ "step": 640
459
+ },
460
+ {
461
+ "epoch": 2.6,
462
+ "grad_norm": 0.004515103995800018,
463
+ "learning_rate": 9.298618719736418e-06,
464
+ "loss": 0.0042,
465
+ "step": 650
466
+ },
467
+ {
468
+ "epoch": 2.64,
469
+ "grad_norm": 0.015484682284295559,
470
+ "learning_rate": 7.570927780690673e-06,
471
+ "loss": 0.0114,
472
+ "step": 660
473
+ },
474
+ {
475
+ "epoch": 2.68,
476
+ "grad_norm": 0.038616035133600235,
477
+ "learning_rate": 6.0143942058104695e-06,
478
+ "loss": 0.0053,
479
+ "step": 670
480
+ },
481
+ {
482
+ "epoch": 2.7199999999999998,
483
+ "grad_norm": 0.19781313836574554,
484
+ "learning_rate": 4.631900336955441e-06,
485
+ "loss": 0.0093,
486
+ "step": 680
487
+ },
488
+ {
489
+ "epoch": 2.76,
490
+ "grad_norm": 0.23140157759189606,
491
+ "learning_rate": 3.426006234514523e-06,
492
+ "loss": 0.009,
493
+ "step": 690
494
+ },
495
+ {
496
+ "epoch": 2.8,
497
+ "grad_norm": 0.21433651447296143,
498
+ "learning_rate": 2.39894493676317e-06,
499
+ "loss": 0.0117,
500
+ "step": 700
501
  }
502
  ],
503
  "logging_steps": 10,
 
517
  "attributes": {}
518
  }
519
  },
520
+ "total_flos": 2236049469769728.0,
521
  "train_batch_size": 2,
522
  "trial_name": null,
523
  "trial_params": null