Fanucci commited on
Commit
260a6ae
·
verified ·
1 Parent(s): 1ce3295

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5effaa531506fd6c094304a0d4fbf50e4689c0c91e34799a678ff3cc9aec82c0
3
  size 4995335576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad832a709ceb64a88d98817dcb75ad708bfacf1a4f40055fae54bcea8244ed97
3
  size 4995335576
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c05b3e597eb203363767c34935766da9cb2fb043cc0009221d70ea49f93ee318
3
  size 1857639032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6af8410d351904bb32739cb663c37b7698ec1f70ff79e91a4a8d02f23e66cc06
3
  size 1857639032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:043188c965bea5abf083a00c8e41c7840c397481f18153d4c25fcaffb631a510
3
  size 13706103974
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5332c2221b81d3206827e2f3bda349a3e32e62b283b472331914261c4f8bf7d
3
  size 13706103974
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0b8a6e3878f9402461193b241d8f1ee8546c6ad03f43e5b9dab8f4fc8c4d065
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad578a43d11126d2531a957d968233fceac13bb258b4a43a2e2c3ad855ce9084
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3372cb7b685e225287edc953534a919d443ed17283851ec6045c9673db22615
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a471a6ec9e3c58f40e439496906fe4d5cf4f1a004883d461acf006d0c949a038
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 1,
3
  "best_metric": 1.4945952892303467,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.034353529044347283,
6
  "eval_steps": 50,
7
- "global_step": 550,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -489,6 +489,49 @@
489
  "eval_samples_per_second": 15.692,
490
  "eval_steps_per_second": 15.692,
491
  "step": 550
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
492
  }
493
  ],
494
  "logging_steps": 10,
@@ -503,7 +546,7 @@
503
  "early_stopping_threshold": 0.0
504
  },
505
  "attributes": {
506
- "early_stopping_patience_counter": 11
507
  }
508
  },
509
  "TrainerControl": {
@@ -517,7 +560,7 @@
517
  "attributes": {}
518
  }
519
  },
520
- "total_flos": 2.246541901824e+16,
521
  "train_batch_size": 1,
522
  "trial_name": null,
523
  "trial_params": null
 
2
  "best_global_step": 1,
3
  "best_metric": 1.4945952892303467,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.037476577139287946,
6
  "eval_steps": 50,
7
+ "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
489
  "eval_samples_per_second": 15.692,
490
  "eval_steps_per_second": 15.692,
491
  "step": 550
492
+ },
493
+ {
494
+ "epoch": 0.03497813866333541,
495
+ "grad_norm": 1072.0,
496
+ "learning_rate": 0.020736945184184407,
497
+ "loss": 9.5244,
498
+ "step": 560
499
+ },
500
+ {
501
+ "epoch": 0.03560274828232355,
502
+ "grad_norm": 2352.0,
503
+ "learning_rate": 0.019957510427206296,
504
+ "loss": 8.832,
505
+ "step": 570
506
+ },
507
+ {
508
+ "epoch": 0.03622735790131168,
509
+ "grad_norm": 540.0,
510
+ "learning_rate": 0.01918315302979444,
511
+ "loss": 10.0747,
512
+ "step": 580
513
+ },
514
+ {
515
+ "epoch": 0.03685196752029981,
516
+ "grad_norm": 600.0,
517
+ "learning_rate": 0.018414652704208584,
518
+ "loss": 8.9236,
519
+ "step": 590
520
+ },
521
+ {
522
+ "epoch": 0.037476577139287946,
523
+ "grad_norm": 644.0,
524
+ "learning_rate": 0.017652783265133608,
525
+ "loss": 8.524,
526
+ "step": 600
527
+ },
528
+ {
529
+ "epoch": 0.037476577139287946,
530
+ "eval_loss": 8.281222343444824,
531
+ "eval_runtime": 52.4549,
532
+ "eval_samples_per_second": 16.071,
533
+ "eval_steps_per_second": 16.071,
534
+ "step": 600
535
  }
536
  ],
537
  "logging_steps": 10,
 
546
  "early_stopping_threshold": 0.0
547
  },
548
  "attributes": {
549
+ "early_stopping_patience_counter": 12
550
  }
551
  },
552
  "TrainerControl": {
 
560
  "attributes": {}
561
  }
562
  },
563
+ "total_flos": 2.450772983808e+16,
564
  "train_batch_size": 1,
565
  "trial_name": null,
566
  "trial_params": null