mgh6 commited on
Commit
c72916d
·
verified ·
1 Parent(s): f3bd469

Training in progress, epoch 6, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9aa70a72406dc314a7a6607fa65812257c8bbb207a37efc9a6e94d5f334bd124
3
  size 2682482800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5c3b0654a55323c3fcee30e714ee2d676246a3bfc98cc5f49a21c0197bf7658
3
  size 2682482800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32017c7cd933df8911a1ed535f5c24cba349c05ff6dc9a24103d3b36c925e465
3
  size 5365108834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebc195ac9c4196031197946a0392714b3efeebd00756085187f31f1fca6860db
3
  size 5365108834
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d33f3dbdb9b3f7dde1b012b4c45dfa6f4e834ae52f1442515a3bb9195da78f3
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d98d2e99b55542cf6b1c45f3a424a53b2fc65122f42198a9dbcf07ba8693c50
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7442667663c027a23a41cfa011998a53a2269770ba7bdaf3adc5d3d98600b2d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13dc37c47f5922c1d1c30342794206ae261a0947af887e4fc89421ae169d8074
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.999310740953475,
5
  "eval_steps": 50,
6
- "global_step": 1632,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -487,6 +487,96 @@
487
  "eval_samples_per_second": 41.606,
488
  "eval_steps_per_second": 20.803,
489
  "step": 1600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
490
  }
491
  ],
492
  "logging_steps": 50,
@@ -506,7 +596,7 @@
506
  "attributes": {}
507
  }
508
  },
509
- "total_flos": 4.210910899071877e+17,
510
  "train_batch_size": 2,
511
  "trial_name": null,
512
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.999195864445721,
5
  "eval_steps": 50,
6
+ "global_step": 1904,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
487
  "eval_samples_per_second": 41.606,
488
  "eval_steps_per_second": 20.803,
489
  "step": 1600
490
+ },
491
+ {
492
+ "epoch": 6.0654796094198735,
493
+ "grad_norm": 47.59389877319336,
494
+ "learning_rate": 3.933823529411765e-05,
495
+ "loss": 0.8925,
496
+ "step": 1650
497
+ },
498
+ {
499
+ "epoch": 6.0654796094198735,
500
+ "eval_loss": 1.2326780557632446,
501
+ "eval_runtime": 116.7248,
502
+ "eval_samples_per_second": 41.345,
503
+ "eval_steps_per_second": 20.673,
504
+ "step": 1650
505
+ },
506
+ {
507
+ "epoch": 6.249282021826536,
508
+ "grad_norm": 45.18083190917969,
509
+ "learning_rate": 3.7500000000000003e-05,
510
+ "loss": 0.8771,
511
+ "step": 1700
512
+ },
513
+ {
514
+ "epoch": 6.249282021826536,
515
+ "eval_loss": 1.2302526235580444,
516
+ "eval_runtime": 115.8769,
517
+ "eval_samples_per_second": 41.648,
518
+ "eval_steps_per_second": 20.824,
519
+ "step": 1700
520
+ },
521
+ {
522
+ "epoch": 6.4330844342332,
523
+ "grad_norm": 40.455318450927734,
524
+ "learning_rate": 3.566176470588235e-05,
525
+ "loss": 0.8743,
526
+ "step": 1750
527
+ },
528
+ {
529
+ "epoch": 6.4330844342332,
530
+ "eval_loss": 1.2299398183822632,
531
+ "eval_runtime": 115.9106,
532
+ "eval_samples_per_second": 41.636,
533
+ "eval_steps_per_second": 20.818,
534
+ "step": 1750
535
+ },
536
+ {
537
+ "epoch": 6.616886846639862,
538
+ "grad_norm": 61.713111877441406,
539
+ "learning_rate": 3.382352941176471e-05,
540
+ "loss": 0.8735,
541
+ "step": 1800
542
+ },
543
+ {
544
+ "epoch": 6.616886846639862,
545
+ "eval_loss": 1.2240906953811646,
546
+ "eval_runtime": 116.0411,
547
+ "eval_samples_per_second": 41.589,
548
+ "eval_steps_per_second": 20.794,
549
+ "step": 1800
550
+ },
551
+ {
552
+ "epoch": 6.800689259046525,
553
+ "grad_norm": 69.22649383544922,
554
+ "learning_rate": 3.198529411764706e-05,
555
+ "loss": 0.8648,
556
+ "step": 1850
557
+ },
558
+ {
559
+ "epoch": 6.800689259046525,
560
+ "eval_loss": 1.2253305912017822,
561
+ "eval_runtime": 115.8996,
562
+ "eval_samples_per_second": 41.639,
563
+ "eval_steps_per_second": 20.82,
564
+ "step": 1850
565
+ },
566
+ {
567
+ "epoch": 6.9844916714531875,
568
+ "grad_norm": 65.4384994506836,
569
+ "learning_rate": 3.0147058823529413e-05,
570
+ "loss": 0.8649,
571
+ "step": 1900
572
+ },
573
+ {
574
+ "epoch": 6.9844916714531875,
575
+ "eval_loss": 1.2292358875274658,
576
+ "eval_runtime": 116.0285,
577
+ "eval_samples_per_second": 41.593,
578
+ "eval_steps_per_second": 20.797,
579
+ "step": 1900
580
  }
581
  ],
582
  "logging_steps": 50,
 
596
  "attributes": {}
597
  }
598
  },
599
+ "total_flos": 4.912264077125878e+17,
600
  "train_batch_size": 2,
601
  "trial_name": null,
602
  "trial_params": null