aaravriyer193 commited on
Commit
b0b60ca
·
verified ·
1 Parent(s): 8c81380

Training in progress, step 337, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ac2bcbbdf28f59e9963abe1a59b0c0659f6c4be33bfe161a5ed3f7b76d9e94d
3
  size 236389544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93d73df98fff4a6a3ad97281f90e353cb111656a20d495d6939da7e25d1982b1
3
  size 236389544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbbea3f8084ad3123aa70919061dfa938e07874f198c10eec228fe7a53a9c79c
3
  size 240875493
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:990731e03e5cbb36e6dff976bf6f6985fab555d8421e9deb16696b1bbedf82ee
3
  size 240875493
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9237fb57361c151ad1e37bec4bb4c44c5b2ef0e256e986c35085ab8e53aa5926
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b5f15ebf8e577db037305eb705a1d1879ee91fe3964c8447f5e62bd02ac498d
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40f7b8536bd5b6bc0a13650e01d6ffbcfec0028c433b2309611b4ba8d5238928
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4f415dd60d9062452c191c523239bdb24a31ba70d99049aa2c8e6d4eb18f951
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.9040892193308551,
6
  "eval_steps": 500,
7
- "global_step": 304,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -608,6 +608,76 @@
608
  "mean_token_accuracy": 0.8706730246543884,
609
  "num_tokens": 9716187.0,
610
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
611
  }
612
  ],
613
  "logging_steps": 5,
@@ -622,12 +692,12 @@
622
  "should_evaluate": false,
623
  "should_log": false,
624
  "should_save": true,
625
- "should_training_stop": false
626
  },
627
  "attributes": {}
628
  }
629
  },
630
- "total_flos": 1.7777614771395625e+18,
631
  "train_batch_size": 4,
632
  "trial_name": null,
633
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
  "eval_steps": 500,
7
+ "global_step": 337,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
608
  "mean_token_accuracy": 0.8706730246543884,
609
  "num_tokens": 9716187.0,
610
  "step": 300
611
+ },
612
+ {
613
+ "entropy": 0.4898031514137983,
614
+ "epoch": 0.9070631970260223,
615
+ "grad_norm": 0.0205078125,
616
+ "learning_rate": 6.453662433477136e-06,
617
+ "loss": 0.4925398826599121,
618
+ "mean_token_accuracy": 0.8753444463014602,
619
+ "num_tokens": 9879023.0,
620
+ "step": 305
621
+ },
622
+ {
623
+ "entropy": 0.45501707717776296,
624
+ "epoch": 0.9219330855018587,
625
+ "grad_norm": 0.0203857421875,
626
+ "learning_rate": 4.660360794506946e-06,
627
+ "loss": 0.4548198699951172,
628
+ "mean_token_accuracy": 0.8821182236075401,
629
+ "num_tokens": 10041800.0,
630
+ "step": 310
631
+ },
632
+ {
633
+ "entropy": 0.5240208253264427,
634
+ "epoch": 0.9368029739776952,
635
+ "grad_norm": 0.0233154296875,
636
+ "learning_rate": 3.1525821236119577e-06,
637
+ "loss": 0.5236988067626953,
638
+ "mean_token_accuracy": 0.8641670763492584,
639
+ "num_tokens": 10204694.0,
640
+ "step": 315
641
+ },
642
+ {
643
+ "entropy": 0.538949977979064,
644
+ "epoch": 0.9516728624535316,
645
+ "grad_norm": 0.0220947265625,
646
+ "learning_rate": 1.934841913455032e-06,
647
+ "loss": 0.5439452648162841,
648
+ "mean_token_accuracy": 0.8550667986273766,
649
+ "num_tokens": 10366660.0,
650
+ "step": 320
651
+ },
652
+ {
653
+ "entropy": 0.5102543152868748,
654
+ "epoch": 0.966542750929368,
655
+ "grad_norm": 0.018310546875,
656
+ "learning_rate": 1.010787050074835e-06,
657
+ "loss": 0.5104735374450684,
658
+ "mean_token_accuracy": 0.8640209168195725,
659
+ "num_tokens": 10529304.0,
660
+ "step": 325
661
+ },
662
+ {
663
+ "entropy": 0.4799253273755312,
664
+ "epoch": 0.9814126394052045,
665
+ "grad_norm": 0.019287109375,
666
+ "learning_rate": 3.831848911984959e-07,
667
+ "loss": 0.47628107070922854,
668
+ "mean_token_accuracy": 0.8758200943470001,
669
+ "num_tokens": 10692105.0,
670
+ "step": 330
671
+ },
672
+ {
673
+ "entropy": 0.45510734170675277,
674
+ "epoch": 0.9962825278810409,
675
+ "grad_norm": 0.0213623046875,
676
+ "learning_rate": 5.391497856399585e-08,
677
+ "loss": 0.4581630229949951,
678
+ "mean_token_accuracy": 0.8793978497385979,
679
+ "num_tokens": 10854094.0,
680
+ "step": 335
681
  }
682
  ],
683
  "logging_steps": 5,
 
692
  "should_evaluate": false,
693
  "should_log": false,
694
  "should_save": true,
695
+ "should_training_stop": true
696
  },
697
  "attributes": {}
698
  }
699
  },
700
+ "total_flos": 1.965993164065407e+18,
701
  "train_batch_size": 4,
702
  "trial_name": null,
703
  "trial_params": null