error577 commited on
Commit
2d212c9
·
verified ·
1 Parent(s): 7f3aa4f

Training in progress, step 1250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e2d54a3801d46b95727c773606930e9bf969e1d8cccfe3cf60831391b78af8c
3
  size 1279323952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d76b1ba4f1499da4a8ed5a8300e6dc366438ac57bf8279ee969fd0b2ca4728b
3
  size 1279323952
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45b613831ea7dfc39f9271816e0b4214759cb3f9e2291ebc3ef260f6bcfe14ae
3
  size 2558803194
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb84ed3f14cc528de2ab5492455762837c66a3b6250725b8ebe7d7239de5d41b
3
  size 2558803194
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cfcf09015728d40d62e893b725e0f8ac8452a70bd3fbf45bd8318f119b8ceb8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2e7a6cc60a56e9e41bc4d01760bda59a0858d99962b8232c0c24c34810dcfef
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f350703ce652c5593eab3ae70fe8c7f19b12b89d7e78f06c4b41397240922d98
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aaab3ece7f21f6134c7946ccfd6c2682813ad47096098a1e3a156edc7ad945a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.9303643703460693,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-1200",
4
- "epoch": 0.6295907660020986,
5
  "eval_steps": 50,
6
- "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8607,6 +8607,364 @@
8607
  "eval_samples_per_second": 5.662,
8608
  "eval_steps_per_second": 1.891,
8609
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8610
  }
8611
  ],
8612
  "logging_steps": 1,
@@ -8621,7 +8979,7 @@
8621
  "early_stopping_threshold": 0.0
8622
  },
8623
  "attributes": {
8624
- "early_stopping_patience_counter": 0
8625
  }
8626
  },
8627
  "TrainerControl": {
@@ -8635,7 +8993,7 @@
8635
  "attributes": {}
8636
  }
8637
  },
8638
- "total_flos": 3.201641398193357e+17,
8639
  "train_batch_size": 3,
8640
  "trial_name": null,
8641
  "trial_params": null
 
1
  {
2
  "best_metric": 1.9303643703460693,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-1200",
4
+ "epoch": 0.6558237145855194,
5
  "eval_steps": 50,
6
+ "global_step": 1250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8607
  "eval_samples_per_second": 5.662,
8608
  "eval_steps_per_second": 1.891,
8609
  "step": 1200
8610
+ },
8611
+ {
8612
+ "epoch": 0.630115424973767,
8613
+ "grad_norm": 0.6785674095153809,
8614
+ "learning_rate": 0.0001792738673354654,
8615
+ "loss": 8.3901,
8616
+ "step": 1201
8617
+ },
8618
+ {
8619
+ "epoch": 0.6306400839454355,
8620
+ "grad_norm": 0.6648474931716919,
8621
+ "learning_rate": 0.00017924030602439216,
8622
+ "loss": 7.8052,
8623
+ "step": 1202
8624
+ },
8625
+ {
8626
+ "epoch": 0.6311647429171039,
8627
+ "grad_norm": 0.6322881579399109,
8628
+ "learning_rate": 0.00017920672070963228,
8629
+ "loss": 8.0898,
8630
+ "step": 1203
8631
+ },
8632
+ {
8633
+ "epoch": 0.6316894018887723,
8634
+ "grad_norm": 0.6528663039207458,
8635
+ "learning_rate": 0.0001791731114013594,
8636
+ "loss": 8.0178,
8637
+ "step": 1204
8638
+ },
8639
+ {
8640
+ "epoch": 0.6322140608604407,
8641
+ "grad_norm": 0.7760594487190247,
8642
+ "learning_rate": 0.00017913947810975462,
8643
+ "loss": 7.8516,
8644
+ "step": 1205
8645
+ },
8646
+ {
8647
+ "epoch": 0.6327387198321092,
8648
+ "grad_norm": 0.7572141289710999,
8649
+ "learning_rate": 0.0001791058208450062,
8650
+ "loss": 7.5367,
8651
+ "step": 1206
8652
+ },
8653
+ {
8654
+ "epoch": 0.6332633788037776,
8655
+ "grad_norm": 0.952572762966156,
8656
+ "learning_rate": 0.00017907213961730972,
8657
+ "loss": 8.8389,
8658
+ "step": 1207
8659
+ },
8660
+ {
8661
+ "epoch": 0.633788037775446,
8662
+ "grad_norm": 0.8288649320602417,
8663
+ "learning_rate": 0.00017903843443686795,
8664
+ "loss": 8.2295,
8665
+ "step": 1208
8666
+ },
8667
+ {
8668
+ "epoch": 0.6343126967471143,
8669
+ "grad_norm": 0.9002505540847778,
8670
+ "learning_rate": 0.00017900470531389095,
8671
+ "loss": 8.2813,
8672
+ "step": 1209
8673
+ },
8674
+ {
8675
+ "epoch": 0.6348373557187827,
8676
+ "grad_norm": 0.8269932866096497,
8677
+ "learning_rate": 0.00017897095225859606,
8678
+ "loss": 8.9073,
8679
+ "step": 1210
8680
+ },
8681
+ {
8682
+ "epoch": 0.6353620146904512,
8683
+ "grad_norm": 0.8590414524078369,
8684
+ "learning_rate": 0.00017893717528120784,
8685
+ "loss": 7.6909,
8686
+ "step": 1211
8687
+ },
8688
+ {
8689
+ "epoch": 0.6358866736621196,
8690
+ "grad_norm": 1.1503770351409912,
8691
+ "learning_rate": 0.00017890337439195811,
8692
+ "loss": 8.3473,
8693
+ "step": 1212
8694
+ },
8695
+ {
8696
+ "epoch": 0.636411332633788,
8697
+ "grad_norm": 0.7925499081611633,
8698
+ "learning_rate": 0.00017886954960108587,
8699
+ "loss": 7.5837,
8700
+ "step": 1213
8701
+ },
8702
+ {
8703
+ "epoch": 0.6369359916054564,
8704
+ "grad_norm": 1.0551660060882568,
8705
+ "learning_rate": 0.0001788357009188375,
8706
+ "loss": 8.6293,
8707
+ "step": 1214
8708
+ },
8709
+ {
8710
+ "epoch": 0.6374606505771249,
8711
+ "grad_norm": 1.1928114891052246,
8712
+ "learning_rate": 0.0001788018283554665,
8713
+ "loss": 8.7884,
8714
+ "step": 1215
8715
+ },
8716
+ {
8717
+ "epoch": 0.6379853095487933,
8718
+ "grad_norm": 0.8815275430679321,
8719
+ "learning_rate": 0.00017876793192123365,
8720
+ "loss": 7.4191,
8721
+ "step": 1216
8722
+ },
8723
+ {
8724
+ "epoch": 0.6385099685204617,
8725
+ "grad_norm": 1.006833553314209,
8726
+ "learning_rate": 0.0001787340116264069,
8727
+ "loss": 7.6687,
8728
+ "step": 1217
8729
+ },
8730
+ {
8731
+ "epoch": 0.6390346274921301,
8732
+ "grad_norm": 0.9964754581451416,
8733
+ "learning_rate": 0.00017870006748126153,
8734
+ "loss": 7.8293,
8735
+ "step": 1218
8736
+ },
8737
+ {
8738
+ "epoch": 0.6395592864637986,
8739
+ "grad_norm": 1.082804799079895,
8740
+ "learning_rate": 0.00017866609949607995,
8741
+ "loss": 8.009,
8742
+ "step": 1219
8743
+ },
8744
+ {
8745
+ "epoch": 0.640083945435467,
8746
+ "grad_norm": 1.3665459156036377,
8747
+ "learning_rate": 0.0001786321076811519,
8748
+ "loss": 8.2669,
8749
+ "step": 1220
8750
+ },
8751
+ {
8752
+ "epoch": 0.6406086044071354,
8753
+ "grad_norm": 1.0634828805923462,
8754
+ "learning_rate": 0.0001785980920467742,
8755
+ "loss": 8.3186,
8756
+ "step": 1221
8757
+ },
8758
+ {
8759
+ "epoch": 0.6411332633788038,
8760
+ "grad_norm": 1.1793396472930908,
8761
+ "learning_rate": 0.000178564052603251,
8762
+ "loss": 8.2618,
8763
+ "step": 1222
8764
+ },
8765
+ {
8766
+ "epoch": 0.6416579223504721,
8767
+ "grad_norm": 1.1798900365829468,
8768
+ "learning_rate": 0.0001785299893608936,
8769
+ "loss": 8.0329,
8770
+ "step": 1223
8771
+ },
8772
+ {
8773
+ "epoch": 0.6421825813221406,
8774
+ "grad_norm": 1.1078875064849854,
8775
+ "learning_rate": 0.00017849590233002052,
8776
+ "loss": 7.5323,
8777
+ "step": 1224
8778
+ },
8779
+ {
8780
+ "epoch": 0.642707240293809,
8781
+ "grad_norm": 1.2947778701782227,
8782
+ "learning_rate": 0.0001784617915209575,
8783
+ "loss": 8.3191,
8784
+ "step": 1225
8785
+ },
8786
+ {
8787
+ "epoch": 0.6432318992654774,
8788
+ "grad_norm": 1.0576255321502686,
8789
+ "learning_rate": 0.00017842765694403752,
8790
+ "loss": 8.1193,
8791
+ "step": 1226
8792
+ },
8793
+ {
8794
+ "epoch": 0.6437565582371458,
8795
+ "grad_norm": 1.1417510509490967,
8796
+ "learning_rate": 0.00017839349860960068,
8797
+ "loss": 8.0367,
8798
+ "step": 1227
8799
+ },
8800
+ {
8801
+ "epoch": 0.6442812172088143,
8802
+ "grad_norm": 1.2275340557098389,
8803
+ "learning_rate": 0.0001783593165279943,
8804
+ "loss": 8.2202,
8805
+ "step": 1228
8806
+ },
8807
+ {
8808
+ "epoch": 0.6448058761804827,
8809
+ "grad_norm": 1.2881832122802734,
8810
+ "learning_rate": 0.00017832511070957295,
8811
+ "loss": 8.2294,
8812
+ "step": 1229
8813
+ },
8814
+ {
8815
+ "epoch": 0.6453305351521511,
8816
+ "grad_norm": 1.375793695449829,
8817
+ "learning_rate": 0.00017829088116469834,
8818
+ "loss": 7.1402,
8819
+ "step": 1230
8820
+ },
8821
+ {
8822
+ "epoch": 0.6458551941238195,
8823
+ "grad_norm": 1.2434371709823608,
8824
+ "learning_rate": 0.00017825662790373935,
8825
+ "loss": 8.8262,
8826
+ "step": 1231
8827
+ },
8828
+ {
8829
+ "epoch": 0.646379853095488,
8830
+ "grad_norm": 1.3222373723983765,
8831
+ "learning_rate": 0.00017822235093707206,
8832
+ "loss": 7.7723,
8833
+ "step": 1232
8834
+ },
8835
+ {
8836
+ "epoch": 0.6469045120671564,
8837
+ "grad_norm": 1.4988670349121094,
8838
+ "learning_rate": 0.0001781880502750798,
8839
+ "loss": 7.4396,
8840
+ "step": 1233
8841
+ },
8842
+ {
8843
+ "epoch": 0.6474291710388248,
8844
+ "grad_norm": 1.1858347654342651,
8845
+ "learning_rate": 0.00017815372592815297,
8846
+ "loss": 8.38,
8847
+ "step": 1234
8848
+ },
8849
+ {
8850
+ "epoch": 0.6479538300104932,
8851
+ "grad_norm": 1.3001600503921509,
8852
+ "learning_rate": 0.0001781193779066892,
8853
+ "loss": 7.4005,
8854
+ "step": 1235
8855
+ },
8856
+ {
8857
+ "epoch": 0.6484784889821616,
8858
+ "grad_norm": 1.302140712738037,
8859
+ "learning_rate": 0.0001780850062210933,
8860
+ "loss": 7.0935,
8861
+ "step": 1236
8862
+ },
8863
+ {
8864
+ "epoch": 0.64900314795383,
8865
+ "grad_norm": 1.4957777261734009,
8866
+ "learning_rate": 0.0001780506108817772,
8867
+ "loss": 8.0204,
8868
+ "step": 1237
8869
+ },
8870
+ {
8871
+ "epoch": 0.6495278069254984,
8872
+ "grad_norm": 1.4060813188552856,
8873
+ "learning_rate": 0.00017801619189916008,
8874
+ "loss": 7.5112,
8875
+ "step": 1238
8876
+ },
8877
+ {
8878
+ "epoch": 0.6500524658971668,
8879
+ "grad_norm": 1.3509002923965454,
8880
+ "learning_rate": 0.0001779817492836682,
8881
+ "loss": 7.5706,
8882
+ "step": 1239
8883
+ },
8884
+ {
8885
+ "epoch": 0.6505771248688352,
8886
+ "grad_norm": 1.6165887117385864,
8887
+ "learning_rate": 0.000177947283045735,
8888
+ "loss": 7.5412,
8889
+ "step": 1240
8890
+ },
8891
+ {
8892
+ "epoch": 0.6511017838405037,
8893
+ "grad_norm": 1.7650121450424194,
8894
+ "learning_rate": 0.00017791279319580113,
8895
+ "loss": 7.8366,
8896
+ "step": 1241
8897
+ },
8898
+ {
8899
+ "epoch": 0.6516264428121721,
8900
+ "grad_norm": 1.5572558641433716,
8901
+ "learning_rate": 0.00017787827974431427,
8902
+ "loss": 6.7198,
8903
+ "step": 1242
8904
+ },
8905
+ {
8906
+ "epoch": 0.6521511017838405,
8907
+ "grad_norm": 1.6787631511688232,
8908
+ "learning_rate": 0.00017784374270172942,
8909
+ "loss": 8.5223,
8910
+ "step": 1243
8911
+ },
8912
+ {
8913
+ "epoch": 0.6526757607555089,
8914
+ "grad_norm": 1.975051999092102,
8915
+ "learning_rate": 0.00017780918207850857,
8916
+ "loss": 8.3379,
8917
+ "step": 1244
8918
+ },
8919
+ {
8920
+ "epoch": 0.6532004197271774,
8921
+ "grad_norm": 1.9721925258636475,
8922
+ "learning_rate": 0.00017777459788512095,
8923
+ "loss": 8.3842,
8924
+ "step": 1245
8925
+ },
8926
+ {
8927
+ "epoch": 0.6537250786988458,
8928
+ "grad_norm": 2.151292324066162,
8929
+ "learning_rate": 0.00017773999013204284,
8930
+ "loss": 6.9369,
8931
+ "step": 1246
8932
+ },
8933
+ {
8934
+ "epoch": 0.6542497376705142,
8935
+ "grad_norm": 1.9202648401260376,
8936
+ "learning_rate": 0.00017770535882975783,
8937
+ "loss": 7.0776,
8938
+ "step": 1247
8939
+ },
8940
+ {
8941
+ "epoch": 0.6547743966421826,
8942
+ "grad_norm": 2.297982931137085,
8943
+ "learning_rate": 0.0001776707039887564,
8944
+ "loss": 7.2104,
8945
+ "step": 1248
8946
+ },
8947
+ {
8948
+ "epoch": 0.655299055613851,
8949
+ "grad_norm": 2.4272403717041016,
8950
+ "learning_rate": 0.00017763602561953636,
8951
+ "loss": 7.0545,
8952
+ "step": 1249
8953
+ },
8954
+ {
8955
+ "epoch": 0.6558237145855194,
8956
+ "grad_norm": 5.506582736968994,
8957
+ "learning_rate": 0.00017760132373260254,
8958
+ "loss": 7.7152,
8959
+ "step": 1250
8960
+ },
8961
+ {
8962
+ "epoch": 0.6558237145855194,
8963
+ "eval_loss": 1.9324959516525269,
8964
+ "eval_runtime": 82.4867,
8965
+ "eval_samples_per_second": 5.662,
8966
+ "eval_steps_per_second": 1.891,
8967
+ "step": 1250
8968
  }
8969
  ],
8970
  "logging_steps": 1,
 
8979
  "early_stopping_threshold": 0.0
8980
  },
8981
  "attributes": {
8982
+ "early_stopping_patience_counter": 1
8983
  }
8984
  },
8985
  "TrainerControl": {
 
8993
  "attributes": {}
8994
  }
8995
  },
8996
+ "total_flos": 3.336984962782986e+17,
8997
  "train_batch_size": 3,
8998
  "trial_name": null,
8999
  "trial_params": null