error577 commited on
Commit
6272826
·
verified ·
1 Parent(s): 24e867b

Training in progress, step 9200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9b5d664aa6ebbaadcc20c76e1cc625239e90c19d6c2942efd64c81c21119518
3
  size 100690288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9cbca2f45d6b52d4f7b6ebb7322fe228c587105c317fa5a3486bcddea3632ee
3
  size 100690288
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e614f71ec99257e2c5001fbb713d139188b185558f5ca1ba1b27966467efa588
3
  size 51345082
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50254272993d00750d20519785aed6da515890c737ef88754fedb4a1125894be
3
  size 51345082
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61062bd98b5506d7ad36e1f5b2bc2732781c5fe4228f62da914c162e474d444e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02d52ff618a83e9102020a0283499806564b6579fb7c6addaa728c8a804eedd0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffa49f78e8a130eb7bcf8fc251143bab34bf14a54d0d14a5f4a0a18dba60dbec
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57b75f147ee39bab59d6712296fd45b943162e0434e26215e766d88ba6e6e775
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 2.2019271850585938,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-8400",
4
- "epoch": 0.1418987631157815,
5
  "eval_steps": 200,
6
- "global_step": 9000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -6675,6 +6675,154 @@
6675
  "eval_samples_per_second": 16.176,
6676
  "eval_steps_per_second": 8.096,
6677
  "step": 9000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6678
  }
6679
  ],
6680
  "logging_steps": 10,
@@ -6689,7 +6837,7 @@
6689
  "early_stopping_threshold": 0.0
6690
  },
6691
  "attributes": {
6692
- "early_stopping_patience_counter": 3
6693
  }
6694
  },
6695
  "TrainerControl": {
@@ -6698,12 +6846,12 @@
6698
  "should_evaluate": false,
6699
  "should_log": false,
6700
  "should_save": true,
6701
- "should_training_stop": false
6702
  },
6703
  "attributes": {}
6704
  }
6705
  },
6706
- "total_flos": 9.868014841233408e+16,
6707
  "train_batch_size": 2,
6708
  "trial_name": null,
6709
  "trial_params": null
 
1
  {
2
  "best_metric": 2.2019271850585938,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-8400",
4
+ "epoch": 0.14505206896279887,
5
  "eval_steps": 200,
6
+ "global_step": 9200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
6675
  "eval_samples_per_second": 16.176,
6676
  "eval_steps_per_second": 8.096,
6677
  "step": 9000
6678
+ },
6679
+ {
6680
+ "epoch": 0.14205642840813237,
6681
+ "grad_norm": 5.197229862213135,
6682
+ "learning_rate": 0.0001988978579208574,
6683
+ "loss": 9.1033,
6684
+ "step": 9010
6685
+ },
6686
+ {
6687
+ "epoch": 0.14221409370048324,
6688
+ "grad_norm": 4.565831661224365,
6689
+ "learning_rate": 0.00019889541187699707,
6690
+ "loss": 8.8077,
6691
+ "step": 9020
6692
+ },
6693
+ {
6694
+ "epoch": 0.1423717589928341,
6695
+ "grad_norm": 4.8584513664245605,
6696
+ "learning_rate": 0.00019889296313689968,
6697
+ "loss": 9.0339,
6698
+ "step": 9030
6699
+ },
6700
+ {
6701
+ "epoch": 0.14252942428518497,
6702
+ "grad_norm": 4.712079048156738,
6703
+ "learning_rate": 0.00019889051170063194,
6704
+ "loss": 8.8577,
6705
+ "step": 9040
6706
+ },
6707
+ {
6708
+ "epoch": 0.14268708957753584,
6709
+ "grad_norm": 9.281351089477539,
6710
+ "learning_rate": 0.00019888805756826074,
6711
+ "loss": 8.3711,
6712
+ "step": 9050
6713
+ },
6714
+ {
6715
+ "epoch": 0.1428447548698867,
6716
+ "grad_norm": 4.656757354736328,
6717
+ "learning_rate": 0.00019888560073985295,
6718
+ "loss": 9.3672,
6719
+ "step": 9060
6720
+ },
6721
+ {
6722
+ "epoch": 0.14300242016223758,
6723
+ "grad_norm": 5.0217108726501465,
6724
+ "learning_rate": 0.00019888314121547558,
6725
+ "loss": 9.3907,
6726
+ "step": 9070
6727
+ },
6728
+ {
6729
+ "epoch": 0.14316008545458844,
6730
+ "grad_norm": 4.683655738830566,
6731
+ "learning_rate": 0.00019888067899519567,
6732
+ "loss": 8.6511,
6733
+ "step": 9080
6734
+ },
6735
+ {
6736
+ "epoch": 0.1433177507469393,
6737
+ "grad_norm": 4.991017818450928,
6738
+ "learning_rate": 0.00019887821407908033,
6739
+ "loss": 8.6748,
6740
+ "step": 9090
6741
+ },
6742
+ {
6743
+ "epoch": 0.1434754160392902,
6744
+ "grad_norm": 13.076788902282715,
6745
+ "learning_rate": 0.0001988757464671968,
6746
+ "loss": 8.3435,
6747
+ "step": 9100
6748
+ },
6749
+ {
6750
+ "epoch": 0.14363308133164107,
6751
+ "grad_norm": 4.446813583374023,
6752
+ "learning_rate": 0.00019887327615961234,
6753
+ "loss": 9.5288,
6754
+ "step": 9110
6755
+ },
6756
+ {
6757
+ "epoch": 0.14379074662399194,
6758
+ "grad_norm": 4.752020835876465,
6759
+ "learning_rate": 0.00019887080315639428,
6760
+ "loss": 9.3455,
6761
+ "step": 9120
6762
+ },
6763
+ {
6764
+ "epoch": 0.1439484119163428,
6765
+ "grad_norm": 5.036296367645264,
6766
+ "learning_rate": 0.0001988683274576101,
6767
+ "loss": 8.9484,
6768
+ "step": 9130
6769
+ },
6770
+ {
6771
+ "epoch": 0.14410607720869367,
6772
+ "grad_norm": 4.9117841720581055,
6773
+ "learning_rate": 0.0001988658490633272,
6774
+ "loss": 8.4929,
6775
+ "step": 9140
6776
+ },
6777
+ {
6778
+ "epoch": 0.14426374250104454,
6779
+ "grad_norm": 13.374736785888672,
6780
+ "learning_rate": 0.00019886336797361325,
6781
+ "loss": 8.4415,
6782
+ "step": 9150
6783
+ },
6784
+ {
6785
+ "epoch": 0.1444214077933954,
6786
+ "grad_norm": 4.678138256072998,
6787
+ "learning_rate": 0.00019886088418853581,
6788
+ "loss": 9.2699,
6789
+ "step": 9160
6790
+ },
6791
+ {
6792
+ "epoch": 0.14457907308574627,
6793
+ "grad_norm": 6.370858669281006,
6794
+ "learning_rate": 0.00019885839770816268,
6795
+ "loss": 8.5141,
6796
+ "step": 9170
6797
+ },
6798
+ {
6799
+ "epoch": 0.14473673837809714,
6800
+ "grad_norm": 5.328512191772461,
6801
+ "learning_rate": 0.0001988559085325616,
6802
+ "loss": 8.5896,
6803
+ "step": 9180
6804
+ },
6805
+ {
6806
+ "epoch": 0.144894403670448,
6807
+ "grad_norm": 5.4744391441345215,
6808
+ "learning_rate": 0.0001988534166618004,
6809
+ "loss": 8.6385,
6810
+ "step": 9190
6811
+ },
6812
+ {
6813
+ "epoch": 0.14505206896279887,
6814
+ "grad_norm": 99.89271545410156,
6815
+ "learning_rate": 0.00019885092209594708,
6816
+ "loss": 8.5823,
6817
+ "step": 9200
6818
+ },
6819
+ {
6820
+ "epoch": 0.14505206896279887,
6821
+ "eval_loss": 2.203756093978882,
6822
+ "eval_runtime": 62.7646,
6823
+ "eval_samples_per_second": 16.203,
6824
+ "eval_steps_per_second": 8.11,
6825
+ "step": 9200
6826
  }
6827
  ],
6828
  "logging_steps": 10,
 
6837
  "early_stopping_threshold": 0.0
6838
  },
6839
  "attributes": {
6840
+ "early_stopping_patience_counter": 4
6841
  }
6842
  },
6843
  "TrainerControl": {
 
6846
  "should_evaluate": false,
6847
  "should_log": false,
6848
  "should_save": true,
6849
+ "should_training_stop": true
6850
  },
6851
  "attributes": {}
6852
  }
6853
  },
6854
+ "total_flos": 1.0089740855200973e+17,
6855
  "train_batch_size": 2,
6856
  "trial_name": null,
6857
  "trial_params": null