moos124 commited on
Commit
ab8c860
·
verified ·
1 Parent(s): 93afe8a

Training in progress, step 800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e2494ffdf63bf22327553c4bb55e852175014ff33dff9daedab169a57bb6f7a
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd2cda9c9230d2d0799a98d4cdf1dadae0585a355490e9c0c3a1041b4b0d60cc
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6def35174fba19fddbd1fbc01159935c523dc04fd26db6a5deaf3b862edc82b4
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88805d5d925cc6ea67c7a661afd5da8cfb504bbdcccb54925a7884fd28adab48
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fdc561fcb0454540f2c70b4e670e9c553e4361d10c10a8586dcf2c98e6374da7
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dfadc0aadf8692780568210f6a28c96f4ccf2481b5f4cc7a3521dab721743a4
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ae3588707c67138d3df6e5970c2e06dbec86d28723537e7e2192c4a3a9f3b48
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf66822e6e71148b00e80d5a766aa265c0ba2c6ee19c08772affaaad1c0f1cde
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.16853333333333334,
6
  "eval_steps": 500,
7
- "global_step": 790,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -808,6 +808,16 @@
808
  "mean_token_accuracy": 0.7721783280372619,
809
  "num_tokens": 3671375.0,
810
  "step": 790
 
 
 
 
 
 
 
 
 
 
811
  }
812
  ],
813
  "logging_steps": 10,
@@ -827,7 +837,7 @@
827
  "attributes": {}
828
  }
829
  },
830
- "total_flos": 1.7370304770330624e+16,
831
  "train_batch_size": 4,
832
  "trial_name": null,
833
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.17066666666666666,
6
  "eval_steps": 500,
7
+ "global_step": 800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
808
  "mean_token_accuracy": 0.7721783280372619,
809
  "num_tokens": 3671375.0,
810
  "step": 790
811
+ },
812
+ {
813
+ "entropy": 0.9649161577224732,
814
+ "epoch": 0.17066666666666666,
815
+ "grad_norm": 0.23826555907726288,
816
+ "learning_rate": 9.92560018204174e-05,
817
+ "loss": 1.0278871536254883,
818
+ "mean_token_accuracy": 0.7589392751455307,
819
+ "num_tokens": 3717893.0,
820
+ "step": 800
821
  }
822
  ],
823
  "logging_steps": 10,
 
837
  "attributes": {}
838
  }
839
  },
840
+ "total_flos": 1.7602666792169472e+16,
841
  "train_batch_size": 4,
842
  "trial_name": null,
843
  "trial_params": null