blackbeard334 commited on
Commit
31ddecf
·
verified ·
1 Parent(s): 272b3af

Training in progress, step 9890, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fec2e4937aab544bc4a7a289c34393ecd56103032bc0d0d26b02e58276b1dbbc
3
  size 1657
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a56ec06dc34cee7dd39d3feb8b68551051c585ab18d67cdd7e55a5bf68feca81
3
  size 1657
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ec370349279813f830889071811ab5efe4b9e7cff1f539189d3d3eccba3be68
3
  size 14709
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c05818830d152bb21342a2f47225b1ba3da4f5c057a0251e0399bfcc2b8fd27a
3
  size 14709
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75fb6279d7a90f2eb40ea21d57c0a3b3b9e3c8c93e25ada48d38c03d7fd5446d
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00adfad39dc29a8ba99da9222898e93d32ae9aefabae1129b73b2933a1864c2c
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 76.0,
6
  "eval_steps": 500,
7
- "global_step": 9880,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -17792,6 +17792,24 @@
17792
  "mean_token_accuracy": 0.6536471992731094,
17793
  "num_tokens": 36031980.0,
17794
  "step": 9880
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17795
  }
17796
  ],
17797
  "logging_steps": 5,
@@ -17811,7 +17829,7 @@
17811
  "attributes": {}
17812
  }
17813
  },
17814
- "total_flos": 1.8271429276203418e+18,
17815
  "train_batch_size": 4,
17816
  "trial_name": null,
17817
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 76.07692307692308,
6
  "eval_steps": 500,
7
+ "global_step": 9890,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
17792
  "mean_token_accuracy": 0.6536471992731094,
17793
  "num_tokens": 36031980.0,
17794
  "step": 9880
17795
+ },
17796
+ {
17797
+ "epoch": 76.03846153846153,
17798
+ "grad_norm": 0.0,
17799
+ "learning_rate": 5.8e-07,
17800
+ "loss": 1.7253,
17801
+ "mean_token_accuracy": 0.6511723041534424,
17802
+ "num_tokens": 36050841.0,
17803
+ "step": 9885
17804
+ },
17805
+ {
17806
+ "epoch": 76.07692307692308,
17807
+ "grad_norm": 0.0,
17808
+ "learning_rate": 5.550000000000001e-07,
17809
+ "loss": 1.7605,
17810
+ "mean_token_accuracy": 0.6479182183742523,
17811
+ "num_tokens": 36069596.0,
17812
+ "step": 9890
17813
  }
17814
  ],
17815
  "logging_steps": 5,
 
17829
  "attributes": {}
17830
  }
17831
  },
17832
+ "total_flos": 1.829078127854715e+18,
17833
  "train_batch_size": 4,
17834
  "trial_name": null,
17835
  "trial_params": null