moos124 commited on
Commit
69ede13
·
verified ·
1 Parent(s): 6bc8259

Training in progress, step 1200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b200680d038211860ba584c2f28a63092bedbb52e5d90f6546bd5a10d09e06a
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ace55a11230d780c73ad3972a954e4d11272264f349fe01c8e407e0c23c8989f
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88dd34cdf02f6c0060ce2effba9c422499547a777556c6fa4324624714c519ba
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:872d4d72444b5de1cc08ad181c9790f46118072a5396e08c0f8471ef620fc0ab
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6099819e8294a8fc5af7ae38f658d01ef30baab7a41ad4ede6b3e0c5a669d1f0
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a1632b2e2a3ea7078bbcddab1cf01b7157f2874045a58a01a68284bf195ec13
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebc341169ddfbf9f200256314bab44f65cd4b1a53f8b3614ed0e563731aee7ac
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d44244312cb5c2d760059d356bc53d068bf179be10b7607667de831e91a7da74
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.2538666666666667,
6
  "eval_steps": 500,
7
- "global_step": 1190,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1208,6 +1208,16 @@
1208
  "mean_token_accuracy": 0.7758729934692383,
1209
  "num_tokens": 5540743.0,
1210
  "step": 1190
 
 
 
 
 
 
 
 
 
 
1211
  }
1212
  ],
1213
  "logging_steps": 10,
@@ -1227,7 +1237,7 @@
1227
  "attributes": {}
1228
  }
1229
  },
1230
- "total_flos": 2.6230647882479616e+16,
1231
  "train_batch_size": 4,
1232
  "trial_name": null,
1233
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.256,
6
  "eval_steps": 500,
7
+ "global_step": 1200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1208
  "mean_token_accuracy": 0.7758729934692383,
1209
  "num_tokens": 5540743.0,
1210
  "step": 1190
1211
+ },
1212
+ {
1213
+ "entropy": 1.0495109014213084,
1214
+ "epoch": 0.256,
1215
+ "grad_norm": 0.2335851490497589,
1216
+ "learning_rate": 9.759860958418926e-05,
1217
+ "loss": 1.1212799072265625,
1218
+ "mean_token_accuracy": 0.7419089064002037,
1219
+ "num_tokens": 5592981.0,
1220
+ "step": 1200
1221
  }
1222
  ],
1223
  "logging_steps": 10,
 
1237
  "attributes": {}
1238
  }
1239
  },
1240
+ "total_flos": 2.647411252652237e+16,
1241
  "train_batch_size": 4,
1242
  "trial_name": null,
1243
  "trial_params": null