moos124 commited on
Commit
0116450
·
verified ·
1 Parent(s): 136be4e

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a57fa937ff04bdb0d277f6c926d885d7554dc6ea5ebd9ae35e7036ef13db5bb6
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4ba12f80e7a1e8c3db8751d68ce71ce45bbb9fc6eebd41558932eeee1a5aac1
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ffdc5b361590cff028df75ea2c8649214b0bc354fec669fe97155a74a342cb7
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55e4bdc20ede56819cc883156569352c532501845302c896e10b5ef18073ea21
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cdf1de3f2f16aa745528237922430f0dcc6c568ae39f40d63fc3ac92475d5cb
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69e8372199887ba63b55f610a5f7b4e9a465bf868e404afffb166fb3929a0bea
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7aba7a3fce4d4f19edab460710dd14fcd7492540cd4a3fd9914056f5f980e21
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8f062c4e31beac64afbc4a04b84a06acf88322cd1acaed3b0d3f908102fb154
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.04053333333333333,
6
  "eval_steps": 500,
7
- "global_step": 190,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -208,6 +208,16 @@
208
  "mean_token_accuracy": 0.7532300829887391,
209
  "num_tokens": 884497.0,
210
  "step": 190
 
 
 
 
 
 
 
 
 
 
211
  }
212
  ],
213
  "logging_steps": 10,
@@ -227,7 +237,7 @@
227
  "attributes": {}
228
  }
229
  },
230
- "total_flos": 4231622065492992.0,
231
  "train_batch_size": 4,
232
  "trial_name": null,
233
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.042666666666666665,
6
  "eval_steps": 500,
7
+ "global_step": 200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
208
  "mean_token_accuracy": 0.7532300829887391,
209
  "num_tokens": 884497.0,
210
  "step": 190
211
+ },
212
+ {
213
+ "entropy": 1.1033389106392861,
214
+ "epoch": 0.042666666666666665,
215
+ "grad_norm": 0.27512305974960327,
216
+ "learning_rate": 6.633333333333334e-05,
217
+ "loss": 1.2109394073486328,
218
+ "mean_token_accuracy": 0.7378732696175575,
219
+ "num_tokens": 931398.0,
220
+ "step": 200
221
  }
222
  ],
223
  "logging_steps": 10,
 
237
  "attributes": {}
238
  }
239
  },
240
+ "total_flos": 4465038475966464.0,
241
  "train_batch_size": 4,
242
  "trial_name": null,
243
  "trial_params": null