moos124 commited on
Commit
6c4a355
·
verified ·
1 Parent(s): cc166c7

Training in progress, step 210, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4ba12f80e7a1e8c3db8751d68ce71ce45bbb9fc6eebd41558932eeee1a5aac1
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7ecb0d2cf6d26d4ca4ae47efc508a6b702838b8414c8ed0c5cc392c84e549b6
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55e4bdc20ede56819cc883156569352c532501845302c896e10b5ef18073ea21
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75bf45c2f566b9436b3ae5bc03816248d0ba8ce87423ac6de53b1bdd08728242
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69e8372199887ba63b55f610a5f7b4e9a465bf868e404afffb166fb3929a0bea
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1796ebef6b03c675662f135c77afa81e7f302def65a3802f4671e29d01ec2b5e
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8f062c4e31beac64afbc4a04b84a06acf88322cd1acaed3b0d3f908102fb154
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea96f589aecf8c6843b29f66085f6fac32186559b39c4722727fe0078978a03e
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.042666666666666665,
6
  "eval_steps": 500,
7
- "global_step": 200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -218,6 +218,16 @@
218
  "mean_token_accuracy": 0.7378732696175575,
219
  "num_tokens": 931398.0,
220
  "step": 200
 
 
 
 
 
 
 
 
 
 
221
  }
222
  ],
223
  "logging_steps": 10,
@@ -237,7 +247,7 @@
237
  "attributes": {}
238
  }
239
  },
240
- "total_flos": 4465038475966464.0,
241
  "train_batch_size": 4,
242
  "trial_name": null,
243
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.0448,
6
  "eval_steps": 500,
7
+ "global_step": 210,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
218
  "mean_token_accuracy": 0.7378732696175575,
219
  "num_tokens": 931398.0,
220
  "step": 200
221
+ },
222
+ {
223
+ "entropy": 1.108424139022827,
224
+ "epoch": 0.0448,
225
+ "grad_norm": 0.23238568007946014,
226
+ "learning_rate": 6.966666666666668e-05,
227
+ "loss": 1.1784509658813476,
228
+ "mean_token_accuracy": 0.7302148967981339,
229
+ "num_tokens": 983649.0,
230
+ "step": 210
231
  }
232
  ],
233
  "logging_steps": 10,
 
247
  "attributes": {}
248
  }
249
  },
250
+ "total_flos": 4708403989453824.0,
251
  "train_batch_size": 4,
252
  "trial_name": null,
253
  "trial_params": null