moos124 commited on
Commit
001eac4
·
verified ·
1 Parent(s): 31af915

Training in progress, step 220, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7ecb0d2cf6d26d4ca4ae47efc508a6b702838b8414c8ed0c5cc392c84e549b6
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c639324ba0ecf5dcfa915db64acfef7123e45c70797c3826c38e9680bb24e0b
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75bf45c2f566b9436b3ae5bc03816248d0ba8ce87423ac6de53b1bdd08728242
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff2ed7971917eacc99dd176090dcfd31b94ae872087744276374cb5eb00ea11e
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1796ebef6b03c675662f135c77afa81e7f302def65a3802f4671e29d01ec2b5e
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0a028ff291af398ddc3bed5e6a2643d2397c865af7ea7cfe8519879f952954e
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea96f589aecf8c6843b29f66085f6fac32186559b39c4722727fe0078978a03e
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddf5ecaae5347236dfda0d9a37eb19f2415a062afb0501fc7fdbd80eabb5dd09
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.0448,
6
  "eval_steps": 500,
7
- "global_step": 210,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -228,6 +228,16 @@
228
  "mean_token_accuracy": 0.7302148967981339,
229
  "num_tokens": 983649.0,
230
  "step": 210
 
 
 
 
 
 
 
 
 
 
231
  }
232
  ],
233
  "logging_steps": 10,
@@ -247,7 +257,7 @@
247
  "attributes": {}
248
  }
249
  },
250
- "total_flos": 4708403989453824.0,
251
  "train_batch_size": 4,
252
  "trial_name": null,
253
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.046933333333333334,
6
  "eval_steps": 500,
7
+ "global_step": 220,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
228
  "mean_token_accuracy": 0.7302148967981339,
229
  "num_tokens": 983649.0,
230
  "step": 210
231
+ },
232
+ {
233
+ "entropy": 0.9056355074048043,
234
+ "epoch": 0.046933333333333334,
235
+ "grad_norm": 0.3005298674106598,
236
+ "learning_rate": 7.3e-05,
237
+ "loss": 0.977406120300293,
238
+ "mean_token_accuracy": 0.7774429574608803,
239
+ "num_tokens": 1024372.0,
240
+ "step": 220
241
  }
242
  ],
243
  "logging_steps": 10,
 
257
  "attributes": {}
258
  }
259
  },
260
+ "total_flos": 4893381605815296.0,
261
  "train_batch_size": 4,
262
  "trial_name": null,
263
  "trial_params": null