moos124 commited on
Commit
0638b78
·
verified ·
1 Parent(s): 670460a

Training in progress, step 230, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c639324ba0ecf5dcfa915db64acfef7123e45c70797c3826c38e9680bb24e0b
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da80b9b1c96897deacc2e2b81b1f3d751e5de0225d0cac792c7d92a720b0c97b
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff2ed7971917eacc99dd176090dcfd31b94ae872087744276374cb5eb00ea11e
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f0fda44f5a53c2192d9b9426d2fc634d270b65ff0d6d12472042d964666d5ae
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0a028ff291af398ddc3bed5e6a2643d2397c865af7ea7cfe8519879f952954e
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e40a5bd495036b6b31bffb6f87b8dae2537e6d4f85b155622d2349b4ac877c47
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddf5ecaae5347236dfda0d9a37eb19f2415a062afb0501fc7fdbd80eabb5dd09
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a7715d4c5dbd313fc01167058631898636ca913b478cd95aae20a9964ae2bd8
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.046933333333333334,
6
  "eval_steps": 500,
7
- "global_step": 220,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -238,6 +238,16 @@
238
  "mean_token_accuracy": 0.7774429574608803,
239
  "num_tokens": 1024372.0,
240
  "step": 220
 
 
 
 
 
 
 
 
 
 
241
  }
242
  ],
243
  "logging_steps": 10,
@@ -257,7 +267,7 @@
257
  "attributes": {}
258
  }
259
  },
260
- "total_flos": 4893381605815296.0,
261
  "train_batch_size": 4,
262
  "trial_name": null,
263
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.04906666666666667,
6
  "eval_steps": 500,
7
+ "global_step": 230,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
238
  "mean_token_accuracy": 0.7774429574608803,
239
  "num_tokens": 1024372.0,
240
  "step": 220
241
+ },
242
+ {
243
+ "entropy": 1.019825778901577,
244
+ "epoch": 0.04906666666666667,
245
+ "grad_norm": 0.2753085494041443,
246
+ "learning_rate": 7.633333333333334e-05,
247
+ "loss": 1.1463540077209473,
248
+ "mean_token_accuracy": 0.7483755856752395,
249
+ "num_tokens": 1073667.0,
250
+ "step": 230
251
  }
252
  ],
253
  "logging_steps": 10,
 
267
  "attributes": {}
268
  }
269
  },
270
+ "total_flos": 5098446677446656.0,
271
  "train_batch_size": 4,
272
  "trial_name": null,
273
  "trial_params": null