moos124 commited on
Commit
29411d7
·
verified ·
1 Parent(s): 416bfc1

Training in progress, step 240, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da80b9b1c96897deacc2e2b81b1f3d751e5de0225d0cac792c7d92a720b0c97b
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efa549cc9e46f954ceaac48903df8e39114e0834855c6af701a2aeda68a30a74
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f0fda44f5a53c2192d9b9426d2fc634d270b65ff0d6d12472042d964666d5ae
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af8bdf477675a0f24a1f9e4f449f1cb4dedfd9fbaaf50501f55743ef4db7aa62
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e40a5bd495036b6b31bffb6f87b8dae2537e6d4f85b155622d2349b4ac877c47
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcbdf5759ee3cb46161fbfb21d4e2c88fd01680ca79c671485101a9fe0d5d622
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a7715d4c5dbd313fc01167058631898636ca913b478cd95aae20a9964ae2bd8
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:125a8ede8ffe66310e452b3d6fbb0e3a0a5fd4969d8f52f1acb13713cc04d4b8
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.04906666666666667,
6
  "eval_steps": 500,
7
- "global_step": 230,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -248,6 +248,16 @@
248
  "mean_token_accuracy": 0.7483755856752395,
249
  "num_tokens": 1073667.0,
250
  "step": 230
 
 
 
 
 
 
 
 
 
 
251
  }
252
  ],
253
  "logging_steps": 10,
@@ -267,7 +277,7 @@
267
  "attributes": {}
268
  }
269
  },
270
- "total_flos": 5098446677446656.0,
271
  "train_batch_size": 4,
272
  "trial_name": null,
273
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.0512,
6
  "eval_steps": 500,
7
+ "global_step": 240,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
248
  "mean_token_accuracy": 0.7483755856752395,
249
  "num_tokens": 1073667.0,
250
  "step": 230
251
+ },
252
+ {
253
+ "entropy": 1.042508740723133,
254
+ "epoch": 0.0512,
255
+ "grad_norm": 0.32724323868751526,
256
+ "learning_rate": 7.966666666666666e-05,
257
+ "loss": 1.098323440551758,
258
+ "mean_token_accuracy": 0.7490729346871376,
259
+ "num_tokens": 1117523.0,
260
+ "step": 240
261
  }
262
  ],
263
  "logging_steps": 10,
 
277
  "attributes": {}
278
  }
279
  },
280
+ "total_flos": 5321814854350848.0,
281
  "train_batch_size": 4,
282
  "trial_name": null,
283
  "trial_params": null