mgh6 commited on
Commit
a3d8d21
·
verified ·
1 Parent(s): 1594257

Training in progress, step 1700, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a548e85abc2ff3d129d10fed437dbdead9272cb44a66886e9e3ff230ed6e2d7
3
  size 136000488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46b457501bc5907333638bb024ffe1e40962714ff7e5b7cc64dcb734c2f8e93c
3
  size 136000488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2941bb931f849e81894f9fc2dac0f376bacd497144ebd1a1e5f60fee1892d7bf
3
  size 268176506
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8433d799db5d101b5d86ec86367f08b635f2cf5bc497050104e6d0318329b7e
3
  size 268176506
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db97249fb2abce3310b9eff58ce2e1650afeaabc2a34f23519369128cb8bf65c
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2d05ca20ab7df9c06232e563b45438033eb0b1bfaefbcde420140fe91a9405d
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41a51497602956bd9b9282bb348f82132dc02721f9d74081c5a4aadcaf218b9c
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f89a4c215894c27b954c3b19733fb77ef75ea172d2f0eef43b04b420be270617
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1b76dc6c723cbb4062a49610014821f5a119c1ef21bb357c8bd77b2feb3f887
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1752b106c10c85178fea4c4f1e2a7bbed40d31590dc95b28b5276aa083b488cc
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b864370896443027245508af4848f87950729c2cd9ac0f4a6ca8c789d252276a
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:290023e4bbeb7e69da0adc1960fd5d5a4028e5a5b2d994d75ae6cf782fc2ca88
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5aeeb3fdfc3b5c117df4adc83127f79aac59e6de065d632826c06a43c29056b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d3deb8e414140d506e7e9c8561a07889fae8315feae0f8113e39eb01ec533e1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.8591811060905457,
3
- "best_model_checkpoint": "mgh6/TCS_MLM/checkpoint-1600",
4
- "epoch": 2.14190093708166,
5
  "eval_steps": 100,
6
- "global_step": 1600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -247,6 +247,21 @@
247
  "eval_samples_per_second": 889.671,
248
  "eval_steps_per_second": 3.595,
249
  "step": 1600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  }
251
  ],
252
  "logging_steps": 100,
@@ -275,7 +290,7 @@
275
  "attributes": {}
276
  }
277
  },
278
- "total_flos": 5.8099633225728e+16,
279
  "train_batch_size": 64,
280
  "trial_name": null,
281
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8537179231643677,
3
+ "best_model_checkpoint": "mgh6/TCS_MLM/checkpoint-1700",
4
+ "epoch": 2.2757697456492636,
5
  "eval_steps": 100,
6
+ "global_step": 1700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
247
  "eval_samples_per_second": 889.671,
248
  "eval_steps_per_second": 3.595,
249
  "step": 1600
250
+ },
251
+ {
252
+ "epoch": 2.2757697456492636,
253
+ "grad_norm": 0.1839440017938614,
254
+ "learning_rate": 0.0007724230254350736,
255
+ "loss": 0.8628,
256
+ "step": 1700
257
+ },
258
+ {
259
+ "epoch": 2.2757697456492636,
260
+ "eval_loss": 0.8537179231643677,
261
+ "eval_runtime": 6.3793,
262
+ "eval_samples_per_second": 892.265,
263
+ "eval_steps_per_second": 3.605,
264
+ "step": 1700
265
  }
266
  ],
267
  "logging_steps": 100,
 
290
  "attributes": {}
291
  }
292
  },
293
+ "total_flos": 6.1730860302336e+16,
294
  "train_batch_size": 64,
295
  "trial_name": null,
296
  "trial_params": null