moos124 commited on
Commit
6b387f9
·
verified ·
1 Parent(s): ffeb5fe

Training in progress, step 400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13c2c57fbae4818217e830a6b2a5199bacd87b34282b6045a212a29faecc5f3f
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b773ce2de6d7cc50ab369fe688147a42e0e791a508e666bce07706c4b35f10c
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:644a9e82f1d68f466b92ea8ceecb49c508519c4087995fbbf1b7a3f907025974
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db050a8e740f735f809fc576df4e37f292ec6a5314c77554aae6e4cf4e25267e
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e266e36d2c1315132304fa44b1ee997d4185c2595e485b892db661c97dec1be2
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b560efadabac5e38946943c60a5a9026b112488df1175e8c4d33d1ede001047c
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09cfabe893797e3a051db538a6384259ef54abbc8f1944b05216eb8eccccb44a
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca62f923b6bedfedee2b94878f48259e4a79dad81665ba7ca91e99e993ab4fc9
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.0832,
6
  "eval_steps": 500,
7
- "global_step": 390,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -408,6 +408,16 @@
408
  "mean_token_accuracy": 0.76050655990839,
409
  "num_tokens": 1805002.0,
410
  "step": 390
 
 
 
 
 
 
 
 
 
 
411
  }
412
  ],
413
  "logging_steps": 10,
@@ -427,7 +437,7 @@
427
  "attributes": {}
428
  }
429
  },
430
- "total_flos": 8592167924207616.0,
431
  "train_batch_size": 4,
432
  "trial_name": null,
433
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.08533333333333333,
6
  "eval_steps": 500,
7
+ "global_step": 400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
408
  "mean_token_accuracy": 0.76050655990839,
409
  "num_tokens": 1805002.0,
410
  "step": 390
411
+ },
412
+ {
413
+ "entropy": 0.8763241834938527,
414
+ "epoch": 0.08533333333333333,
415
+ "grad_norm": 0.24879010021686554,
416
+ "learning_rate": 9.997064523088384e-05,
417
+ "loss": 0.9313676834106446,
418
+ "mean_token_accuracy": 0.7820899412035942,
419
+ "num_tokens": 1844640.0,
420
+ "step": 400
421
  }
422
  ],
423
  "logging_steps": 10,
 
437
  "attributes": {}
438
  }
439
  },
440
+ "total_flos": 8772071858506752.0,
441
  "train_batch_size": 4,
442
  "trial_name": null,
443
  "trial_params": null