moos124 commited on
Commit
c2c4a34
·
verified ·
1 Parent(s): ba20d46

Training in progress, step 390, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad536876db10f3be2615947ae36289b2dbe30b514d60d20ea96412b6dd985b33
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13c2c57fbae4818217e830a6b2a5199bacd87b34282b6045a212a29faecc5f3f
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff5598f896ac9528751710208d2a0d0e4a213fdf99ac27490e5d357f470c3d3f
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:644a9e82f1d68f466b92ea8ceecb49c508519c4087995fbbf1b7a3f907025974
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2c59dadccd9b6d5640db255df66c9397363ec90cea60aab1c876bf4272241a3
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e266e36d2c1315132304fa44b1ee997d4185c2595e485b892db661c97dec1be2
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccc47a82d6b62116d16571a1dde9113e9078d976e8eda42384330abeea15865e
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09cfabe893797e3a051db538a6384259ef54abbc8f1944b05216eb8eccccb44a
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.08106666666666666,
6
  "eval_steps": 500,
7
- "global_step": 380,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -398,6 +398,16 @@
398
  "mean_token_accuracy": 0.7529133662581444,
399
  "num_tokens": 1762001.0,
400
  "step": 380
 
 
 
 
 
 
 
 
 
 
401
  }
402
  ],
403
  "logging_steps": 10,
@@ -417,7 +427,7 @@
417
  "attributes": {}
418
  }
419
  },
420
- "total_flos": 8390211947268096.0,
421
  "train_batch_size": 4,
422
  "trial_name": null,
423
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.0832,
6
  "eval_steps": 500,
7
+ "global_step": 390,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
398
  "mean_token_accuracy": 0.7529133662581444,
399
  "num_tokens": 1762001.0,
400
  "step": 380
401
+ },
402
+ {
403
+ "entropy": 0.95768673568964,
404
+ "epoch": 0.0832,
405
+ "grad_norm": 0.2921101748943329,
406
+ "learning_rate": 9.997627553403699e-05,
407
+ "loss": 1.0986035346984864,
408
+ "mean_token_accuracy": 0.76050655990839,
409
+ "num_tokens": 1805002.0,
410
+ "step": 390
411
  }
412
  ],
413
  "logging_steps": 10,
 
427
  "attributes": {}
428
  }
429
  },
430
+ "total_flos": 8592167924207616.0,
431
  "train_batch_size": 4,
432
  "trial_name": null,
433
  "trial_params": null