moos124 commited on
Commit
ffebacc
·
verified ·
1 Parent(s): 7abe979

Training in progress, step 1250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee78fbe3e1e0371fbd7e1d99cd209484a0864e5963e372d87bd75f8e957f8b0d
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5616f477fd639cdf29ad5c7d118b3e915d2b27f07c4d208cd0ace5cd3bf5bc01
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06beec4deae6ef65f2b37fdb301344580a5bc16b9931d6b1472ea6a07666ab85
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d948c44a079a96a9a9cdaeebea3b3583d465b51a14ea5d12d7942da62b9778c
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:986528bb1512d6f9428bb2e2b1ae3f65e21747eeffc9b252ab651061033a2945
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:280a4a0a7e9600df169eef0cfd95b920be72c06632c470a2a48e52ec9a82f22f
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2912d9609fc2e0e0f874bc4a1c28fd4eb9c7ad4cf18690751be68d4c9f99c05
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de4e95e7c05aac68a45971509c4b26f22299fdf966b12971000c0752608308db
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.26453333333333334,
6
  "eval_steps": 500,
7
- "global_step": 1240,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1258,6 +1258,16 @@
1258
  "mean_token_accuracy": 0.7489309534430504,
1259
  "num_tokens": 5776891.0,
1260
  "step": 1240
 
 
 
 
 
 
 
 
 
 
1261
  }
1262
  ],
1263
  "logging_steps": 10,
@@ -1277,7 +1287,7 @@
1277
  "attributes": {}
1278
  }
1279
  },
1280
- "total_flos": 2.73719740141824e+16,
1281
  "train_batch_size": 4,
1282
  "trial_name": null,
1283
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.26666666666666666,
6
  "eval_steps": 500,
7
+ "global_step": 1250,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1258
  "mean_token_accuracy": 0.7489309534430504,
1259
  "num_tokens": 5776891.0,
1260
  "step": 1240
1261
+ },
1262
+ {
1263
+ "entropy": 0.9446908816695213,
1264
+ "epoch": 0.26666666666666666,
1265
+ "grad_norm": 0.2996140718460083,
1266
+ "learning_rate": 9.732653526357612e-05,
1267
+ "loss": 1.0297443389892578,
1268
+ "mean_token_accuracy": 0.7673991709947586,
1269
+ "num_tokens": 5817721.0,
1270
+ "step": 1250
1271
  }
1272
  ],
1273
  "logging_steps": 10,
 
1287
  "attributes": {}
1288
  }
1289
  },
1290
+ "total_flos": 2.7567098994668544e+16,
1291
  "train_batch_size": 4,
1292
  "trial_name": null,
1293
  "trial_params": null