moos124 commited on
Commit
6b88664
·
verified ·
1 Parent(s): 264c428

Training in progress, step 310, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af425da52570a98f34d1a384e039e2e483ed4a5b53a2df4e79aaf16ceaac5d1a
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e194f8a417bfeb0d7fe313f3e3ce3e9281c81355a752e272d4a9f86904ff4e3
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:277e32b2650dca6189545354afd8579d4fd2e96eca617720f6f93ba03fb51750
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f482ae30d2e8525a1e39ed20a11830ddc2d28d3bf9119c743442c9a38dcfabcc
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf93f0953cf38059935f8a861883443194c60319e053dc70b90e0b5d5053d6a0
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d3197e14fd71b872050bfcdf47b8047e40bb95283d15d709ca7cdc3d8dc2a56
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e5c970667d2882cfe99c3bf0a16d854cd2e60422a1ab22dee6d08a0bcb0952b
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afd6965293e39960c24ab2eb262951e9e03f24a87a65c4526c3d160cd3f52c1f
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.064,
6
  "eval_steps": 500,
7
- "global_step": 300,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -318,6 +318,16 @@
318
  "mean_token_accuracy": 0.7730094477534294,
319
  "num_tokens": 1375777.0,
320
  "step": 300
 
 
 
 
 
 
 
 
 
 
321
  }
322
  ],
323
  "logging_steps": 10,
@@ -337,7 +347,7 @@
337
  "attributes": {}
338
  }
339
  },
340
- "total_flos": 6542364323549184.0,
341
  "train_batch_size": 4,
342
  "trial_name": null,
343
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.06613333333333334,
6
  "eval_steps": 500,
7
+ "global_step": 310,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
318
  "mean_token_accuracy": 0.7730094477534294,
319
  "num_tokens": 1375777.0,
320
  "step": 300
321
+ },
322
+ {
323
+ "entropy": 1.0266294315457345,
324
+ "epoch": 0.06613333333333334,
325
+ "grad_norm": 0.23917347192764282,
326
+ "learning_rate": 9.999975737505649e-05,
327
+ "loss": 1.1334312438964844,
328
+ "mean_token_accuracy": 0.7540638357400894,
329
+ "num_tokens": 1421027.0,
330
+ "step": 310
331
  }
332
  ],
333
  "logging_steps": 10,
 
347
  "attributes": {}
348
  }
349
  },
350
+ "total_flos": 6755386875217920.0,
351
  "train_batch_size": 4,
352
  "trial_name": null,
353
  "trial_params": null