moos124 commited on
Commit
f99e315
·
verified ·
1 Parent(s): f763879

Training in progress, step 320, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e194f8a417bfeb0d7fe313f3e3ce3e9281c81355a752e272d4a9f86904ff4e3
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3abf244d469473fa52891e3f31f2d5d1c5ff073f4d2b5bb817dc5d30dfa82332
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f482ae30d2e8525a1e39ed20a11830ddc2d28d3bf9119c743442c9a38dcfabcc
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfa5d8a7157df0a5eacdcc6bd5ccb31f0477ca14700a2671c6f678d1f94dac93
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d3197e14fd71b872050bfcdf47b8047e40bb95283d15d709ca7cdc3d8dc2a56
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0bec828ee85f5f35df8d0034d7d501451bd46f134d0d13918434bfd847feba5
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afd6965293e39960c24ab2eb262951e9e03f24a87a65c4526c3d160cd3f52c1f
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c727c5faaaf0e8dbfe94f897b9183e692e39806ea1639c82f3d79733c504fc7
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.06613333333333334,
6
  "eval_steps": 500,
7
- "global_step": 310,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -328,6 +328,16 @@
328
  "mean_token_accuracy": 0.7540638357400894,
329
  "num_tokens": 1421027.0,
330
  "step": 310
 
 
 
 
 
 
 
 
 
 
331
  }
332
  ],
333
  "logging_steps": 10,
@@ -347,7 +357,7 @@
347
  "attributes": {}
348
  }
349
  },
350
- "total_flos": 6755386875217920.0,
351
  "train_batch_size": 4,
352
  "trial_name": null,
353
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.06826666666666667,
6
  "eval_steps": 500,
7
+ "global_step": 320,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
328
  "mean_token_accuracy": 0.7540638357400894,
329
  "num_tokens": 1421027.0,
330
  "step": 310
331
+ },
332
+ {
333
+ "entropy": 1.0486552365124227,
334
+ "epoch": 0.06826666666666667,
335
+ "grad_norm": 0.2840607762336731,
336
+ "learning_rate": 9.999891867457112e-05,
337
+ "loss": 1.1424532890319825,
338
+ "mean_token_accuracy": 0.7420963421463966,
339
+ "num_tokens": 1472539.0,
340
+ "step": 320
341
  }
342
  ],
343
  "logging_steps": 10,
 
357
  "attributes": {}
358
  }
359
  },
360
+ "total_flos": 6994408668005376.0,
361
  "train_batch_size": 4,
362
  "trial_name": null,
363
  "trial_params": null