moos124 commited on
Commit
1599cb2
·
verified ·
1 Parent(s): f37be4e

Training in progress, step 330, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3abf244d469473fa52891e3f31f2d5d1c5ff073f4d2b5bb817dc5d30dfa82332
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:080e280080559f2791d55f3e9b530866ef137afce957188c20fa52869278185a
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfa5d8a7157df0a5eacdcc6bd5ccb31f0477ca14700a2671c6f678d1f94dac93
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9817785732336e82575cd690ac86eee871b24cb8b95746ad574d47c0f1ba7156
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0bec828ee85f5f35df8d0034d7d501451bd46f134d0d13918434bfd847feba5
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:872fe76418f47a93b19f7178149bfab3a4c567f9bf8f19fe875e06de31f34354
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c727c5faaaf0e8dbfe94f897b9183e692e39806ea1639c82f3d79733c504fc7
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f133a959bfddbf7e52765c340dc6f5b0914229e18f54079bc6ff8b34af89bb5f
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.06826666666666667,
6
  "eval_steps": 500,
7
- "global_step": 320,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -338,6 +338,16 @@
338
  "mean_token_accuracy": 0.7420963421463966,
339
  "num_tokens": 1472539.0,
340
  "step": 320
 
 
 
 
 
 
 
 
 
 
341
  }
342
  ],
343
  "logging_steps": 10,
@@ -357,7 +367,7 @@
357
  "attributes": {}
358
  }
359
  },
360
- "total_flos": 6994408668005376.0,
361
  "train_batch_size": 4,
362
  "trial_name": null,
363
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.0704,
6
  "eval_steps": 500,
7
+ "global_step": 330,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
338
  "mean_token_accuracy": 0.7420963421463966,
339
  "num_tokens": 1472539.0,
340
  "step": 320
341
+ },
342
+ {
343
+ "entropy": 0.9518789499998093,
344
+ "epoch": 0.0704,
345
+ "grad_norm": 0.25352534651756287,
346
+ "learning_rate": 9.999748091322068e-05,
347
+ "loss": 0.9646738052368165,
348
+ "mean_token_accuracy": 0.7610545977950096,
349
+ "num_tokens": 1518725.0,
350
+ "step": 330
351
  }
352
  ],
353
  "logging_steps": 10,
 
367
  "attributes": {}
368
  }
369
  },
370
+ "total_flos": 7212522925473792.0,
371
  "train_batch_size": 4,
372
  "trial_name": null,
373
  "trial_params": null