moos124 commited on
Commit
89cbd6d
·
verified ·
1 Parent(s): bd29cee

Training in progress, step 2200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d50b0db003b0e4849422a680436a6f85d8c430030fdef2b06905fbbc25300782
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:438f1b85e775f9e14db91d33aef57bceed98b4eb02d4610a459abebeee5b8135
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fdbd8173961b979bf5d5eeae90b6ded931991d909be7d028befb2d689150315d
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64b5ee38f74bd6a0b39ad70ed414f5fcf3911df9add14197c37d1fd9aeaf0650
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed7324b862753bcd4f637366d09b8a7b2affe05ad03ec9d02039705a73af90f1
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:675feaa56f6e6458f6d12a8760ab03628bfa72d997dee2fef99ec1c6e9455b67
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:762167d444d04ce74f8ab2b70e3ee8b57183dbd1bee3039ce9785a72ad322e71
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64c4d71e5cf8e2a30f4aa098b6b5f83acde56be61c2ac9ddff5603346452afe3
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.4672,
6
  "eval_steps": 500,
7
- "global_step": 2190,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2208,6 +2208,16 @@
2208
  "mean_token_accuracy": 0.7760828763246537,
2209
  "num_tokens": 10159849.0,
2210
  "step": 2190
 
 
 
 
 
 
 
 
 
 
2211
  }
2212
  ],
2213
  "logging_steps": 10,
@@ -2227,7 +2237,7 @@
2227
  "attributes": {}
2228
  }
2229
  },
2230
- "total_flos": 4.818689435888026e+16,
2231
  "train_batch_size": 4,
2232
  "trial_name": null,
2233
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.4693333333333333,
6
  "eval_steps": 500,
7
+ "global_step": 2200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2208
  "mean_token_accuracy": 0.7760828763246537,
2209
  "num_tokens": 10159849.0,
2210
  "step": 2190
2211
+ },
2212
+ {
2213
+ "entropy": 0.9931552834808827,
2214
+ "epoch": 0.4693333333333333,
2215
+ "grad_norm": 0.2644927501678467,
2216
+ "learning_rate": 8.95814652801258e-05,
2217
+ "loss": 1.1035637855529785,
2218
+ "mean_token_accuracy": 0.755839766561985,
2219
+ "num_tokens": 10210375.0,
2220
+ "step": 2200
2221
  }
2222
  ],
2223
  "logging_steps": 10,
 
2237
  "attributes": {}
2238
  }
2239
  },
2240
+ "total_flos": 4.841430886481818e+16,
2241
  "train_batch_size": 4,
2242
  "trial_name": null,
2243
  "trial_params": null