moos124 commited on
Commit
772d572
·
verified ·
1 Parent(s): e352cb9

Training in progress, step 2160, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9af2591401db89c8bcf5627116d9e4981f723c431cde3b541c032e1eddd70fc4
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66710723d01236d9c96d2f18926db15f1f34385c49684ee70cd5c1b5e2bccce0
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9429dc5bc2ae496071061cab33503c51766e22eb97a42ba4de97e945fe500e9d
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b830f4bd5f10ddaa78c2ccd5bae960791c4296cf33c74d863b325c31d2ac5eec
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a82a2faeef8f44f548e688d0bfad38b4dc444eed1d9b5de630a36c31ad2022b8
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dd591345e5a7f523e887729293c5e6719efb2817a7a7d69905e2063008bb6d9
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8193bdd060acd9eea78ef0a8acdfe4565aa2a7a858785f436b7cec03021cb345
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1952f51067506f4daa1c4e5ae09ad820e7ac7d72f46bcd41a257149deb5cd71
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.45866666666666667,
6
  "eval_steps": 500,
7
- "global_step": 2150,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2168,6 +2168,16 @@
2168
  "mean_token_accuracy": 0.7358616881072522,
2169
  "num_tokens": 9972409.0,
2170
  "step": 2150
 
 
 
 
 
 
 
 
 
 
2171
  }
2172
  ],
2173
  "logging_steps": 10,
@@ -2187,7 +2197,7 @@
2187
  "attributes": {}
2188
  }
2189
  },
2190
- "total_flos": 4.731821330198016e+16,
2191
  "train_batch_size": 4,
2192
  "trial_name": null,
2193
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.4608,
6
  "eval_steps": 500,
7
+ "global_step": 2160,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2168
  "mean_token_accuracy": 0.7358616881072522,
2169
  "num_tokens": 9972409.0,
2170
  "step": 2150
2171
+ },
2172
+ {
2173
+ "entropy": 0.9736781157553196,
2174
+ "epoch": 0.4608,
2175
+ "grad_norm": 0.32094913721084595,
2176
+ "learning_rate": 9.000064560681625e-05,
2177
+ "loss": 1.082399559020996,
2178
+ "mean_token_accuracy": 0.7566891044378281,
2179
+ "num_tokens": 10020194.0,
2180
+ "step": 2160
2181
  }
2182
  ],
2183
  "logging_steps": 10,
 
2197
  "attributes": {}
2198
  }
2199
  },
2200
+ "total_flos": 4.754142827711693e+16,
2201
  "train_batch_size": 4,
2202
  "trial_name": null,
2203
  "trial_params": null