Upload trainer_state.json with huggingface_hub
Browse files- trainer_state.json +45 -3
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 10,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -176,6 +176,48 @@
|
|
| 176 |
"eval_samples_per_second": 4.363,
|
| 177 |
"eval_steps_per_second": 1.091,
|
| 178 |
"step": 80
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
}
|
| 180 |
],
|
| 181 |
"logging_steps": 10,
|
|
@@ -195,7 +237,7 @@
|
|
| 195 |
"attributes": {}
|
| 196 |
}
|
| 197 |
},
|
| 198 |
-
"total_flos":
|
| 199 |
"train_batch_size": 4,
|
| 200 |
"trial_name": null,
|
| 201 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.1242937853107344,
|
| 6 |
"eval_steps": 10,
|
| 7 |
+
"global_step": 100,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 176 |
"eval_samples_per_second": 4.363,
|
| 177 |
"eval_steps_per_second": 1.091,
|
| 178 |
"step": 80
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"entropy": 6.3696688413619995,
|
| 182 |
+
"epoch": 1.0112994350282485,
|
| 183 |
+
"grad_norm": 0.64453125,
|
| 184 |
+
"learning_rate": 0.00010348837209302327,
|
| 185 |
+
"loss": 6.255178451538086,
|
| 186 |
+
"mean_token_accuracy": 0.24344109077202647,
|
| 187 |
+
"num_tokens": 1511764.0,
|
| 188 |
+
"step": 90
|
| 189 |
+
},
|
| 190 |
+
{
|
| 191 |
+
"epoch": 1.0112994350282485,
|
| 192 |
+
"eval_entropy": 6.723124821980794,
|
| 193 |
+
"eval_loss": 6.457315921783447,
|
| 194 |
+
"eval_mean_token_accuracy": 0.20087979889164367,
|
| 195 |
+
"eval_num_tokens": 1511764.0,
|
| 196 |
+
"eval_runtime": 43.8306,
|
| 197 |
+
"eval_samples_per_second": 4.381,
|
| 198 |
+
"eval_steps_per_second": 1.095,
|
| 199 |
+
"step": 90
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"entropy": 6.43809232711792,
|
| 203 |
+
"epoch": 1.1242937853107344,
|
| 204 |
+
"grad_norm": 0.55859375,
|
| 205 |
+
"learning_rate": 9.186046511627907e-05,
|
| 206 |
+
"loss": 6.314236068725586,
|
| 207 |
+
"mean_token_accuracy": 0.23132331417873503,
|
| 208 |
+
"num_tokens": 1682798.0,
|
| 209 |
+
"step": 100
|
| 210 |
+
},
|
| 211 |
+
{
|
| 212 |
+
"epoch": 1.1242937853107344,
|
| 213 |
+
"eval_entropy": 6.687405467033386,
|
| 214 |
+
"eval_loss": 6.43255090713501,
|
| 215 |
+
"eval_mean_token_accuracy": 0.20094856123129526,
|
| 216 |
+
"eval_num_tokens": 1682798.0,
|
| 217 |
+
"eval_runtime": 44.0723,
|
| 218 |
+
"eval_samples_per_second": 4.356,
|
| 219 |
+
"eval_steps_per_second": 1.089,
|
| 220 |
+
"step": 100
|
| 221 |
}
|
| 222 |
],
|
| 223 |
"logging_steps": 10,
|
|
|
|
| 237 |
"attributes": {}
|
| 238 |
}
|
| 239 |
},
|
| 240 |
+
"total_flos": 1.2023018700050227e+17,
|
| 241 |
"train_batch_size": 4,
|
| 242 |
"trial_name": null,
|
| 243 |
"trial_params": null
|