RantiRepo commited on
Commit
234bf0f
·
verified ·
1 Parent(s): b02553c

Upload trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +45 -3
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.3502824858757063,
6
  "eval_steps": 10,
7
- "global_step": 120,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -260,6 +260,48 @@
260
  "eval_samples_per_second": 4.362,
261
  "eval_steps_per_second": 1.091,
262
  "step": 120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
  }
264
  ],
265
  "logging_steps": 10,
@@ -279,7 +321,7 @@
279
  "attributes": {}
280
  }
281
  },
282
- "total_flos": 1.443790492223017e+17,
283
  "train_batch_size": 4,
284
  "trial_name": null,
285
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.576271186440678,
6
  "eval_steps": 10,
7
+ "global_step": 140,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
260
  "eval_samples_per_second": 4.362,
261
  "eval_steps_per_second": 1.091,
262
  "step": 120
263
+ },
264
+ {
265
+ "entropy": 6.290887945890427,
266
+ "epoch": 1.463276836158192,
267
+ "grad_norm": 0.7265625,
268
+ "learning_rate": 5.697674418604652e-05,
269
+ "loss": 6.218046951293945,
270
+ "mean_token_accuracy": 0.24080509012565016,
271
+ "num_tokens": 2183142.0,
272
+ "step": 130
273
+ },
274
+ {
275
+ "epoch": 1.463276836158192,
276
+ "eval_entropy": 6.626457552115123,
277
+ "eval_loss": 6.388693332672119,
278
+ "eval_mean_token_accuracy": 0.20111992427458367,
279
+ "eval_num_tokens": 2183142.0,
280
+ "eval_runtime": 43.8362,
281
+ "eval_samples_per_second": 4.38,
282
+ "eval_steps_per_second": 1.095,
283
+ "step": 130
284
+ },
285
+ {
286
+ "entropy": 6.49563906788826,
287
+ "epoch": 1.576271186440678,
288
+ "grad_norm": 0.6015625,
289
+ "learning_rate": 4.5348837209302326e-05,
290
+ "loss": 6.376762771606446,
291
+ "mean_token_accuracy": 0.21492539951577783,
292
+ "num_tokens": 2364034.0,
293
+ "step": 140
294
+ },
295
+ {
296
+ "epoch": 1.576271186440678,
297
+ "eval_entropy": 6.611844847599666,
298
+ "eval_loss": 6.380344390869141,
299
+ "eval_mean_token_accuracy": 0.20116183906793594,
300
+ "eval_num_tokens": 2364034.0,
301
+ "eval_runtime": 44.3142,
302
+ "eval_samples_per_second": 4.333,
303
+ "eval_steps_per_second": 1.083,
304
+ "step": 140
305
  }
306
  ],
307
  "logging_steps": 10,
 
321
  "attributes": {}
322
  }
323
  },
324
+ "total_flos": 1.6869770537105203e+17,
325
  "train_batch_size": 4,
326
  "trial_name": null,
327
  "trial_params": null