RantiRepo commited on
Commit
eef3cba
·
verified ·
1 Parent(s): 7a91c99

Upload trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +45 -3
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.576271186440678,
6
  "eval_steps": 10,
7
- "global_step": 140,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -302,6 +302,48 @@
302
  "eval_samples_per_second": 4.333,
303
  "eval_steps_per_second": 1.083,
304
  "step": 140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  }
306
  ],
307
  "logging_steps": 10,
@@ -321,7 +363,7 @@
321
  "attributes": {}
322
  }
323
  },
324
- "total_flos": 1.6869770537105203e+17,
325
  "train_batch_size": 4,
326
  "trial_name": null,
327
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.8022598870056497,
6
  "eval_steps": 10,
7
+ "global_step": 160,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
302
  "eval_samples_per_second": 4.333,
303
  "eval_steps_per_second": 1.083,
304
  "step": 140
305
+ },
306
+ {
307
+ "entropy": 6.287807840108871,
308
+ "epoch": 1.689265536723164,
309
+ "grad_norm": 0.26171875,
310
+ "learning_rate": 3.372093023255814e-05,
311
+ "loss": 6.183982086181641,
312
+ "mean_token_accuracy": 0.2382544383406639,
313
+ "num_tokens": 2527248.0,
314
+ "step": 150
315
+ },
316
+ {
317
+ "epoch": 1.689265536723164,
318
+ "eval_entropy": 6.602698942025502,
319
+ "eval_loss": 6.374426364898682,
320
+ "eval_mean_token_accuracy": 0.20121282618492842,
321
+ "eval_num_tokens": 2527248.0,
322
+ "eval_runtime": 43.9903,
323
+ "eval_samples_per_second": 4.365,
324
+ "eval_steps_per_second": 1.091,
325
+ "step": 150
326
+ },
327
+ {
328
+ "entropy": 6.482830649614334,
329
+ "epoch": 1.8022598870056497,
330
+ "grad_norm": 0.37890625,
331
+ "learning_rate": 2.2093023255813955e-05,
332
+ "loss": 6.326276779174805,
333
+ "mean_token_accuracy": 0.2152696281671524,
334
+ "num_tokens": 2704860.0,
335
+ "step": 160
336
+ },
337
+ {
338
+ "epoch": 1.8022598870056497,
339
+ "eval_entropy": 6.604644636313121,
340
+ "eval_loss": 6.371755123138428,
341
+ "eval_mean_token_accuracy": 0.20118677647163472,
342
+ "eval_num_tokens": 2704860.0,
343
+ "eval_runtime": 44.2676,
344
+ "eval_samples_per_second": 4.337,
345
+ "eval_steps_per_second": 1.084,
346
+ "step": 160
347
  }
348
  ],
349
  "logging_steps": 10,
 
363
  "attributes": {}
364
  }
365
  },
366
+ "total_flos": 1.931435674847908e+17,
367
  "train_batch_size": 4,
368
  "trial_name": null,
369
  "trial_params": null