| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 100, |
| "global_step": 135, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.22535211267605634, |
| "grad_norm": 2.90373158454895, |
| "learning_rate": 6.4285714285714295e-06, |
| "loss": 0.8135, |
| "mean_token_accuracy": 0.3537426620721817, |
| "num_tokens": 81920.0, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.4507042253521127, |
| "grad_norm": 2.4647960662841797, |
| "learning_rate": 9.586776859504134e-06, |
| "loss": 0.5292, |
| "mean_token_accuracy": 0.4071795493364334, |
| "num_tokens": 163840.0, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.676056338028169, |
| "grad_norm": 2.641406297683716, |
| "learning_rate": 8.760330578512397e-06, |
| "loss": 0.5129, |
| "mean_token_accuracy": 0.41124021336436273, |
| "num_tokens": 245760.0, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.9014084507042254, |
| "grad_norm": 2.4985663890838623, |
| "learning_rate": 7.933884297520661e-06, |
| "loss": 0.5123, |
| "mean_token_accuracy": 0.40974804051220415, |
| "num_tokens": 327680.0, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.1126760563380282, |
| "grad_norm": 2.1662192344665527, |
| "learning_rate": 7.107438016528926e-06, |
| "loss": 0.4303, |
| "mean_token_accuracy": 0.4234572724501292, |
| "num_tokens": 404480.0, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.3380281690140845, |
| "grad_norm": 2.3111164569854736, |
| "learning_rate": 6.280991735537191e-06, |
| "loss": 0.4318, |
| "mean_token_accuracy": 0.42314089871942995, |
| "num_tokens": 486400.0, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.563380281690141, |
| "grad_norm": 2.6036767959594727, |
| "learning_rate": 5.4545454545454545e-06, |
| "loss": 0.4583, |
| "mean_token_accuracy": 0.41168052703142166, |
| "num_tokens": 568320.0, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.788732394366197, |
| "grad_norm": 2.100604772567749, |
| "learning_rate": 4.62809917355372e-06, |
| "loss": 0.4329, |
| "mean_token_accuracy": 0.4169642850756645, |
| "num_tokens": 650240.0, |
| "step": 80 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 4.51124382019043, |
| "learning_rate": 3.801652892561984e-06, |
| "loss": 0.4272, |
| "mean_token_accuracy": 0.4280365296204885, |
| "num_tokens": 727040.0, |
| "step": 90 |
| }, |
| { |
| "epoch": 2.2253521126760565, |
| "grad_norm": 2.14109468460083, |
| "learning_rate": 2.9752066115702483e-06, |
| "loss": 0.3818, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.2253521126760565, |
| "eval_loss": 0.9899753332138062, |
| "eval_mean_token_accuracy": 0.7401255607604981, |
| "eval_num_tokens": 808960.0, |
| "eval_runtime": 2.7529, |
| "eval_samples_per_second": 28.697, |
| "eval_steps_per_second": 1.816, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.4507042253521125, |
| "grad_norm": 2.369915723800659, |
| "learning_rate": 2.1487603305785124e-06, |
| "loss": 0.3967, |
| "mean_token_accuracy": 0.4355430521070957, |
| "num_tokens": 890880.0, |
| "step": 110 |
| }, |
| { |
| "epoch": 2.676056338028169, |
| "grad_norm": 2.0789310932159424, |
| "learning_rate": 1.322314049586777e-06, |
| "loss": 0.3508, |
| "mean_token_accuracy": 0.44587817750871184, |
| "num_tokens": 972800.0, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.9014084507042255, |
| "grad_norm": 2.4127914905548096, |
| "learning_rate": 4.958677685950413e-07, |
| "loss": 0.3784, |
| "mean_token_accuracy": 0.42572162225842475, |
| "num_tokens": 1054720.0, |
| "step": 130 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 135, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2882137132892160.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|