| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.892857142857143, | |
| "eval_steps": 500, | |
| "global_step": 27, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.10714285714285714, | |
| "grad_norm": 6.38134225738387, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.8639, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.21428571428571427, | |
| "grad_norm": 6.427662887763939, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.8681, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.32142857142857145, | |
| "grad_norm": 5.789679210219034, | |
| "learning_rate": 1e-05, | |
| "loss": 0.8274, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.42857142857142855, | |
| "grad_norm": 2.715427948155473, | |
| "learning_rate": 9.957224306869053e-06, | |
| "loss": 0.7307, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.5357142857142857, | |
| "grad_norm": 4.590973911185041, | |
| "learning_rate": 9.829629131445342e-06, | |
| "loss": 0.7966, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.6428571428571429, | |
| "grad_norm": 4.432507923465467, | |
| "learning_rate": 9.619397662556434e-06, | |
| "loss": 0.7316, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 5.306261563213631, | |
| "learning_rate": 9.330127018922195e-06, | |
| "loss": 0.7056, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 4.169018233306645, | |
| "learning_rate": 8.966766701456177e-06, | |
| "loss": 0.6871, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.9642857142857143, | |
| "grad_norm": 2.604291092600148, | |
| "learning_rate": 8.535533905932739e-06, | |
| "loss": 0.6827, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 1.0714285714285714, | |
| "grad_norm": 1.6640185227412452, | |
| "learning_rate": 8.043807145043604e-06, | |
| "loss": 0.6352, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 1.1785714285714286, | |
| "grad_norm": 1.8223282776265066, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.6048, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 1.2857142857142856, | |
| "grad_norm": 1.7690767247619352, | |
| "learning_rate": 6.913417161825449e-06, | |
| "loss": 0.6075, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 1.3928571428571428, | |
| "grad_norm": 1.2297684666814992, | |
| "learning_rate": 6.294095225512604e-06, | |
| "loss": 0.5608, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 1.070630517465832, | |
| "learning_rate": 5.65263096110026e-06, | |
| "loss": 0.6357, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 1.6071428571428572, | |
| "grad_norm": 1.007789087281265, | |
| "learning_rate": 5e-06, | |
| "loss": 0.5734, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 1.7142857142857144, | |
| "grad_norm": 0.9958655806019134, | |
| "learning_rate": 4.347369038899744e-06, | |
| "loss": 0.5387, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 1.8214285714285714, | |
| "grad_norm": 0.9382716443564832, | |
| "learning_rate": 3.705904774487396e-06, | |
| "loss": 0.5875, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 1.9285714285714286, | |
| "grad_norm": 0.6927647016669346, | |
| "learning_rate": 3.0865828381745515e-06, | |
| "loss": 0.5431, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 2.0357142857142856, | |
| "grad_norm": 0.7217038911956491, | |
| "learning_rate": 2.5000000000000015e-06, | |
| "loss": 0.5459, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 2.142857142857143, | |
| "grad_norm": 0.6908510203254233, | |
| "learning_rate": 1.956192854956397e-06, | |
| "loss": 0.5234, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 0.7471539664195086, | |
| "learning_rate": 1.4644660940672628e-06, | |
| "loss": 0.5342, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 2.357142857142857, | |
| "grad_norm": 0.6734174560619429, | |
| "learning_rate": 1.0332332985438248e-06, | |
| "loss": 0.5331, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 2.4642857142857144, | |
| "grad_norm": 0.6359505084389541, | |
| "learning_rate": 6.698729810778065e-07, | |
| "loss": 0.5339, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 2.571428571428571, | |
| "grad_norm": 0.48694859883342007, | |
| "learning_rate": 3.8060233744356634e-07, | |
| "loss": 0.4833, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 2.678571428571429, | |
| "grad_norm": 0.5111742609060286, | |
| "learning_rate": 1.7037086855465902e-07, | |
| "loss": 0.5057, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 2.7857142857142856, | |
| "grad_norm": 0.48315315504784084, | |
| "learning_rate": 4.2775693130948094e-08, | |
| "loss": 0.5156, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 2.892857142857143, | |
| "grad_norm": 0.47858711201269805, | |
| "learning_rate": 0.0, | |
| "loss": 0.5448, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 2.892857142857143, | |
| "step": 27, | |
| "total_flos": 2.895182914859827e+16, | |
| "train_loss": 0.625937306218677, | |
| "train_runtime": 2995.925, | |
| "train_samples_per_second": 0.888, | |
| "train_steps_per_second": 0.009 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 27, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.895182914859827e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |