Invalid JSON: Unexpected token 'N', ..."ad_norm": NaN,
"... is not valid JSON
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.857142857142857, | |
| "eval_steps": 500, | |
| "global_step": 15, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 6.906898498535156, | |
| "learning_rate": 0.0001, | |
| "loss": 5.0884, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 7.150900840759277, | |
| "learning_rate": 0.0002, | |
| "loss": 4.6143, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 5.832034111022949, | |
| "learning_rate": 0.00018461538461538463, | |
| "loss": 4.4776, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 5.760493755340576, | |
| "learning_rate": 0.00016923076923076923, | |
| "loss": 4.4845, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": NaN, | |
| "learning_rate": 0.00016923076923076923, | |
| "loss": 4.8731, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "grad_norm": 5.546196937561035, | |
| "learning_rate": 0.00015384615384615385, | |
| "loss": 4.0771, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "grad_norm": 10.478511810302734, | |
| "learning_rate": 0.00013846153846153847, | |
| "loss": 4.4343, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 6.995128154754639, | |
| "learning_rate": 0.0001230769230769231, | |
| "loss": 3.6025, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 6.940845012664795, | |
| "learning_rate": 0.0001076923076923077, | |
| "loss": 3.7716, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 4.949404239654541, | |
| "learning_rate": 9.230769230769232e-05, | |
| "loss": 3.7752, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "grad_norm": 5.935202121734619, | |
| "learning_rate": 7.692307692307693e-05, | |
| "loss": 3.5705, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "grad_norm": 5.354796886444092, | |
| "learning_rate": 6.153846153846155e-05, | |
| "loss": 3.4207, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 5.133244514465332, | |
| "learning_rate": 4.615384615384616e-05, | |
| "loss": 3.5064, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "grad_norm": 5.422821521759033, | |
| "learning_rate": 3.0769230769230774e-05, | |
| "loss": 3.3041, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "grad_norm": 6.032769203186035, | |
| "learning_rate": 1.5384615384615387e-05, | |
| "loss": 3.2407, | |
| "step": 15 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 15, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 203616748830720.0, | |
| "train_batch_size": 5, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |