Invalid JSON: Unexpected token 'N', ..."al_loss": NaN,
"... is not valid JSON
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 100, | |
| "global_step": 163, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.030745580322828592, | |
| "grad_norm": 6.350613102744608, | |
| "learning_rate": 1.0204081632653061e-07, | |
| "loss": 6.5284, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.061491160645657184, | |
| "grad_norm": 5.249984394687515, | |
| "learning_rate": 2.0408163265306121e-07, | |
| "loss": 6.4945, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.09223674096848578, | |
| "grad_norm": 5.352306724137368, | |
| "learning_rate": 3.0612244897959183e-07, | |
| "loss": 6.6828, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.12298232129131437, | |
| "grad_norm": 5.486794769339902, | |
| "learning_rate": 4.0816326530612243e-07, | |
| "loss": 6.467, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.15372790161414296, | |
| "grad_norm": 4.077117426584361, | |
| "learning_rate": 5.10204081632653e-07, | |
| "loss": 6.1826, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.18447348193697155, | |
| "grad_norm": 3.119167144675083, | |
| "learning_rate": 6.122448979591837e-07, | |
| "loss": 5.949, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.21521906225980014, | |
| "grad_norm": 2.6950432819564356, | |
| "learning_rate": 7.142857142857143e-07, | |
| "loss": 5.4738, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.24596464258262873, | |
| "grad_norm": 1.5382799926978608, | |
| "learning_rate": 8.163265306122449e-07, | |
| "loss": 5.2285, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.2767102229054573, | |
| "grad_norm": 1.2825119089431523, | |
| "learning_rate": 9.183673469387755e-07, | |
| "loss": 5.0174, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.3074558032282859, | |
| "grad_norm": 1.14912166210836, | |
| "learning_rate": 9.99987079628245e-07, | |
| "loss": 4.7716, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3382013835511145, | |
| "grad_norm": 1.285686948508955, | |
| "learning_rate": 9.995349367260807e-07, | |
| "loss": 4.5585, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.3689469638739431, | |
| "grad_norm": 1.054957564577766, | |
| "learning_rate": 9.984374428250894e-07, | |
| "loss": 4.3855, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3996925441967717, | |
| "grad_norm": 0.9925990274913604, | |
| "learning_rate": 9.966960157816278e-07, | |
| "loss": 4.1541, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.4304381245196003, | |
| "grad_norm": 0.8744404881743455, | |
| "learning_rate": 9.943129053516174e-07, | |
| "loss": 4.1257, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.4611837048424289, | |
| "grad_norm": 0.7463667798296578, | |
| "learning_rate": 9.91291190284077e-07, | |
| "loss": 3.8852, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.49192928516525747, | |
| "grad_norm": 0.7902691532243193, | |
| "learning_rate": 9.876347743436758e-07, | |
| "loss": 3.8846, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5226748654880861, | |
| "grad_norm": 0.926879835692504, | |
| "learning_rate": 9.833483812674452e-07, | |
| "loss": 3.9166, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.5534204458109147, | |
| "grad_norm": 0.803407155130827, | |
| "learning_rate": 9.784375486621668e-07, | |
| "loss": 3.7194, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.5841660261337432, | |
| "grad_norm": 0.8117888501469026, | |
| "learning_rate": 9.729086208503173e-07, | |
| "loss": 3.7475, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.6149116064565718, | |
| "grad_norm": 0.7821755674358282, | |
| "learning_rate": 9.66768740673815e-07, | |
| "loss": 3.6407, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.6149116064565718, | |
| "eval_loss": NaN, | |
| "eval_runtime": 349.4965, | |
| "eval_samples_per_second": 15.285, | |
| "eval_steps_per_second": 0.956, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.6456571867794004, | |
| "grad_norm": 0.6822137175921249, | |
| "learning_rate": 9.600258402661569e-07, | |
| "loss": 3.6531, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.676402767102229, | |
| "grad_norm": 0.7500359822402426, | |
| "learning_rate": 9.526886308048668e-07, | |
| "loss": 3.6282, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.7071483474250576, | |
| "grad_norm": 0.8622924385168957, | |
| "learning_rate": 9.447665912574929e-07, | |
| "loss": 3.5622, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.7378939277478862, | |
| "grad_norm": 0.7766220833707687, | |
| "learning_rate": 9.362699561356956e-07, | |
| "loss": 3.4787, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.7686395080707148, | |
| "grad_norm": 0.7590912417940764, | |
| "learning_rate": 9.272097022732443e-07, | |
| "loss": 3.4078, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.7993850883935434, | |
| "grad_norm": 0.822433668441714, | |
| "learning_rate": 9.175975346450062e-07, | |
| "loss": 3.3673, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.830130668716372, | |
| "grad_norm": 0.7129904612641743, | |
| "learning_rate": 9.074458712452475e-07, | |
| "loss": 3.4122, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.8608762490392006, | |
| "grad_norm": 0.8733565752917316, | |
| "learning_rate": 8.967678270447798e-07, | |
| "loss": 3.3817, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.8916218293620292, | |
| "grad_norm": 0.7851319189409072, | |
| "learning_rate": 8.855771970476833e-07, | |
| "loss": 3.2653, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.9223674096848578, | |
| "grad_norm": 0.8962287259777225, | |
| "learning_rate": 8.738884384694905e-07, | |
| "loss": 3.3029, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.9531129900076863, | |
| "grad_norm": 0.9466863901587774, | |
| "learning_rate": 8.617166520598562e-07, | |
| "loss": 3.2964, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.9838585703305149, | |
| "grad_norm": 0.8018084383307482, | |
| "learning_rate": 8.490775625938451e-07, | |
| "loss": 3.2516, | |
| "step": 160 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 486, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 51242899865600.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |