| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 19.428571428571427, | |
| "eval_steps": 500, | |
| "global_step": 60, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.4285714285714285e-05, | |
| "loss": 1.7815, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 2.857142857142857e-05, | |
| "loss": 1.8831, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.2857142857142856e-05, | |
| "loss": 1.825, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 5.714285714285714e-05, | |
| "loss": 1.951, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 7.142857142857143e-05, | |
| "loss": 1.5805, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 8.571428571428571e-05, | |
| "loss": 1.859, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 0.0001, | |
| "loss": 1.7268, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 0.00011428571428571428, | |
| "loss": 1.4927, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 0.00012857142857142858, | |
| "loss": 1.8618, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 0.00014285714285714287, | |
| "loss": 1.6209, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 0.00015714285714285716, | |
| "loss": 1.5352, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 0.00017142857142857143, | |
| "loss": 1.3475, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 0.00018571428571428572, | |
| "loss": 1.6543, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 0.0002, | |
| "loss": 1.3424, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 0.00019841269841269844, | |
| "loss": 1.0813, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 0.00019682539682539682, | |
| "loss": 1.2542, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 0.00019523809523809525, | |
| "loss": 1.3668, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 0.00019365079365079365, | |
| "loss": 1.2575, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 6.14, | |
| "learning_rate": 0.00019206349206349208, | |
| "loss": 1.2714, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 6.29, | |
| "learning_rate": 0.00019047619047619048, | |
| "loss": 1.0502, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 6.43, | |
| "learning_rate": 0.00018888888888888888, | |
| "loss": 1.2165, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 0.00018730158730158731, | |
| "loss": 1.1839, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "learning_rate": 0.00018571428571428572, | |
| "loss": 1.0632, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "learning_rate": 0.00018412698412698412, | |
| "loss": 0.9129, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "learning_rate": 0.00018253968253968255, | |
| "loss": 0.9559, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 8.29, | |
| "learning_rate": 0.00018095238095238095, | |
| "loss": 0.9037, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 8.43, | |
| "learning_rate": 0.00017936507936507938, | |
| "loss": 0.99, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 9.14, | |
| "learning_rate": 0.00017777777777777779, | |
| "loss": 0.7489, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 9.29, | |
| "learning_rate": 0.0001761904761904762, | |
| "loss": 0.9113, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 9.43, | |
| "learning_rate": 0.00017460317460317462, | |
| "loss": 0.8181, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 10.14, | |
| "learning_rate": 0.00017301587301587302, | |
| "loss": 0.825, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 10.29, | |
| "learning_rate": 0.00017142857142857143, | |
| "loss": 0.785, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 10.43, | |
| "learning_rate": 0.00016984126984126986, | |
| "loss": 0.4748, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 11.14, | |
| "learning_rate": 0.00016825396825396826, | |
| "loss": 0.6036, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 11.29, | |
| "learning_rate": 0.0001666666666666667, | |
| "loss": 0.7164, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 11.43, | |
| "learning_rate": 0.0001650793650793651, | |
| "loss": 0.4889, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 12.14, | |
| "learning_rate": 0.0001634920634920635, | |
| "loss": 0.5426, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 12.29, | |
| "learning_rate": 0.00016190476190476192, | |
| "loss": 0.6201, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 12.43, | |
| "learning_rate": 0.00016031746031746033, | |
| "loss": 0.3853, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 13.14, | |
| "learning_rate": 0.00015873015873015873, | |
| "loss": 0.4882, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 13.29, | |
| "learning_rate": 0.00015714285714285716, | |
| "loss": 0.5028, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 13.43, | |
| "learning_rate": 0.00015555555555555556, | |
| "loss": 0.3693, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 14.14, | |
| "learning_rate": 0.000153968253968254, | |
| "loss": 0.3257, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 14.29, | |
| "learning_rate": 0.00015238095238095237, | |
| "loss": 0.4502, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 14.43, | |
| "learning_rate": 0.0001507936507936508, | |
| "loss": 0.4277, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 15.14, | |
| "learning_rate": 0.00014920634920634923, | |
| "loss": 0.4595, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 15.29, | |
| "learning_rate": 0.00014761904761904763, | |
| "loss": 0.3251, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 15.43, | |
| "learning_rate": 0.00014603174603174603, | |
| "loss": 0.2482, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 16.14, | |
| "learning_rate": 0.00014444444444444444, | |
| "loss": 0.3022, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 16.29, | |
| "learning_rate": 0.00014285714285714287, | |
| "loss": 0.2293, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 16.43, | |
| "learning_rate": 0.0001412698412698413, | |
| "loss": 0.3855, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 17.14, | |
| "learning_rate": 0.00013968253968253967, | |
| "loss": 0.2593, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 17.29, | |
| "learning_rate": 0.0001380952380952381, | |
| "loss": 0.2478, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 17.43, | |
| "learning_rate": 0.0001365079365079365, | |
| "loss": 0.279, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 18.14, | |
| "learning_rate": 0.00013492063492063494, | |
| "loss": 0.2797, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 18.29, | |
| "learning_rate": 0.00013333333333333334, | |
| "loss": 0.1801, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 18.43, | |
| "learning_rate": 0.00013174603174603174, | |
| "loss": 0.2129, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 19.14, | |
| "learning_rate": 0.00013015873015873017, | |
| "loss": 0.1864, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 19.29, | |
| "learning_rate": 0.00012857142857142858, | |
| "loss": 0.2059, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 19.43, | |
| "learning_rate": 0.00012698412698412698, | |
| "loss": 0.1823, | |
| "step": 60 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 140, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "total_flos": 1.13811835060224e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |