| { | |
| "best_metric": 3.5057616233825684, | |
| "best_model_checkpoint": "MIReADNeuro_3e-05/checkpoint-66500", | |
| "epoch": 6.0, | |
| "global_step": 99750, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.9849624060150376e-05, | |
| "loss": 7.0752, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.9699248120300752e-05, | |
| "loss": 6.588, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.9548872180451128e-05, | |
| "loss": 6.3893, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.9398496240601503e-05, | |
| "loss": 6.1736, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.924812030075188e-05, | |
| "loss": 6.0491, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.9097744360902258e-05, | |
| "loss": 5.8392, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 2.8947368421052634e-05, | |
| "loss": 5.7665, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 2.879699248120301e-05, | |
| "loss": 5.6406, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 2.8646616541353385e-05, | |
| "loss": 5.4408, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 2.849624060150376e-05, | |
| "loss": 5.381, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 2.8345864661654136e-05, | |
| "loss": 5.2809, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 2.819548872180451e-05, | |
| "loss": 5.1722, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 2.8045112781954887e-05, | |
| "loss": 5.2068, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.7894736842105263e-05, | |
| "loss": 5.0432, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.774436090225564e-05, | |
| "loss": 4.9497, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.7593984962406017e-05, | |
| "loss": 4.9384, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.7443609022556393e-05, | |
| "loss": 4.847, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.729323308270677e-05, | |
| "loss": 4.8078, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.7142857142857144e-05, | |
| "loss": 4.7734, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.699248120300752e-05, | |
| "loss": 4.6804, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 2.6842105263157896e-05, | |
| "loss": 4.6768, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 2.669172932330827e-05, | |
| "loss": 4.6783, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.6541353383458647e-05, | |
| "loss": 4.5171, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.6390977443609022e-05, | |
| "loss": 4.5836, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 2.6240601503759398e-05, | |
| "loss": 4.4883, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.6090225563909774e-05, | |
| "loss": 4.4158, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 2.5939849624060153e-05, | |
| "loss": 4.4106, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 2.578947368421053e-05, | |
| "loss": 4.3406, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 2.5639097744360904e-05, | |
| "loss": 4.3537, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 2.548872180451128e-05, | |
| "loss": 4.3262, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.5338345864661655e-05, | |
| "loss": 4.314, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.518796992481203e-05, | |
| "loss": 4.2858, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.5037593984962406e-05, | |
| "loss": 4.2601, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.21903355138214814, | |
| "eval_f1": 0.04313887931948169, | |
| "eval_loss": 4.24895715713501, | |
| "eval_precision": 0.0479982600718407, | |
| "eval_recall": 0.05640984310450227, | |
| "eval_runtime": 241.3717, | |
| "eval_samples_per_second": 117.802, | |
| "eval_steps_per_second": 14.728, | |
| "step": 16625 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 2.4887218045112782e-05, | |
| "loss": 4.131, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 2.4736842105263158e-05, | |
| "loss": 4.0202, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 2.4586466165413533e-05, | |
| "loss": 4.0053, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 2.4436090225563912e-05, | |
| "loss": 3.9312, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 2.4285714285714288e-05, | |
| "loss": 3.9707, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 2.4135338345864664e-05, | |
| "loss": 3.9047, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 2.398496240601504e-05, | |
| "loss": 3.8578, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.3834586466165415e-05, | |
| "loss": 3.8515, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.368421052631579e-05, | |
| "loss": 3.7787, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.3533834586466166e-05, | |
| "loss": 3.8549, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.338345864661654e-05, | |
| "loss": 3.8012, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.3233082706766917e-05, | |
| "loss": 3.7865, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.3082706766917293e-05, | |
| "loss": 3.7387, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.293233082706767e-05, | |
| "loss": 3.7936, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.2781954887218048e-05, | |
| "loss": 3.7485, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.2631578947368423e-05, | |
| "loss": 3.6973, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.24812030075188e-05, | |
| "loss": 3.7168, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.2330827067669174e-05, | |
| "loss": 3.677, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.218045112781955e-05, | |
| "loss": 3.731, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.2030075187969926e-05, | |
| "loss": 3.7097, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.18796992481203e-05, | |
| "loss": 3.6737, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.1729323308270677e-05, | |
| "loss": 3.6045, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.1578947368421053e-05, | |
| "loss": 3.6213, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.1428571428571428e-05, | |
| "loss": 3.6703, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.1278195488721807e-05, | |
| "loss": 3.6156, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.1127819548872183e-05, | |
| "loss": 3.6843, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.097744360902256e-05, | |
| "loss": 3.703, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 2.0827067669172934e-05, | |
| "loss": 3.5849, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 2.067669172932331e-05, | |
| "loss": 3.5669, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 2.0526315789473685e-05, | |
| "loss": 3.5471, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 2.037593984962406e-05, | |
| "loss": 3.5496, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 2.0225563909774437e-05, | |
| "loss": 3.5342, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 2.0075187969924812e-05, | |
| "loss": 3.5937, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.2797355278891468, | |
| "eval_f1": 0.0903492692549776, | |
| "eval_loss": 3.727938413619995, | |
| "eval_precision": 0.09982434922230758, | |
| "eval_recall": 0.10664659921675672, | |
| "eval_runtime": 238.7563, | |
| "eval_samples_per_second": 119.092, | |
| "eval_steps_per_second": 14.89, | |
| "step": 33250 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.9924812030075188e-05, | |
| "loss": 3.2646, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.9774436090225563e-05, | |
| "loss": 3.2013, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.9624060150375942e-05, | |
| "loss": 3.1563, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.9473684210526318e-05, | |
| "loss": 3.1373, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.9323308270676694e-05, | |
| "loss": 3.1235, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.917293233082707e-05, | |
| "loss": 3.1204, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.9022556390977445e-05, | |
| "loss": 3.1349, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.887218045112782e-05, | |
| "loss": 3.0985, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.8721804511278196e-05, | |
| "loss": 3.0835, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.8571428571428572e-05, | |
| "loss": 3.1539, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.8421052631578947e-05, | |
| "loss": 3.0683, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.8270676691729323e-05, | |
| "loss": 3.079, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.8120300751879702e-05, | |
| "loss": 3.0684, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 1.7969924812030078e-05, | |
| "loss": 3.1205, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.7819548872180453e-05, | |
| "loss": 3.0898, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 1.766917293233083e-05, | |
| "loss": 3.1235, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 1.7518796992481204e-05, | |
| "loss": 3.0515, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 1.736842105263158e-05, | |
| "loss": 3.0574, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 1.7218045112781956e-05, | |
| "loss": 3.0646, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 1.706766917293233e-05, | |
| "loss": 3.0787, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 1.6917293233082707e-05, | |
| "loss": 3.0776, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 1.6766917293233083e-05, | |
| "loss": 3.0509, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 1.6616541353383458e-05, | |
| "loss": 3.0462, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 1.6466165413533837e-05, | |
| "loss": 3.0383, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 1.6315789473684213e-05, | |
| "loss": 3.0674, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 1.616541353383459e-05, | |
| "loss": 3.0155, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 1.6015037593984964e-05, | |
| "loss": 3.0936, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 1.586466165413534e-05, | |
| "loss": 3.0657, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 1.5714285714285715e-05, | |
| "loss": 3.0275, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.556390977443609e-05, | |
| "loss": 3.0265, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.5413533834586467e-05, | |
| "loss": 3.0118, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 1.5263157894736842e-05, | |
| "loss": 2.9924, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 1.511278195488722e-05, | |
| "loss": 3.0082, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.30794119715833157, | |
| "eval_f1": 0.1317337990094712, | |
| "eval_loss": 3.523235559463501, | |
| "eval_precision": 0.14522851171454873, | |
| "eval_recall": 0.1484531543448168, | |
| "eval_runtime": 241.6864, | |
| "eval_samples_per_second": 117.648, | |
| "eval_steps_per_second": 14.709, | |
| "step": 49875 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 1.4962406015037593e-05, | |
| "loss": 2.9208, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 1.4812030075187969e-05, | |
| "loss": 2.599, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 1.4661654135338345e-05, | |
| "loss": 2.5956, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 1.4511278195488722e-05, | |
| "loss": 2.554, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 1.4360902255639098e-05, | |
| "loss": 2.5659, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 1.4210526315789473e-05, | |
| "loss": 2.5728, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 1.4060150375939849e-05, | |
| "loss": 2.5427, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 1.3909774436090224e-05, | |
| "loss": 2.5317, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 1.3759398496240602e-05, | |
| "loss": 2.5205, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 1.3609022556390977e-05, | |
| "loss": 2.5857, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 1.3458646616541353e-05, | |
| "loss": 2.5955, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 1.3308270676691729e-05, | |
| "loss": 2.5892, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 1.3157894736842104e-05, | |
| "loss": 2.5631, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 1.3007518796992482e-05, | |
| "loss": 2.6229, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 1.2857142857142857e-05, | |
| "loss": 2.6204, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 1.2706766917293233e-05, | |
| "loss": 2.5644, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 1.2556390977443608e-05, | |
| "loss": 2.489, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 1.2406015037593984e-05, | |
| "loss": 2.5243, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 1.225563909774436e-05, | |
| "loss": 2.5323, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 1.2105263157894737e-05, | |
| "loss": 2.5484, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 1.1954887218045113e-05, | |
| "loss": 2.5526, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 1.1804511278195488e-05, | |
| "loss": 2.546, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 1.1654135338345864e-05, | |
| "loss": 2.4828, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 1.150375939849624e-05, | |
| "loss": 2.5012, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 1.1353383458646617e-05, | |
| "loss": 2.6209, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 1.1203007518796992e-05, | |
| "loss": 2.474, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 1.1052631578947368e-05, | |
| "loss": 2.5335, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 1.0902255639097744e-05, | |
| "loss": 2.5015, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 1.075187969924812e-05, | |
| "loss": 2.4746, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 1.0601503759398497e-05, | |
| "loss": 2.4877, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 1.0451127819548872e-05, | |
| "loss": 2.4325, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 1.0300751879699248e-05, | |
| "loss": 2.5299, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 1.0150375939849624e-05, | |
| "loss": 2.5546, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 2.4999, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.3128648800731519, | |
| "eval_f1": 0.15025379830203955, | |
| "eval_loss": 3.5057616233825684, | |
| "eval_precision": 0.16130440997164983, | |
| "eval_recall": 0.16584776990015507, | |
| "eval_runtime": 243.3854, | |
| "eval_samples_per_second": 116.827, | |
| "eval_steps_per_second": 14.606, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 9.849624060150376e-06, | |
| "loss": 2.1264, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 9.699248120300752e-06, | |
| "loss": 2.1454, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 9.548872180451128e-06, | |
| "loss": 2.1236, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 9.398496240601503e-06, | |
| "loss": 2.1299, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 9.248120300751879e-06, | |
| "loss": 2.1006, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 9.097744360902255e-06, | |
| "loss": 2.105, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 8.947368421052632e-06, | |
| "loss": 2.1096, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 8.796992481203007e-06, | |
| "loss": 2.0984, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 8.646616541353383e-06, | |
| "loss": 2.1179, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 8.496240601503759e-06, | |
| "loss": 2.1057, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 8.345864661654134e-06, | |
| "loss": 2.0953, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 8.195488721804512e-06, | |
| "loss": 2.0757, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 8.045112781954887e-06, | |
| "loss": 2.1196, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 7.894736842105263e-06, | |
| "loss": 2.1006, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 7.744360902255639e-06, | |
| "loss": 2.0635, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 7.593984962406015e-06, | |
| "loss": 2.1135, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 7.443609022556391e-06, | |
| "loss": 2.0996, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 7.293233082706767e-06, | |
| "loss": 2.1062, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 7.142857142857143e-06, | |
| "loss": 2.0833, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 6.992481203007518e-06, | |
| "loss": 2.0442, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 6.842105263157895e-06, | |
| "loss": 2.126, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 6.6917293233082704e-06, | |
| "loss": 2.1046, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 6.541353383458646e-06, | |
| "loss": 2.0697, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 6.3909774436090225e-06, | |
| "loss": 2.0902, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 6.240601503759398e-06, | |
| "loss": 2.09, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 6.090225563909775e-06, | |
| "loss": 2.0961, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 5.93984962406015e-06, | |
| "loss": 2.0841, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 5.789473684210526e-06, | |
| "loss": 2.0838, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 5.639097744360902e-06, | |
| "loss": 2.0492, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 5.488721804511278e-06, | |
| "loss": 2.0804, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 5.3383458646616536e-06, | |
| "loss": 2.105, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 5.18796992481203e-06, | |
| "loss": 2.1231, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 5.037593984962406e-06, | |
| "loss": 2.0048, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.3194063445171274, | |
| "eval_f1": 0.16919354439457027, | |
| "eval_loss": 3.5483460426330566, | |
| "eval_precision": 0.1821520394597461, | |
| "eval_recall": 0.18324334265649903, | |
| "eval_runtime": 239.2709, | |
| "eval_samples_per_second": 118.836, | |
| "eval_steps_per_second": 14.858, | |
| "step": 83125 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 4.887218045112782e-06, | |
| "loss": 1.8463, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 4.736842105263158e-06, | |
| "loss": 1.8074, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "learning_rate": 4.586466165413533e-06, | |
| "loss": 1.7579, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 4.43609022556391e-06, | |
| "loss": 1.7725, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 4.2857142857142855e-06, | |
| "loss": 1.7747, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 4.135338345864662e-06, | |
| "loss": 1.7711, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 3.9849624060150376e-06, | |
| "loss": 1.8442, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 3.834586466165413e-06, | |
| "loss": 1.8341, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 3.6842105263157892e-06, | |
| "loss": 1.7366, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 3.5338345864661653e-06, | |
| "loss": 1.7525, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 3.3834586466165413e-06, | |
| "loss": 1.7782, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 3.2330827067669174e-06, | |
| "loss": 1.8131, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 3.082706766917293e-06, | |
| "loss": 1.7687, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "learning_rate": 2.932330827067669e-06, | |
| "loss": 1.7685, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 2.781954887218045e-06, | |
| "loss": 1.7978, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 2.631578947368421e-06, | |
| "loss": 1.8071, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 2.4812030075187967e-06, | |
| "loss": 1.7622, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 2.3308270676691728e-06, | |
| "loss": 1.756, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 2.180451127819549e-06, | |
| "loss": 1.8045, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 2.030075187969925e-06, | |
| "loss": 1.6949, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 1.8796992481203007e-06, | |
| "loss": 1.7255, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 1.7293233082706765e-06, | |
| "loss": 1.7631, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 1.5789473684210526e-06, | |
| "loss": 1.7723, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 1.7405, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 1.2781954887218045e-06, | |
| "loss": 1.8144, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 1.1278195488721805e-06, | |
| "loss": 1.7847, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 9.774436090225563e-07, | |
| "loss": 1.7579, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 8.270676691729323e-07, | |
| "loss": 1.8196, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 6.766917293233082e-07, | |
| "loss": 1.744, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 5.263157894736842e-07, | |
| "loss": 1.7594, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 3.7593984962406015e-07, | |
| "loss": 1.7179, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 2.255639097744361e-07, | |
| "loss": 1.7427, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 7.518796992481203e-08, | |
| "loss": 1.7591, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.317647886333263, | |
| "eval_f1": 0.1720864596377023, | |
| "eval_loss": 3.6147267818450928, | |
| "eval_precision": 0.18196956006589313, | |
| "eval_recall": 0.18618078808353178, | |
| "eval_runtime": 239.2791, | |
| "eval_samples_per_second": 118.832, | |
| "eval_steps_per_second": 14.857, | |
| "step": 99750 | |
| } | |
| ], | |
| "max_steps": 99750, | |
| "num_train_epochs": 6, | |
| "total_flos": 2.142716050814976e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |