| { |
| "best_metric": 0.08917281776666641, |
| "best_model_checkpoint": "/content/drive/MyDrive/vit-cifar10/checkpoint-263043", |
| "epoch": 100.0, |
| "global_step": 265700, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.9992472713586754e-05, |
| "loss": 0.3607, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9984945427173507e-05, |
| "loss": 0.3215, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.997741814076026e-05, |
| "loss": 0.3187, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9969890854347008e-05, |
| "loss": 0.3123, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.996236356793376e-05, |
| "loss": 0.3007, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.9954836281520513e-05, |
| "loss": 0.3073, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.9947308995107266e-05, |
| "loss": 0.3031, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.993978170869402e-05, |
| "loss": 0.3008, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.993225442228077e-05, |
| "loss": 0.2996, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.992472713586752e-05, |
| "loss": 0.3026, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.9917199849454273e-05, |
| "loss": 0.3003, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.9909672563041025e-05, |
| "loss": 0.2994, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.9902145276627778e-05, |
| "loss": 0.2974, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 1.989461799021453e-05, |
| "loss": 0.2973, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 1.9887090703801283e-05, |
| "loss": 0.2995, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 1.987956341738803e-05, |
| "loss": 0.2945, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 1.9872036130974784e-05, |
| "loss": 0.296, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 1.9864508844561537e-05, |
| "loss": 0.2904, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.985698155814829e-05, |
| "loss": 0.2931, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.9849454271735042e-05, |
| "loss": 0.2952, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.9841926985321794e-05, |
| "loss": 0.2935, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.9834399698908543e-05, |
| "loss": 0.2912, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.9826872412495296e-05, |
| "loss": 0.2951, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 1.981934512608205e-05, |
| "loss": 0.2873, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.98118178396688e-05, |
| "loss": 0.2906, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.9804290553255553e-05, |
| "loss": 0.289, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.2940625846385956, |
| "eval_runtime": 45.1121, |
| "eval_samples_per_second": 166.253, |
| "eval_steps_per_second": 10.396, |
| "step": 2657 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 1.9796763266842306e-05, |
| "loss": 0.2831, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 1.978923598042906e-05, |
| "loss": 0.2941, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 1.9781708694015808e-05, |
| "loss": 0.2866, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 1.977418140760256e-05, |
| "loss": 0.2811, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 1.9766654121189313e-05, |
| "loss": 0.2912, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 1.9759126834776065e-05, |
| "loss": 0.289, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 1.9751599548362818e-05, |
| "loss": 0.2907, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 1.974407226194957e-05, |
| "loss": 0.2913, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 1.973654497553632e-05, |
| "loss": 0.2863, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 1.9729017689123072e-05, |
| "loss": 0.2822, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 1.9721490402709824e-05, |
| "loss": 0.2808, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 1.9713963116296577e-05, |
| "loss": 0.2836, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 1.970643582988333e-05, |
| "loss": 0.2796, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 1.9698908543470082e-05, |
| "loss": 0.2831, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 1.969138125705683e-05, |
| "loss": 0.2878, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 1.9683853970643583e-05, |
| "loss": 0.2767, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 1.9676326684230336e-05, |
| "loss": 0.2835, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 1.966879939781709e-05, |
| "loss": 0.2823, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 1.966127211140384e-05, |
| "loss": 0.284, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 1.9653744824990594e-05, |
| "loss": 0.2813, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.9646217538577343e-05, |
| "loss": 0.2806, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.9638690252164095e-05, |
| "loss": 0.2846, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 1.9631162965750848e-05, |
| "loss": 0.2834, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 1.96236356793376e-05, |
| "loss": 0.2821, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 1.9616108392924353e-05, |
| "loss": 0.2837, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 1.9608581106511105e-05, |
| "loss": 0.2782, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 1.9601053820097858e-05, |
| "loss": 0.2858, |
| "step": 5300 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.28088775277137756, |
| "eval_runtime": 45.112, |
| "eval_samples_per_second": 166.253, |
| "eval_steps_per_second": 10.396, |
| "step": 5314 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 1.9593526533684607e-05, |
| "loss": 0.2826, |
| "step": 5400 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 1.9585999247271363e-05, |
| "loss": 0.2858, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 1.9578471960858112e-05, |
| "loss": 0.2782, |
| "step": 5600 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 1.9570944674444864e-05, |
| "loss": 0.2779, |
| "step": 5700 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 1.9563417388031617e-05, |
| "loss": 0.2792, |
| "step": 5800 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 1.955589010161837e-05, |
| "loss": 0.2837, |
| "step": 5900 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 1.954836281520512e-05, |
| "loss": 0.2792, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 1.954083552879187e-05, |
| "loss": 0.2825, |
| "step": 6100 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 1.9533308242378624e-05, |
| "loss": 0.2762, |
| "step": 6200 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 1.9525780955965376e-05, |
| "loss": 0.283, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 1.951825366955213e-05, |
| "loss": 0.2824, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 1.951072638313888e-05, |
| "loss": 0.2787, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 1.950319909672563e-05, |
| "loss": 0.2769, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 1.9495671810312383e-05, |
| "loss": 0.2791, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 1.9488144523899135e-05, |
| "loss": 0.2754, |
| "step": 6800 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 1.9480617237485888e-05, |
| "loss": 0.2721, |
| "step": 6900 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 1.947308995107264e-05, |
| "loss": 0.2796, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 1.9465562664659393e-05, |
| "loss": 0.2771, |
| "step": 7100 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 1.9458035378246142e-05, |
| "loss": 0.2778, |
| "step": 7200 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 1.9450508091832894e-05, |
| "loss": 0.2773, |
| "step": 7300 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 1.9442980805419647e-05, |
| "loss": 0.2771, |
| "step": 7400 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 1.94354535190064e-05, |
| "loss": 0.2764, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 1.9427926232593152e-05, |
| "loss": 0.2789, |
| "step": 7600 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 1.9420398946179905e-05, |
| "loss": 0.2768, |
| "step": 7700 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 1.9412871659766657e-05, |
| "loss": 0.2765, |
| "step": 7800 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 1.9405344373353406e-05, |
| "loss": 0.2693, |
| "step": 7900 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.2738477289676666, |
| "eval_runtime": 44.824, |
| "eval_samples_per_second": 167.321, |
| "eval_steps_per_second": 10.463, |
| "step": 7971 |
| }, |
| { |
| "epoch": 3.01, |
| "learning_rate": 1.9397817086940162e-05, |
| "loss": 0.277, |
| "step": 8000 |
| }, |
| { |
| "epoch": 3.05, |
| "learning_rate": 1.939028980052691e-05, |
| "loss": 0.2734, |
| "step": 8100 |
| }, |
| { |
| "epoch": 3.09, |
| "learning_rate": 1.9382762514113664e-05, |
| "loss": 0.2767, |
| "step": 8200 |
| }, |
| { |
| "epoch": 3.12, |
| "learning_rate": 1.9375235227700416e-05, |
| "loss": 0.2767, |
| "step": 8300 |
| }, |
| { |
| "epoch": 3.16, |
| "learning_rate": 1.936770794128717e-05, |
| "loss": 0.271, |
| "step": 8400 |
| }, |
| { |
| "epoch": 3.2, |
| "learning_rate": 1.9360180654873918e-05, |
| "loss": 0.2747, |
| "step": 8500 |
| }, |
| { |
| "epoch": 3.24, |
| "learning_rate": 1.9352653368460674e-05, |
| "loss": 0.2758, |
| "step": 8600 |
| }, |
| { |
| "epoch": 3.27, |
| "learning_rate": 1.9345126082047423e-05, |
| "loss": 0.2706, |
| "step": 8700 |
| }, |
| { |
| "epoch": 3.31, |
| "learning_rate": 1.9337598795634175e-05, |
| "loss": 0.2734, |
| "step": 8800 |
| }, |
| { |
| "epoch": 3.35, |
| "learning_rate": 1.9330071509220928e-05, |
| "loss": 0.2732, |
| "step": 8900 |
| }, |
| { |
| "epoch": 3.39, |
| "learning_rate": 1.932254422280768e-05, |
| "loss": 0.2724, |
| "step": 9000 |
| }, |
| { |
| "epoch": 3.42, |
| "learning_rate": 1.931501693639443e-05, |
| "loss": 0.2678, |
| "step": 9100 |
| }, |
| { |
| "epoch": 3.46, |
| "learning_rate": 1.9307489649981182e-05, |
| "loss": 0.2755, |
| "step": 9200 |
| }, |
| { |
| "epoch": 3.5, |
| "learning_rate": 1.9299962363567935e-05, |
| "loss": 0.264, |
| "step": 9300 |
| }, |
| { |
| "epoch": 3.54, |
| "learning_rate": 1.9292435077154687e-05, |
| "loss": 0.2706, |
| "step": 9400 |
| }, |
| { |
| "epoch": 3.58, |
| "learning_rate": 1.928490779074144e-05, |
| "loss": 0.2686, |
| "step": 9500 |
| }, |
| { |
| "epoch": 3.61, |
| "learning_rate": 1.9277380504328192e-05, |
| "loss": 0.2681, |
| "step": 9600 |
| }, |
| { |
| "epoch": 3.65, |
| "learning_rate": 1.926985321791494e-05, |
| "loss": 0.2671, |
| "step": 9700 |
| }, |
| { |
| "epoch": 3.69, |
| "learning_rate": 1.9262325931501694e-05, |
| "loss": 0.2668, |
| "step": 9800 |
| }, |
| { |
| "epoch": 3.73, |
| "learning_rate": 1.9254798645088446e-05, |
| "loss": 0.2621, |
| "step": 9900 |
| }, |
| { |
| "epoch": 3.76, |
| "learning_rate": 1.92472713586752e-05, |
| "loss": 0.2641, |
| "step": 10000 |
| }, |
| { |
| "epoch": 3.8, |
| "learning_rate": 1.923974407226195e-05, |
| "loss": 0.2645, |
| "step": 10100 |
| }, |
| { |
| "epoch": 3.84, |
| "learning_rate": 1.9232216785848704e-05, |
| "loss": 0.2593, |
| "step": 10200 |
| }, |
| { |
| "epoch": 3.88, |
| "learning_rate": 1.9224689499435456e-05, |
| "loss": 0.2604, |
| "step": 10300 |
| }, |
| { |
| "epoch": 3.91, |
| "learning_rate": 1.9217162213022205e-05, |
| "loss": 0.2614, |
| "step": 10400 |
| }, |
| { |
| "epoch": 3.95, |
| "learning_rate": 1.920963492660896e-05, |
| "loss": 0.2627, |
| "step": 10500 |
| }, |
| { |
| "epoch": 3.99, |
| "learning_rate": 1.920210764019571e-05, |
| "loss": 0.2578, |
| "step": 10600 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 0.2545997202396393, |
| "eval_runtime": 44.2178, |
| "eval_samples_per_second": 169.615, |
| "eval_steps_per_second": 10.607, |
| "step": 10628 |
| }, |
| { |
| "epoch": 4.03, |
| "learning_rate": 1.9194580353782463e-05, |
| "loss": 0.2567, |
| "step": 10700 |
| }, |
| { |
| "epoch": 4.06, |
| "learning_rate": 1.9187053067369215e-05, |
| "loss": 0.2547, |
| "step": 10800 |
| }, |
| { |
| "epoch": 4.1, |
| "learning_rate": 1.9179525780955968e-05, |
| "loss": 0.2564, |
| "step": 10900 |
| }, |
| { |
| "epoch": 4.14, |
| "learning_rate": 1.9171998494542717e-05, |
| "loss": 0.2603, |
| "step": 11000 |
| }, |
| { |
| "epoch": 4.18, |
| "learning_rate": 1.9164471208129473e-05, |
| "loss": 0.2514, |
| "step": 11100 |
| }, |
| { |
| "epoch": 4.22, |
| "learning_rate": 1.9156943921716222e-05, |
| "loss": 0.2571, |
| "step": 11200 |
| }, |
| { |
| "epoch": 4.25, |
| "learning_rate": 1.9149416635302975e-05, |
| "loss": 0.2513, |
| "step": 11300 |
| }, |
| { |
| "epoch": 4.29, |
| "learning_rate": 1.9141889348889727e-05, |
| "loss": 0.2531, |
| "step": 11400 |
| }, |
| { |
| "epoch": 4.33, |
| "learning_rate": 1.913436206247648e-05, |
| "loss": 0.2485, |
| "step": 11500 |
| }, |
| { |
| "epoch": 4.37, |
| "learning_rate": 1.912683477606323e-05, |
| "loss": 0.2519, |
| "step": 11600 |
| }, |
| { |
| "epoch": 4.4, |
| "learning_rate": 1.9119307489649985e-05, |
| "loss": 0.2506, |
| "step": 11700 |
| }, |
| { |
| "epoch": 4.44, |
| "learning_rate": 1.9111780203236734e-05, |
| "loss": 0.2487, |
| "step": 11800 |
| }, |
| { |
| "epoch": 4.48, |
| "learning_rate": 1.9104252916823486e-05, |
| "loss": 0.2456, |
| "step": 11900 |
| }, |
| { |
| "epoch": 4.52, |
| "learning_rate": 1.909672563041024e-05, |
| "loss": 0.2493, |
| "step": 12000 |
| }, |
| { |
| "epoch": 4.55, |
| "learning_rate": 1.908919834399699e-05, |
| "loss": 0.2439, |
| "step": 12100 |
| }, |
| { |
| "epoch": 4.59, |
| "learning_rate": 1.908167105758374e-05, |
| "loss": 0.2434, |
| "step": 12200 |
| }, |
| { |
| "epoch": 4.63, |
| "learning_rate": 1.9074143771170493e-05, |
| "loss": 0.2454, |
| "step": 12300 |
| }, |
| { |
| "epoch": 4.67, |
| "learning_rate": 1.9066616484757246e-05, |
| "loss": 0.2406, |
| "step": 12400 |
| }, |
| { |
| "epoch": 4.7, |
| "learning_rate": 1.9059089198343998e-05, |
| "loss": 0.2392, |
| "step": 12500 |
| }, |
| { |
| "epoch": 4.74, |
| "learning_rate": 1.905156191193075e-05, |
| "loss": 0.2394, |
| "step": 12600 |
| }, |
| { |
| "epoch": 4.78, |
| "learning_rate": 1.9044034625517503e-05, |
| "loss": 0.2351, |
| "step": 12700 |
| }, |
| { |
| "epoch": 4.82, |
| "learning_rate": 1.9036507339104256e-05, |
| "loss": 0.2367, |
| "step": 12800 |
| }, |
| { |
| "epoch": 4.86, |
| "learning_rate": 1.9028980052691005e-05, |
| "loss": 0.233, |
| "step": 12900 |
| }, |
| { |
| "epoch": 4.89, |
| "learning_rate": 1.902145276627776e-05, |
| "loss": 0.2276, |
| "step": 13000 |
| }, |
| { |
| "epoch": 4.93, |
| "learning_rate": 1.901392547986451e-05, |
| "loss": 0.2323, |
| "step": 13100 |
| }, |
| { |
| "epoch": 4.97, |
| "learning_rate": 1.9006398193451262e-05, |
| "loss": 0.2211, |
| "step": 13200 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_loss": 0.21532748639583588, |
| "eval_runtime": 44.0312, |
| "eval_samples_per_second": 170.334, |
| "eval_steps_per_second": 10.652, |
| "step": 13285 |
| }, |
| { |
| "epoch": 5.01, |
| "learning_rate": 1.8998870907038015e-05, |
| "loss": 0.2225, |
| "step": 13300 |
| }, |
| { |
| "epoch": 5.04, |
| "learning_rate": 1.8991343620624767e-05, |
| "loss": 0.2211, |
| "step": 13400 |
| }, |
| { |
| "epoch": 5.08, |
| "learning_rate": 1.8983816334211516e-05, |
| "loss": 0.2178, |
| "step": 13500 |
| }, |
| { |
| "epoch": 5.12, |
| "learning_rate": 1.8976289047798272e-05, |
| "loss": 0.2164, |
| "step": 13600 |
| }, |
| { |
| "epoch": 5.16, |
| "learning_rate": 1.896876176138502e-05, |
| "loss": 0.2144, |
| "step": 13700 |
| }, |
| { |
| "epoch": 5.19, |
| "learning_rate": 1.8961234474971774e-05, |
| "loss": 0.2093, |
| "step": 13800 |
| }, |
| { |
| "epoch": 5.23, |
| "learning_rate": 1.8953707188558526e-05, |
| "loss": 0.2087, |
| "step": 13900 |
| }, |
| { |
| "epoch": 5.27, |
| "learning_rate": 1.894617990214528e-05, |
| "loss": 0.2111, |
| "step": 14000 |
| }, |
| { |
| "epoch": 5.31, |
| "learning_rate": 1.8938652615732028e-05, |
| "loss": 0.2041, |
| "step": 14100 |
| }, |
| { |
| "epoch": 5.34, |
| "learning_rate": 1.8931125329318784e-05, |
| "loss": 0.2023, |
| "step": 14200 |
| }, |
| { |
| "epoch": 5.38, |
| "learning_rate": 1.8923598042905533e-05, |
| "loss": 0.2011, |
| "step": 14300 |
| }, |
| { |
| "epoch": 5.42, |
| "learning_rate": 1.8916070756492286e-05, |
| "loss": 0.2028, |
| "step": 14400 |
| }, |
| { |
| "epoch": 5.46, |
| "learning_rate": 1.8908543470079038e-05, |
| "loss": 0.198, |
| "step": 14500 |
| }, |
| { |
| "epoch": 5.49, |
| "learning_rate": 1.890101618366579e-05, |
| "loss": 0.1986, |
| "step": 14600 |
| }, |
| { |
| "epoch": 5.53, |
| "learning_rate": 1.889348889725254e-05, |
| "loss": 0.1959, |
| "step": 14700 |
| }, |
| { |
| "epoch": 5.57, |
| "learning_rate": 1.8885961610839296e-05, |
| "loss": 0.1953, |
| "step": 14800 |
| }, |
| { |
| "epoch": 5.61, |
| "learning_rate": 1.8878434324426048e-05, |
| "loss": 0.193, |
| "step": 14900 |
| }, |
| { |
| "epoch": 5.65, |
| "learning_rate": 1.8870907038012797e-05, |
| "loss": 0.1918, |
| "step": 15000 |
| }, |
| { |
| "epoch": 5.68, |
| "learning_rate": 1.886337975159955e-05, |
| "loss": 0.1911, |
| "step": 15100 |
| }, |
| { |
| "epoch": 5.72, |
| "learning_rate": 1.8855852465186302e-05, |
| "loss": 0.1889, |
| "step": 15200 |
| }, |
| { |
| "epoch": 5.76, |
| "learning_rate": 1.8848325178773055e-05, |
| "loss": 0.1879, |
| "step": 15300 |
| }, |
| { |
| "epoch": 5.8, |
| "learning_rate": 1.8840797892359804e-05, |
| "loss": 0.1844, |
| "step": 15400 |
| }, |
| { |
| "epoch": 5.83, |
| "learning_rate": 1.883327060594656e-05, |
| "loss": 0.1821, |
| "step": 15500 |
| }, |
| { |
| "epoch": 5.87, |
| "learning_rate": 1.882574331953331e-05, |
| "loss": 0.1808, |
| "step": 15600 |
| }, |
| { |
| "epoch": 5.91, |
| "learning_rate": 1.881821603312006e-05, |
| "loss": 0.1863, |
| "step": 15700 |
| }, |
| { |
| "epoch": 5.95, |
| "learning_rate": 1.8810688746706814e-05, |
| "loss": 0.1806, |
| "step": 15800 |
| }, |
| { |
| "epoch": 5.98, |
| "learning_rate": 1.8803161460293567e-05, |
| "loss": 0.1799, |
| "step": 15900 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_loss": 0.1794862300157547, |
| "eval_runtime": 44.3468, |
| "eval_samples_per_second": 169.121, |
| "eval_steps_per_second": 10.576, |
| "step": 15942 |
| }, |
| { |
| "epoch": 6.02, |
| "learning_rate": 1.8795634173880316e-05, |
| "loss": 0.1773, |
| "step": 16000 |
| }, |
| { |
| "epoch": 6.06, |
| "learning_rate": 1.878810688746707e-05, |
| "loss": 0.1746, |
| "step": 16100 |
| }, |
| { |
| "epoch": 6.1, |
| "learning_rate": 1.878057960105382e-05, |
| "loss": 0.1772, |
| "step": 16200 |
| }, |
| { |
| "epoch": 6.13, |
| "learning_rate": 1.8773052314640573e-05, |
| "loss": 0.1724, |
| "step": 16300 |
| }, |
| { |
| "epoch": 6.17, |
| "learning_rate": 1.8765525028227326e-05, |
| "loss": 0.17, |
| "step": 16400 |
| }, |
| { |
| "epoch": 6.21, |
| "learning_rate": 1.8757997741814078e-05, |
| "loss": 0.1737, |
| "step": 16500 |
| }, |
| { |
| "epoch": 6.25, |
| "learning_rate": 1.8750470455400827e-05, |
| "loss": 0.1732, |
| "step": 16600 |
| }, |
| { |
| "epoch": 6.29, |
| "learning_rate": 1.8742943168987583e-05, |
| "loss": 0.1689, |
| "step": 16700 |
| }, |
| { |
| "epoch": 6.32, |
| "learning_rate": 1.8735415882574332e-05, |
| "loss": 0.1704, |
| "step": 16800 |
| }, |
| { |
| "epoch": 6.36, |
| "learning_rate": 1.8727888596161085e-05, |
| "loss": 0.1664, |
| "step": 16900 |
| }, |
| { |
| "epoch": 6.4, |
| "learning_rate": 1.8720361309747837e-05, |
| "loss": 0.1651, |
| "step": 17000 |
| }, |
| { |
| "epoch": 6.44, |
| "learning_rate": 1.871283402333459e-05, |
| "loss": 0.1678, |
| "step": 17100 |
| }, |
| { |
| "epoch": 6.47, |
| "learning_rate": 1.870530673692134e-05, |
| "loss": 0.1674, |
| "step": 17200 |
| }, |
| { |
| "epoch": 6.51, |
| "learning_rate": 1.8697779450508095e-05, |
| "loss": 0.1674, |
| "step": 17300 |
| }, |
| { |
| "epoch": 6.55, |
| "learning_rate": 1.8690252164094847e-05, |
| "loss": 0.1643, |
| "step": 17400 |
| }, |
| { |
| "epoch": 6.59, |
| "learning_rate": 1.8682724877681597e-05, |
| "loss": 0.1667, |
| "step": 17500 |
| }, |
| { |
| "epoch": 6.62, |
| "learning_rate": 1.867519759126835e-05, |
| "loss": 0.1601, |
| "step": 17600 |
| }, |
| { |
| "epoch": 6.66, |
| "learning_rate": 1.86676703048551e-05, |
| "loss": 0.1645, |
| "step": 17700 |
| }, |
| { |
| "epoch": 6.7, |
| "learning_rate": 1.8660143018441854e-05, |
| "loss": 0.1637, |
| "step": 17800 |
| }, |
| { |
| "epoch": 6.74, |
| "learning_rate": 1.8652615732028607e-05, |
| "loss": 0.1633, |
| "step": 17900 |
| }, |
| { |
| "epoch": 6.77, |
| "learning_rate": 1.864508844561536e-05, |
| "loss": 0.1651, |
| "step": 18000 |
| }, |
| { |
| "epoch": 6.81, |
| "learning_rate": 1.863756115920211e-05, |
| "loss": 0.1589, |
| "step": 18100 |
| }, |
| { |
| "epoch": 6.85, |
| "learning_rate": 1.863003387278886e-05, |
| "loss": 0.1612, |
| "step": 18200 |
| }, |
| { |
| "epoch": 6.89, |
| "learning_rate": 1.8622506586375613e-05, |
| "loss": 0.1629, |
| "step": 18300 |
| }, |
| { |
| "epoch": 6.93, |
| "learning_rate": 1.8614979299962366e-05, |
| "loss": 0.1571, |
| "step": 18400 |
| }, |
| { |
| "epoch": 6.96, |
| "learning_rate": 1.8607452013549115e-05, |
| "loss": 0.158, |
| "step": 18500 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_loss": 0.162311390042305, |
| "eval_runtime": 44.6199, |
| "eval_samples_per_second": 168.087, |
| "eval_steps_per_second": 10.511, |
| "step": 18599 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 1.859992472713587e-05, |
| "loss": 0.158, |
| "step": 18600 |
| }, |
| { |
| "epoch": 7.04, |
| "learning_rate": 1.859239744072262e-05, |
| "loss": 0.1588, |
| "step": 18700 |
| }, |
| { |
| "epoch": 7.08, |
| "learning_rate": 1.8584870154309373e-05, |
| "loss": 0.1556, |
| "step": 18800 |
| }, |
| { |
| "epoch": 7.11, |
| "learning_rate": 1.8577342867896125e-05, |
| "loss": 0.1552, |
| "step": 18900 |
| }, |
| { |
| "epoch": 7.15, |
| "learning_rate": 1.8569815581482878e-05, |
| "loss": 0.1567, |
| "step": 19000 |
| }, |
| { |
| "epoch": 7.19, |
| "learning_rate": 1.8562288295069627e-05, |
| "loss": 0.1533, |
| "step": 19100 |
| }, |
| { |
| "epoch": 7.23, |
| "learning_rate": 1.8554761008656383e-05, |
| "loss": 0.1543, |
| "step": 19200 |
| }, |
| { |
| "epoch": 7.26, |
| "learning_rate": 1.854723372224313e-05, |
| "loss": 0.1545, |
| "step": 19300 |
| }, |
| { |
| "epoch": 7.3, |
| "learning_rate": 1.8539706435829884e-05, |
| "loss": 0.1557, |
| "step": 19400 |
| }, |
| { |
| "epoch": 7.34, |
| "learning_rate": 1.8532179149416637e-05, |
| "loss": 0.1524, |
| "step": 19500 |
| }, |
| { |
| "epoch": 7.38, |
| "learning_rate": 1.852465186300339e-05, |
| "loss": 0.1538, |
| "step": 19600 |
| }, |
| { |
| "epoch": 7.41, |
| "learning_rate": 1.851712457659014e-05, |
| "loss": 0.1533, |
| "step": 19700 |
| }, |
| { |
| "epoch": 7.45, |
| "learning_rate": 1.8509597290176894e-05, |
| "loss": 0.1506, |
| "step": 19800 |
| }, |
| { |
| "epoch": 7.49, |
| "learning_rate": 1.8502070003763647e-05, |
| "loss": 0.1499, |
| "step": 19900 |
| }, |
| { |
| "epoch": 7.53, |
| "learning_rate": 1.8494542717350396e-05, |
| "loss": 0.1514, |
| "step": 20000 |
| }, |
| { |
| "epoch": 7.56, |
| "learning_rate": 1.848701543093715e-05, |
| "loss": 0.1497, |
| "step": 20100 |
| }, |
| { |
| "epoch": 7.6, |
| "learning_rate": 1.84794881445239e-05, |
| "loss": 0.151, |
| "step": 20200 |
| }, |
| { |
| "epoch": 7.64, |
| "learning_rate": 1.8471960858110653e-05, |
| "loss": 0.1523, |
| "step": 20300 |
| }, |
| { |
| "epoch": 7.68, |
| "learning_rate": 1.8464433571697406e-05, |
| "loss": 0.1484, |
| "step": 20400 |
| }, |
| { |
| "epoch": 7.72, |
| "learning_rate": 1.845690628528416e-05, |
| "loss": 0.15, |
| "step": 20500 |
| }, |
| { |
| "epoch": 7.75, |
| "learning_rate": 1.8449378998870908e-05, |
| "loss": 0.1467, |
| "step": 20600 |
| }, |
| { |
| "epoch": 7.79, |
| "learning_rate": 1.844185171245766e-05, |
| "loss": 0.1467, |
| "step": 20700 |
| }, |
| { |
| "epoch": 7.83, |
| "learning_rate": 1.8434324426044413e-05, |
| "loss": 0.1469, |
| "step": 20800 |
| }, |
| { |
| "epoch": 7.87, |
| "learning_rate": 1.8426797139631165e-05, |
| "loss": 0.148, |
| "step": 20900 |
| }, |
| { |
| "epoch": 7.9, |
| "learning_rate": 1.8419269853217914e-05, |
| "loss": 0.1484, |
| "step": 21000 |
| }, |
| { |
| "epoch": 7.94, |
| "learning_rate": 1.841174256680467e-05, |
| "loss": 0.1459, |
| "step": 21100 |
| }, |
| { |
| "epoch": 7.98, |
| "learning_rate": 1.840421528039142e-05, |
| "loss": 0.1481, |
| "step": 21200 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_loss": 0.14529532194137573, |
| "eval_runtime": 44.2737, |
| "eval_samples_per_second": 169.401, |
| "eval_steps_per_second": 10.593, |
| "step": 21256 |
| }, |
| { |
| "epoch": 8.02, |
| "learning_rate": 1.8396687993978172e-05, |
| "loss": 0.1446, |
| "step": 21300 |
| }, |
| { |
| "epoch": 8.05, |
| "learning_rate": 1.8389160707564924e-05, |
| "loss": 0.1465, |
| "step": 21400 |
| }, |
| { |
| "epoch": 8.09, |
| "learning_rate": 1.8381633421151677e-05, |
| "loss": 0.1458, |
| "step": 21500 |
| }, |
| { |
| "epoch": 8.13, |
| "learning_rate": 1.8374106134738426e-05, |
| "loss": 0.145, |
| "step": 21600 |
| }, |
| { |
| "epoch": 8.17, |
| "learning_rate": 1.8366578848325182e-05, |
| "loss": 0.1457, |
| "step": 21700 |
| }, |
| { |
| "epoch": 8.2, |
| "learning_rate": 1.835905156191193e-05, |
| "loss": 0.1422, |
| "step": 21800 |
| }, |
| { |
| "epoch": 8.24, |
| "learning_rate": 1.8351524275498683e-05, |
| "loss": 0.1435, |
| "step": 21900 |
| }, |
| { |
| "epoch": 8.28, |
| "learning_rate": 1.8343996989085436e-05, |
| "loss": 0.1456, |
| "step": 22000 |
| }, |
| { |
| "epoch": 8.32, |
| "learning_rate": 1.833646970267219e-05, |
| "loss": 0.1472, |
| "step": 22100 |
| }, |
| { |
| "epoch": 8.36, |
| "learning_rate": 1.8328942416258938e-05, |
| "loss": 0.1434, |
| "step": 22200 |
| }, |
| { |
| "epoch": 8.39, |
| "learning_rate": 1.8321415129845694e-05, |
| "loss": 0.1446, |
| "step": 22300 |
| }, |
| { |
| "epoch": 8.43, |
| "learning_rate": 1.8313887843432446e-05, |
| "loss": 0.1464, |
| "step": 22400 |
| }, |
| { |
| "epoch": 8.47, |
| "learning_rate": 1.8306360557019195e-05, |
| "loss": 0.1446, |
| "step": 22500 |
| }, |
| { |
| "epoch": 8.51, |
| "learning_rate": 1.8298833270605948e-05, |
| "loss": 0.1428, |
| "step": 22600 |
| }, |
| { |
| "epoch": 8.54, |
| "learning_rate": 1.82913059841927e-05, |
| "loss": 0.1437, |
| "step": 22700 |
| }, |
| { |
| "epoch": 8.58, |
| "learning_rate": 1.8283778697779453e-05, |
| "loss": 0.1412, |
| "step": 22800 |
| }, |
| { |
| "epoch": 8.62, |
| "learning_rate": 1.8276251411366205e-05, |
| "loss": 0.1396, |
| "step": 22900 |
| }, |
| { |
| "epoch": 8.66, |
| "learning_rate": 1.8268724124952958e-05, |
| "loss": 0.1426, |
| "step": 23000 |
| }, |
| { |
| "epoch": 8.69, |
| "learning_rate": 1.8261196838539707e-05, |
| "loss": 0.1421, |
| "step": 23100 |
| }, |
| { |
| "epoch": 8.73, |
| "learning_rate": 1.825366955212646e-05, |
| "loss": 0.1418, |
| "step": 23200 |
| }, |
| { |
| "epoch": 8.77, |
| "learning_rate": 1.8246142265713212e-05, |
| "loss": 0.1403, |
| "step": 23300 |
| }, |
| { |
| "epoch": 8.81, |
| "learning_rate": 1.8238614979299964e-05, |
| "loss": 0.1416, |
| "step": 23400 |
| }, |
| { |
| "epoch": 8.84, |
| "learning_rate": 1.8231087692886717e-05, |
| "loss": 0.1403, |
| "step": 23500 |
| }, |
| { |
| "epoch": 8.88, |
| "learning_rate": 1.822356040647347e-05, |
| "loss": 0.1396, |
| "step": 23600 |
| }, |
| { |
| "epoch": 8.92, |
| "learning_rate": 1.821603312006022e-05, |
| "loss": 0.1378, |
| "step": 23700 |
| }, |
| { |
| "epoch": 8.96, |
| "learning_rate": 1.820850583364697e-05, |
| "loss": 0.1392, |
| "step": 23800 |
| }, |
| { |
| "epoch": 9.0, |
| "learning_rate": 1.8200978547233724e-05, |
| "loss": 0.1391, |
| "step": 23900 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_loss": 0.13683784008026123, |
| "eval_runtime": 44.0792, |
| "eval_samples_per_second": 170.148, |
| "eval_steps_per_second": 10.64, |
| "step": 23913 |
| }, |
| { |
| "epoch": 9.03, |
| "learning_rate": 1.8193451260820476e-05, |
| "loss": 0.1407, |
| "step": 24000 |
| }, |
| { |
| "epoch": 9.07, |
| "learning_rate": 1.8185923974407225e-05, |
| "loss": 0.1386, |
| "step": 24100 |
| }, |
| { |
| "epoch": 9.11, |
| "learning_rate": 1.817839668799398e-05, |
| "loss": 0.1385, |
| "step": 24200 |
| }, |
| { |
| "epoch": 9.15, |
| "learning_rate": 1.817086940158073e-05, |
| "loss": 0.1403, |
| "step": 24300 |
| }, |
| { |
| "epoch": 9.18, |
| "learning_rate": 1.8163342115167483e-05, |
| "loss": 0.1395, |
| "step": 24400 |
| }, |
| { |
| "epoch": 9.22, |
| "learning_rate": 1.8155814828754235e-05, |
| "loss": 0.1374, |
| "step": 24500 |
| }, |
| { |
| "epoch": 9.26, |
| "learning_rate": 1.8148287542340988e-05, |
| "loss": 0.1354, |
| "step": 24600 |
| }, |
| { |
| "epoch": 9.3, |
| "learning_rate": 1.8140760255927737e-05, |
| "loss": 0.1367, |
| "step": 24700 |
| }, |
| { |
| "epoch": 9.33, |
| "learning_rate": 1.8133232969514493e-05, |
| "loss": 0.1389, |
| "step": 24800 |
| }, |
| { |
| "epoch": 9.37, |
| "learning_rate": 1.8125705683101245e-05, |
| "loss": 0.1355, |
| "step": 24900 |
| }, |
| { |
| "epoch": 9.41, |
| "learning_rate": 1.8118178396687994e-05, |
| "loss": 0.1359, |
| "step": 25000 |
| }, |
| { |
| "epoch": 9.45, |
| "learning_rate": 1.8110651110274747e-05, |
| "loss": 0.1351, |
| "step": 25100 |
| }, |
| { |
| "epoch": 9.48, |
| "learning_rate": 1.81031238238615e-05, |
| "loss": 0.1381, |
| "step": 25200 |
| }, |
| { |
| "epoch": 9.52, |
| "learning_rate": 1.8095596537448252e-05, |
| "loss": 0.1364, |
| "step": 25300 |
| }, |
| { |
| "epoch": 9.56, |
| "learning_rate": 1.8088069251035005e-05, |
| "loss": 0.1339, |
| "step": 25400 |
| }, |
| { |
| "epoch": 9.6, |
| "learning_rate": 1.8080541964621757e-05, |
| "loss": 0.1346, |
| "step": 25500 |
| }, |
| { |
| "epoch": 9.63, |
| "learning_rate": 1.8073014678208506e-05, |
| "loss": 0.1338, |
| "step": 25600 |
| }, |
| { |
| "epoch": 9.67, |
| "learning_rate": 1.806548739179526e-05, |
| "loss": 0.1332, |
| "step": 25700 |
| }, |
| { |
| "epoch": 9.71, |
| "learning_rate": 1.805796010538201e-05, |
| "loss": 0.1344, |
| "step": 25800 |
| }, |
| { |
| "epoch": 9.75, |
| "learning_rate": 1.8050432818968764e-05, |
| "loss": 0.134, |
| "step": 25900 |
| }, |
| { |
| "epoch": 9.79, |
| "learning_rate": 1.8042905532555516e-05, |
| "loss": 0.1365, |
| "step": 26000 |
| }, |
| { |
| "epoch": 9.82, |
| "learning_rate": 1.803537824614227e-05, |
| "loss": 0.1354, |
| "step": 26100 |
| }, |
| { |
| "epoch": 9.86, |
| "learning_rate": 1.8027850959729018e-05, |
| "loss": 0.1327, |
| "step": 26200 |
| }, |
| { |
| "epoch": 9.9, |
| "learning_rate": 1.802032367331577e-05, |
| "loss": 0.1366, |
| "step": 26300 |
| }, |
| { |
| "epoch": 9.94, |
| "learning_rate": 1.8012796386902523e-05, |
| "loss": 0.1343, |
| "step": 26400 |
| }, |
| { |
| "epoch": 9.97, |
| "learning_rate": 1.8005269100489275e-05, |
| "loss": 0.1348, |
| "step": 26500 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_loss": 0.13540224730968475, |
| "eval_runtime": 43.8422, |
| "eval_samples_per_second": 171.068, |
| "eval_steps_per_second": 10.697, |
| "step": 26570 |
| }, |
| { |
| "epoch": 10.01, |
| "learning_rate": 1.7997741814076028e-05, |
| "loss": 0.134, |
| "step": 26600 |
| }, |
| { |
| "epoch": 10.05, |
| "learning_rate": 1.799021452766278e-05, |
| "loss": 0.1327, |
| "step": 26700 |
| }, |
| { |
| "epoch": 10.09, |
| "learning_rate": 1.798268724124953e-05, |
| "loss": 0.1303, |
| "step": 26800 |
| }, |
| { |
| "epoch": 10.12, |
| "learning_rate": 1.7975159954836282e-05, |
| "loss": 0.1343, |
| "step": 26900 |
| }, |
| { |
| "epoch": 10.16, |
| "learning_rate": 1.7967632668423035e-05, |
| "loss": 0.1323, |
| "step": 27000 |
| }, |
| { |
| "epoch": 10.2, |
| "learning_rate": 1.7960105382009787e-05, |
| "loss": 0.1318, |
| "step": 27100 |
| }, |
| { |
| "epoch": 10.24, |
| "learning_rate": 1.7952578095596536e-05, |
| "loss": 0.1322, |
| "step": 27200 |
| }, |
| { |
| "epoch": 10.27, |
| "learning_rate": 1.7945050809183292e-05, |
| "loss": 0.1318, |
| "step": 27300 |
| }, |
| { |
| "epoch": 10.31, |
| "learning_rate": 1.7937523522770045e-05, |
| "loss": 0.1324, |
| "step": 27400 |
| }, |
| { |
| "epoch": 10.35, |
| "learning_rate": 1.7929996236356794e-05, |
| "loss": 0.1306, |
| "step": 27500 |
| }, |
| { |
| "epoch": 10.39, |
| "learning_rate": 1.7922468949943546e-05, |
| "loss": 0.1283, |
| "step": 27600 |
| }, |
| { |
| "epoch": 10.43, |
| "learning_rate": 1.79149416635303e-05, |
| "loss": 0.1311, |
| "step": 27700 |
| }, |
| { |
| "epoch": 10.46, |
| "learning_rate": 1.790741437711705e-05, |
| "loss": 0.1313, |
| "step": 27800 |
| }, |
| { |
| "epoch": 10.5, |
| "learning_rate": 1.7899887090703804e-05, |
| "loss": 0.1291, |
| "step": 27900 |
| }, |
| { |
| "epoch": 10.54, |
| "learning_rate": 1.7892359804290556e-05, |
| "loss": 0.1291, |
| "step": 28000 |
| }, |
| { |
| "epoch": 10.58, |
| "learning_rate": 1.7884832517877305e-05, |
| "loss": 0.1314, |
| "step": 28100 |
| }, |
| { |
| "epoch": 10.61, |
| "learning_rate": 1.7877305231464058e-05, |
| "loss": 0.1294, |
| "step": 28200 |
| }, |
| { |
| "epoch": 10.65, |
| "learning_rate": 1.786977794505081e-05, |
| "loss": 0.1308, |
| "step": 28300 |
| }, |
| { |
| "epoch": 10.69, |
| "learning_rate": 1.7862250658637563e-05, |
| "loss": 0.1317, |
| "step": 28400 |
| }, |
| { |
| "epoch": 10.73, |
| "learning_rate": 1.7854723372224315e-05, |
| "loss": 0.129, |
| "step": 28500 |
| }, |
| { |
| "epoch": 10.76, |
| "learning_rate": 1.7847196085811068e-05, |
| "loss": 0.1307, |
| "step": 28600 |
| }, |
| { |
| "epoch": 10.8, |
| "learning_rate": 1.7839668799397817e-05, |
| "loss": 0.1302, |
| "step": 28700 |
| }, |
| { |
| "epoch": 10.84, |
| "learning_rate": 1.783214151298457e-05, |
| "loss": 0.1283, |
| "step": 28800 |
| }, |
| { |
| "epoch": 10.88, |
| "learning_rate": 1.7824614226571322e-05, |
| "loss": 0.1277, |
| "step": 28900 |
| }, |
| { |
| "epoch": 10.91, |
| "learning_rate": 1.7817086940158075e-05, |
| "loss": 0.1287, |
| "step": 29000 |
| }, |
| { |
| "epoch": 10.95, |
| "learning_rate": 1.7809559653744827e-05, |
| "loss": 0.13, |
| "step": 29100 |
| }, |
| { |
| "epoch": 10.99, |
| "learning_rate": 1.780203236733158e-05, |
| "loss": 0.129, |
| "step": 29200 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_loss": 0.12486864626407623, |
| "eval_runtime": 44.5752, |
| "eval_samples_per_second": 168.255, |
| "eval_steps_per_second": 10.522, |
| "step": 29227 |
| }, |
| { |
| "epoch": 11.03, |
| "learning_rate": 1.779450508091833e-05, |
| "loss": 0.1259, |
| "step": 29300 |
| }, |
| { |
| "epoch": 11.07, |
| "learning_rate": 1.778697779450508e-05, |
| "loss": 0.128, |
| "step": 29400 |
| }, |
| { |
| "epoch": 11.1, |
| "learning_rate": 1.7779450508091834e-05, |
| "loss": 0.127, |
| "step": 29500 |
| }, |
| { |
| "epoch": 11.14, |
| "learning_rate": 1.7771923221678586e-05, |
| "loss": 0.1277, |
| "step": 29600 |
| }, |
| { |
| "epoch": 11.18, |
| "learning_rate": 1.776439593526534e-05, |
| "loss": 0.1271, |
| "step": 29700 |
| }, |
| { |
| "epoch": 11.22, |
| "learning_rate": 1.775686864885209e-05, |
| "loss": 0.1263, |
| "step": 29800 |
| }, |
| { |
| "epoch": 11.25, |
| "learning_rate": 1.7749341362438844e-05, |
| "loss": 0.1259, |
| "step": 29900 |
| }, |
| { |
| "epoch": 11.29, |
| "learning_rate": 1.7741814076025593e-05, |
| "loss": 0.1257, |
| "step": 30000 |
| }, |
| { |
| "epoch": 11.33, |
| "learning_rate": 1.7734286789612346e-05, |
| "loss": 0.125, |
| "step": 30100 |
| }, |
| { |
| "epoch": 11.37, |
| "learning_rate": 1.7726759503199098e-05, |
| "loss": 0.128, |
| "step": 30200 |
| }, |
| { |
| "epoch": 11.4, |
| "learning_rate": 1.771923221678585e-05, |
| "loss": 0.1271, |
| "step": 30300 |
| }, |
| { |
| "epoch": 11.44, |
| "learning_rate": 1.7711704930372603e-05, |
| "loss": 0.1268, |
| "step": 30400 |
| }, |
| { |
| "epoch": 11.48, |
| "learning_rate": 1.7704177643959356e-05, |
| "loss": 0.1262, |
| "step": 30500 |
| }, |
| { |
| "epoch": 11.52, |
| "learning_rate": 1.7696650357546105e-05, |
| "loss": 0.1247, |
| "step": 30600 |
| }, |
| { |
| "epoch": 11.55, |
| "learning_rate": 1.7689123071132857e-05, |
| "loss": 0.1243, |
| "step": 30700 |
| }, |
| { |
| "epoch": 11.59, |
| "learning_rate": 1.768159578471961e-05, |
| "loss": 0.1249, |
| "step": 30800 |
| }, |
| { |
| "epoch": 11.63, |
| "learning_rate": 1.7674068498306362e-05, |
| "loss": 0.1255, |
| "step": 30900 |
| }, |
| { |
| "epoch": 11.67, |
| "learning_rate": 1.7666541211893115e-05, |
| "loss": 0.1258, |
| "step": 31000 |
| }, |
| { |
| "epoch": 11.7, |
| "learning_rate": 1.7659013925479867e-05, |
| "loss": 0.1234, |
| "step": 31100 |
| }, |
| { |
| "epoch": 11.74, |
| "learning_rate": 1.7651486639066616e-05, |
| "loss": 0.1226, |
| "step": 31200 |
| }, |
| { |
| "epoch": 11.78, |
| "learning_rate": 1.764395935265337e-05, |
| "loss": 0.1251, |
| "step": 31300 |
| }, |
| { |
| "epoch": 11.82, |
| "learning_rate": 1.763643206624012e-05, |
| "loss": 0.1238, |
| "step": 31400 |
| }, |
| { |
| "epoch": 11.86, |
| "learning_rate": 1.7628904779826874e-05, |
| "loss": 0.1262, |
| "step": 31500 |
| }, |
| { |
| "epoch": 11.89, |
| "learning_rate": 1.7621377493413626e-05, |
| "loss": 0.1249, |
| "step": 31600 |
| }, |
| { |
| "epoch": 11.93, |
| "learning_rate": 1.761385020700038e-05, |
| "loss": 0.1224, |
| "step": 31700 |
| }, |
| { |
| "epoch": 11.97, |
| "learning_rate": 1.7606322920587128e-05, |
| "loss": 0.126, |
| "step": 31800 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_loss": 0.12289831042289734, |
| "eval_runtime": 44.279, |
| "eval_samples_per_second": 169.381, |
| "eval_steps_per_second": 10.592, |
| "step": 31884 |
| }, |
| { |
| "epoch": 12.01, |
| "learning_rate": 1.759879563417388e-05, |
| "loss": 0.1247, |
| "step": 31900 |
| }, |
| { |
| "epoch": 12.04, |
| "learning_rate": 1.7591268347760633e-05, |
| "loss": 0.1224, |
| "step": 32000 |
| }, |
| { |
| "epoch": 12.08, |
| "learning_rate": 1.7583741061347386e-05, |
| "loss": 0.1225, |
| "step": 32100 |
| }, |
| { |
| "epoch": 12.12, |
| "learning_rate": 1.7576213774934138e-05, |
| "loss": 0.1247, |
| "step": 32200 |
| }, |
| { |
| "epoch": 12.16, |
| "learning_rate": 1.756868648852089e-05, |
| "loss": 0.1244, |
| "step": 32300 |
| }, |
| { |
| "epoch": 12.19, |
| "learning_rate": 1.7561159202107643e-05, |
| "loss": 0.1231, |
| "step": 32400 |
| }, |
| { |
| "epoch": 12.23, |
| "learning_rate": 1.7553631915694392e-05, |
| "loss": 0.1215, |
| "step": 32500 |
| }, |
| { |
| "epoch": 12.27, |
| "learning_rate": 1.7546104629281148e-05, |
| "loss": 0.1238, |
| "step": 32600 |
| }, |
| { |
| "epoch": 12.31, |
| "learning_rate": 1.7538577342867897e-05, |
| "loss": 0.1251, |
| "step": 32700 |
| }, |
| { |
| "epoch": 12.34, |
| "learning_rate": 1.753105005645465e-05, |
| "loss": 0.1248, |
| "step": 32800 |
| }, |
| { |
| "epoch": 12.38, |
| "learning_rate": 1.7523522770041402e-05, |
| "loss": 0.1227, |
| "step": 32900 |
| }, |
| { |
| "epoch": 12.42, |
| "learning_rate": 1.7515995483628155e-05, |
| "loss": 0.1219, |
| "step": 33000 |
| }, |
| { |
| "epoch": 12.46, |
| "learning_rate": 1.7508468197214904e-05, |
| "loss": 0.121, |
| "step": 33100 |
| }, |
| { |
| "epoch": 12.5, |
| "learning_rate": 1.7500940910801657e-05, |
| "loss": 0.1225, |
| "step": 33200 |
| }, |
| { |
| "epoch": 12.53, |
| "learning_rate": 1.749341362438841e-05, |
| "loss": 0.1224, |
| "step": 33300 |
| }, |
| { |
| "epoch": 12.57, |
| "learning_rate": 1.748588633797516e-05, |
| "loss": 0.1214, |
| "step": 33400 |
| }, |
| { |
| "epoch": 12.61, |
| "learning_rate": 1.7478359051561914e-05, |
| "loss": 0.1217, |
| "step": 33500 |
| }, |
| { |
| "epoch": 12.65, |
| "learning_rate": 1.7470831765148667e-05, |
| "loss": 0.1201, |
| "step": 33600 |
| }, |
| { |
| "epoch": 12.68, |
| "learning_rate": 1.7463304478735416e-05, |
| "loss": 0.1212, |
| "step": 33700 |
| }, |
| { |
| "epoch": 12.72, |
| "learning_rate": 1.7455777192322168e-05, |
| "loss": 0.1218, |
| "step": 33800 |
| }, |
| { |
| "epoch": 12.76, |
| "learning_rate": 1.744824990590892e-05, |
| "loss": 0.1221, |
| "step": 33900 |
| }, |
| { |
| "epoch": 12.8, |
| "learning_rate": 1.7440722619495673e-05, |
| "loss": 0.1199, |
| "step": 34000 |
| }, |
| { |
| "epoch": 12.83, |
| "learning_rate": 1.7433195333082426e-05, |
| "loss": 0.122, |
| "step": 34100 |
| }, |
| { |
| "epoch": 12.87, |
| "learning_rate": 1.7425668046669178e-05, |
| "loss": 0.12, |
| "step": 34200 |
| }, |
| { |
| "epoch": 12.91, |
| "learning_rate": 1.7418140760255927e-05, |
| "loss": 0.1222, |
| "step": 34300 |
| }, |
| { |
| "epoch": 12.95, |
| "learning_rate": 1.741061347384268e-05, |
| "loss": 0.1228, |
| "step": 34400 |
| }, |
| { |
| "epoch": 12.98, |
| "learning_rate": 1.7403086187429432e-05, |
| "loss": 0.1216, |
| "step": 34500 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_loss": 0.11841125041246414, |
| "eval_runtime": 44.6411, |
| "eval_samples_per_second": 168.006, |
| "eval_steps_per_second": 10.506, |
| "step": 34541 |
| }, |
| { |
| "epoch": 13.02, |
| "learning_rate": 1.7395558901016185e-05, |
| "loss": 0.1195, |
| "step": 34600 |
| }, |
| { |
| "epoch": 13.06, |
| "learning_rate": 1.7388031614602937e-05, |
| "loss": 0.1211, |
| "step": 34700 |
| }, |
| { |
| "epoch": 13.1, |
| "learning_rate": 1.738050432818969e-05, |
| "loss": 0.1209, |
| "step": 34800 |
| }, |
| { |
| "epoch": 13.14, |
| "learning_rate": 1.7372977041776442e-05, |
| "loss": 0.122, |
| "step": 34900 |
| }, |
| { |
| "epoch": 13.17, |
| "learning_rate": 1.736544975536319e-05, |
| "loss": 0.1206, |
| "step": 35000 |
| }, |
| { |
| "epoch": 13.21, |
| "learning_rate": 1.7357922468949947e-05, |
| "loss": 0.1198, |
| "step": 35100 |
| }, |
| { |
| "epoch": 13.25, |
| "learning_rate": 1.7350395182536697e-05, |
| "loss": 0.1196, |
| "step": 35200 |
| }, |
| { |
| "epoch": 13.29, |
| "learning_rate": 1.734286789612345e-05, |
| "loss": 0.1192, |
| "step": 35300 |
| }, |
| { |
| "epoch": 13.32, |
| "learning_rate": 1.73353406097102e-05, |
| "loss": 0.12, |
| "step": 35400 |
| }, |
| { |
| "epoch": 13.36, |
| "learning_rate": 1.7327813323296954e-05, |
| "loss": 0.1179, |
| "step": 35500 |
| }, |
| { |
| "epoch": 13.4, |
| "learning_rate": 1.7320286036883703e-05, |
| "loss": 0.1201, |
| "step": 35600 |
| }, |
| { |
| "epoch": 13.44, |
| "learning_rate": 1.731275875047046e-05, |
| "loss": 0.1181, |
| "step": 35700 |
| }, |
| { |
| "epoch": 13.47, |
| "learning_rate": 1.730523146405721e-05, |
| "loss": 0.1185, |
| "step": 35800 |
| }, |
| { |
| "epoch": 13.51, |
| "learning_rate": 1.729770417764396e-05, |
| "loss": 0.1171, |
| "step": 35900 |
| }, |
| { |
| "epoch": 13.55, |
| "learning_rate": 1.7290176891230713e-05, |
| "loss": 0.1202, |
| "step": 36000 |
| }, |
| { |
| "epoch": 13.59, |
| "learning_rate": 1.7282649604817466e-05, |
| "loss": 0.1191, |
| "step": 36100 |
| }, |
| { |
| "epoch": 13.62, |
| "learning_rate": 1.7275122318404215e-05, |
| "loss": 0.1201, |
| "step": 36200 |
| }, |
| { |
| "epoch": 13.66, |
| "learning_rate": 1.7267595031990967e-05, |
| "loss": 0.1188, |
| "step": 36300 |
| }, |
| { |
| "epoch": 13.7, |
| "learning_rate": 1.726006774557772e-05, |
| "loss": 0.1191, |
| "step": 36400 |
| }, |
| { |
| "epoch": 13.74, |
| "learning_rate": 1.7252540459164473e-05, |
| "loss": 0.117, |
| "step": 36500 |
| }, |
| { |
| "epoch": 13.77, |
| "learning_rate": 1.7245013172751225e-05, |
| "loss": 0.1177, |
| "step": 36600 |
| }, |
| { |
| "epoch": 13.81, |
| "learning_rate": 1.7237485886337978e-05, |
| "loss": 0.1173, |
| "step": 36700 |
| }, |
| { |
| "epoch": 13.85, |
| "learning_rate": 1.7229958599924727e-05, |
| "loss": 0.1176, |
| "step": 36800 |
| }, |
| { |
| "epoch": 13.89, |
| "learning_rate": 1.722243131351148e-05, |
| "loss": 0.1177, |
| "step": 36900 |
| }, |
| { |
| "epoch": 13.93, |
| "learning_rate": 1.7214904027098232e-05, |
| "loss": 0.1168, |
| "step": 37000 |
| }, |
| { |
| "epoch": 13.96, |
| "learning_rate": 1.7207376740684984e-05, |
| "loss": 0.1175, |
| "step": 37100 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_loss": 0.1184767335653305, |
| "eval_runtime": 44.9958, |
| "eval_samples_per_second": 166.682, |
| "eval_steps_per_second": 10.423, |
| "step": 37198 |
| }, |
| { |
| "epoch": 14.0, |
| "learning_rate": 1.7199849454271737e-05, |
| "loss": 0.1172, |
| "step": 37200 |
| }, |
| { |
| "epoch": 14.04, |
| "learning_rate": 1.719232216785849e-05, |
| "loss": 0.117, |
| "step": 37300 |
| }, |
| { |
| "epoch": 14.08, |
| "learning_rate": 1.7184794881445242e-05, |
| "loss": 0.1175, |
| "step": 37400 |
| }, |
| { |
| "epoch": 14.11, |
| "learning_rate": 1.717726759503199e-05, |
| "loss": 0.1164, |
| "step": 37500 |
| }, |
| { |
| "epoch": 14.15, |
| "learning_rate": 1.7169740308618747e-05, |
| "loss": 0.1161, |
| "step": 37600 |
| }, |
| { |
| "epoch": 14.19, |
| "learning_rate": 1.7162213022205496e-05, |
| "loss": 0.1183, |
| "step": 37700 |
| }, |
| { |
| "epoch": 14.23, |
| "learning_rate": 1.715468573579225e-05, |
| "loss": 0.1161, |
| "step": 37800 |
| }, |
| { |
| "epoch": 14.26, |
| "learning_rate": 1.7147158449379e-05, |
| "loss": 0.117, |
| "step": 37900 |
| }, |
| { |
| "epoch": 14.3, |
| "learning_rate": 1.7139631162965753e-05, |
| "loss": 0.1169, |
| "step": 38000 |
| }, |
| { |
| "epoch": 14.34, |
| "learning_rate": 1.7132103876552503e-05, |
| "loss": 0.1171, |
| "step": 38100 |
| }, |
| { |
| "epoch": 14.38, |
| "learning_rate": 1.712457659013926e-05, |
| "loss": 0.1165, |
| "step": 38200 |
| }, |
| { |
| "epoch": 14.41, |
| "learning_rate": 1.7117049303726008e-05, |
| "loss": 0.115, |
| "step": 38300 |
| }, |
| { |
| "epoch": 14.45, |
| "learning_rate": 1.710952201731276e-05, |
| "loss": 0.1164, |
| "step": 38400 |
| }, |
| { |
| "epoch": 14.49, |
| "learning_rate": 1.7101994730899513e-05, |
| "loss": 0.1151, |
| "step": 38500 |
| }, |
| { |
| "epoch": 14.53, |
| "learning_rate": 1.7094467444486265e-05, |
| "loss": 0.1163, |
| "step": 38600 |
| }, |
| { |
| "epoch": 14.57, |
| "learning_rate": 1.7086940158073014e-05, |
| "loss": 0.1157, |
| "step": 38700 |
| }, |
| { |
| "epoch": 14.6, |
| "learning_rate": 1.7079412871659767e-05, |
| "loss": 0.1154, |
| "step": 38800 |
| }, |
| { |
| "epoch": 14.64, |
| "learning_rate": 1.707188558524652e-05, |
| "loss": 0.1145, |
| "step": 38900 |
| }, |
| { |
| "epoch": 14.68, |
| "learning_rate": 1.7064358298833272e-05, |
| "loss": 0.1153, |
| "step": 39000 |
| }, |
| { |
| "epoch": 14.72, |
| "learning_rate": 1.7056831012420024e-05, |
| "loss": 0.1163, |
| "step": 39100 |
| }, |
| { |
| "epoch": 14.75, |
| "learning_rate": 1.7049303726006777e-05, |
| "loss": 0.1173, |
| "step": 39200 |
| }, |
| { |
| "epoch": 14.79, |
| "learning_rate": 1.7041776439593526e-05, |
| "loss": 0.1161, |
| "step": 39300 |
| }, |
| { |
| "epoch": 14.83, |
| "learning_rate": 1.703424915318028e-05, |
| "loss": 0.1144, |
| "step": 39400 |
| }, |
| { |
| "epoch": 14.87, |
| "learning_rate": 1.702672186676703e-05, |
| "loss": 0.1152, |
| "step": 39500 |
| }, |
| { |
| "epoch": 14.9, |
| "learning_rate": 1.7019194580353783e-05, |
| "loss": 0.1129, |
| "step": 39600 |
| }, |
| { |
| "epoch": 14.94, |
| "learning_rate": 1.7011667293940536e-05, |
| "loss": 0.1159, |
| "step": 39700 |
| }, |
| { |
| "epoch": 14.98, |
| "learning_rate": 1.700414000752729e-05, |
| "loss": 0.1137, |
| "step": 39800 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_loss": 0.11463519930839539, |
| "eval_runtime": 44.7206, |
| "eval_samples_per_second": 167.708, |
| "eval_steps_per_second": 10.487, |
| "step": 39855 |
| }, |
| { |
| "epoch": 15.02, |
| "learning_rate": 1.699661272111404e-05, |
| "loss": 0.1136, |
| "step": 39900 |
| }, |
| { |
| "epoch": 15.05, |
| "learning_rate": 1.698908543470079e-05, |
| "loss": 0.1154, |
| "step": 40000 |
| }, |
| { |
| "epoch": 15.09, |
| "learning_rate": 1.6981558148287546e-05, |
| "loss": 0.1119, |
| "step": 40100 |
| }, |
| { |
| "epoch": 15.13, |
| "learning_rate": 1.6974030861874295e-05, |
| "loss": 0.1147, |
| "step": 40200 |
| }, |
| { |
| "epoch": 15.17, |
| "learning_rate": 1.6966503575461048e-05, |
| "loss": 0.1133, |
| "step": 40300 |
| }, |
| { |
| "epoch": 15.21, |
| "learning_rate": 1.69589762890478e-05, |
| "loss": 0.1159, |
| "step": 40400 |
| }, |
| { |
| "epoch": 15.24, |
| "learning_rate": 1.6951449002634553e-05, |
| "loss": 0.1123, |
| "step": 40500 |
| }, |
| { |
| "epoch": 15.28, |
| "learning_rate": 1.6943921716221302e-05, |
| "loss": 0.1144, |
| "step": 40600 |
| }, |
| { |
| "epoch": 15.32, |
| "learning_rate": 1.6936394429808058e-05, |
| "loss": 0.1156, |
| "step": 40700 |
| }, |
| { |
| "epoch": 15.36, |
| "learning_rate": 1.6928867143394807e-05, |
| "loss": 0.115, |
| "step": 40800 |
| }, |
| { |
| "epoch": 15.39, |
| "learning_rate": 1.692133985698156e-05, |
| "loss": 0.1129, |
| "step": 40900 |
| }, |
| { |
| "epoch": 15.43, |
| "learning_rate": 1.6913812570568312e-05, |
| "loss": 0.1136, |
| "step": 41000 |
| }, |
| { |
| "epoch": 15.47, |
| "learning_rate": 1.6906285284155064e-05, |
| "loss": 0.1127, |
| "step": 41100 |
| }, |
| { |
| "epoch": 15.51, |
| "learning_rate": 1.6898757997741814e-05, |
| "loss": 0.1119, |
| "step": 41200 |
| }, |
| { |
| "epoch": 15.54, |
| "learning_rate": 1.689123071132857e-05, |
| "loss": 0.1131, |
| "step": 41300 |
| }, |
| { |
| "epoch": 15.58, |
| "learning_rate": 1.688370342491532e-05, |
| "loss": 0.1112, |
| "step": 41400 |
| }, |
| { |
| "epoch": 15.62, |
| "learning_rate": 1.687617613850207e-05, |
| "loss": 0.1149, |
| "step": 41500 |
| }, |
| { |
| "epoch": 15.66, |
| "learning_rate": 1.6868648852088824e-05, |
| "loss": 0.1133, |
| "step": 41600 |
| }, |
| { |
| "epoch": 15.69, |
| "learning_rate": 1.6861121565675576e-05, |
| "loss": 0.1123, |
| "step": 41700 |
| }, |
| { |
| "epoch": 15.73, |
| "learning_rate": 1.6853594279262325e-05, |
| "loss": 0.1138, |
| "step": 41800 |
| }, |
| { |
| "epoch": 15.77, |
| "learning_rate": 1.6846066992849078e-05, |
| "loss": 0.1134, |
| "step": 41900 |
| }, |
| { |
| "epoch": 15.81, |
| "learning_rate": 1.683853970643583e-05, |
| "loss": 0.1138, |
| "step": 42000 |
| }, |
| { |
| "epoch": 15.84, |
| "learning_rate": 1.6831012420022583e-05, |
| "loss": 0.1122, |
| "step": 42100 |
| }, |
| { |
| "epoch": 15.88, |
| "learning_rate": 1.6823485133609335e-05, |
| "loss": 0.1127, |
| "step": 42200 |
| }, |
| { |
| "epoch": 15.92, |
| "learning_rate": 1.6815957847196088e-05, |
| "loss": 0.1147, |
| "step": 42300 |
| }, |
| { |
| "epoch": 15.96, |
| "learning_rate": 1.680843056078284e-05, |
| "loss": 0.1127, |
| "step": 42400 |
| }, |
| { |
| "epoch": 16.0, |
| "learning_rate": 1.680090327436959e-05, |
| "loss": 0.1125, |
| "step": 42500 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_loss": 0.1117386743426323, |
| "eval_runtime": 44.2589, |
| "eval_samples_per_second": 169.457, |
| "eval_steps_per_second": 10.597, |
| "step": 42512 |
| }, |
| { |
| "epoch": 16.03, |
| "learning_rate": 1.6793375987956345e-05, |
| "loss": 0.1115, |
| "step": 42600 |
| }, |
| { |
| "epoch": 16.07, |
| "learning_rate": 1.6785848701543094e-05, |
| "loss": 0.1128, |
| "step": 42700 |
| }, |
| { |
| "epoch": 16.11, |
| "learning_rate": 1.6778321415129847e-05, |
| "loss": 0.1115, |
| "step": 42800 |
| }, |
| { |
| "epoch": 16.15, |
| "learning_rate": 1.67707941287166e-05, |
| "loss": 0.1119, |
| "step": 42900 |
| }, |
| { |
| "epoch": 16.18, |
| "learning_rate": 1.6763266842303352e-05, |
| "loss": 0.1126, |
| "step": 43000 |
| }, |
| { |
| "epoch": 16.22, |
| "learning_rate": 1.67557395558901e-05, |
| "loss": 0.1127, |
| "step": 43100 |
| }, |
| { |
| "epoch": 16.26, |
| "learning_rate": 1.6748212269476857e-05, |
| "loss": 0.1145, |
| "step": 43200 |
| }, |
| { |
| "epoch": 16.3, |
| "learning_rate": 1.6740684983063606e-05, |
| "loss": 0.1118, |
| "step": 43300 |
| }, |
| { |
| "epoch": 16.33, |
| "learning_rate": 1.673315769665036e-05, |
| "loss": 0.1129, |
| "step": 43400 |
| }, |
| { |
| "epoch": 16.37, |
| "learning_rate": 1.672563041023711e-05, |
| "loss": 0.113, |
| "step": 43500 |
| }, |
| { |
| "epoch": 16.41, |
| "learning_rate": 1.6718103123823864e-05, |
| "loss": 0.1129, |
| "step": 43600 |
| }, |
| { |
| "epoch": 16.45, |
| "learning_rate": 1.6710575837410613e-05, |
| "loss": 0.1098, |
| "step": 43700 |
| }, |
| { |
| "epoch": 16.48, |
| "learning_rate": 1.670304855099737e-05, |
| "loss": 0.112, |
| "step": 43800 |
| }, |
| { |
| "epoch": 16.52, |
| "learning_rate": 1.6695521264584118e-05, |
| "loss": 0.1117, |
| "step": 43900 |
| }, |
| { |
| "epoch": 16.56, |
| "learning_rate": 1.668799397817087e-05, |
| "loss": 0.1128, |
| "step": 44000 |
| }, |
| { |
| "epoch": 16.6, |
| "learning_rate": 1.6680466691757623e-05, |
| "loss": 0.1133, |
| "step": 44100 |
| }, |
| { |
| "epoch": 16.64, |
| "learning_rate": 1.6672939405344375e-05, |
| "loss": 0.11, |
| "step": 44200 |
| }, |
| { |
| "epoch": 16.67, |
| "learning_rate": 1.6665412118931125e-05, |
| "loss": 0.1138, |
| "step": 44300 |
| }, |
| { |
| "epoch": 16.71, |
| "learning_rate": 1.665788483251788e-05, |
| "loss": 0.1108, |
| "step": 44400 |
| }, |
| { |
| "epoch": 16.75, |
| "learning_rate": 1.665035754610463e-05, |
| "loss": 0.1096, |
| "step": 44500 |
| }, |
| { |
| "epoch": 16.79, |
| "learning_rate": 1.6642830259691382e-05, |
| "loss": 0.1092, |
| "step": 44600 |
| }, |
| { |
| "epoch": 16.82, |
| "learning_rate": 1.6635302973278135e-05, |
| "loss": 0.1106, |
| "step": 44700 |
| }, |
| { |
| "epoch": 16.86, |
| "learning_rate": 1.6627775686864887e-05, |
| "loss": 0.1118, |
| "step": 44800 |
| }, |
| { |
| "epoch": 16.9, |
| "learning_rate": 1.662024840045164e-05, |
| "loss": 0.1116, |
| "step": 44900 |
| }, |
| { |
| "epoch": 16.94, |
| "learning_rate": 1.661272111403839e-05, |
| "loss": 0.1095, |
| "step": 45000 |
| }, |
| { |
| "epoch": 16.97, |
| "learning_rate": 1.6605193827625145e-05, |
| "loss": 0.1112, |
| "step": 45100 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_loss": 0.10999125987291336, |
| "eval_runtime": 44.3431, |
| "eval_samples_per_second": 169.136, |
| "eval_steps_per_second": 10.577, |
| "step": 45169 |
| }, |
| { |
| "epoch": 17.01, |
| "learning_rate": 1.6597666541211894e-05, |
| "loss": 0.11, |
| "step": 45200 |
| }, |
| { |
| "epoch": 17.05, |
| "learning_rate": 1.6590139254798646e-05, |
| "loss": 0.1107, |
| "step": 45300 |
| }, |
| { |
| "epoch": 17.09, |
| "learning_rate": 1.65826119683854e-05, |
| "loss": 0.1127, |
| "step": 45400 |
| }, |
| { |
| "epoch": 17.12, |
| "learning_rate": 1.657508468197215e-05, |
| "loss": 0.1106, |
| "step": 45500 |
| }, |
| { |
| "epoch": 17.16, |
| "learning_rate": 1.65675573955589e-05, |
| "loss": 0.1098, |
| "step": 45600 |
| }, |
| { |
| "epoch": 17.2, |
| "learning_rate": 1.6560030109145656e-05, |
| "loss": 0.111, |
| "step": 45700 |
| }, |
| { |
| "epoch": 17.24, |
| "learning_rate": 1.6552502822732405e-05, |
| "loss": 0.1094, |
| "step": 45800 |
| }, |
| { |
| "epoch": 17.28, |
| "learning_rate": 1.6544975536319158e-05, |
| "loss": 0.1098, |
| "step": 45900 |
| }, |
| { |
| "epoch": 17.31, |
| "learning_rate": 1.653744824990591e-05, |
| "loss": 0.11, |
| "step": 46000 |
| }, |
| { |
| "epoch": 17.35, |
| "learning_rate": 1.6529920963492663e-05, |
| "loss": 0.1101, |
| "step": 46100 |
| }, |
| { |
| "epoch": 17.39, |
| "learning_rate": 1.6522393677079412e-05, |
| "loss": 0.1113, |
| "step": 46200 |
| }, |
| { |
| "epoch": 17.43, |
| "learning_rate": 1.6514866390666168e-05, |
| "loss": 0.112, |
| "step": 46300 |
| }, |
| { |
| "epoch": 17.46, |
| "learning_rate": 1.6507339104252917e-05, |
| "loss": 0.1109, |
| "step": 46400 |
| }, |
| { |
| "epoch": 17.5, |
| "learning_rate": 1.649981181783967e-05, |
| "loss": 0.1108, |
| "step": 46500 |
| }, |
| { |
| "epoch": 17.54, |
| "learning_rate": 1.6492284531426422e-05, |
| "loss": 0.1092, |
| "step": 46600 |
| }, |
| { |
| "epoch": 17.58, |
| "learning_rate": 1.6484757245013175e-05, |
| "loss": 0.1097, |
| "step": 46700 |
| }, |
| { |
| "epoch": 17.61, |
| "learning_rate": 1.6477229958599924e-05, |
| "loss": 0.1098, |
| "step": 46800 |
| }, |
| { |
| "epoch": 17.65, |
| "learning_rate": 1.646970267218668e-05, |
| "loss": 0.1081, |
| "step": 46900 |
| }, |
| { |
| "epoch": 17.69, |
| "learning_rate": 1.646217538577343e-05, |
| "loss": 0.1085, |
| "step": 47000 |
| }, |
| { |
| "epoch": 17.73, |
| "learning_rate": 1.645464809936018e-05, |
| "loss": 0.1089, |
| "step": 47100 |
| }, |
| { |
| "epoch": 17.76, |
| "learning_rate": 1.6447120812946934e-05, |
| "loss": 0.1084, |
| "step": 47200 |
| }, |
| { |
| "epoch": 17.8, |
| "learning_rate": 1.6439593526533686e-05, |
| "loss": 0.1093, |
| "step": 47300 |
| }, |
| { |
| "epoch": 17.84, |
| "learning_rate": 1.643206624012044e-05, |
| "loss": 0.1115, |
| "step": 47400 |
| }, |
| { |
| "epoch": 17.88, |
| "learning_rate": 1.642453895370719e-05, |
| "loss": 0.1091, |
| "step": 47500 |
| }, |
| { |
| "epoch": 17.91, |
| "learning_rate": 1.6417011667293944e-05, |
| "loss": 0.11, |
| "step": 47600 |
| }, |
| { |
| "epoch": 17.95, |
| "learning_rate": 1.6409484380880693e-05, |
| "loss": 0.1096, |
| "step": 47700 |
| }, |
| { |
| "epoch": 17.99, |
| "learning_rate": 1.6401957094467446e-05, |
| "loss": 0.1108, |
| "step": 47800 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_loss": 0.10891053080558777, |
| "eval_runtime": 44.4735, |
| "eval_samples_per_second": 168.64, |
| "eval_steps_per_second": 10.546, |
| "step": 47826 |
| }, |
| { |
| "epoch": 18.03, |
| "learning_rate": 1.6394429808054198e-05, |
| "loss": 0.1076, |
| "step": 47900 |
| }, |
| { |
| "epoch": 18.07, |
| "learning_rate": 1.638690252164095e-05, |
| "loss": 0.1092, |
| "step": 48000 |
| }, |
| { |
| "epoch": 18.1, |
| "learning_rate": 1.63793752352277e-05, |
| "loss": 0.1081, |
| "step": 48100 |
| }, |
| { |
| "epoch": 18.14, |
| "learning_rate": 1.6371847948814456e-05, |
| "loss": 0.1089, |
| "step": 48200 |
| }, |
| { |
| "epoch": 18.18, |
| "learning_rate": 1.6364320662401205e-05, |
| "loss": 0.1097, |
| "step": 48300 |
| }, |
| { |
| "epoch": 18.22, |
| "learning_rate": 1.6356793375987957e-05, |
| "loss": 0.11, |
| "step": 48400 |
| }, |
| { |
| "epoch": 18.25, |
| "learning_rate": 1.634926608957471e-05, |
| "loss": 0.107, |
| "step": 48500 |
| }, |
| { |
| "epoch": 18.29, |
| "learning_rate": 1.6341738803161462e-05, |
| "loss": 0.109, |
| "step": 48600 |
| }, |
| { |
| "epoch": 18.33, |
| "learning_rate": 1.633421151674821e-05, |
| "loss": 0.1092, |
| "step": 48700 |
| }, |
| { |
| "epoch": 18.37, |
| "learning_rate": 1.6326684230334967e-05, |
| "loss": 0.109, |
| "step": 48800 |
| }, |
| { |
| "epoch": 18.4, |
| "learning_rate": 1.6319156943921716e-05, |
| "loss": 0.1068, |
| "step": 48900 |
| }, |
| { |
| "epoch": 18.44, |
| "learning_rate": 1.631162965750847e-05, |
| "loss": 0.1091, |
| "step": 49000 |
| }, |
| { |
| "epoch": 18.48, |
| "learning_rate": 1.630410237109522e-05, |
| "loss": 0.1087, |
| "step": 49100 |
| }, |
| { |
| "epoch": 18.52, |
| "learning_rate": 1.6296575084681974e-05, |
| "loss": 0.1078, |
| "step": 49200 |
| }, |
| { |
| "epoch": 18.55, |
| "learning_rate": 1.6289047798268723e-05, |
| "loss": 0.109, |
| "step": 49300 |
| }, |
| { |
| "epoch": 18.59, |
| "learning_rate": 1.628152051185548e-05, |
| "loss": 0.1112, |
| "step": 49400 |
| }, |
| { |
| "epoch": 18.63, |
| "learning_rate": 1.6273993225442228e-05, |
| "loss": 0.1098, |
| "step": 49500 |
| }, |
| { |
| "epoch": 18.67, |
| "learning_rate": 1.626646593902898e-05, |
| "loss": 0.11, |
| "step": 49600 |
| }, |
| { |
| "epoch": 18.71, |
| "learning_rate": 1.6258938652615733e-05, |
| "loss": 0.1085, |
| "step": 49700 |
| }, |
| { |
| "epoch": 18.74, |
| "learning_rate": 1.6251411366202486e-05, |
| "loss": 0.1088, |
| "step": 49800 |
| }, |
| { |
| "epoch": 18.78, |
| "learning_rate": 1.6243884079789238e-05, |
| "loss": 0.1093, |
| "step": 49900 |
| }, |
| { |
| "epoch": 18.82, |
| "learning_rate": 1.623635679337599e-05, |
| "loss": 0.1069, |
| "step": 50000 |
| }, |
| { |
| "epoch": 18.86, |
| "learning_rate": 1.6228829506962743e-05, |
| "loss": 0.1082, |
| "step": 50100 |
| }, |
| { |
| "epoch": 18.89, |
| "learning_rate": 1.6221302220549492e-05, |
| "loss": 0.1093, |
| "step": 50200 |
| }, |
| { |
| "epoch": 18.93, |
| "learning_rate": 1.6213774934136245e-05, |
| "loss": 0.1074, |
| "step": 50300 |
| }, |
| { |
| "epoch": 18.97, |
| "learning_rate": 1.6206247647722997e-05, |
| "loss": 0.1061, |
| "step": 50400 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_loss": 0.10703522711992264, |
| "eval_runtime": 45.2943, |
| "eval_samples_per_second": 165.584, |
| "eval_steps_per_second": 10.354, |
| "step": 50483 |
| }, |
| { |
| "epoch": 19.01, |
| "learning_rate": 1.619872036130975e-05, |
| "loss": 0.1082, |
| "step": 50500 |
| }, |
| { |
| "epoch": 19.04, |
| "learning_rate": 1.6191193074896502e-05, |
| "loss": 0.1093, |
| "step": 50600 |
| }, |
| { |
| "epoch": 19.08, |
| "learning_rate": 1.6183665788483255e-05, |
| "loss": 0.1078, |
| "step": 50700 |
| }, |
| { |
| "epoch": 19.12, |
| "learning_rate": 1.6176138502070004e-05, |
| "loss": 0.1069, |
| "step": 50800 |
| }, |
| { |
| "epoch": 19.16, |
| "learning_rate": 1.6168611215656757e-05, |
| "loss": 0.1092, |
| "step": 50900 |
| }, |
| { |
| "epoch": 19.19, |
| "learning_rate": 1.616108392924351e-05, |
| "loss": 0.1064, |
| "step": 51000 |
| }, |
| { |
| "epoch": 19.23, |
| "learning_rate": 1.615355664283026e-05, |
| "loss": 0.1063, |
| "step": 51100 |
| }, |
| { |
| "epoch": 19.27, |
| "learning_rate": 1.614602935641701e-05, |
| "loss": 0.1071, |
| "step": 51200 |
| }, |
| { |
| "epoch": 19.31, |
| "learning_rate": 1.6138502070003767e-05, |
| "loss": 0.1083, |
| "step": 51300 |
| }, |
| { |
| "epoch": 19.35, |
| "learning_rate": 1.6130974783590516e-05, |
| "loss": 0.1079, |
| "step": 51400 |
| }, |
| { |
| "epoch": 19.38, |
| "learning_rate": 1.6123447497177268e-05, |
| "loss": 0.1081, |
| "step": 51500 |
| }, |
| { |
| "epoch": 19.42, |
| "learning_rate": 1.611592021076402e-05, |
| "loss": 0.1086, |
| "step": 51600 |
| }, |
| { |
| "epoch": 19.46, |
| "learning_rate": 1.6108392924350773e-05, |
| "loss": 0.1073, |
| "step": 51700 |
| }, |
| { |
| "epoch": 19.5, |
| "learning_rate": 1.6100865637937522e-05, |
| "loss": 0.1082, |
| "step": 51800 |
| }, |
| { |
| "epoch": 19.53, |
| "learning_rate": 1.6093338351524278e-05, |
| "loss": 0.1081, |
| "step": 51900 |
| }, |
| { |
| "epoch": 19.57, |
| "learning_rate": 1.6085811065111027e-05, |
| "loss": 0.1081, |
| "step": 52000 |
| }, |
| { |
| "epoch": 19.61, |
| "learning_rate": 1.607828377869778e-05, |
| "loss": 0.1077, |
| "step": 52100 |
| }, |
| { |
| "epoch": 19.65, |
| "learning_rate": 1.6070756492284532e-05, |
| "loss": 0.108, |
| "step": 52200 |
| }, |
| { |
| "epoch": 19.68, |
| "learning_rate": 1.6063229205871285e-05, |
| "loss": 0.107, |
| "step": 52300 |
| }, |
| { |
| "epoch": 19.72, |
| "learning_rate": 1.6055701919458037e-05, |
| "loss": 0.1082, |
| "step": 52400 |
| }, |
| { |
| "epoch": 19.76, |
| "learning_rate": 1.604817463304479e-05, |
| "loss": 0.1082, |
| "step": 52500 |
| }, |
| { |
| "epoch": 19.8, |
| "learning_rate": 1.6040647346631542e-05, |
| "loss": 0.1067, |
| "step": 52600 |
| }, |
| { |
| "epoch": 19.83, |
| "learning_rate": 1.603312006021829e-05, |
| "loss": 0.1071, |
| "step": 52700 |
| }, |
| { |
| "epoch": 19.87, |
| "learning_rate": 1.6025592773805044e-05, |
| "loss": 0.1068, |
| "step": 52800 |
| }, |
| { |
| "epoch": 19.91, |
| "learning_rate": 1.6018065487391797e-05, |
| "loss": 0.1086, |
| "step": 52900 |
| }, |
| { |
| "epoch": 19.95, |
| "learning_rate": 1.601053820097855e-05, |
| "loss": 0.1085, |
| "step": 53000 |
| }, |
| { |
| "epoch": 19.98, |
| "learning_rate": 1.60030109145653e-05, |
| "loss": 0.1073, |
| "step": 53100 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_loss": 0.10757213830947876, |
| "eval_runtime": 45.7115, |
| "eval_samples_per_second": 164.072, |
| "eval_steps_per_second": 10.26, |
| "step": 53140 |
| }, |
| { |
| "epoch": 20.02, |
| "learning_rate": 1.5995483628152054e-05, |
| "loss": 0.1084, |
| "step": 53200 |
| }, |
| { |
| "epoch": 20.06, |
| "learning_rate": 1.5987956341738803e-05, |
| "loss": 0.1089, |
| "step": 53300 |
| }, |
| { |
| "epoch": 20.1, |
| "learning_rate": 1.5980429055325556e-05, |
| "loss": 0.1085, |
| "step": 53400 |
| }, |
| { |
| "epoch": 20.14, |
| "learning_rate": 1.597290176891231e-05, |
| "loss": 0.1092, |
| "step": 53500 |
| }, |
| { |
| "epoch": 20.17, |
| "learning_rate": 1.596537448249906e-05, |
| "loss": 0.1073, |
| "step": 53600 |
| }, |
| { |
| "epoch": 20.21, |
| "learning_rate": 1.5957847196085813e-05, |
| "loss": 0.1087, |
| "step": 53700 |
| }, |
| { |
| "epoch": 20.25, |
| "learning_rate": 1.5950319909672566e-05, |
| "loss": 0.1071, |
| "step": 53800 |
| }, |
| { |
| "epoch": 20.29, |
| "learning_rate": 1.5942792623259315e-05, |
| "loss": 0.1061, |
| "step": 53900 |
| }, |
| { |
| "epoch": 20.32, |
| "learning_rate": 1.5935265336846068e-05, |
| "loss": 0.1055, |
| "step": 54000 |
| }, |
| { |
| "epoch": 20.36, |
| "learning_rate": 1.592773805043282e-05, |
| "loss": 0.1077, |
| "step": 54100 |
| }, |
| { |
| "epoch": 20.4, |
| "learning_rate": 1.5920210764019573e-05, |
| "loss": 0.108, |
| "step": 54200 |
| }, |
| { |
| "epoch": 20.44, |
| "learning_rate": 1.591268347760632e-05, |
| "loss": 0.1075, |
| "step": 54300 |
| }, |
| { |
| "epoch": 20.47, |
| "learning_rate": 1.5905156191193078e-05, |
| "loss": 0.1066, |
| "step": 54400 |
| }, |
| { |
| "epoch": 20.51, |
| "learning_rate": 1.5897628904779827e-05, |
| "loss": 0.1055, |
| "step": 54500 |
| }, |
| { |
| "epoch": 20.55, |
| "learning_rate": 1.589010161836658e-05, |
| "loss": 0.1069, |
| "step": 54600 |
| }, |
| { |
| "epoch": 20.59, |
| "learning_rate": 1.5882574331953332e-05, |
| "loss": 0.1068, |
| "step": 54700 |
| }, |
| { |
| "epoch": 20.62, |
| "learning_rate": 1.5875047045540084e-05, |
| "loss": 0.1065, |
| "step": 54800 |
| }, |
| { |
| "epoch": 20.66, |
| "learning_rate": 1.5867519759126837e-05, |
| "loss": 0.1053, |
| "step": 54900 |
| }, |
| { |
| "epoch": 20.7, |
| "learning_rate": 1.585999247271359e-05, |
| "loss": 0.1055, |
| "step": 55000 |
| }, |
| { |
| "epoch": 20.74, |
| "learning_rate": 1.5852465186300342e-05, |
| "loss": 0.107, |
| "step": 55100 |
| }, |
| { |
| "epoch": 20.78, |
| "learning_rate": 1.584493789988709e-05, |
| "loss": 0.1058, |
| "step": 55200 |
| }, |
| { |
| "epoch": 20.81, |
| "learning_rate": 1.5837410613473843e-05, |
| "loss": 0.1088, |
| "step": 55300 |
| }, |
| { |
| "epoch": 20.85, |
| "learning_rate": 1.5829883327060596e-05, |
| "loss": 0.1061, |
| "step": 55400 |
| }, |
| { |
| "epoch": 20.89, |
| "learning_rate": 1.582235604064735e-05, |
| "loss": 0.1066, |
| "step": 55500 |
| }, |
| { |
| "epoch": 20.93, |
| "learning_rate": 1.58148287542341e-05, |
| "loss": 0.1071, |
| "step": 55600 |
| }, |
| { |
| "epoch": 20.96, |
| "learning_rate": 1.5807301467820853e-05, |
| "loss": 0.1066, |
| "step": 55700 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_loss": 0.1060996800661087, |
| "eval_runtime": 45.0415, |
| "eval_samples_per_second": 166.513, |
| "eval_steps_per_second": 10.413, |
| "step": 55797 |
| }, |
| { |
| "epoch": 21.0, |
| "learning_rate": 1.5799774181407603e-05, |
| "loss": 0.1054, |
| "step": 55800 |
| }, |
| { |
| "epoch": 21.04, |
| "learning_rate": 1.5792246894994355e-05, |
| "loss": 0.1053, |
| "step": 55900 |
| }, |
| { |
| "epoch": 21.08, |
| "learning_rate": 1.5784719608581108e-05, |
| "loss": 0.1055, |
| "step": 56000 |
| }, |
| { |
| "epoch": 21.11, |
| "learning_rate": 1.577719232216786e-05, |
| "loss": 0.1056, |
| "step": 56100 |
| }, |
| { |
| "epoch": 21.15, |
| "learning_rate": 1.5769665035754613e-05, |
| "loss": 0.106, |
| "step": 56200 |
| }, |
| { |
| "epoch": 21.19, |
| "learning_rate": 1.5762137749341365e-05, |
| "loss": 0.106, |
| "step": 56300 |
| }, |
| { |
| "epoch": 21.23, |
| "learning_rate": 1.5754610462928114e-05, |
| "loss": 0.1056, |
| "step": 56400 |
| }, |
| { |
| "epoch": 21.26, |
| "learning_rate": 1.5747083176514867e-05, |
| "loss": 0.1069, |
| "step": 56500 |
| }, |
| { |
| "epoch": 21.3, |
| "learning_rate": 1.573955589010162e-05, |
| "loss": 0.1054, |
| "step": 56600 |
| }, |
| { |
| "epoch": 21.34, |
| "learning_rate": 1.5732028603688372e-05, |
| "loss": 0.1087, |
| "step": 56700 |
| }, |
| { |
| "epoch": 21.38, |
| "learning_rate": 1.5724501317275124e-05, |
| "loss": 0.1085, |
| "step": 56800 |
| }, |
| { |
| "epoch": 21.42, |
| "learning_rate": 1.5716974030861877e-05, |
| "loss": 0.1052, |
| "step": 56900 |
| }, |
| { |
| "epoch": 21.45, |
| "learning_rate": 1.5709446744448626e-05, |
| "loss": 0.1072, |
| "step": 57000 |
| }, |
| { |
| "epoch": 21.49, |
| "learning_rate": 1.570191945803538e-05, |
| "loss": 0.1055, |
| "step": 57100 |
| }, |
| { |
| "epoch": 21.53, |
| "learning_rate": 1.569439217162213e-05, |
| "loss": 0.1067, |
| "step": 57200 |
| }, |
| { |
| "epoch": 21.57, |
| "learning_rate": 1.5686864885208884e-05, |
| "loss": 0.1057, |
| "step": 57300 |
| }, |
| { |
| "epoch": 21.6, |
| "learning_rate": 1.5679337598795636e-05, |
| "loss": 0.1057, |
| "step": 57400 |
| }, |
| { |
| "epoch": 21.64, |
| "learning_rate": 1.567181031238239e-05, |
| "loss": 0.106, |
| "step": 57500 |
| }, |
| { |
| "epoch": 21.68, |
| "learning_rate": 1.566428302596914e-05, |
| "loss": 0.1022, |
| "step": 57600 |
| }, |
| { |
| "epoch": 21.72, |
| "learning_rate": 1.565675573955589e-05, |
| "loss": 0.1066, |
| "step": 57700 |
| }, |
| { |
| "epoch": 21.75, |
| "learning_rate": 1.5649228453142643e-05, |
| "loss": 0.1063, |
| "step": 57800 |
| }, |
| { |
| "epoch": 21.79, |
| "learning_rate": 1.5641701166729395e-05, |
| "loss": 0.1072, |
| "step": 57900 |
| }, |
| { |
| "epoch": 21.83, |
| "learning_rate": 1.5634173880316148e-05, |
| "loss": 0.1044, |
| "step": 58000 |
| }, |
| { |
| "epoch": 21.87, |
| "learning_rate": 1.56266465939029e-05, |
| "loss": 0.1071, |
| "step": 58100 |
| }, |
| { |
| "epoch": 21.9, |
| "learning_rate": 1.5619119307489653e-05, |
| "loss": 0.1058, |
| "step": 58200 |
| }, |
| { |
| "epoch": 21.94, |
| "learning_rate": 1.5611592021076402e-05, |
| "loss": 0.1072, |
| "step": 58300 |
| }, |
| { |
| "epoch": 21.98, |
| "learning_rate": 1.5604064734663154e-05, |
| "loss": 0.1065, |
| "step": 58400 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_loss": 0.10562047362327576, |
| "eval_runtime": 44.6592, |
| "eval_samples_per_second": 167.938, |
| "eval_steps_per_second": 10.502, |
| "step": 58454 |
| }, |
| { |
| "epoch": 22.02, |
| "learning_rate": 1.5596537448249907e-05, |
| "loss": 0.1042, |
| "step": 58500 |
| }, |
| { |
| "epoch": 22.05, |
| "learning_rate": 1.558901016183666e-05, |
| "loss": 0.1051, |
| "step": 58600 |
| }, |
| { |
| "epoch": 22.09, |
| "learning_rate": 1.5581482875423412e-05, |
| "loss": 0.1075, |
| "step": 58700 |
| }, |
| { |
| "epoch": 22.13, |
| "learning_rate": 1.5573955589010164e-05, |
| "loss": 0.1052, |
| "step": 58800 |
| }, |
| { |
| "epoch": 22.17, |
| "learning_rate": 1.5566428302596914e-05, |
| "loss": 0.1039, |
| "step": 58900 |
| }, |
| { |
| "epoch": 22.21, |
| "learning_rate": 1.5558901016183666e-05, |
| "loss": 0.1063, |
| "step": 59000 |
| }, |
| { |
| "epoch": 22.24, |
| "learning_rate": 1.555137372977042e-05, |
| "loss": 0.1048, |
| "step": 59100 |
| }, |
| { |
| "epoch": 22.28, |
| "learning_rate": 1.554384644335717e-05, |
| "loss": 0.1056, |
| "step": 59200 |
| }, |
| { |
| "epoch": 22.32, |
| "learning_rate": 1.5536319156943924e-05, |
| "loss": 0.1064, |
| "step": 59300 |
| }, |
| { |
| "epoch": 22.36, |
| "learning_rate": 1.5528791870530676e-05, |
| "loss": 0.1055, |
| "step": 59400 |
| }, |
| { |
| "epoch": 22.39, |
| "learning_rate": 1.5521264584117425e-05, |
| "loss": 0.1065, |
| "step": 59500 |
| }, |
| { |
| "epoch": 22.43, |
| "learning_rate": 1.5513737297704178e-05, |
| "loss": 0.1063, |
| "step": 59600 |
| }, |
| { |
| "epoch": 22.47, |
| "learning_rate": 1.550621001129093e-05, |
| "loss": 0.1045, |
| "step": 59700 |
| }, |
| { |
| "epoch": 22.51, |
| "learning_rate": 1.5498682724877683e-05, |
| "loss": 0.1041, |
| "step": 59800 |
| }, |
| { |
| "epoch": 22.54, |
| "learning_rate": 1.5491155438464435e-05, |
| "loss": 0.1057, |
| "step": 59900 |
| }, |
| { |
| "epoch": 22.58, |
| "learning_rate": 1.5483628152051188e-05, |
| "loss": 0.1048, |
| "step": 60000 |
| }, |
| { |
| "epoch": 22.62, |
| "learning_rate": 1.547610086563794e-05, |
| "loss": 0.1055, |
| "step": 60100 |
| }, |
| { |
| "epoch": 22.66, |
| "learning_rate": 1.546857357922469e-05, |
| "loss": 0.1021, |
| "step": 60200 |
| }, |
| { |
| "epoch": 22.69, |
| "learning_rate": 1.5461046292811442e-05, |
| "loss": 0.1044, |
| "step": 60300 |
| }, |
| { |
| "epoch": 22.73, |
| "learning_rate": 1.5453519006398194e-05, |
| "loss": 0.1056, |
| "step": 60400 |
| }, |
| { |
| "epoch": 22.77, |
| "learning_rate": 1.5445991719984947e-05, |
| "loss": 0.1057, |
| "step": 60500 |
| }, |
| { |
| "epoch": 22.81, |
| "learning_rate": 1.54384644335717e-05, |
| "loss": 0.1058, |
| "step": 60600 |
| }, |
| { |
| "epoch": 22.85, |
| "learning_rate": 1.5430937147158452e-05, |
| "loss": 0.1059, |
| "step": 60700 |
| }, |
| { |
| "epoch": 22.88, |
| "learning_rate": 1.54234098607452e-05, |
| "loss": 0.1058, |
| "step": 60800 |
| }, |
| { |
| "epoch": 22.92, |
| "learning_rate": 1.5415882574331954e-05, |
| "loss": 0.1052, |
| "step": 60900 |
| }, |
| { |
| "epoch": 22.96, |
| "learning_rate": 1.5408355287918706e-05, |
| "loss": 0.1062, |
| "step": 61000 |
| }, |
| { |
| "epoch": 23.0, |
| "learning_rate": 1.540082800150546e-05, |
| "loss": 0.1045, |
| "step": 61100 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_loss": 0.10369115322828293, |
| "eval_runtime": 44.6965, |
| "eval_samples_per_second": 167.798, |
| "eval_steps_per_second": 10.493, |
| "step": 61111 |
| }, |
| { |
| "epoch": 23.03, |
| "learning_rate": 1.539330071509221e-05, |
| "loss": 0.1056, |
| "step": 61200 |
| }, |
| { |
| "epoch": 23.07, |
| "learning_rate": 1.5385773428678964e-05, |
| "loss": 0.1034, |
| "step": 61300 |
| }, |
| { |
| "epoch": 23.11, |
| "learning_rate": 1.5378246142265713e-05, |
| "loss": 0.1056, |
| "step": 61400 |
| }, |
| { |
| "epoch": 23.15, |
| "learning_rate": 1.5370718855852465e-05, |
| "loss": 0.1055, |
| "step": 61500 |
| }, |
| { |
| "epoch": 23.18, |
| "learning_rate": 1.5363191569439218e-05, |
| "loss": 0.1053, |
| "step": 61600 |
| }, |
| { |
| "epoch": 23.22, |
| "learning_rate": 1.535566428302597e-05, |
| "loss": 0.1049, |
| "step": 61700 |
| }, |
| { |
| "epoch": 23.26, |
| "learning_rate": 1.5348136996612723e-05, |
| "loss": 0.1062, |
| "step": 61800 |
| }, |
| { |
| "epoch": 23.3, |
| "learning_rate": 1.5340609710199475e-05, |
| "loss": 0.1038, |
| "step": 61900 |
| }, |
| { |
| "epoch": 23.33, |
| "learning_rate": 1.5333082423786225e-05, |
| "loss": 0.1048, |
| "step": 62000 |
| }, |
| { |
| "epoch": 23.37, |
| "learning_rate": 1.5325555137372977e-05, |
| "loss": 0.1054, |
| "step": 62100 |
| }, |
| { |
| "epoch": 23.41, |
| "learning_rate": 1.5318027850959733e-05, |
| "loss": 0.1041, |
| "step": 62200 |
| }, |
| { |
| "epoch": 23.45, |
| "learning_rate": 1.5310500564546482e-05, |
| "loss": 0.1048, |
| "step": 62300 |
| }, |
| { |
| "epoch": 23.49, |
| "learning_rate": 1.5302973278133235e-05, |
| "loss": 0.105, |
| "step": 62400 |
| }, |
| { |
| "epoch": 23.52, |
| "learning_rate": 1.5295445991719987e-05, |
| "loss": 0.1023, |
| "step": 62500 |
| }, |
| { |
| "epoch": 23.56, |
| "learning_rate": 1.528791870530674e-05, |
| "loss": 0.105, |
| "step": 62600 |
| }, |
| { |
| "epoch": 23.6, |
| "learning_rate": 1.528039141889349e-05, |
| "loss": 0.1046, |
| "step": 62700 |
| }, |
| { |
| "epoch": 23.64, |
| "learning_rate": 1.527286413248024e-05, |
| "loss": 0.1033, |
| "step": 62800 |
| }, |
| { |
| "epoch": 23.67, |
| "learning_rate": 1.5265336846066994e-05, |
| "loss": 0.1057, |
| "step": 62900 |
| }, |
| { |
| "epoch": 23.71, |
| "learning_rate": 1.5257809559653746e-05, |
| "loss": 0.1056, |
| "step": 63000 |
| }, |
| { |
| "epoch": 23.75, |
| "learning_rate": 1.5250282273240497e-05, |
| "loss": 0.1056, |
| "step": 63100 |
| }, |
| { |
| "epoch": 23.79, |
| "learning_rate": 1.5242754986827251e-05, |
| "loss": 0.1032, |
| "step": 63200 |
| }, |
| { |
| "epoch": 23.82, |
| "learning_rate": 1.5235227700414002e-05, |
| "loss": 0.1025, |
| "step": 63300 |
| }, |
| { |
| "epoch": 23.86, |
| "learning_rate": 1.5227700414000755e-05, |
| "loss": 0.1063, |
| "step": 63400 |
| }, |
| { |
| "epoch": 23.9, |
| "learning_rate": 1.5220173127587505e-05, |
| "loss": 0.1035, |
| "step": 63500 |
| }, |
| { |
| "epoch": 23.94, |
| "learning_rate": 1.5212645841174258e-05, |
| "loss": 0.104, |
| "step": 63600 |
| }, |
| { |
| "epoch": 23.97, |
| "learning_rate": 1.5205118554761009e-05, |
| "loss": 0.1052, |
| "step": 63700 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_loss": 0.10549741983413696, |
| "eval_runtime": 43.683, |
| "eval_samples_per_second": 171.691, |
| "eval_steps_per_second": 10.736, |
| "step": 63768 |
| }, |
| { |
| "epoch": 24.01, |
| "learning_rate": 1.5197591268347763e-05, |
| "loss": 0.1051, |
| "step": 63800 |
| }, |
| { |
| "epoch": 24.05, |
| "learning_rate": 1.5190063981934514e-05, |
| "loss": 0.1031, |
| "step": 63900 |
| }, |
| { |
| "epoch": 24.09, |
| "learning_rate": 1.5182536695521266e-05, |
| "loss": 0.1057, |
| "step": 64000 |
| }, |
| { |
| "epoch": 24.12, |
| "learning_rate": 1.5175009409108017e-05, |
| "loss": 0.1048, |
| "step": 64100 |
| }, |
| { |
| "epoch": 24.16, |
| "learning_rate": 1.516748212269477e-05, |
| "loss": 0.1054, |
| "step": 64200 |
| }, |
| { |
| "epoch": 24.2, |
| "learning_rate": 1.515995483628152e-05, |
| "loss": 0.1042, |
| "step": 64300 |
| }, |
| { |
| "epoch": 24.24, |
| "learning_rate": 1.5152427549868273e-05, |
| "loss": 0.1049, |
| "step": 64400 |
| }, |
| { |
| "epoch": 24.28, |
| "learning_rate": 1.5144900263455024e-05, |
| "loss": 0.1039, |
| "step": 64500 |
| }, |
| { |
| "epoch": 24.31, |
| "learning_rate": 1.5137372977041778e-05, |
| "loss": 0.1039, |
| "step": 64600 |
| }, |
| { |
| "epoch": 24.35, |
| "learning_rate": 1.512984569062853e-05, |
| "loss": 0.104, |
| "step": 64700 |
| }, |
| { |
| "epoch": 24.39, |
| "learning_rate": 1.5122318404215281e-05, |
| "loss": 0.1039, |
| "step": 64800 |
| }, |
| { |
| "epoch": 24.43, |
| "learning_rate": 1.5114791117802034e-05, |
| "loss": 0.1031, |
| "step": 64900 |
| }, |
| { |
| "epoch": 24.46, |
| "learning_rate": 1.5107263831388785e-05, |
| "loss": 0.1019, |
| "step": 65000 |
| }, |
| { |
| "epoch": 24.5, |
| "learning_rate": 1.5099736544975539e-05, |
| "loss": 0.1041, |
| "step": 65100 |
| }, |
| { |
| "epoch": 24.54, |
| "learning_rate": 1.509220925856229e-05, |
| "loss": 0.1049, |
| "step": 65200 |
| }, |
| { |
| "epoch": 24.58, |
| "learning_rate": 1.5084681972149042e-05, |
| "loss": 0.1029, |
| "step": 65300 |
| }, |
| { |
| "epoch": 24.61, |
| "learning_rate": 1.5077154685735793e-05, |
| "loss": 0.105, |
| "step": 65400 |
| }, |
| { |
| "epoch": 24.65, |
| "learning_rate": 1.5069627399322546e-05, |
| "loss": 0.1041, |
| "step": 65500 |
| }, |
| { |
| "epoch": 24.69, |
| "learning_rate": 1.5062100112909296e-05, |
| "loss": 0.1032, |
| "step": 65600 |
| }, |
| { |
| "epoch": 24.73, |
| "learning_rate": 1.505457282649605e-05, |
| "loss": 0.1033, |
| "step": 65700 |
| }, |
| { |
| "epoch": 24.76, |
| "learning_rate": 1.5047045540082801e-05, |
| "loss": 0.1036, |
| "step": 65800 |
| }, |
| { |
| "epoch": 24.8, |
| "learning_rate": 1.5039518253669554e-05, |
| "loss": 0.1031, |
| "step": 65900 |
| }, |
| { |
| "epoch": 24.84, |
| "learning_rate": 1.5031990967256305e-05, |
| "loss": 0.1031, |
| "step": 66000 |
| }, |
| { |
| "epoch": 24.88, |
| "learning_rate": 1.5024463680843057e-05, |
| "loss": 0.103, |
| "step": 66100 |
| }, |
| { |
| "epoch": 24.92, |
| "learning_rate": 1.5016936394429808e-05, |
| "loss": 0.103, |
| "step": 66200 |
| }, |
| { |
| "epoch": 24.95, |
| "learning_rate": 1.5009409108016562e-05, |
| "loss": 0.103, |
| "step": 66300 |
| }, |
| { |
| "epoch": 24.99, |
| "learning_rate": 1.5001881821603313e-05, |
| "loss": 0.102, |
| "step": 66400 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_loss": 0.10278935730457306, |
| "eval_runtime": 43.9205, |
| "eval_samples_per_second": 170.763, |
| "eval_steps_per_second": 10.678, |
| "step": 66425 |
| }, |
| { |
| "epoch": 25.03, |
| "learning_rate": 1.4994354535190066e-05, |
| "loss": 0.1033, |
| "step": 66500 |
| }, |
| { |
| "epoch": 25.07, |
| "learning_rate": 1.4986827248776816e-05, |
| "loss": 0.1022, |
| "step": 66600 |
| }, |
| { |
| "epoch": 25.1, |
| "learning_rate": 1.4979299962363569e-05, |
| "loss": 0.1028, |
| "step": 66700 |
| }, |
| { |
| "epoch": 25.14, |
| "learning_rate": 1.497177267595032e-05, |
| "loss": 0.1044, |
| "step": 66800 |
| }, |
| { |
| "epoch": 25.18, |
| "learning_rate": 1.4964245389537074e-05, |
| "loss": 0.1038, |
| "step": 66900 |
| }, |
| { |
| "epoch": 25.22, |
| "learning_rate": 1.4956718103123825e-05, |
| "loss": 0.1052, |
| "step": 67000 |
| }, |
| { |
| "epoch": 25.25, |
| "learning_rate": 1.4949190816710577e-05, |
| "loss": 0.1039, |
| "step": 67100 |
| }, |
| { |
| "epoch": 25.29, |
| "learning_rate": 1.494166353029733e-05, |
| "loss": 0.1039, |
| "step": 67200 |
| }, |
| { |
| "epoch": 25.33, |
| "learning_rate": 1.493413624388408e-05, |
| "loss": 0.1025, |
| "step": 67300 |
| }, |
| { |
| "epoch": 25.37, |
| "learning_rate": 1.4926608957470833e-05, |
| "loss": 0.1037, |
| "step": 67400 |
| }, |
| { |
| "epoch": 25.4, |
| "learning_rate": 1.4919081671057584e-05, |
| "loss": 0.1019, |
| "step": 67500 |
| }, |
| { |
| "epoch": 25.44, |
| "learning_rate": 1.4911554384644338e-05, |
| "loss": 0.1027, |
| "step": 67600 |
| }, |
| { |
| "epoch": 25.48, |
| "learning_rate": 1.4904027098231089e-05, |
| "loss": 0.1037, |
| "step": 67700 |
| }, |
| { |
| "epoch": 25.52, |
| "learning_rate": 1.4896499811817842e-05, |
| "loss": 0.1031, |
| "step": 67800 |
| }, |
| { |
| "epoch": 25.56, |
| "learning_rate": 1.4888972525404592e-05, |
| "loss": 0.1035, |
| "step": 67900 |
| }, |
| { |
| "epoch": 25.59, |
| "learning_rate": 1.4881445238991345e-05, |
| "loss": 0.1031, |
| "step": 68000 |
| }, |
| { |
| "epoch": 25.63, |
| "learning_rate": 1.4873917952578096e-05, |
| "loss": 0.1034, |
| "step": 68100 |
| }, |
| { |
| "epoch": 25.67, |
| "learning_rate": 1.486639066616485e-05, |
| "loss": 0.1037, |
| "step": 68200 |
| }, |
| { |
| "epoch": 25.71, |
| "learning_rate": 1.48588633797516e-05, |
| "loss": 0.104, |
| "step": 68300 |
| }, |
| { |
| "epoch": 25.74, |
| "learning_rate": 1.4851336093338353e-05, |
| "loss": 0.1036, |
| "step": 68400 |
| }, |
| { |
| "epoch": 25.78, |
| "learning_rate": 1.4843808806925104e-05, |
| "loss": 0.1031, |
| "step": 68500 |
| }, |
| { |
| "epoch": 25.82, |
| "learning_rate": 1.4836281520511857e-05, |
| "loss": 0.1027, |
| "step": 68600 |
| }, |
| { |
| "epoch": 25.86, |
| "learning_rate": 1.4828754234098607e-05, |
| "loss": 0.1036, |
| "step": 68700 |
| }, |
| { |
| "epoch": 25.89, |
| "learning_rate": 1.4821226947685362e-05, |
| "loss": 0.1023, |
| "step": 68800 |
| }, |
| { |
| "epoch": 25.93, |
| "learning_rate": 1.4813699661272112e-05, |
| "loss": 0.1015, |
| "step": 68900 |
| }, |
| { |
| "epoch": 25.97, |
| "learning_rate": 1.4806172374858865e-05, |
| "loss": 0.1025, |
| "step": 69000 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_loss": 0.10342206060886383, |
| "eval_runtime": 45.4198, |
| "eval_samples_per_second": 165.126, |
| "eval_steps_per_second": 10.326, |
| "step": 69082 |
| }, |
| { |
| "epoch": 26.01, |
| "learning_rate": 1.4798645088445616e-05, |
| "loss": 0.1011, |
| "step": 69100 |
| }, |
| { |
| "epoch": 26.04, |
| "learning_rate": 1.4791117802032368e-05, |
| "loss": 0.1021, |
| "step": 69200 |
| }, |
| { |
| "epoch": 26.08, |
| "learning_rate": 1.4783590515619119e-05, |
| "loss": 0.1036, |
| "step": 69300 |
| }, |
| { |
| "epoch": 26.12, |
| "learning_rate": 1.4776063229205873e-05, |
| "loss": 0.1041, |
| "step": 69400 |
| }, |
| { |
| "epoch": 26.16, |
| "learning_rate": 1.4768535942792624e-05, |
| "loss": 0.1028, |
| "step": 69500 |
| }, |
| { |
| "epoch": 26.19, |
| "learning_rate": 1.4761008656379377e-05, |
| "loss": 0.1024, |
| "step": 69600 |
| }, |
| { |
| "epoch": 26.23, |
| "learning_rate": 1.4753481369966129e-05, |
| "loss": 0.1019, |
| "step": 69700 |
| }, |
| { |
| "epoch": 26.27, |
| "learning_rate": 1.474595408355288e-05, |
| "loss": 0.1022, |
| "step": 69800 |
| }, |
| { |
| "epoch": 26.31, |
| "learning_rate": 1.4738426797139634e-05, |
| "loss": 0.102, |
| "step": 69900 |
| }, |
| { |
| "epoch": 26.35, |
| "learning_rate": 1.4730899510726385e-05, |
| "loss": 0.1008, |
| "step": 70000 |
| }, |
| { |
| "epoch": 26.38, |
| "learning_rate": 1.4723372224313137e-05, |
| "loss": 0.1032, |
| "step": 70100 |
| }, |
| { |
| "epoch": 26.42, |
| "learning_rate": 1.4715844937899888e-05, |
| "loss": 0.1036, |
| "step": 70200 |
| }, |
| { |
| "epoch": 26.46, |
| "learning_rate": 1.470831765148664e-05, |
| "loss": 0.103, |
| "step": 70300 |
| }, |
| { |
| "epoch": 26.5, |
| "learning_rate": 1.4700790365073392e-05, |
| "loss": 0.1016, |
| "step": 70400 |
| }, |
| { |
| "epoch": 26.53, |
| "learning_rate": 1.4693263078660144e-05, |
| "loss": 0.1022, |
| "step": 70500 |
| }, |
| { |
| "epoch": 26.57, |
| "learning_rate": 1.4685735792246895e-05, |
| "loss": 0.1033, |
| "step": 70600 |
| }, |
| { |
| "epoch": 26.61, |
| "learning_rate": 1.4678208505833649e-05, |
| "loss": 0.1032, |
| "step": 70700 |
| }, |
| { |
| "epoch": 26.65, |
| "learning_rate": 1.46706812194204e-05, |
| "loss": 0.1026, |
| "step": 70800 |
| }, |
| { |
| "epoch": 26.68, |
| "learning_rate": 1.4663153933007152e-05, |
| "loss": 0.1019, |
| "step": 70900 |
| }, |
| { |
| "epoch": 26.72, |
| "learning_rate": 1.4655626646593903e-05, |
| "loss": 0.1035, |
| "step": 71000 |
| }, |
| { |
| "epoch": 26.76, |
| "learning_rate": 1.4648099360180656e-05, |
| "loss": 0.102, |
| "step": 71100 |
| }, |
| { |
| "epoch": 26.8, |
| "learning_rate": 1.4640572073767407e-05, |
| "loss": 0.1026, |
| "step": 71200 |
| }, |
| { |
| "epoch": 26.83, |
| "learning_rate": 1.463304478735416e-05, |
| "loss": 0.1023, |
| "step": 71300 |
| }, |
| { |
| "epoch": 26.87, |
| "learning_rate": 1.4625517500940912e-05, |
| "loss": 0.1011, |
| "step": 71400 |
| }, |
| { |
| "epoch": 26.91, |
| "learning_rate": 1.4617990214527664e-05, |
| "loss": 0.1037, |
| "step": 71500 |
| }, |
| { |
| "epoch": 26.95, |
| "learning_rate": 1.4610462928114415e-05, |
| "loss": 0.1036, |
| "step": 71600 |
| }, |
| { |
| "epoch": 26.99, |
| "learning_rate": 1.4602935641701168e-05, |
| "loss": 0.1037, |
| "step": 71700 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_loss": 0.10246068239212036, |
| "eval_runtime": 45.3187, |
| "eval_samples_per_second": 165.495, |
| "eval_steps_per_second": 10.349, |
| "step": 71739 |
| }, |
| { |
| "epoch": 27.02, |
| "learning_rate": 1.4595408355287918e-05, |
| "loss": 0.1032, |
| "step": 71800 |
| }, |
| { |
| "epoch": 27.06, |
| "learning_rate": 1.4587881068874673e-05, |
| "loss": 0.1021, |
| "step": 71900 |
| }, |
| { |
| "epoch": 27.1, |
| "learning_rate": 1.4580353782461423e-05, |
| "loss": 0.1034, |
| "step": 72000 |
| }, |
| { |
| "epoch": 27.14, |
| "learning_rate": 1.4572826496048176e-05, |
| "loss": 0.1029, |
| "step": 72100 |
| }, |
| { |
| "epoch": 27.17, |
| "learning_rate": 1.4565299209634928e-05, |
| "loss": 0.1024, |
| "step": 72200 |
| }, |
| { |
| "epoch": 27.21, |
| "learning_rate": 1.455777192322168e-05, |
| "loss": 0.1028, |
| "step": 72300 |
| }, |
| { |
| "epoch": 27.25, |
| "learning_rate": 1.4550244636808433e-05, |
| "loss": 0.1053, |
| "step": 72400 |
| }, |
| { |
| "epoch": 27.29, |
| "learning_rate": 1.4542717350395184e-05, |
| "loss": 0.1013, |
| "step": 72500 |
| }, |
| { |
| "epoch": 27.32, |
| "learning_rate": 1.4535190063981937e-05, |
| "loss": 0.1012, |
| "step": 72600 |
| }, |
| { |
| "epoch": 27.36, |
| "learning_rate": 1.4527662777568688e-05, |
| "loss": 0.1024, |
| "step": 72700 |
| }, |
| { |
| "epoch": 27.4, |
| "learning_rate": 1.452013549115544e-05, |
| "loss": 0.1003, |
| "step": 72800 |
| }, |
| { |
| "epoch": 27.44, |
| "learning_rate": 1.4512608204742191e-05, |
| "loss": 0.103, |
| "step": 72900 |
| }, |
| { |
| "epoch": 27.47, |
| "learning_rate": 1.4505080918328945e-05, |
| "loss": 0.1012, |
| "step": 73000 |
| }, |
| { |
| "epoch": 27.51, |
| "learning_rate": 1.4497553631915696e-05, |
| "loss": 0.1015, |
| "step": 73100 |
| }, |
| { |
| "epoch": 27.55, |
| "learning_rate": 1.4490026345502448e-05, |
| "loss": 0.1032, |
| "step": 73200 |
| }, |
| { |
| "epoch": 27.59, |
| "learning_rate": 1.44824990590892e-05, |
| "loss": 0.1038, |
| "step": 73300 |
| }, |
| { |
| "epoch": 27.63, |
| "learning_rate": 1.4474971772675952e-05, |
| "loss": 0.0996, |
| "step": 73400 |
| }, |
| { |
| "epoch": 27.66, |
| "learning_rate": 1.4467444486262703e-05, |
| "loss": 0.1024, |
| "step": 73500 |
| }, |
| { |
| "epoch": 27.7, |
| "learning_rate": 1.4459917199849455e-05, |
| "loss": 0.1019, |
| "step": 73600 |
| }, |
| { |
| "epoch": 27.74, |
| "learning_rate": 1.4452389913436206e-05, |
| "loss": 0.1024, |
| "step": 73700 |
| }, |
| { |
| "epoch": 27.78, |
| "learning_rate": 1.444486262702296e-05, |
| "loss": 0.1006, |
| "step": 73800 |
| }, |
| { |
| "epoch": 27.81, |
| "learning_rate": 1.4437335340609711e-05, |
| "loss": 0.1014, |
| "step": 73900 |
| }, |
| { |
| "epoch": 27.85, |
| "learning_rate": 1.4429808054196463e-05, |
| "loss": 0.1024, |
| "step": 74000 |
| }, |
| { |
| "epoch": 27.89, |
| "learning_rate": 1.4422280767783214e-05, |
| "loss": 0.1023, |
| "step": 74100 |
| }, |
| { |
| "epoch": 27.93, |
| "learning_rate": 1.4414753481369967e-05, |
| "loss": 0.1034, |
| "step": 74200 |
| }, |
| { |
| "epoch": 27.96, |
| "learning_rate": 1.4407226194956718e-05, |
| "loss": 0.1022, |
| "step": 74300 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_loss": 0.10144173353910446, |
| "eval_runtime": 45.583, |
| "eval_samples_per_second": 164.535, |
| "eval_steps_per_second": 10.289, |
| "step": 74396 |
| }, |
| { |
| "epoch": 28.0, |
| "learning_rate": 1.4399698908543472e-05, |
| "loss": 0.1023, |
| "step": 74400 |
| }, |
| { |
| "epoch": 28.04, |
| "learning_rate": 1.4392171622130224e-05, |
| "loss": 0.1017, |
| "step": 74500 |
| }, |
| { |
| "epoch": 28.08, |
| "learning_rate": 1.4384644335716975e-05, |
| "loss": 0.1017, |
| "step": 74600 |
| }, |
| { |
| "epoch": 28.11, |
| "learning_rate": 1.4377117049303728e-05, |
| "loss": 0.1008, |
| "step": 74700 |
| }, |
| { |
| "epoch": 28.15, |
| "learning_rate": 1.4369589762890478e-05, |
| "loss": 0.1023, |
| "step": 74800 |
| }, |
| { |
| "epoch": 28.19, |
| "learning_rate": 1.4362062476477233e-05, |
| "loss": 0.1022, |
| "step": 74900 |
| }, |
| { |
| "epoch": 28.23, |
| "learning_rate": 1.4354535190063984e-05, |
| "loss": 0.1017, |
| "step": 75000 |
| }, |
| { |
| "epoch": 28.26, |
| "learning_rate": 1.4347007903650736e-05, |
| "loss": 0.1012, |
| "step": 75100 |
| }, |
| { |
| "epoch": 28.3, |
| "learning_rate": 1.4339480617237487e-05, |
| "loss": 0.103, |
| "step": 75200 |
| }, |
| { |
| "epoch": 28.34, |
| "learning_rate": 1.433195333082424e-05, |
| "loss": 0.1025, |
| "step": 75300 |
| }, |
| { |
| "epoch": 28.38, |
| "learning_rate": 1.432442604441099e-05, |
| "loss": 0.1013, |
| "step": 75400 |
| }, |
| { |
| "epoch": 28.42, |
| "learning_rate": 1.4316898757997744e-05, |
| "loss": 0.1021, |
| "step": 75500 |
| }, |
| { |
| "epoch": 28.45, |
| "learning_rate": 1.4309371471584495e-05, |
| "loss": 0.1026, |
| "step": 75600 |
| }, |
| { |
| "epoch": 28.49, |
| "learning_rate": 1.4301844185171248e-05, |
| "loss": 0.1027, |
| "step": 75700 |
| }, |
| { |
| "epoch": 28.53, |
| "learning_rate": 1.4294316898757999e-05, |
| "loss": 0.1014, |
| "step": 75800 |
| }, |
| { |
| "epoch": 28.57, |
| "learning_rate": 1.4286789612344751e-05, |
| "loss": 0.1013, |
| "step": 75900 |
| }, |
| { |
| "epoch": 28.6, |
| "learning_rate": 1.4279262325931502e-05, |
| "loss": 0.1018, |
| "step": 76000 |
| }, |
| { |
| "epoch": 28.64, |
| "learning_rate": 1.4271735039518254e-05, |
| "loss": 0.102, |
| "step": 76100 |
| }, |
| { |
| "epoch": 28.68, |
| "learning_rate": 1.4264207753105005e-05, |
| "loss": 0.1013, |
| "step": 76200 |
| }, |
| { |
| "epoch": 28.72, |
| "learning_rate": 1.425668046669176e-05, |
| "loss": 0.1027, |
| "step": 76300 |
| }, |
| { |
| "epoch": 28.75, |
| "learning_rate": 1.424915318027851e-05, |
| "loss": 0.1004, |
| "step": 76400 |
| }, |
| { |
| "epoch": 28.79, |
| "learning_rate": 1.4241625893865263e-05, |
| "loss": 0.1013, |
| "step": 76500 |
| }, |
| { |
| "epoch": 28.83, |
| "learning_rate": 1.4234098607452014e-05, |
| "loss": 0.1021, |
| "step": 76600 |
| }, |
| { |
| "epoch": 28.87, |
| "learning_rate": 1.4226571321038766e-05, |
| "loss": 0.1009, |
| "step": 76700 |
| }, |
| { |
| "epoch": 28.9, |
| "learning_rate": 1.4219044034625517e-05, |
| "loss": 0.1012, |
| "step": 76800 |
| }, |
| { |
| "epoch": 28.94, |
| "learning_rate": 1.4211516748212271e-05, |
| "loss": 0.1015, |
| "step": 76900 |
| }, |
| { |
| "epoch": 28.98, |
| "learning_rate": 1.4203989461799024e-05, |
| "loss": 0.1026, |
| "step": 77000 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_loss": 0.10109123587608337, |
| "eval_runtime": 45.9148, |
| "eval_samples_per_second": 163.346, |
| "eval_steps_per_second": 10.215, |
| "step": 77053 |
| }, |
| { |
| "epoch": 29.02, |
| "learning_rate": 1.4196462175385774e-05, |
| "loss": 0.102, |
| "step": 77100 |
| }, |
| { |
| "epoch": 29.06, |
| "learning_rate": 1.4188934888972527e-05, |
| "loss": 0.1028, |
| "step": 77200 |
| }, |
| { |
| "epoch": 29.09, |
| "learning_rate": 1.4181407602559278e-05, |
| "loss": 0.1016, |
| "step": 77300 |
| }, |
| { |
| "epoch": 29.13, |
| "learning_rate": 1.4173880316146032e-05, |
| "loss": 0.1021, |
| "step": 77400 |
| }, |
| { |
| "epoch": 29.17, |
| "learning_rate": 1.4166353029732783e-05, |
| "loss": 0.1018, |
| "step": 77500 |
| }, |
| { |
| "epoch": 29.21, |
| "learning_rate": 1.4158825743319535e-05, |
| "loss": 0.1004, |
| "step": 77600 |
| }, |
| { |
| "epoch": 29.24, |
| "learning_rate": 1.4151298456906286e-05, |
| "loss": 0.102, |
| "step": 77700 |
| }, |
| { |
| "epoch": 29.28, |
| "learning_rate": 1.4143771170493039e-05, |
| "loss": 0.1013, |
| "step": 77800 |
| }, |
| { |
| "epoch": 29.32, |
| "learning_rate": 1.413624388407979e-05, |
| "loss": 0.1014, |
| "step": 77900 |
| }, |
| { |
| "epoch": 29.36, |
| "learning_rate": 1.4128716597666544e-05, |
| "loss": 0.1003, |
| "step": 78000 |
| }, |
| { |
| "epoch": 29.39, |
| "learning_rate": 1.4121189311253294e-05, |
| "loss": 0.1009, |
| "step": 78100 |
| }, |
| { |
| "epoch": 29.43, |
| "learning_rate": 1.4113662024840047e-05, |
| "loss": 0.1008, |
| "step": 78200 |
| }, |
| { |
| "epoch": 29.47, |
| "learning_rate": 1.4106134738426798e-05, |
| "loss": 0.1015, |
| "step": 78300 |
| }, |
| { |
| "epoch": 29.51, |
| "learning_rate": 1.409860745201355e-05, |
| "loss": 0.1019, |
| "step": 78400 |
| }, |
| { |
| "epoch": 29.54, |
| "learning_rate": 1.4091080165600301e-05, |
| "loss": 0.1014, |
| "step": 78500 |
| }, |
| { |
| "epoch": 29.58, |
| "learning_rate": 1.4083552879187055e-05, |
| "loss": 0.1009, |
| "step": 78600 |
| }, |
| { |
| "epoch": 29.62, |
| "learning_rate": 1.4076025592773806e-05, |
| "loss": 0.1013, |
| "step": 78700 |
| }, |
| { |
| "epoch": 29.66, |
| "learning_rate": 1.4068498306360559e-05, |
| "loss": 0.1018, |
| "step": 78800 |
| }, |
| { |
| "epoch": 29.7, |
| "learning_rate": 1.406097101994731e-05, |
| "loss": 0.1026, |
| "step": 78900 |
| }, |
| { |
| "epoch": 29.73, |
| "learning_rate": 1.4053443733534062e-05, |
| "loss": 0.1005, |
| "step": 79000 |
| }, |
| { |
| "epoch": 29.77, |
| "learning_rate": 1.4045916447120813e-05, |
| "loss": 0.1009, |
| "step": 79100 |
| }, |
| { |
| "epoch": 29.81, |
| "learning_rate": 1.4038389160707565e-05, |
| "loss": 0.1001, |
| "step": 79200 |
| }, |
| { |
| "epoch": 29.85, |
| "learning_rate": 1.4030861874294316e-05, |
| "loss": 0.102, |
| "step": 79300 |
| }, |
| { |
| "epoch": 29.88, |
| "learning_rate": 1.402333458788107e-05, |
| "loss": 0.1011, |
| "step": 79400 |
| }, |
| { |
| "epoch": 29.92, |
| "learning_rate": 1.4015807301467823e-05, |
| "loss": 0.1011, |
| "step": 79500 |
| }, |
| { |
| "epoch": 29.96, |
| "learning_rate": 1.4008280015054574e-05, |
| "loss": 0.1011, |
| "step": 79600 |
| }, |
| { |
| "epoch": 30.0, |
| "learning_rate": 1.4000752728641326e-05, |
| "loss": 0.1022, |
| "step": 79700 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_loss": 0.10009202361106873, |
| "eval_runtime": 45.5608, |
| "eval_samples_per_second": 164.615, |
| "eval_steps_per_second": 10.294, |
| "step": 79710 |
| }, |
| { |
| "epoch": 30.03, |
| "learning_rate": 1.3993225442228077e-05, |
| "loss": 0.1027, |
| "step": 79800 |
| }, |
| { |
| "epoch": 30.07, |
| "learning_rate": 1.3985698155814831e-05, |
| "loss": 0.1015, |
| "step": 79900 |
| }, |
| { |
| "epoch": 30.11, |
| "learning_rate": 1.3978170869401582e-05, |
| "loss": 0.1018, |
| "step": 80000 |
| }, |
| { |
| "epoch": 30.15, |
| "learning_rate": 1.3970643582988335e-05, |
| "loss": 0.1014, |
| "step": 80100 |
| }, |
| { |
| "epoch": 30.18, |
| "learning_rate": 1.3963116296575085e-05, |
| "loss": 0.1013, |
| "step": 80200 |
| }, |
| { |
| "epoch": 30.22, |
| "learning_rate": 1.3955589010161838e-05, |
| "loss": 0.0992, |
| "step": 80300 |
| }, |
| { |
| "epoch": 30.26, |
| "learning_rate": 1.3948061723748589e-05, |
| "loss": 0.1029, |
| "step": 80400 |
| }, |
| { |
| "epoch": 30.3, |
| "learning_rate": 1.3940534437335343e-05, |
| "loss": 0.1009, |
| "step": 80500 |
| }, |
| { |
| "epoch": 30.33, |
| "learning_rate": 1.3933007150922094e-05, |
| "loss": 0.1011, |
| "step": 80600 |
| }, |
| { |
| "epoch": 30.37, |
| "learning_rate": 1.3925479864508846e-05, |
| "loss": 0.1003, |
| "step": 80700 |
| }, |
| { |
| "epoch": 30.41, |
| "learning_rate": 1.3917952578095597e-05, |
| "loss": 0.1011, |
| "step": 80800 |
| }, |
| { |
| "epoch": 30.45, |
| "learning_rate": 1.391042529168235e-05, |
| "loss": 0.1003, |
| "step": 80900 |
| }, |
| { |
| "epoch": 30.49, |
| "learning_rate": 1.39028980052691e-05, |
| "loss": 0.1011, |
| "step": 81000 |
| }, |
| { |
| "epoch": 30.52, |
| "learning_rate": 1.3895370718855855e-05, |
| "loss": 0.1004, |
| "step": 81100 |
| }, |
| { |
| "epoch": 30.56, |
| "learning_rate": 1.3887843432442605e-05, |
| "loss": 0.1006, |
| "step": 81200 |
| }, |
| { |
| "epoch": 30.6, |
| "learning_rate": 1.3880316146029358e-05, |
| "loss": 0.1019, |
| "step": 81300 |
| }, |
| { |
| "epoch": 30.64, |
| "learning_rate": 1.3872788859616109e-05, |
| "loss": 0.1013, |
| "step": 81400 |
| }, |
| { |
| "epoch": 30.67, |
| "learning_rate": 1.3865261573202861e-05, |
| "loss": 0.0997, |
| "step": 81500 |
| }, |
| { |
| "epoch": 30.71, |
| "learning_rate": 1.3857734286789612e-05, |
| "loss": 0.1, |
| "step": 81600 |
| }, |
| { |
| "epoch": 30.75, |
| "learning_rate": 1.3850207000376366e-05, |
| "loss": 0.1004, |
| "step": 81700 |
| }, |
| { |
| "epoch": 30.79, |
| "learning_rate": 1.3842679713963117e-05, |
| "loss": 0.101, |
| "step": 81800 |
| }, |
| { |
| "epoch": 30.82, |
| "learning_rate": 1.383515242754987e-05, |
| "loss": 0.1003, |
| "step": 81900 |
| }, |
| { |
| "epoch": 30.86, |
| "learning_rate": 1.3827625141136622e-05, |
| "loss": 0.1, |
| "step": 82000 |
| }, |
| { |
| "epoch": 30.9, |
| "learning_rate": 1.3820097854723373e-05, |
| "loss": 0.1003, |
| "step": 82100 |
| }, |
| { |
| "epoch": 30.94, |
| "learning_rate": 1.3812570568310126e-05, |
| "loss": 0.0997, |
| "step": 82200 |
| }, |
| { |
| "epoch": 30.97, |
| "learning_rate": 1.3805043281896876e-05, |
| "loss": 0.0997, |
| "step": 82300 |
| }, |
| { |
| "epoch": 31.0, |
| "eval_loss": 0.10071013867855072, |
| "eval_runtime": 45.6947, |
| "eval_samples_per_second": 164.133, |
| "eval_steps_per_second": 10.264, |
| "step": 82367 |
| }, |
| { |
| "epoch": 31.01, |
| "learning_rate": 1.379751599548363e-05, |
| "loss": 0.0994, |
| "step": 82400 |
| }, |
| { |
| "epoch": 31.05, |
| "learning_rate": 1.3789988709070381e-05, |
| "loss": 0.1028, |
| "step": 82500 |
| }, |
| { |
| "epoch": 31.09, |
| "learning_rate": 1.3782461422657134e-05, |
| "loss": 0.1002, |
| "step": 82600 |
| }, |
| { |
| "epoch": 31.13, |
| "learning_rate": 1.3774934136243885e-05, |
| "loss": 0.0998, |
| "step": 82700 |
| }, |
| { |
| "epoch": 31.16, |
| "learning_rate": 1.3767406849830637e-05, |
| "loss": 0.1013, |
| "step": 82800 |
| }, |
| { |
| "epoch": 31.2, |
| "learning_rate": 1.3759879563417388e-05, |
| "loss": 0.1021, |
| "step": 82900 |
| }, |
| { |
| "epoch": 31.24, |
| "learning_rate": 1.3752352277004142e-05, |
| "loss": 0.1008, |
| "step": 83000 |
| }, |
| { |
| "epoch": 31.28, |
| "learning_rate": 1.3744824990590893e-05, |
| "loss": 0.101, |
| "step": 83100 |
| }, |
| { |
| "epoch": 31.31, |
| "learning_rate": 1.3737297704177646e-05, |
| "loss": 0.1016, |
| "step": 83200 |
| }, |
| { |
| "epoch": 31.35, |
| "learning_rate": 1.3729770417764396e-05, |
| "loss": 0.0993, |
| "step": 83300 |
| }, |
| { |
| "epoch": 31.39, |
| "learning_rate": 1.3722243131351149e-05, |
| "loss": 0.0993, |
| "step": 83400 |
| }, |
| { |
| "epoch": 31.43, |
| "learning_rate": 1.37147158449379e-05, |
| "loss": 0.0998, |
| "step": 83500 |
| }, |
| { |
| "epoch": 31.46, |
| "learning_rate": 1.3707188558524654e-05, |
| "loss": 0.1008, |
| "step": 83600 |
| }, |
| { |
| "epoch": 31.5, |
| "learning_rate": 1.3699661272111405e-05, |
| "loss": 0.0989, |
| "step": 83700 |
| }, |
| { |
| "epoch": 31.54, |
| "learning_rate": 1.3692133985698157e-05, |
| "loss": 0.1027, |
| "step": 83800 |
| }, |
| { |
| "epoch": 31.58, |
| "learning_rate": 1.3684606699284908e-05, |
| "loss": 0.1001, |
| "step": 83900 |
| }, |
| { |
| "epoch": 31.61, |
| "learning_rate": 1.367707941287166e-05, |
| "loss": 0.1006, |
| "step": 84000 |
| }, |
| { |
| "epoch": 31.65, |
| "learning_rate": 1.3669552126458411e-05, |
| "loss": 0.0991, |
| "step": 84100 |
| }, |
| { |
| "epoch": 31.69, |
| "learning_rate": 1.3662024840045166e-05, |
| "loss": 0.1005, |
| "step": 84200 |
| }, |
| { |
| "epoch": 31.73, |
| "learning_rate": 1.3654497553631916e-05, |
| "loss": 0.099, |
| "step": 84300 |
| }, |
| { |
| "epoch": 31.77, |
| "learning_rate": 1.3646970267218669e-05, |
| "loss": 0.1002, |
| "step": 84400 |
| }, |
| { |
| "epoch": 31.8, |
| "learning_rate": 1.3639442980805421e-05, |
| "loss": 0.1001, |
| "step": 84500 |
| }, |
| { |
| "epoch": 31.84, |
| "learning_rate": 1.3631915694392172e-05, |
| "loss": 0.0988, |
| "step": 84600 |
| }, |
| { |
| "epoch": 31.88, |
| "learning_rate": 1.3624388407978926e-05, |
| "loss": 0.0998, |
| "step": 84700 |
| }, |
| { |
| "epoch": 31.92, |
| "learning_rate": 1.3616861121565677e-05, |
| "loss": 0.0996, |
| "step": 84800 |
| }, |
| { |
| "epoch": 31.95, |
| "learning_rate": 1.360933383515243e-05, |
| "loss": 0.1022, |
| "step": 84900 |
| }, |
| { |
| "epoch": 31.99, |
| "learning_rate": 1.360180654873918e-05, |
| "loss": 0.0998, |
| "step": 85000 |
| }, |
| { |
| "epoch": 32.0, |
| "eval_loss": 0.10160314291715622, |
| "eval_runtime": 45.6356, |
| "eval_samples_per_second": 164.345, |
| "eval_steps_per_second": 10.277, |
| "step": 85024 |
| }, |
| { |
| "epoch": 32.03, |
| "learning_rate": 1.3594279262325933e-05, |
| "loss": 0.1026, |
| "step": 85100 |
| }, |
| { |
| "epoch": 32.07, |
| "learning_rate": 1.3586751975912684e-05, |
| "loss": 0.0999, |
| "step": 85200 |
| }, |
| { |
| "epoch": 32.1, |
| "learning_rate": 1.3579224689499436e-05, |
| "loss": 0.101, |
| "step": 85300 |
| }, |
| { |
| "epoch": 32.14, |
| "learning_rate": 1.3571697403086187e-05, |
| "loss": 0.1007, |
| "step": 85400 |
| }, |
| { |
| "epoch": 32.18, |
| "learning_rate": 1.3564170116672942e-05, |
| "loss": 0.1001, |
| "step": 85500 |
| }, |
| { |
| "epoch": 32.22, |
| "learning_rate": 1.3556642830259692e-05, |
| "loss": 0.1014, |
| "step": 85600 |
| }, |
| { |
| "epoch": 32.25, |
| "learning_rate": 1.3549115543846445e-05, |
| "loss": 0.0993, |
| "step": 85700 |
| }, |
| { |
| "epoch": 32.29, |
| "learning_rate": 1.3541588257433196e-05, |
| "loss": 0.1004, |
| "step": 85800 |
| }, |
| { |
| "epoch": 32.33, |
| "learning_rate": 1.3534060971019948e-05, |
| "loss": 0.1015, |
| "step": 85900 |
| }, |
| { |
| "epoch": 32.37, |
| "learning_rate": 1.3526533684606699e-05, |
| "loss": 0.0991, |
| "step": 86000 |
| }, |
| { |
| "epoch": 32.4, |
| "learning_rate": 1.3519006398193453e-05, |
| "loss": 0.0984, |
| "step": 86100 |
| }, |
| { |
| "epoch": 32.44, |
| "learning_rate": 1.3511479111780204e-05, |
| "loss": 0.1011, |
| "step": 86200 |
| }, |
| { |
| "epoch": 32.48, |
| "learning_rate": 1.3503951825366957e-05, |
| "loss": 0.1, |
| "step": 86300 |
| }, |
| { |
| "epoch": 32.52, |
| "learning_rate": 1.3496424538953707e-05, |
| "loss": 0.1009, |
| "step": 86400 |
| }, |
| { |
| "epoch": 32.56, |
| "learning_rate": 1.348889725254046e-05, |
| "loss": 0.0996, |
| "step": 86500 |
| }, |
| { |
| "epoch": 32.59, |
| "learning_rate": 1.348136996612721e-05, |
| "loss": 0.1003, |
| "step": 86600 |
| }, |
| { |
| "epoch": 32.63, |
| "learning_rate": 1.3473842679713965e-05, |
| "loss": 0.0997, |
| "step": 86700 |
| }, |
| { |
| "epoch": 32.67, |
| "learning_rate": 1.3466315393300716e-05, |
| "loss": 0.1016, |
| "step": 86800 |
| }, |
| { |
| "epoch": 32.71, |
| "learning_rate": 1.3458788106887468e-05, |
| "loss": 0.1003, |
| "step": 86900 |
| }, |
| { |
| "epoch": 32.74, |
| "learning_rate": 1.345126082047422e-05, |
| "loss": 0.0984, |
| "step": 87000 |
| }, |
| { |
| "epoch": 32.78, |
| "learning_rate": 1.3443733534060972e-05, |
| "loss": 0.1008, |
| "step": 87100 |
| }, |
| { |
| "epoch": 32.82, |
| "learning_rate": 1.3436206247647726e-05, |
| "loss": 0.0999, |
| "step": 87200 |
| }, |
| { |
| "epoch": 32.86, |
| "learning_rate": 1.3428678961234477e-05, |
| "loss": 0.0996, |
| "step": 87300 |
| }, |
| { |
| "epoch": 32.89, |
| "learning_rate": 1.3421151674821229e-05, |
| "loss": 0.101, |
| "step": 87400 |
| }, |
| { |
| "epoch": 32.93, |
| "learning_rate": 1.341362438840798e-05, |
| "loss": 0.099, |
| "step": 87500 |
| }, |
| { |
| "epoch": 32.97, |
| "learning_rate": 1.3406097101994732e-05, |
| "loss": 0.1019, |
| "step": 87600 |
| }, |
| { |
| "epoch": 33.0, |
| "eval_loss": 0.10076244920492172, |
| "eval_runtime": 45.2424, |
| "eval_samples_per_second": 165.774, |
| "eval_steps_per_second": 10.366, |
| "step": 87681 |
| }, |
| { |
| "epoch": 33.01, |
| "learning_rate": 1.3398569815581483e-05, |
| "loss": 0.1001, |
| "step": 87700 |
| }, |
| { |
| "epoch": 33.04, |
| "learning_rate": 1.3391042529168237e-05, |
| "loss": 0.1017, |
| "step": 87800 |
| }, |
| { |
| "epoch": 33.08, |
| "learning_rate": 1.3383515242754988e-05, |
| "loss": 0.1, |
| "step": 87900 |
| }, |
| { |
| "epoch": 33.12, |
| "learning_rate": 1.337598795634174e-05, |
| "loss": 0.0983, |
| "step": 88000 |
| }, |
| { |
| "epoch": 33.16, |
| "learning_rate": 1.3368460669928492e-05, |
| "loss": 0.1, |
| "step": 88100 |
| }, |
| { |
| "epoch": 33.2, |
| "learning_rate": 1.3360933383515244e-05, |
| "loss": 0.1008, |
| "step": 88200 |
| }, |
| { |
| "epoch": 33.23, |
| "learning_rate": 1.3353406097101995e-05, |
| "loss": 0.0998, |
| "step": 88300 |
| }, |
| { |
| "epoch": 33.27, |
| "learning_rate": 1.3345878810688747e-05, |
| "loss": 0.1, |
| "step": 88400 |
| }, |
| { |
| "epoch": 33.31, |
| "learning_rate": 1.3338351524275498e-05, |
| "loss": 0.0997, |
| "step": 88500 |
| }, |
| { |
| "epoch": 33.35, |
| "learning_rate": 1.3330824237862252e-05, |
| "loss": 0.0995, |
| "step": 88600 |
| }, |
| { |
| "epoch": 33.38, |
| "learning_rate": 1.3323296951449003e-05, |
| "loss": 0.1015, |
| "step": 88700 |
| }, |
| { |
| "epoch": 33.42, |
| "learning_rate": 1.3315769665035756e-05, |
| "loss": 0.0993, |
| "step": 88800 |
| }, |
| { |
| "epoch": 33.46, |
| "learning_rate": 1.3308242378622507e-05, |
| "loss": 0.0986, |
| "step": 88900 |
| }, |
| { |
| "epoch": 33.5, |
| "learning_rate": 1.330071509220926e-05, |
| "loss": 0.1003, |
| "step": 89000 |
| }, |
| { |
| "epoch": 33.53, |
| "learning_rate": 1.329318780579601e-05, |
| "loss": 0.0997, |
| "step": 89100 |
| }, |
| { |
| "epoch": 33.57, |
| "learning_rate": 1.3285660519382764e-05, |
| "loss": 0.0993, |
| "step": 89200 |
| }, |
| { |
| "epoch": 33.61, |
| "learning_rate": 1.3278133232969515e-05, |
| "loss": 0.1017, |
| "step": 89300 |
| }, |
| { |
| "epoch": 33.65, |
| "learning_rate": 1.3270605946556268e-05, |
| "loss": 0.1003, |
| "step": 89400 |
| }, |
| { |
| "epoch": 33.68, |
| "learning_rate": 1.326307866014302e-05, |
| "loss": 0.1012, |
| "step": 89500 |
| }, |
| { |
| "epoch": 33.72, |
| "learning_rate": 1.3255551373729771e-05, |
| "loss": 0.1006, |
| "step": 89600 |
| }, |
| { |
| "epoch": 33.76, |
| "learning_rate": 1.3248024087316525e-05, |
| "loss": 0.0976, |
| "step": 89700 |
| }, |
| { |
| "epoch": 33.8, |
| "learning_rate": 1.3240496800903276e-05, |
| "loss": 0.1002, |
| "step": 89800 |
| }, |
| { |
| "epoch": 33.84, |
| "learning_rate": 1.3232969514490028e-05, |
| "loss": 0.0984, |
| "step": 89900 |
| }, |
| { |
| "epoch": 33.87, |
| "learning_rate": 1.322544222807678e-05, |
| "loss": 0.0987, |
| "step": 90000 |
| }, |
| { |
| "epoch": 33.91, |
| "learning_rate": 1.3217914941663532e-05, |
| "loss": 0.1008, |
| "step": 90100 |
| }, |
| { |
| "epoch": 33.95, |
| "learning_rate": 1.3210387655250283e-05, |
| "loss": 0.1031, |
| "step": 90200 |
| }, |
| { |
| "epoch": 33.99, |
| "learning_rate": 1.3202860368837037e-05, |
| "loss": 0.0999, |
| "step": 90300 |
| }, |
| { |
| "epoch": 34.0, |
| "eval_loss": 0.10001099109649658, |
| "eval_runtime": 45.4099, |
| "eval_samples_per_second": 165.162, |
| "eval_steps_per_second": 10.328, |
| "step": 90338 |
| }, |
| { |
| "epoch": 34.02, |
| "learning_rate": 1.3195333082423788e-05, |
| "loss": 0.0995, |
| "step": 90400 |
| }, |
| { |
| "epoch": 34.06, |
| "learning_rate": 1.318780579601054e-05, |
| "loss": 0.1017, |
| "step": 90500 |
| }, |
| { |
| "epoch": 34.1, |
| "learning_rate": 1.3180278509597291e-05, |
| "loss": 0.101, |
| "step": 90600 |
| }, |
| { |
| "epoch": 34.14, |
| "learning_rate": 1.3172751223184043e-05, |
| "loss": 0.1002, |
| "step": 90700 |
| }, |
| { |
| "epoch": 34.17, |
| "learning_rate": 1.3165223936770794e-05, |
| "loss": 0.0988, |
| "step": 90800 |
| }, |
| { |
| "epoch": 34.21, |
| "learning_rate": 1.3157696650357548e-05, |
| "loss": 0.101, |
| "step": 90900 |
| }, |
| { |
| "epoch": 34.25, |
| "learning_rate": 1.31501693639443e-05, |
| "loss": 0.0999, |
| "step": 91000 |
| }, |
| { |
| "epoch": 34.29, |
| "learning_rate": 1.3142642077531052e-05, |
| "loss": 0.0994, |
| "step": 91100 |
| }, |
| { |
| "epoch": 34.32, |
| "learning_rate": 1.3135114791117803e-05, |
| "loss": 0.1002, |
| "step": 91200 |
| }, |
| { |
| "epoch": 34.36, |
| "learning_rate": 1.3127587504704555e-05, |
| "loss": 0.0987, |
| "step": 91300 |
| }, |
| { |
| "epoch": 34.4, |
| "learning_rate": 1.3120060218291306e-05, |
| "loss": 0.1001, |
| "step": 91400 |
| }, |
| { |
| "epoch": 34.44, |
| "learning_rate": 1.3112532931878058e-05, |
| "loss": 0.099, |
| "step": 91500 |
| }, |
| { |
| "epoch": 34.47, |
| "learning_rate": 1.310500564546481e-05, |
| "loss": 0.0987, |
| "step": 91600 |
| }, |
| { |
| "epoch": 34.51, |
| "learning_rate": 1.3097478359051563e-05, |
| "loss": 0.0997, |
| "step": 91700 |
| }, |
| { |
| "epoch": 34.55, |
| "learning_rate": 1.3089951072638314e-05, |
| "loss": 0.1008, |
| "step": 91800 |
| }, |
| { |
| "epoch": 34.59, |
| "learning_rate": 1.3082423786225067e-05, |
| "loss": 0.0991, |
| "step": 91900 |
| }, |
| { |
| "epoch": 34.63, |
| "learning_rate": 1.307489649981182e-05, |
| "loss": 0.0995, |
| "step": 92000 |
| }, |
| { |
| "epoch": 34.66, |
| "learning_rate": 1.306736921339857e-05, |
| "loss": 0.1001, |
| "step": 92100 |
| }, |
| { |
| "epoch": 34.7, |
| "learning_rate": 1.3059841926985324e-05, |
| "loss": 0.0991, |
| "step": 92200 |
| }, |
| { |
| "epoch": 34.74, |
| "learning_rate": 1.3052314640572075e-05, |
| "loss": 0.0977, |
| "step": 92300 |
| }, |
| { |
| "epoch": 34.78, |
| "learning_rate": 1.3044787354158828e-05, |
| "loss": 0.0993, |
| "step": 92400 |
| }, |
| { |
| "epoch": 34.81, |
| "learning_rate": 1.3037260067745578e-05, |
| "loss": 0.1001, |
| "step": 92500 |
| }, |
| { |
| "epoch": 34.85, |
| "learning_rate": 1.3029732781332331e-05, |
| "loss": 0.1, |
| "step": 92600 |
| }, |
| { |
| "epoch": 34.89, |
| "learning_rate": 1.3022205494919082e-05, |
| "loss": 0.0998, |
| "step": 92700 |
| }, |
| { |
| "epoch": 34.93, |
| "learning_rate": 1.3014678208505836e-05, |
| "loss": 0.0996, |
| "step": 92800 |
| }, |
| { |
| "epoch": 34.96, |
| "learning_rate": 1.3007150922092587e-05, |
| "loss": 0.0998, |
| "step": 92900 |
| }, |
| { |
| "epoch": 35.0, |
| "eval_loss": 0.09930834919214249, |
| "eval_runtime": 45.6646, |
| "eval_samples_per_second": 164.241, |
| "eval_steps_per_second": 10.271, |
| "step": 92995 |
| }, |
| { |
| "epoch": 35.0, |
| "learning_rate": 1.299962363567934e-05, |
| "loss": 0.1003, |
| "step": 93000 |
| }, |
| { |
| "epoch": 35.04, |
| "learning_rate": 1.299209634926609e-05, |
| "loss": 0.0996, |
| "step": 93100 |
| }, |
| { |
| "epoch": 35.08, |
| "learning_rate": 1.2984569062852843e-05, |
| "loss": 0.0986, |
| "step": 93200 |
| }, |
| { |
| "epoch": 35.11, |
| "learning_rate": 1.2977041776439594e-05, |
| "loss": 0.0999, |
| "step": 93300 |
| }, |
| { |
| "epoch": 35.15, |
| "learning_rate": 1.2969514490026348e-05, |
| "loss": 0.1006, |
| "step": 93400 |
| }, |
| { |
| "epoch": 35.19, |
| "learning_rate": 1.2961987203613099e-05, |
| "loss": 0.0999, |
| "step": 93500 |
| }, |
| { |
| "epoch": 35.23, |
| "learning_rate": 1.2954459917199851e-05, |
| "loss": 0.0984, |
| "step": 93600 |
| }, |
| { |
| "epoch": 35.27, |
| "learning_rate": 1.2946932630786602e-05, |
| "loss": 0.0981, |
| "step": 93700 |
| }, |
| { |
| "epoch": 35.3, |
| "learning_rate": 1.2939405344373354e-05, |
| "loss": 0.1004, |
| "step": 93800 |
| }, |
| { |
| "epoch": 35.34, |
| "learning_rate": 1.2931878057960105e-05, |
| "loss": 0.0994, |
| "step": 93900 |
| }, |
| { |
| "epoch": 35.38, |
| "learning_rate": 1.292435077154686e-05, |
| "loss": 0.0984, |
| "step": 94000 |
| }, |
| { |
| "epoch": 35.42, |
| "learning_rate": 1.291682348513361e-05, |
| "loss": 0.1002, |
| "step": 94100 |
| }, |
| { |
| "epoch": 35.45, |
| "learning_rate": 1.2909296198720363e-05, |
| "loss": 0.0997, |
| "step": 94200 |
| }, |
| { |
| "epoch": 35.49, |
| "learning_rate": 1.2901768912307114e-05, |
| "loss": 0.0977, |
| "step": 94300 |
| }, |
| { |
| "epoch": 35.53, |
| "learning_rate": 1.2894241625893866e-05, |
| "loss": 0.0991, |
| "step": 94400 |
| }, |
| { |
| "epoch": 35.57, |
| "learning_rate": 1.2886714339480619e-05, |
| "loss": 0.0981, |
| "step": 94500 |
| }, |
| { |
| "epoch": 35.6, |
| "learning_rate": 1.287918705306737e-05, |
| "loss": 0.0998, |
| "step": 94600 |
| }, |
| { |
| "epoch": 35.64, |
| "learning_rate": 1.2871659766654124e-05, |
| "loss": 0.0999, |
| "step": 94700 |
| }, |
| { |
| "epoch": 35.68, |
| "learning_rate": 1.2864132480240874e-05, |
| "loss": 0.0989, |
| "step": 94800 |
| }, |
| { |
| "epoch": 35.72, |
| "learning_rate": 1.2856605193827627e-05, |
| "loss": 0.1, |
| "step": 94900 |
| }, |
| { |
| "epoch": 35.75, |
| "learning_rate": 1.2849077907414378e-05, |
| "loss": 0.1003, |
| "step": 95000 |
| }, |
| { |
| "epoch": 35.79, |
| "learning_rate": 1.284155062100113e-05, |
| "loss": 0.0997, |
| "step": 95100 |
| }, |
| { |
| "epoch": 35.83, |
| "learning_rate": 1.2834023334587881e-05, |
| "loss": 0.1002, |
| "step": 95200 |
| }, |
| { |
| "epoch": 35.87, |
| "learning_rate": 1.2826496048174635e-05, |
| "loss": 0.0986, |
| "step": 95300 |
| }, |
| { |
| "epoch": 35.91, |
| "learning_rate": 1.2818968761761386e-05, |
| "loss": 0.0999, |
| "step": 95400 |
| }, |
| { |
| "epoch": 35.94, |
| "learning_rate": 1.2811441475348139e-05, |
| "loss": 0.1005, |
| "step": 95500 |
| }, |
| { |
| "epoch": 35.98, |
| "learning_rate": 1.280391418893489e-05, |
| "loss": 0.0994, |
| "step": 95600 |
| }, |
| { |
| "epoch": 36.0, |
| "eval_loss": 0.09918170422315598, |
| "eval_runtime": 45.6422, |
| "eval_samples_per_second": 164.321, |
| "eval_steps_per_second": 10.276, |
| "step": 95652 |
| }, |
| { |
| "epoch": 36.02, |
| "learning_rate": 1.2796386902521642e-05, |
| "loss": 0.0979, |
| "step": 95700 |
| }, |
| { |
| "epoch": 36.06, |
| "learning_rate": 1.2788859616108393e-05, |
| "loss": 0.0981, |
| "step": 95800 |
| }, |
| { |
| "epoch": 36.09, |
| "learning_rate": 1.2781332329695147e-05, |
| "loss": 0.0992, |
| "step": 95900 |
| }, |
| { |
| "epoch": 36.13, |
| "learning_rate": 1.2773805043281898e-05, |
| "loss": 0.0991, |
| "step": 96000 |
| }, |
| { |
| "epoch": 36.17, |
| "learning_rate": 1.276627775686865e-05, |
| "loss": 0.1003, |
| "step": 96100 |
| }, |
| { |
| "epoch": 36.21, |
| "learning_rate": 1.2758750470455401e-05, |
| "loss": 0.0991, |
| "step": 96200 |
| }, |
| { |
| "epoch": 36.24, |
| "learning_rate": 1.2751223184042154e-05, |
| "loss": 0.0985, |
| "step": 96300 |
| }, |
| { |
| "epoch": 36.28, |
| "learning_rate": 1.2743695897628904e-05, |
| "loss": 0.0993, |
| "step": 96400 |
| }, |
| { |
| "epoch": 36.32, |
| "learning_rate": 1.2736168611215659e-05, |
| "loss": 0.0998, |
| "step": 96500 |
| }, |
| { |
| "epoch": 36.36, |
| "learning_rate": 1.272864132480241e-05, |
| "loss": 0.1014, |
| "step": 96600 |
| }, |
| { |
| "epoch": 36.39, |
| "learning_rate": 1.2721114038389162e-05, |
| "loss": 0.1005, |
| "step": 96700 |
| }, |
| { |
| "epoch": 36.43, |
| "learning_rate": 1.2713586751975913e-05, |
| "loss": 0.0992, |
| "step": 96800 |
| }, |
| { |
| "epoch": 36.47, |
| "learning_rate": 1.2706059465562665e-05, |
| "loss": 0.0981, |
| "step": 96900 |
| }, |
| { |
| "epoch": 36.51, |
| "learning_rate": 1.2698532179149418e-05, |
| "loss": 0.0988, |
| "step": 97000 |
| }, |
| { |
| "epoch": 36.54, |
| "learning_rate": 1.2691004892736169e-05, |
| "loss": 0.0978, |
| "step": 97100 |
| }, |
| { |
| "epoch": 36.58, |
| "learning_rate": 1.2683477606322923e-05, |
| "loss": 0.0989, |
| "step": 97200 |
| }, |
| { |
| "epoch": 36.62, |
| "learning_rate": 1.2675950319909674e-05, |
| "loss": 0.098, |
| "step": 97300 |
| }, |
| { |
| "epoch": 36.66, |
| "learning_rate": 1.2668423033496426e-05, |
| "loss": 0.0979, |
| "step": 97400 |
| }, |
| { |
| "epoch": 36.7, |
| "learning_rate": 1.2660895747083177e-05, |
| "loss": 0.0989, |
| "step": 97500 |
| }, |
| { |
| "epoch": 36.73, |
| "learning_rate": 1.265336846066993e-05, |
| "loss": 0.0993, |
| "step": 97600 |
| }, |
| { |
| "epoch": 36.77, |
| "learning_rate": 1.264584117425668e-05, |
| "loss": 0.0988, |
| "step": 97700 |
| }, |
| { |
| "epoch": 36.81, |
| "learning_rate": 1.2638313887843435e-05, |
| "loss": 0.0979, |
| "step": 97800 |
| }, |
| { |
| "epoch": 36.85, |
| "learning_rate": 1.2630786601430185e-05, |
| "loss": 0.0982, |
| "step": 97900 |
| }, |
| { |
| "epoch": 36.88, |
| "learning_rate": 1.2623259315016938e-05, |
| "loss": 0.0992, |
| "step": 98000 |
| }, |
| { |
| "epoch": 36.92, |
| "learning_rate": 1.2615732028603689e-05, |
| "loss": 0.1, |
| "step": 98100 |
| }, |
| { |
| "epoch": 36.96, |
| "learning_rate": 1.2608204742190441e-05, |
| "loss": 0.0977, |
| "step": 98200 |
| }, |
| { |
| "epoch": 37.0, |
| "learning_rate": 1.2600677455777192e-05, |
| "loss": 0.0966, |
| "step": 98300 |
| }, |
| { |
| "epoch": 37.0, |
| "eval_loss": 0.09910181164741516, |
| "eval_runtime": 45.3338, |
| "eval_samples_per_second": 165.439, |
| "eval_steps_per_second": 10.345, |
| "step": 98309 |
| }, |
| { |
| "epoch": 37.03, |
| "learning_rate": 1.2593150169363946e-05, |
| "loss": 0.0961, |
| "step": 98400 |
| }, |
| { |
| "epoch": 37.07, |
| "learning_rate": 1.2585622882950697e-05, |
| "loss": 0.0995, |
| "step": 98500 |
| }, |
| { |
| "epoch": 37.11, |
| "learning_rate": 1.257809559653745e-05, |
| "loss": 0.0996, |
| "step": 98600 |
| }, |
| { |
| "epoch": 37.15, |
| "learning_rate": 1.25705683101242e-05, |
| "loss": 0.0982, |
| "step": 98700 |
| }, |
| { |
| "epoch": 37.18, |
| "learning_rate": 1.2563041023710953e-05, |
| "loss": 0.0985, |
| "step": 98800 |
| }, |
| { |
| "epoch": 37.22, |
| "learning_rate": 1.2555513737297704e-05, |
| "loss": 0.0968, |
| "step": 98900 |
| }, |
| { |
| "epoch": 37.26, |
| "learning_rate": 1.2547986450884458e-05, |
| "loss": 0.0982, |
| "step": 99000 |
| }, |
| { |
| "epoch": 37.3, |
| "learning_rate": 1.2540459164471209e-05, |
| "loss": 0.0994, |
| "step": 99100 |
| }, |
| { |
| "epoch": 37.34, |
| "learning_rate": 1.2532931878057961e-05, |
| "loss": 0.1004, |
| "step": 99200 |
| }, |
| { |
| "epoch": 37.37, |
| "learning_rate": 1.2525404591644712e-05, |
| "loss": 0.0988, |
| "step": 99300 |
| }, |
| { |
| "epoch": 37.41, |
| "learning_rate": 1.2517877305231465e-05, |
| "loss": 0.099, |
| "step": 99400 |
| }, |
| { |
| "epoch": 37.45, |
| "learning_rate": 1.2510350018818219e-05, |
| "loss": 0.0991, |
| "step": 99500 |
| }, |
| { |
| "epoch": 37.49, |
| "learning_rate": 1.250282273240497e-05, |
| "loss": 0.0978, |
| "step": 99600 |
| }, |
| { |
| "epoch": 37.52, |
| "learning_rate": 1.2495295445991722e-05, |
| "loss": 0.0981, |
| "step": 99700 |
| }, |
| { |
| "epoch": 37.56, |
| "learning_rate": 1.2487768159578473e-05, |
| "loss": 0.0989, |
| "step": 99800 |
| }, |
| { |
| "epoch": 37.6, |
| "learning_rate": 1.2480240873165226e-05, |
| "loss": 0.0982, |
| "step": 99900 |
| }, |
| { |
| "epoch": 37.64, |
| "learning_rate": 1.2472713586751976e-05, |
| "loss": 0.1001, |
| "step": 100000 |
| }, |
| { |
| "epoch": 37.67, |
| "learning_rate": 1.2465186300338729e-05, |
| "loss": 0.0972, |
| "step": 100100 |
| }, |
| { |
| "epoch": 37.71, |
| "learning_rate": 1.245765901392548e-05, |
| "loss": 0.0993, |
| "step": 100200 |
| }, |
| { |
| "epoch": 37.75, |
| "learning_rate": 1.2450131727512234e-05, |
| "loss": 0.0997, |
| "step": 100300 |
| }, |
| { |
| "epoch": 37.79, |
| "learning_rate": 1.2442604441098985e-05, |
| "loss": 0.0978, |
| "step": 100400 |
| }, |
| { |
| "epoch": 37.82, |
| "learning_rate": 1.2435077154685737e-05, |
| "loss": 0.0992, |
| "step": 100500 |
| }, |
| { |
| "epoch": 37.86, |
| "learning_rate": 1.2427549868272488e-05, |
| "loss": 0.0992, |
| "step": 100600 |
| }, |
| { |
| "epoch": 37.9, |
| "learning_rate": 1.242002258185924e-05, |
| "loss": 0.0984, |
| "step": 100700 |
| }, |
| { |
| "epoch": 37.94, |
| "learning_rate": 1.2412495295445991e-05, |
| "loss": 0.0983, |
| "step": 100800 |
| }, |
| { |
| "epoch": 37.98, |
| "learning_rate": 1.2404968009032746e-05, |
| "loss": 0.0997, |
| "step": 100900 |
| }, |
| { |
| "epoch": 38.0, |
| "eval_loss": 0.09699103981256485, |
| "eval_runtime": 45.3352, |
| "eval_samples_per_second": 165.435, |
| "eval_steps_per_second": 10.345, |
| "step": 100966 |
| }, |
| { |
| "epoch": 38.01, |
| "learning_rate": 1.2397440722619496e-05, |
| "loss": 0.0983, |
| "step": 101000 |
| }, |
| { |
| "epoch": 38.05, |
| "learning_rate": 1.2389913436206249e-05, |
| "loss": 0.0984, |
| "step": 101100 |
| }, |
| { |
| "epoch": 38.09, |
| "learning_rate": 1.2382386149793e-05, |
| "loss": 0.0971, |
| "step": 101200 |
| }, |
| { |
| "epoch": 38.13, |
| "learning_rate": 1.2374858863379752e-05, |
| "loss": 0.0979, |
| "step": 101300 |
| }, |
| { |
| "epoch": 38.16, |
| "learning_rate": 1.2367331576966503e-05, |
| "loss": 0.0992, |
| "step": 101400 |
| }, |
| { |
| "epoch": 38.2, |
| "learning_rate": 1.2359804290553257e-05, |
| "loss": 0.0989, |
| "step": 101500 |
| }, |
| { |
| "epoch": 38.24, |
| "learning_rate": 1.2352277004140008e-05, |
| "loss": 0.0988, |
| "step": 101600 |
| }, |
| { |
| "epoch": 38.28, |
| "learning_rate": 1.234474971772676e-05, |
| "loss": 0.098, |
| "step": 101700 |
| }, |
| { |
| "epoch": 38.31, |
| "learning_rate": 1.2337222431313511e-05, |
| "loss": 0.0961, |
| "step": 101800 |
| }, |
| { |
| "epoch": 38.35, |
| "learning_rate": 1.2329695144900264e-05, |
| "loss": 0.0978, |
| "step": 101900 |
| }, |
| { |
| "epoch": 38.39, |
| "learning_rate": 1.2322167858487018e-05, |
| "loss": 0.1003, |
| "step": 102000 |
| }, |
| { |
| "epoch": 38.43, |
| "learning_rate": 1.2314640572073769e-05, |
| "loss": 0.0989, |
| "step": 102100 |
| }, |
| { |
| "epoch": 38.46, |
| "learning_rate": 1.2307113285660521e-05, |
| "loss": 0.0984, |
| "step": 102200 |
| }, |
| { |
| "epoch": 38.5, |
| "learning_rate": 1.2299585999247272e-05, |
| "loss": 0.0985, |
| "step": 102300 |
| }, |
| { |
| "epoch": 38.54, |
| "learning_rate": 1.2292058712834025e-05, |
| "loss": 0.1008, |
| "step": 102400 |
| }, |
| { |
| "epoch": 38.58, |
| "learning_rate": 1.2284531426420776e-05, |
| "loss": 0.0969, |
| "step": 102500 |
| }, |
| { |
| "epoch": 38.61, |
| "learning_rate": 1.227700414000753e-05, |
| "loss": 0.0981, |
| "step": 102600 |
| }, |
| { |
| "epoch": 38.65, |
| "learning_rate": 1.226947685359428e-05, |
| "loss": 0.0979, |
| "step": 102700 |
| }, |
| { |
| "epoch": 38.69, |
| "learning_rate": 1.2261949567181033e-05, |
| "loss": 0.0994, |
| "step": 102800 |
| }, |
| { |
| "epoch": 38.73, |
| "learning_rate": 1.2254422280767784e-05, |
| "loss": 0.0992, |
| "step": 102900 |
| }, |
| { |
| "epoch": 38.77, |
| "learning_rate": 1.2246894994354536e-05, |
| "loss": 0.0981, |
| "step": 103000 |
| }, |
| { |
| "epoch": 38.8, |
| "learning_rate": 1.2239367707941287e-05, |
| "loss": 0.0991, |
| "step": 103100 |
| }, |
| { |
| "epoch": 38.84, |
| "learning_rate": 1.223184042152804e-05, |
| "loss": 0.0977, |
| "step": 103200 |
| }, |
| { |
| "epoch": 38.88, |
| "learning_rate": 1.222431313511479e-05, |
| "loss": 0.0979, |
| "step": 103300 |
| }, |
| { |
| "epoch": 38.92, |
| "learning_rate": 1.2216785848701545e-05, |
| "loss": 0.0976, |
| "step": 103400 |
| }, |
| { |
| "epoch": 38.95, |
| "learning_rate": 1.2209258562288296e-05, |
| "loss": 0.0996, |
| "step": 103500 |
| }, |
| { |
| "epoch": 38.99, |
| "learning_rate": 1.2201731275875048e-05, |
| "loss": 0.0991, |
| "step": 103600 |
| }, |
| { |
| "epoch": 39.0, |
| "eval_loss": 0.09791671484708786, |
| "eval_runtime": 45.4451, |
| "eval_samples_per_second": 165.034, |
| "eval_steps_per_second": 10.32, |
| "step": 103623 |
| }, |
| { |
| "epoch": 39.03, |
| "learning_rate": 1.2194203989461799e-05, |
| "loss": 0.098, |
| "step": 103700 |
| }, |
| { |
| "epoch": 39.07, |
| "learning_rate": 1.2186676703048552e-05, |
| "loss": 0.0965, |
| "step": 103800 |
| }, |
| { |
| "epoch": 39.1, |
| "learning_rate": 1.2179149416635302e-05, |
| "loss": 0.0979, |
| "step": 103900 |
| }, |
| { |
| "epoch": 39.14, |
| "learning_rate": 1.2171622130222057e-05, |
| "loss": 0.0978, |
| "step": 104000 |
| }, |
| { |
| "epoch": 39.18, |
| "learning_rate": 1.2164094843808807e-05, |
| "loss": 0.0996, |
| "step": 104100 |
| }, |
| { |
| "epoch": 39.22, |
| "learning_rate": 1.215656755739556e-05, |
| "loss": 0.0995, |
| "step": 104200 |
| }, |
| { |
| "epoch": 39.25, |
| "learning_rate": 1.2149040270982312e-05, |
| "loss": 0.0988, |
| "step": 104300 |
| }, |
| { |
| "epoch": 39.29, |
| "learning_rate": 1.2141512984569063e-05, |
| "loss": 0.0975, |
| "step": 104400 |
| }, |
| { |
| "epoch": 39.33, |
| "learning_rate": 1.2133985698155817e-05, |
| "loss": 0.098, |
| "step": 104500 |
| }, |
| { |
| "epoch": 39.37, |
| "learning_rate": 1.2126458411742568e-05, |
| "loss": 0.098, |
| "step": 104600 |
| }, |
| { |
| "epoch": 39.41, |
| "learning_rate": 1.211893112532932e-05, |
| "loss": 0.098, |
| "step": 104700 |
| }, |
| { |
| "epoch": 39.44, |
| "learning_rate": 1.2111403838916072e-05, |
| "loss": 0.0995, |
| "step": 104800 |
| }, |
| { |
| "epoch": 39.48, |
| "learning_rate": 1.2103876552502824e-05, |
| "loss": 0.0977, |
| "step": 104900 |
| }, |
| { |
| "epoch": 39.52, |
| "learning_rate": 1.2096349266089575e-05, |
| "loss": 0.0988, |
| "step": 105000 |
| }, |
| { |
| "epoch": 39.56, |
| "learning_rate": 1.2088821979676329e-05, |
| "loss": 0.0986, |
| "step": 105100 |
| }, |
| { |
| "epoch": 39.59, |
| "learning_rate": 1.208129469326308e-05, |
| "loss": 0.0987, |
| "step": 105200 |
| }, |
| { |
| "epoch": 39.63, |
| "learning_rate": 1.2073767406849832e-05, |
| "loss": 0.0979, |
| "step": 105300 |
| }, |
| { |
| "epoch": 39.67, |
| "learning_rate": 1.2066240120436583e-05, |
| "loss": 0.0963, |
| "step": 105400 |
| }, |
| { |
| "epoch": 39.71, |
| "learning_rate": 1.2058712834023336e-05, |
| "loss": 0.0978, |
| "step": 105500 |
| }, |
| { |
| "epoch": 39.74, |
| "learning_rate": 1.2051185547610087e-05, |
| "loss": 0.0989, |
| "step": 105600 |
| }, |
| { |
| "epoch": 39.78, |
| "learning_rate": 1.204365826119684e-05, |
| "loss": 0.0971, |
| "step": 105700 |
| }, |
| { |
| "epoch": 39.82, |
| "learning_rate": 1.2036130974783592e-05, |
| "loss": 0.0985, |
| "step": 105800 |
| }, |
| { |
| "epoch": 39.86, |
| "learning_rate": 1.2028603688370344e-05, |
| "loss": 0.0964, |
| "step": 105900 |
| }, |
| { |
| "epoch": 39.89, |
| "learning_rate": 1.2021076401957095e-05, |
| "loss": 0.0968, |
| "step": 106000 |
| }, |
| { |
| "epoch": 39.93, |
| "learning_rate": 1.2013549115543847e-05, |
| "loss": 0.0965, |
| "step": 106100 |
| }, |
| { |
| "epoch": 39.97, |
| "learning_rate": 1.2006021829130598e-05, |
| "loss": 0.099, |
| "step": 106200 |
| }, |
| { |
| "epoch": 40.0, |
| "eval_loss": 0.09832270443439484, |
| "eval_runtime": 45.1549, |
| "eval_samples_per_second": 166.095, |
| "eval_steps_per_second": 10.386, |
| "step": 106280 |
| }, |
| { |
| "epoch": 40.01, |
| "learning_rate": 1.199849454271735e-05, |
| "loss": 0.098, |
| "step": 106300 |
| }, |
| { |
| "epoch": 40.05, |
| "learning_rate": 1.1990967256304102e-05, |
| "loss": 0.098, |
| "step": 106400 |
| }, |
| { |
| "epoch": 40.08, |
| "learning_rate": 1.1983439969890856e-05, |
| "loss": 0.0977, |
| "step": 106500 |
| }, |
| { |
| "epoch": 40.12, |
| "learning_rate": 1.1975912683477607e-05, |
| "loss": 0.0994, |
| "step": 106600 |
| }, |
| { |
| "epoch": 40.16, |
| "learning_rate": 1.196838539706436e-05, |
| "loss": 0.0976, |
| "step": 106700 |
| }, |
| { |
| "epoch": 40.2, |
| "learning_rate": 1.1960858110651112e-05, |
| "loss": 0.098, |
| "step": 106800 |
| }, |
| { |
| "epoch": 40.23, |
| "learning_rate": 1.1953330824237862e-05, |
| "loss": 0.0978, |
| "step": 106900 |
| }, |
| { |
| "epoch": 40.27, |
| "learning_rate": 1.1945803537824617e-05, |
| "loss": 0.0981, |
| "step": 107000 |
| }, |
| { |
| "epoch": 40.31, |
| "learning_rate": 1.1938276251411368e-05, |
| "loss": 0.0971, |
| "step": 107100 |
| }, |
| { |
| "epoch": 40.35, |
| "learning_rate": 1.193074896499812e-05, |
| "loss": 0.0985, |
| "step": 107200 |
| }, |
| { |
| "epoch": 40.38, |
| "learning_rate": 1.1923221678584871e-05, |
| "loss": 0.0996, |
| "step": 107300 |
| }, |
| { |
| "epoch": 40.42, |
| "learning_rate": 1.1915694392171623e-05, |
| "loss": 0.0981, |
| "step": 107400 |
| }, |
| { |
| "epoch": 40.46, |
| "learning_rate": 1.1908167105758374e-05, |
| "loss": 0.0996, |
| "step": 107500 |
| }, |
| { |
| "epoch": 40.5, |
| "learning_rate": 1.1900639819345128e-05, |
| "loss": 0.0989, |
| "step": 107600 |
| }, |
| { |
| "epoch": 40.53, |
| "learning_rate": 1.189311253293188e-05, |
| "loss": 0.0992, |
| "step": 107700 |
| }, |
| { |
| "epoch": 40.57, |
| "learning_rate": 1.1885585246518632e-05, |
| "loss": 0.0974, |
| "step": 107800 |
| }, |
| { |
| "epoch": 40.61, |
| "learning_rate": 1.1878057960105383e-05, |
| "loss": 0.0984, |
| "step": 107900 |
| }, |
| { |
| "epoch": 40.65, |
| "learning_rate": 1.1870530673692135e-05, |
| "loss": 0.0984, |
| "step": 108000 |
| }, |
| { |
| "epoch": 40.68, |
| "learning_rate": 1.1863003387278886e-05, |
| "loss": 0.099, |
| "step": 108100 |
| }, |
| { |
| "epoch": 40.72, |
| "learning_rate": 1.185547610086564e-05, |
| "loss": 0.0984, |
| "step": 108200 |
| }, |
| { |
| "epoch": 40.76, |
| "learning_rate": 1.1847948814452391e-05, |
| "loss": 0.0966, |
| "step": 108300 |
| }, |
| { |
| "epoch": 40.8, |
| "learning_rate": 1.1840421528039143e-05, |
| "loss": 0.1, |
| "step": 108400 |
| }, |
| { |
| "epoch": 40.84, |
| "learning_rate": 1.1832894241625894e-05, |
| "loss": 0.0979, |
| "step": 108500 |
| }, |
| { |
| "epoch": 40.87, |
| "learning_rate": 1.1825366955212647e-05, |
| "loss": 0.0981, |
| "step": 108600 |
| }, |
| { |
| "epoch": 40.91, |
| "learning_rate": 1.1817839668799398e-05, |
| "loss": 0.0973, |
| "step": 108700 |
| }, |
| { |
| "epoch": 40.95, |
| "learning_rate": 1.1810312382386152e-05, |
| "loss": 0.0978, |
| "step": 108800 |
| }, |
| { |
| "epoch": 40.99, |
| "learning_rate": 1.1802785095972903e-05, |
| "loss": 0.0974, |
| "step": 108900 |
| }, |
| { |
| "epoch": 41.0, |
| "eval_loss": 0.09795571118593216, |
| "eval_runtime": 45.3802, |
| "eval_samples_per_second": 165.27, |
| "eval_steps_per_second": 10.335, |
| "step": 108937 |
| }, |
| { |
| "epoch": 41.02, |
| "learning_rate": 1.1795257809559655e-05, |
| "loss": 0.0961, |
| "step": 109000 |
| }, |
| { |
| "epoch": 41.06, |
| "learning_rate": 1.1787730523146406e-05, |
| "loss": 0.0989, |
| "step": 109100 |
| }, |
| { |
| "epoch": 41.1, |
| "learning_rate": 1.1780203236733158e-05, |
| "loss": 0.0988, |
| "step": 109200 |
| }, |
| { |
| "epoch": 41.14, |
| "learning_rate": 1.1772675950319911e-05, |
| "loss": 0.0979, |
| "step": 109300 |
| }, |
| { |
| "epoch": 41.17, |
| "learning_rate": 1.1765148663906662e-05, |
| "loss": 0.0971, |
| "step": 109400 |
| }, |
| { |
| "epoch": 41.21, |
| "learning_rate": 1.1757621377493416e-05, |
| "loss": 0.0965, |
| "step": 109500 |
| }, |
| { |
| "epoch": 41.25, |
| "learning_rate": 1.1750094091080167e-05, |
| "loss": 0.0982, |
| "step": 109600 |
| }, |
| { |
| "epoch": 41.29, |
| "learning_rate": 1.174256680466692e-05, |
| "loss": 0.0974, |
| "step": 109700 |
| }, |
| { |
| "epoch": 41.32, |
| "learning_rate": 1.173503951825367e-05, |
| "loss": 0.097, |
| "step": 109800 |
| }, |
| { |
| "epoch": 41.36, |
| "learning_rate": 1.1727512231840423e-05, |
| "loss": 0.0974, |
| "step": 109900 |
| }, |
| { |
| "epoch": 41.4, |
| "learning_rate": 1.1719984945427173e-05, |
| "loss": 0.0969, |
| "step": 110000 |
| }, |
| { |
| "epoch": 41.44, |
| "learning_rate": 1.1712457659013928e-05, |
| "loss": 0.0983, |
| "step": 110100 |
| }, |
| { |
| "epoch": 41.48, |
| "learning_rate": 1.1704930372600679e-05, |
| "loss": 0.0978, |
| "step": 110200 |
| }, |
| { |
| "epoch": 41.51, |
| "learning_rate": 1.1697403086187431e-05, |
| "loss": 0.0962, |
| "step": 110300 |
| }, |
| { |
| "epoch": 41.55, |
| "learning_rate": 1.1689875799774182e-05, |
| "loss": 0.0985, |
| "step": 110400 |
| }, |
| { |
| "epoch": 41.59, |
| "learning_rate": 1.1682348513360934e-05, |
| "loss": 0.0992, |
| "step": 110500 |
| }, |
| { |
| "epoch": 41.63, |
| "learning_rate": 1.1674821226947685e-05, |
| "loss": 0.0972, |
| "step": 110600 |
| }, |
| { |
| "epoch": 41.66, |
| "learning_rate": 1.166729394053444e-05, |
| "loss": 0.098, |
| "step": 110700 |
| }, |
| { |
| "epoch": 41.7, |
| "learning_rate": 1.165976665412119e-05, |
| "loss": 0.0991, |
| "step": 110800 |
| }, |
| { |
| "epoch": 41.74, |
| "learning_rate": 1.1652239367707943e-05, |
| "loss": 0.0963, |
| "step": 110900 |
| }, |
| { |
| "epoch": 41.78, |
| "learning_rate": 1.1644712081294694e-05, |
| "loss": 0.0983, |
| "step": 111000 |
| }, |
| { |
| "epoch": 41.81, |
| "learning_rate": 1.1637184794881446e-05, |
| "loss": 0.0988, |
| "step": 111100 |
| }, |
| { |
| "epoch": 41.85, |
| "learning_rate": 1.1629657508468197e-05, |
| "loss": 0.0977, |
| "step": 111200 |
| }, |
| { |
| "epoch": 41.89, |
| "learning_rate": 1.1622130222054951e-05, |
| "loss": 0.0984, |
| "step": 111300 |
| }, |
| { |
| "epoch": 41.93, |
| "learning_rate": 1.1614602935641702e-05, |
| "loss": 0.0974, |
| "step": 111400 |
| }, |
| { |
| "epoch": 41.96, |
| "learning_rate": 1.1607075649228454e-05, |
| "loss": 0.0974, |
| "step": 111500 |
| }, |
| { |
| "epoch": 42.0, |
| "eval_loss": 0.0971272811293602, |
| "eval_runtime": 45.4607, |
| "eval_samples_per_second": 164.978, |
| "eval_steps_per_second": 10.317, |
| "step": 111594 |
| }, |
| { |
| "epoch": 42.0, |
| "learning_rate": 1.1599548362815205e-05, |
| "loss": 0.0983, |
| "step": 111600 |
| }, |
| { |
| "epoch": 42.04, |
| "learning_rate": 1.1592021076401958e-05, |
| "loss": 0.0968, |
| "step": 111700 |
| }, |
| { |
| "epoch": 42.08, |
| "learning_rate": 1.1584493789988712e-05, |
| "loss": 0.0984, |
| "step": 111800 |
| }, |
| { |
| "epoch": 42.12, |
| "learning_rate": 1.1576966503575463e-05, |
| "loss": 0.0991, |
| "step": 111900 |
| }, |
| { |
| "epoch": 42.15, |
| "learning_rate": 1.1569439217162215e-05, |
| "loss": 0.0965, |
| "step": 112000 |
| }, |
| { |
| "epoch": 42.19, |
| "learning_rate": 1.1561911930748966e-05, |
| "loss": 0.098, |
| "step": 112100 |
| }, |
| { |
| "epoch": 42.23, |
| "learning_rate": 1.1554384644335719e-05, |
| "loss": 0.0979, |
| "step": 112200 |
| }, |
| { |
| "epoch": 42.27, |
| "learning_rate": 1.154685735792247e-05, |
| "loss": 0.0971, |
| "step": 112300 |
| }, |
| { |
| "epoch": 42.3, |
| "learning_rate": 1.1539330071509222e-05, |
| "loss": 0.0974, |
| "step": 112400 |
| }, |
| { |
| "epoch": 42.34, |
| "learning_rate": 1.1531802785095973e-05, |
| "loss": 0.0963, |
| "step": 112500 |
| }, |
| { |
| "epoch": 42.38, |
| "learning_rate": 1.1524275498682727e-05, |
| "loss": 0.097, |
| "step": 112600 |
| }, |
| { |
| "epoch": 42.42, |
| "learning_rate": 1.1516748212269478e-05, |
| "loss": 0.1001, |
| "step": 112700 |
| }, |
| { |
| "epoch": 42.45, |
| "learning_rate": 1.150922092585623e-05, |
| "loss": 0.0972, |
| "step": 112800 |
| }, |
| { |
| "epoch": 42.49, |
| "learning_rate": 1.1501693639442981e-05, |
| "loss": 0.0976, |
| "step": 112900 |
| }, |
| { |
| "epoch": 42.53, |
| "learning_rate": 1.1494166353029734e-05, |
| "loss": 0.0963, |
| "step": 113000 |
| }, |
| { |
| "epoch": 42.57, |
| "learning_rate": 1.1486639066616484e-05, |
| "loss": 0.0965, |
| "step": 113100 |
| }, |
| { |
| "epoch": 42.6, |
| "learning_rate": 1.1479111780203239e-05, |
| "loss": 0.0978, |
| "step": 113200 |
| }, |
| { |
| "epoch": 42.64, |
| "learning_rate": 1.147158449378999e-05, |
| "loss": 0.0996, |
| "step": 113300 |
| }, |
| { |
| "epoch": 42.68, |
| "learning_rate": 1.1464057207376742e-05, |
| "loss": 0.0965, |
| "step": 113400 |
| }, |
| { |
| "epoch": 42.72, |
| "learning_rate": 1.1456529920963493e-05, |
| "loss": 0.0964, |
| "step": 113500 |
| }, |
| { |
| "epoch": 42.75, |
| "learning_rate": 1.1449002634550245e-05, |
| "loss": 0.0979, |
| "step": 113600 |
| }, |
| { |
| "epoch": 42.79, |
| "learning_rate": 1.1441475348136996e-05, |
| "loss": 0.0982, |
| "step": 113700 |
| }, |
| { |
| "epoch": 42.83, |
| "learning_rate": 1.143394806172375e-05, |
| "loss": 0.0975, |
| "step": 113800 |
| }, |
| { |
| "epoch": 42.87, |
| "learning_rate": 1.1426420775310501e-05, |
| "loss": 0.0956, |
| "step": 113900 |
| }, |
| { |
| "epoch": 42.91, |
| "learning_rate": 1.1418893488897254e-05, |
| "loss": 0.0964, |
| "step": 114000 |
| }, |
| { |
| "epoch": 42.94, |
| "learning_rate": 1.1411366202484005e-05, |
| "loss": 0.0984, |
| "step": 114100 |
| }, |
| { |
| "epoch": 42.98, |
| "learning_rate": 1.1403838916070757e-05, |
| "loss": 0.0972, |
| "step": 114200 |
| }, |
| { |
| "epoch": 43.0, |
| "eval_loss": 0.09703872352838516, |
| "eval_runtime": 45.2306, |
| "eval_samples_per_second": 165.817, |
| "eval_steps_per_second": 10.369, |
| "step": 114251 |
| }, |
| { |
| "epoch": 43.02, |
| "learning_rate": 1.1396311629657511e-05, |
| "loss": 0.0971, |
| "step": 114300 |
| }, |
| { |
| "epoch": 43.06, |
| "learning_rate": 1.1388784343244262e-05, |
| "loss": 0.0988, |
| "step": 114400 |
| }, |
| { |
| "epoch": 43.09, |
| "learning_rate": 1.1381257056831015e-05, |
| "loss": 0.0966, |
| "step": 114500 |
| }, |
| { |
| "epoch": 43.13, |
| "learning_rate": 1.1373729770417765e-05, |
| "loss": 0.0964, |
| "step": 114600 |
| }, |
| { |
| "epoch": 43.17, |
| "learning_rate": 1.1366202484004518e-05, |
| "loss": 0.0984, |
| "step": 114700 |
| }, |
| { |
| "epoch": 43.21, |
| "learning_rate": 1.1358675197591269e-05, |
| "loss": 0.0978, |
| "step": 114800 |
| }, |
| { |
| "epoch": 43.24, |
| "learning_rate": 1.1351147911178021e-05, |
| "loss": 0.0975, |
| "step": 114900 |
| }, |
| { |
| "epoch": 43.28, |
| "learning_rate": 1.1343620624764772e-05, |
| "loss": 0.0976, |
| "step": 115000 |
| }, |
| { |
| "epoch": 43.32, |
| "learning_rate": 1.1336093338351526e-05, |
| "loss": 0.0965, |
| "step": 115100 |
| }, |
| { |
| "epoch": 43.36, |
| "learning_rate": 1.1328566051938277e-05, |
| "loss": 0.0961, |
| "step": 115200 |
| }, |
| { |
| "epoch": 43.39, |
| "learning_rate": 1.132103876552503e-05, |
| "loss": 0.0964, |
| "step": 115300 |
| }, |
| { |
| "epoch": 43.43, |
| "learning_rate": 1.131351147911178e-05, |
| "loss": 0.0969, |
| "step": 115400 |
| }, |
| { |
| "epoch": 43.47, |
| "learning_rate": 1.1305984192698533e-05, |
| "loss": 0.097, |
| "step": 115500 |
| }, |
| { |
| "epoch": 43.51, |
| "learning_rate": 1.1298456906285284e-05, |
| "loss": 0.0977, |
| "step": 115600 |
| }, |
| { |
| "epoch": 43.55, |
| "learning_rate": 1.1290929619872038e-05, |
| "loss": 0.0981, |
| "step": 115700 |
| }, |
| { |
| "epoch": 43.58, |
| "learning_rate": 1.1283402333458789e-05, |
| "loss": 0.0973, |
| "step": 115800 |
| }, |
| { |
| "epoch": 43.62, |
| "learning_rate": 1.1275875047045541e-05, |
| "loss": 0.0968, |
| "step": 115900 |
| }, |
| { |
| "epoch": 43.66, |
| "learning_rate": 1.1268347760632292e-05, |
| "loss": 0.0962, |
| "step": 116000 |
| }, |
| { |
| "epoch": 43.7, |
| "learning_rate": 1.1260820474219045e-05, |
| "loss": 0.098, |
| "step": 116100 |
| }, |
| { |
| "epoch": 43.73, |
| "learning_rate": 1.1253293187805795e-05, |
| "loss": 0.0974, |
| "step": 116200 |
| }, |
| { |
| "epoch": 43.77, |
| "learning_rate": 1.124576590139255e-05, |
| "loss": 0.0957, |
| "step": 116300 |
| }, |
| { |
| "epoch": 43.81, |
| "learning_rate": 1.12382386149793e-05, |
| "loss": 0.0974, |
| "step": 116400 |
| }, |
| { |
| "epoch": 43.85, |
| "learning_rate": 1.1230711328566053e-05, |
| "loss": 0.096, |
| "step": 116500 |
| }, |
| { |
| "epoch": 43.88, |
| "learning_rate": 1.1223184042152804e-05, |
| "loss": 0.0979, |
| "step": 116600 |
| }, |
| { |
| "epoch": 43.92, |
| "learning_rate": 1.1215656755739556e-05, |
| "loss": 0.0958, |
| "step": 116700 |
| }, |
| { |
| "epoch": 43.96, |
| "learning_rate": 1.120812946932631e-05, |
| "loss": 0.097, |
| "step": 116800 |
| }, |
| { |
| "epoch": 44.0, |
| "learning_rate": 1.1200602182913061e-05, |
| "loss": 0.0991, |
| "step": 116900 |
| }, |
| { |
| "epoch": 44.0, |
| "eval_loss": 0.09703505784273148, |
| "eval_runtime": 45.5627, |
| "eval_samples_per_second": 164.608, |
| "eval_steps_per_second": 10.294, |
| "step": 116908 |
| }, |
| { |
| "epoch": 44.03, |
| "learning_rate": 1.1193074896499814e-05, |
| "loss": 0.0984, |
| "step": 117000 |
| }, |
| { |
| "epoch": 44.07, |
| "learning_rate": 1.1185547610086565e-05, |
| "loss": 0.0965, |
| "step": 117100 |
| }, |
| { |
| "epoch": 44.11, |
| "learning_rate": 1.1178020323673317e-05, |
| "loss": 0.0957, |
| "step": 117200 |
| }, |
| { |
| "epoch": 44.15, |
| "learning_rate": 1.1170493037260068e-05, |
| "loss": 0.0971, |
| "step": 117300 |
| }, |
| { |
| "epoch": 44.19, |
| "learning_rate": 1.1162965750846822e-05, |
| "loss": 0.096, |
| "step": 117400 |
| }, |
| { |
| "epoch": 44.22, |
| "learning_rate": 1.1155438464433573e-05, |
| "loss": 0.0959, |
| "step": 117500 |
| }, |
| { |
| "epoch": 44.26, |
| "learning_rate": 1.1147911178020326e-05, |
| "loss": 0.0965, |
| "step": 117600 |
| }, |
| { |
| "epoch": 44.3, |
| "learning_rate": 1.1140383891607076e-05, |
| "loss": 0.0987, |
| "step": 117700 |
| }, |
| { |
| "epoch": 44.34, |
| "learning_rate": 1.1132856605193829e-05, |
| "loss": 0.0972, |
| "step": 117800 |
| }, |
| { |
| "epoch": 44.37, |
| "learning_rate": 1.112532931878058e-05, |
| "loss": 0.0962, |
| "step": 117900 |
| }, |
| { |
| "epoch": 44.41, |
| "learning_rate": 1.1117802032367332e-05, |
| "loss": 0.0956, |
| "step": 118000 |
| }, |
| { |
| "epoch": 44.45, |
| "learning_rate": 1.1110274745954083e-05, |
| "loss": 0.0974, |
| "step": 118100 |
| }, |
| { |
| "epoch": 44.49, |
| "learning_rate": 1.1102747459540837e-05, |
| "loss": 0.0973, |
| "step": 118200 |
| }, |
| { |
| "epoch": 44.52, |
| "learning_rate": 1.1095220173127588e-05, |
| "loss": 0.0952, |
| "step": 118300 |
| }, |
| { |
| "epoch": 44.56, |
| "learning_rate": 1.108769288671434e-05, |
| "loss": 0.0968, |
| "step": 118400 |
| }, |
| { |
| "epoch": 44.6, |
| "learning_rate": 1.1080165600301091e-05, |
| "loss": 0.0971, |
| "step": 118500 |
| }, |
| { |
| "epoch": 44.64, |
| "learning_rate": 1.1072638313887844e-05, |
| "loss": 0.1, |
| "step": 118600 |
| }, |
| { |
| "epoch": 44.67, |
| "learning_rate": 1.1065111027474595e-05, |
| "loss": 0.0966, |
| "step": 118700 |
| }, |
| { |
| "epoch": 44.71, |
| "learning_rate": 1.1057583741061349e-05, |
| "loss": 0.0973, |
| "step": 118800 |
| }, |
| { |
| "epoch": 44.75, |
| "learning_rate": 1.10500564546481e-05, |
| "loss": 0.0957, |
| "step": 118900 |
| }, |
| { |
| "epoch": 44.79, |
| "learning_rate": 1.1042529168234852e-05, |
| "loss": 0.0964, |
| "step": 119000 |
| }, |
| { |
| "epoch": 44.82, |
| "learning_rate": 1.1035001881821603e-05, |
| "loss": 0.0954, |
| "step": 119100 |
| }, |
| { |
| "epoch": 44.86, |
| "learning_rate": 1.1027474595408356e-05, |
| "loss": 0.0971, |
| "step": 119200 |
| }, |
| { |
| "epoch": 44.9, |
| "learning_rate": 1.101994730899511e-05, |
| "loss": 0.0973, |
| "step": 119300 |
| }, |
| { |
| "epoch": 44.94, |
| "learning_rate": 1.101242002258186e-05, |
| "loss": 0.0975, |
| "step": 119400 |
| }, |
| { |
| "epoch": 44.98, |
| "learning_rate": 1.1004892736168613e-05, |
| "loss": 0.0979, |
| "step": 119500 |
| }, |
| { |
| "epoch": 45.0, |
| "eval_loss": 0.09719178825616837, |
| "eval_runtime": 45.3083, |
| "eval_samples_per_second": 165.532, |
| "eval_steps_per_second": 10.351, |
| "step": 119565 |
| }, |
| { |
| "epoch": 45.01, |
| "learning_rate": 1.0997365449755364e-05, |
| "loss": 0.0972, |
| "step": 119600 |
| }, |
| { |
| "epoch": 45.05, |
| "learning_rate": 1.0989838163342116e-05, |
| "loss": 0.0977, |
| "step": 119700 |
| }, |
| { |
| "epoch": 45.09, |
| "learning_rate": 1.0982310876928867e-05, |
| "loss": 0.0973, |
| "step": 119800 |
| }, |
| { |
| "epoch": 45.13, |
| "learning_rate": 1.0974783590515621e-05, |
| "loss": 0.0954, |
| "step": 119900 |
| }, |
| { |
| "epoch": 45.16, |
| "learning_rate": 1.0967256304102372e-05, |
| "loss": 0.0969, |
| "step": 120000 |
| }, |
| { |
| "epoch": 45.2, |
| "learning_rate": 1.0959729017689125e-05, |
| "loss": 0.0958, |
| "step": 120100 |
| }, |
| { |
| "epoch": 45.24, |
| "learning_rate": 1.0952201731275876e-05, |
| "loss": 0.0964, |
| "step": 120200 |
| }, |
| { |
| "epoch": 45.28, |
| "learning_rate": 1.0944674444862628e-05, |
| "loss": 0.0961, |
| "step": 120300 |
| }, |
| { |
| "epoch": 45.31, |
| "learning_rate": 1.0937147158449379e-05, |
| "loss": 0.0968, |
| "step": 120400 |
| }, |
| { |
| "epoch": 45.35, |
| "learning_rate": 1.0929619872036133e-05, |
| "loss": 0.0973, |
| "step": 120500 |
| }, |
| { |
| "epoch": 45.39, |
| "learning_rate": 1.0922092585622884e-05, |
| "loss": 0.0983, |
| "step": 120600 |
| }, |
| { |
| "epoch": 45.43, |
| "learning_rate": 1.0914565299209637e-05, |
| "loss": 0.0979, |
| "step": 120700 |
| }, |
| { |
| "epoch": 45.46, |
| "learning_rate": 1.0907038012796387e-05, |
| "loss": 0.0959, |
| "step": 120800 |
| }, |
| { |
| "epoch": 45.5, |
| "learning_rate": 1.089951072638314e-05, |
| "loss": 0.0967, |
| "step": 120900 |
| }, |
| { |
| "epoch": 45.54, |
| "learning_rate": 1.089198343996989e-05, |
| "loss": 0.0978, |
| "step": 121000 |
| }, |
| { |
| "epoch": 45.58, |
| "learning_rate": 1.0884456153556643e-05, |
| "loss": 0.0966, |
| "step": 121100 |
| }, |
| { |
| "epoch": 45.62, |
| "learning_rate": 1.0876928867143394e-05, |
| "loss": 0.0967, |
| "step": 121200 |
| }, |
| { |
| "epoch": 45.65, |
| "learning_rate": 1.0869401580730148e-05, |
| "loss": 0.0981, |
| "step": 121300 |
| }, |
| { |
| "epoch": 45.69, |
| "learning_rate": 1.0861874294316899e-05, |
| "loss": 0.0955, |
| "step": 121400 |
| }, |
| { |
| "epoch": 45.73, |
| "learning_rate": 1.0854347007903652e-05, |
| "loss": 0.0981, |
| "step": 121500 |
| }, |
| { |
| "epoch": 45.77, |
| "learning_rate": 1.0846819721490402e-05, |
| "loss": 0.0963, |
| "step": 121600 |
| }, |
| { |
| "epoch": 45.8, |
| "learning_rate": 1.0839292435077155e-05, |
| "loss": 0.096, |
| "step": 121700 |
| }, |
| { |
| "epoch": 45.84, |
| "learning_rate": 1.0831765148663909e-05, |
| "loss": 0.0969, |
| "step": 121800 |
| }, |
| { |
| "epoch": 45.88, |
| "learning_rate": 1.082423786225066e-05, |
| "loss": 0.0959, |
| "step": 121900 |
| }, |
| { |
| "epoch": 45.92, |
| "learning_rate": 1.0816710575837412e-05, |
| "loss": 0.0969, |
| "step": 122000 |
| }, |
| { |
| "epoch": 45.95, |
| "learning_rate": 1.0809183289424163e-05, |
| "loss": 0.0968, |
| "step": 122100 |
| }, |
| { |
| "epoch": 45.99, |
| "learning_rate": 1.0801656003010916e-05, |
| "loss": 0.097, |
| "step": 122200 |
| }, |
| { |
| "epoch": 46.0, |
| "eval_loss": 0.09704454988241196, |
| "eval_runtime": 45.4128, |
| "eval_samples_per_second": 165.152, |
| "eval_steps_per_second": 10.327, |
| "step": 122222 |
| }, |
| { |
| "epoch": 46.03, |
| "learning_rate": 1.0794128716597667e-05, |
| "loss": 0.0977, |
| "step": 122300 |
| }, |
| { |
| "epoch": 46.07, |
| "learning_rate": 1.078660143018442e-05, |
| "loss": 0.0966, |
| "step": 122400 |
| }, |
| { |
| "epoch": 46.1, |
| "learning_rate": 1.0779074143771172e-05, |
| "loss": 0.0972, |
| "step": 122500 |
| }, |
| { |
| "epoch": 46.14, |
| "learning_rate": 1.0771546857357924e-05, |
| "loss": 0.0965, |
| "step": 122600 |
| }, |
| { |
| "epoch": 46.18, |
| "learning_rate": 1.0764019570944675e-05, |
| "loss": 0.0959, |
| "step": 122700 |
| }, |
| { |
| "epoch": 46.22, |
| "learning_rate": 1.0756492284531427e-05, |
| "loss": 0.0955, |
| "step": 122800 |
| }, |
| { |
| "epoch": 46.26, |
| "learning_rate": 1.0748964998118178e-05, |
| "loss": 0.0963, |
| "step": 122900 |
| }, |
| { |
| "epoch": 46.29, |
| "learning_rate": 1.0741437711704932e-05, |
| "loss": 0.0965, |
| "step": 123000 |
| }, |
| { |
| "epoch": 46.33, |
| "learning_rate": 1.0733910425291683e-05, |
| "loss": 0.0966, |
| "step": 123100 |
| }, |
| { |
| "epoch": 46.37, |
| "learning_rate": 1.0726383138878436e-05, |
| "loss": 0.0959, |
| "step": 123200 |
| }, |
| { |
| "epoch": 46.41, |
| "learning_rate": 1.0718855852465187e-05, |
| "loss": 0.0975, |
| "step": 123300 |
| }, |
| { |
| "epoch": 46.44, |
| "learning_rate": 1.0711328566051939e-05, |
| "loss": 0.0957, |
| "step": 123400 |
| }, |
| { |
| "epoch": 46.48, |
| "learning_rate": 1.070380127963869e-05, |
| "loss": 0.0966, |
| "step": 123500 |
| }, |
| { |
| "epoch": 46.52, |
| "learning_rate": 1.0696273993225444e-05, |
| "loss": 0.0973, |
| "step": 123600 |
| }, |
| { |
| "epoch": 46.56, |
| "learning_rate": 1.0688746706812195e-05, |
| "loss": 0.0979, |
| "step": 123700 |
| }, |
| { |
| "epoch": 46.59, |
| "learning_rate": 1.0681219420398947e-05, |
| "loss": 0.0983, |
| "step": 123800 |
| }, |
| { |
| "epoch": 46.63, |
| "learning_rate": 1.0673692133985698e-05, |
| "loss": 0.0969, |
| "step": 123900 |
| }, |
| { |
| "epoch": 46.67, |
| "learning_rate": 1.066616484757245e-05, |
| "loss": 0.097, |
| "step": 124000 |
| }, |
| { |
| "epoch": 46.71, |
| "learning_rate": 1.0658637561159202e-05, |
| "loss": 0.0969, |
| "step": 124100 |
| }, |
| { |
| "epoch": 46.74, |
| "learning_rate": 1.0651110274745954e-05, |
| "loss": 0.0967, |
| "step": 124200 |
| }, |
| { |
| "epoch": 46.78, |
| "learning_rate": 1.0643582988332708e-05, |
| "loss": 0.0955, |
| "step": 124300 |
| }, |
| { |
| "epoch": 46.82, |
| "learning_rate": 1.063605570191946e-05, |
| "loss": 0.0967, |
| "step": 124400 |
| }, |
| { |
| "epoch": 46.86, |
| "learning_rate": 1.0628528415506212e-05, |
| "loss": 0.0972, |
| "step": 124500 |
| }, |
| { |
| "epoch": 46.89, |
| "learning_rate": 1.0621001129092963e-05, |
| "loss": 0.0975, |
| "step": 124600 |
| }, |
| { |
| "epoch": 46.93, |
| "learning_rate": 1.0613473842679715e-05, |
| "loss": 0.0956, |
| "step": 124700 |
| }, |
| { |
| "epoch": 46.97, |
| "learning_rate": 1.0605946556266466e-05, |
| "loss": 0.0936, |
| "step": 124800 |
| }, |
| { |
| "epoch": 47.0, |
| "eval_loss": 0.096713587641716, |
| "eval_runtime": 45.4283, |
| "eval_samples_per_second": 165.095, |
| "eval_steps_per_second": 10.324, |
| "step": 124879 |
| }, |
| { |
| "epoch": 47.01, |
| "learning_rate": 1.059841926985322e-05, |
| "loss": 0.0966, |
| "step": 124900 |
| }, |
| { |
| "epoch": 47.05, |
| "learning_rate": 1.0590891983439971e-05, |
| "loss": 0.0958, |
| "step": 125000 |
| }, |
| { |
| "epoch": 47.08, |
| "learning_rate": 1.0583364697026723e-05, |
| "loss": 0.0961, |
| "step": 125100 |
| }, |
| { |
| "epoch": 47.12, |
| "learning_rate": 1.0575837410613474e-05, |
| "loss": 0.0955, |
| "step": 125200 |
| }, |
| { |
| "epoch": 47.16, |
| "learning_rate": 1.0568310124200227e-05, |
| "loss": 0.0958, |
| "step": 125300 |
| }, |
| { |
| "epoch": 47.2, |
| "learning_rate": 1.0560782837786978e-05, |
| "loss": 0.0961, |
| "step": 125400 |
| }, |
| { |
| "epoch": 47.23, |
| "learning_rate": 1.0553255551373732e-05, |
| "loss": 0.0959, |
| "step": 125500 |
| }, |
| { |
| "epoch": 47.27, |
| "learning_rate": 1.0545728264960483e-05, |
| "loss": 0.0954, |
| "step": 125600 |
| }, |
| { |
| "epoch": 47.31, |
| "learning_rate": 1.0538200978547235e-05, |
| "loss": 0.0992, |
| "step": 125700 |
| }, |
| { |
| "epoch": 47.35, |
| "learning_rate": 1.0530673692133986e-05, |
| "loss": 0.0967, |
| "step": 125800 |
| }, |
| { |
| "epoch": 47.38, |
| "learning_rate": 1.0523146405720738e-05, |
| "loss": 0.0957, |
| "step": 125900 |
| }, |
| { |
| "epoch": 47.42, |
| "learning_rate": 1.051561911930749e-05, |
| "loss": 0.0963, |
| "step": 126000 |
| }, |
| { |
| "epoch": 47.46, |
| "learning_rate": 1.0508091832894243e-05, |
| "loss": 0.0959, |
| "step": 126100 |
| }, |
| { |
| "epoch": 47.5, |
| "learning_rate": 1.0500564546480994e-05, |
| "loss": 0.0956, |
| "step": 126200 |
| }, |
| { |
| "epoch": 47.53, |
| "learning_rate": 1.0493037260067747e-05, |
| "loss": 0.0976, |
| "step": 126300 |
| }, |
| { |
| "epoch": 47.57, |
| "learning_rate": 1.0485509973654498e-05, |
| "loss": 0.0981, |
| "step": 126400 |
| }, |
| { |
| "epoch": 47.61, |
| "learning_rate": 1.047798268724125e-05, |
| "loss": 0.0982, |
| "step": 126500 |
| }, |
| { |
| "epoch": 47.65, |
| "learning_rate": 1.0470455400828001e-05, |
| "loss": 0.0959, |
| "step": 126600 |
| }, |
| { |
| "epoch": 47.69, |
| "learning_rate": 1.0462928114414755e-05, |
| "loss": 0.098, |
| "step": 126700 |
| }, |
| { |
| "epoch": 47.72, |
| "learning_rate": 1.0455400828001508e-05, |
| "loss": 0.0958, |
| "step": 126800 |
| }, |
| { |
| "epoch": 47.76, |
| "learning_rate": 1.0447873541588258e-05, |
| "loss": 0.0977, |
| "step": 126900 |
| }, |
| { |
| "epoch": 47.8, |
| "learning_rate": 1.0440346255175011e-05, |
| "loss": 0.0944, |
| "step": 127000 |
| }, |
| { |
| "epoch": 47.84, |
| "learning_rate": 1.0432818968761762e-05, |
| "loss": 0.0953, |
| "step": 127100 |
| }, |
| { |
| "epoch": 47.87, |
| "learning_rate": 1.0425291682348514e-05, |
| "loss": 0.0949, |
| "step": 127200 |
| }, |
| { |
| "epoch": 47.91, |
| "learning_rate": 1.0417764395935265e-05, |
| "loss": 0.0957, |
| "step": 127300 |
| }, |
| { |
| "epoch": 47.95, |
| "learning_rate": 1.041023710952202e-05, |
| "loss": 0.0976, |
| "step": 127400 |
| }, |
| { |
| "epoch": 47.99, |
| "learning_rate": 1.040270982310877e-05, |
| "loss": 0.0948, |
| "step": 127500 |
| }, |
| { |
| "epoch": 48.0, |
| "eval_loss": 0.0966743603348732, |
| "eval_runtime": 45.5618, |
| "eval_samples_per_second": 164.611, |
| "eval_steps_per_second": 10.294, |
| "step": 127536 |
| }, |
| { |
| "epoch": 48.02, |
| "learning_rate": 1.0395182536695523e-05, |
| "loss": 0.096, |
| "step": 127600 |
| }, |
| { |
| "epoch": 48.06, |
| "learning_rate": 1.0387655250282273e-05, |
| "loss": 0.0958, |
| "step": 127700 |
| }, |
| { |
| "epoch": 48.1, |
| "learning_rate": 1.0380127963869026e-05, |
| "loss": 0.0963, |
| "step": 127800 |
| }, |
| { |
| "epoch": 48.14, |
| "learning_rate": 1.0372600677455777e-05, |
| "loss": 0.095, |
| "step": 127900 |
| }, |
| { |
| "epoch": 48.17, |
| "learning_rate": 1.0365073391042531e-05, |
| "loss": 0.0973, |
| "step": 128000 |
| }, |
| { |
| "epoch": 48.21, |
| "learning_rate": 1.0357546104629282e-05, |
| "loss": 0.0958, |
| "step": 128100 |
| }, |
| { |
| "epoch": 48.25, |
| "learning_rate": 1.0350018818216034e-05, |
| "loss": 0.094, |
| "step": 128200 |
| }, |
| { |
| "epoch": 48.29, |
| "learning_rate": 1.0342491531802785e-05, |
| "loss": 0.0965, |
| "step": 128300 |
| }, |
| { |
| "epoch": 48.33, |
| "learning_rate": 1.0334964245389538e-05, |
| "loss": 0.0962, |
| "step": 128400 |
| }, |
| { |
| "epoch": 48.36, |
| "learning_rate": 1.0327436958976289e-05, |
| "loss": 0.0973, |
| "step": 128500 |
| }, |
| { |
| "epoch": 48.4, |
| "learning_rate": 1.0319909672563043e-05, |
| "loss": 0.0966, |
| "step": 128600 |
| }, |
| { |
| "epoch": 48.44, |
| "learning_rate": 1.0312382386149794e-05, |
| "loss": 0.0967, |
| "step": 128700 |
| }, |
| { |
| "epoch": 48.48, |
| "learning_rate": 1.0304855099736546e-05, |
| "loss": 0.0983, |
| "step": 128800 |
| }, |
| { |
| "epoch": 48.51, |
| "learning_rate": 1.0297327813323297e-05, |
| "loss": 0.0953, |
| "step": 128900 |
| }, |
| { |
| "epoch": 48.55, |
| "learning_rate": 1.028980052691005e-05, |
| "loss": 0.0951, |
| "step": 129000 |
| }, |
| { |
| "epoch": 48.59, |
| "learning_rate": 1.02822732404968e-05, |
| "loss": 0.0988, |
| "step": 129100 |
| }, |
| { |
| "epoch": 48.63, |
| "learning_rate": 1.0274745954083554e-05, |
| "loss": 0.095, |
| "step": 129200 |
| }, |
| { |
| "epoch": 48.66, |
| "learning_rate": 1.0267218667670307e-05, |
| "loss": 0.0956, |
| "step": 129300 |
| }, |
| { |
| "epoch": 48.7, |
| "learning_rate": 1.0259691381257058e-05, |
| "loss": 0.0962, |
| "step": 129400 |
| }, |
| { |
| "epoch": 48.74, |
| "learning_rate": 1.025216409484381e-05, |
| "loss": 0.0964, |
| "step": 129500 |
| }, |
| { |
| "epoch": 48.78, |
| "learning_rate": 1.0244636808430561e-05, |
| "loss": 0.0952, |
| "step": 129600 |
| }, |
| { |
| "epoch": 48.81, |
| "learning_rate": 1.0237109522017315e-05, |
| "loss": 0.0959, |
| "step": 129700 |
| }, |
| { |
| "epoch": 48.85, |
| "learning_rate": 1.0229582235604066e-05, |
| "loss": 0.0965, |
| "step": 129800 |
| }, |
| { |
| "epoch": 48.89, |
| "learning_rate": 1.0222054949190819e-05, |
| "loss": 0.0949, |
| "step": 129900 |
| }, |
| { |
| "epoch": 48.93, |
| "learning_rate": 1.021452766277757e-05, |
| "loss": 0.0966, |
| "step": 130000 |
| }, |
| { |
| "epoch": 48.96, |
| "learning_rate": 1.0207000376364322e-05, |
| "loss": 0.0974, |
| "step": 130100 |
| }, |
| { |
| "epoch": 49.0, |
| "eval_loss": 0.09543051570653915, |
| "eval_runtime": 45.608, |
| "eval_samples_per_second": 164.445, |
| "eval_steps_per_second": 10.283, |
| "step": 130193 |
| }, |
| { |
| "epoch": 49.0, |
| "learning_rate": 1.0199473089951073e-05, |
| "loss": 0.0959, |
| "step": 130200 |
| }, |
| { |
| "epoch": 49.04, |
| "learning_rate": 1.0191945803537825e-05, |
| "loss": 0.0962, |
| "step": 130300 |
| }, |
| { |
| "epoch": 49.08, |
| "learning_rate": 1.0184418517124576e-05, |
| "loss": 0.0954, |
| "step": 130400 |
| }, |
| { |
| "epoch": 49.12, |
| "learning_rate": 1.017689123071133e-05, |
| "loss": 0.0959, |
| "step": 130500 |
| }, |
| { |
| "epoch": 49.15, |
| "learning_rate": 1.0169363944298081e-05, |
| "loss": 0.0949, |
| "step": 130600 |
| }, |
| { |
| "epoch": 49.19, |
| "learning_rate": 1.0161836657884834e-05, |
| "loss": 0.0967, |
| "step": 130700 |
| }, |
| { |
| "epoch": 49.23, |
| "learning_rate": 1.0154309371471584e-05, |
| "loss": 0.0965, |
| "step": 130800 |
| }, |
| { |
| "epoch": 49.27, |
| "learning_rate": 1.0146782085058337e-05, |
| "loss": 0.0963, |
| "step": 130900 |
| }, |
| { |
| "epoch": 49.3, |
| "learning_rate": 1.0139254798645088e-05, |
| "loss": 0.0956, |
| "step": 131000 |
| }, |
| { |
| "epoch": 49.34, |
| "learning_rate": 1.0131727512231842e-05, |
| "loss": 0.0959, |
| "step": 131100 |
| }, |
| { |
| "epoch": 49.38, |
| "learning_rate": 1.0124200225818593e-05, |
| "loss": 0.0958, |
| "step": 131200 |
| }, |
| { |
| "epoch": 49.42, |
| "learning_rate": 1.0116672939405345e-05, |
| "loss": 0.0974, |
| "step": 131300 |
| }, |
| { |
| "epoch": 49.45, |
| "learning_rate": 1.0109145652992096e-05, |
| "loss": 0.0984, |
| "step": 131400 |
| }, |
| { |
| "epoch": 49.49, |
| "learning_rate": 1.0101618366578849e-05, |
| "loss": 0.0946, |
| "step": 131500 |
| }, |
| { |
| "epoch": 49.53, |
| "learning_rate": 1.00940910801656e-05, |
| "loss": 0.0967, |
| "step": 131600 |
| }, |
| { |
| "epoch": 49.57, |
| "learning_rate": 1.0086563793752354e-05, |
| "loss": 0.0968, |
| "step": 131700 |
| }, |
| { |
| "epoch": 49.6, |
| "learning_rate": 1.0079036507339106e-05, |
| "loss": 0.0961, |
| "step": 131800 |
| }, |
| { |
| "epoch": 49.64, |
| "learning_rate": 1.0071509220925857e-05, |
| "loss": 0.0946, |
| "step": 131900 |
| }, |
| { |
| "epoch": 49.68, |
| "learning_rate": 1.006398193451261e-05, |
| "loss": 0.0959, |
| "step": 132000 |
| }, |
| { |
| "epoch": 49.72, |
| "learning_rate": 1.005645464809936e-05, |
| "loss": 0.0959, |
| "step": 132100 |
| }, |
| { |
| "epoch": 49.76, |
| "learning_rate": 1.0048927361686115e-05, |
| "loss": 0.0974, |
| "step": 132200 |
| }, |
| { |
| "epoch": 49.79, |
| "learning_rate": 1.0041400075272865e-05, |
| "loss": 0.095, |
| "step": 132300 |
| }, |
| { |
| "epoch": 49.83, |
| "learning_rate": 1.0033872788859618e-05, |
| "loss": 0.0954, |
| "step": 132400 |
| }, |
| { |
| "epoch": 49.87, |
| "learning_rate": 1.0026345502446369e-05, |
| "loss": 0.0958, |
| "step": 132500 |
| }, |
| { |
| "epoch": 49.91, |
| "learning_rate": 1.0018818216033121e-05, |
| "loss": 0.0959, |
| "step": 132600 |
| }, |
| { |
| "epoch": 49.94, |
| "learning_rate": 1.0011290929619872e-05, |
| "loss": 0.095, |
| "step": 132700 |
| }, |
| { |
| "epoch": 49.98, |
| "learning_rate": 1.0003763643206625e-05, |
| "loss": 0.0958, |
| "step": 132800 |
| }, |
| { |
| "epoch": 50.0, |
| "eval_loss": 0.09539712220430374, |
| "eval_runtime": 45.0771, |
| "eval_samples_per_second": 166.382, |
| "eval_steps_per_second": 10.404, |
| "step": 132850 |
| }, |
| { |
| "epoch": 50.02, |
| "learning_rate": 9.996236356793377e-06, |
| "loss": 0.0956, |
| "step": 132900 |
| }, |
| { |
| "epoch": 50.06, |
| "learning_rate": 9.98870907038013e-06, |
| "loss": 0.0943, |
| "step": 133000 |
| }, |
| { |
| "epoch": 50.09, |
| "learning_rate": 9.98118178396688e-06, |
| "loss": 0.0958, |
| "step": 133100 |
| }, |
| { |
| "epoch": 50.13, |
| "learning_rate": 9.973654497553633e-06, |
| "loss": 0.0952, |
| "step": 133200 |
| }, |
| { |
| "epoch": 50.17, |
| "learning_rate": 9.966127211140385e-06, |
| "loss": 0.0969, |
| "step": 133300 |
| }, |
| { |
| "epoch": 50.21, |
| "learning_rate": 9.958599924727136e-06, |
| "loss": 0.0952, |
| "step": 133400 |
| }, |
| { |
| "epoch": 50.24, |
| "learning_rate": 9.951072638313889e-06, |
| "loss": 0.0959, |
| "step": 133500 |
| }, |
| { |
| "epoch": 50.28, |
| "learning_rate": 9.943545351900641e-06, |
| "loss": 0.0949, |
| "step": 133600 |
| }, |
| { |
| "epoch": 50.32, |
| "learning_rate": 9.936018065487392e-06, |
| "loss": 0.0964, |
| "step": 133700 |
| }, |
| { |
| "epoch": 50.36, |
| "learning_rate": 9.928490779074145e-06, |
| "loss": 0.0952, |
| "step": 133800 |
| }, |
| { |
| "epoch": 50.4, |
| "learning_rate": 9.920963492660897e-06, |
| "loss": 0.0944, |
| "step": 133900 |
| }, |
| { |
| "epoch": 50.43, |
| "learning_rate": 9.913436206247648e-06, |
| "loss": 0.0973, |
| "step": 134000 |
| }, |
| { |
| "epoch": 50.47, |
| "learning_rate": 9.9059089198344e-06, |
| "loss": 0.0949, |
| "step": 134100 |
| }, |
| { |
| "epoch": 50.51, |
| "learning_rate": 9.898381633421153e-06, |
| "loss": 0.096, |
| "step": 134200 |
| }, |
| { |
| "epoch": 50.55, |
| "learning_rate": 9.890854347007904e-06, |
| "loss": 0.0962, |
| "step": 134300 |
| }, |
| { |
| "epoch": 50.58, |
| "learning_rate": 9.883327060594656e-06, |
| "loss": 0.0972, |
| "step": 134400 |
| }, |
| { |
| "epoch": 50.62, |
| "learning_rate": 9.875799774181409e-06, |
| "loss": 0.0931, |
| "step": 134500 |
| }, |
| { |
| "epoch": 50.66, |
| "learning_rate": 9.86827248776816e-06, |
| "loss": 0.0961, |
| "step": 134600 |
| }, |
| { |
| "epoch": 50.7, |
| "learning_rate": 9.860745201354912e-06, |
| "loss": 0.0947, |
| "step": 134700 |
| }, |
| { |
| "epoch": 50.73, |
| "learning_rate": 9.853217914941665e-06, |
| "loss": 0.0968, |
| "step": 134800 |
| }, |
| { |
| "epoch": 50.77, |
| "learning_rate": 9.845690628528415e-06, |
| "loss": 0.095, |
| "step": 134900 |
| }, |
| { |
| "epoch": 50.81, |
| "learning_rate": 9.838163342115168e-06, |
| "loss": 0.0966, |
| "step": 135000 |
| }, |
| { |
| "epoch": 50.85, |
| "learning_rate": 9.83063605570192e-06, |
| "loss": 0.0966, |
| "step": 135100 |
| }, |
| { |
| "epoch": 50.88, |
| "learning_rate": 9.823108769288671e-06, |
| "loss": 0.0957, |
| "step": 135200 |
| }, |
| { |
| "epoch": 50.92, |
| "learning_rate": 9.815581482875424e-06, |
| "loss": 0.0948, |
| "step": 135300 |
| }, |
| { |
| "epoch": 50.96, |
| "learning_rate": 9.808054196462176e-06, |
| "loss": 0.0942, |
| "step": 135400 |
| }, |
| { |
| "epoch": 51.0, |
| "learning_rate": 9.800526910048929e-06, |
| "loss": 0.0948, |
| "step": 135500 |
| }, |
| { |
| "epoch": 51.0, |
| "eval_loss": 0.09547575563192368, |
| "eval_runtime": 45.2525, |
| "eval_samples_per_second": 165.737, |
| "eval_steps_per_second": 10.364, |
| "step": 135507 |
| }, |
| { |
| "epoch": 51.04, |
| "learning_rate": 9.792999623635681e-06, |
| "loss": 0.0973, |
| "step": 135600 |
| }, |
| { |
| "epoch": 51.07, |
| "learning_rate": 9.785472337222432e-06, |
| "loss": 0.0962, |
| "step": 135700 |
| }, |
| { |
| "epoch": 51.11, |
| "learning_rate": 9.777945050809185e-06, |
| "loss": 0.0957, |
| "step": 135800 |
| }, |
| { |
| "epoch": 51.15, |
| "learning_rate": 9.770417764395936e-06, |
| "loss": 0.0953, |
| "step": 135900 |
| }, |
| { |
| "epoch": 51.19, |
| "learning_rate": 9.762890477982688e-06, |
| "loss": 0.0948, |
| "step": 136000 |
| }, |
| { |
| "epoch": 51.22, |
| "learning_rate": 9.75536319156944e-06, |
| "loss": 0.0958, |
| "step": 136100 |
| }, |
| { |
| "epoch": 51.26, |
| "learning_rate": 9.747835905156191e-06, |
| "loss": 0.0956, |
| "step": 136200 |
| }, |
| { |
| "epoch": 51.3, |
| "learning_rate": 9.740308618742944e-06, |
| "loss": 0.0957, |
| "step": 136300 |
| }, |
| { |
| "epoch": 51.34, |
| "learning_rate": 9.732781332329696e-06, |
| "loss": 0.0953, |
| "step": 136400 |
| }, |
| { |
| "epoch": 51.37, |
| "learning_rate": 9.725254045916447e-06, |
| "loss": 0.0942, |
| "step": 136500 |
| }, |
| { |
| "epoch": 51.41, |
| "learning_rate": 9.7177267595032e-06, |
| "loss": 0.0952, |
| "step": 136600 |
| }, |
| { |
| "epoch": 51.45, |
| "learning_rate": 9.710199473089952e-06, |
| "loss": 0.0949, |
| "step": 136700 |
| }, |
| { |
| "epoch": 51.49, |
| "learning_rate": 9.702672186676703e-06, |
| "loss": 0.0961, |
| "step": 136800 |
| }, |
| { |
| "epoch": 51.52, |
| "learning_rate": 9.695144900263456e-06, |
| "loss": 0.0949, |
| "step": 136900 |
| }, |
| { |
| "epoch": 51.56, |
| "learning_rate": 9.687617613850208e-06, |
| "loss": 0.0955, |
| "step": 137000 |
| }, |
| { |
| "epoch": 51.6, |
| "learning_rate": 9.680090327436959e-06, |
| "loss": 0.0954, |
| "step": 137100 |
| }, |
| { |
| "epoch": 51.64, |
| "learning_rate": 9.672563041023711e-06, |
| "loss": 0.0958, |
| "step": 137200 |
| }, |
| { |
| "epoch": 51.67, |
| "learning_rate": 9.665035754610464e-06, |
| "loss": 0.0961, |
| "step": 137300 |
| }, |
| { |
| "epoch": 51.71, |
| "learning_rate": 9.657508468197215e-06, |
| "loss": 0.0971, |
| "step": 137400 |
| }, |
| { |
| "epoch": 51.75, |
| "learning_rate": 9.649981181783967e-06, |
| "loss": 0.0952, |
| "step": 137500 |
| }, |
| { |
| "epoch": 51.79, |
| "learning_rate": 9.64245389537072e-06, |
| "loss": 0.0961, |
| "step": 137600 |
| }, |
| { |
| "epoch": 51.83, |
| "learning_rate": 9.63492660895747e-06, |
| "loss": 0.0963, |
| "step": 137700 |
| }, |
| { |
| "epoch": 51.86, |
| "learning_rate": 9.627399322544223e-06, |
| "loss": 0.0952, |
| "step": 137800 |
| }, |
| { |
| "epoch": 51.9, |
| "learning_rate": 9.619872036130976e-06, |
| "loss": 0.0944, |
| "step": 137900 |
| }, |
| { |
| "epoch": 51.94, |
| "learning_rate": 9.612344749717728e-06, |
| "loss": 0.0952, |
| "step": 138000 |
| }, |
| { |
| "epoch": 51.98, |
| "learning_rate": 9.60481746330448e-06, |
| "loss": 0.095, |
| "step": 138100 |
| }, |
| { |
| "epoch": 52.0, |
| "eval_loss": 0.0952862873673439, |
| "eval_runtime": 45.261, |
| "eval_samples_per_second": 165.706, |
| "eval_steps_per_second": 10.362, |
| "step": 138164 |
| }, |
| { |
| "epoch": 52.01, |
| "learning_rate": 9.597290176891231e-06, |
| "loss": 0.0958, |
| "step": 138200 |
| }, |
| { |
| "epoch": 52.05, |
| "learning_rate": 9.589762890477984e-06, |
| "loss": 0.0955, |
| "step": 138300 |
| }, |
| { |
| "epoch": 52.09, |
| "learning_rate": 9.582235604064737e-06, |
| "loss": 0.0959, |
| "step": 138400 |
| }, |
| { |
| "epoch": 52.13, |
| "learning_rate": 9.574708317651487e-06, |
| "loss": 0.0971, |
| "step": 138500 |
| }, |
| { |
| "epoch": 52.16, |
| "learning_rate": 9.56718103123824e-06, |
| "loss": 0.0952, |
| "step": 138600 |
| }, |
| { |
| "epoch": 52.2, |
| "learning_rate": 9.559653744824992e-06, |
| "loss": 0.0955, |
| "step": 138700 |
| }, |
| { |
| "epoch": 52.24, |
| "learning_rate": 9.552126458411743e-06, |
| "loss": 0.0946, |
| "step": 138800 |
| }, |
| { |
| "epoch": 52.28, |
| "learning_rate": 9.544599171998496e-06, |
| "loss": 0.0965, |
| "step": 138900 |
| }, |
| { |
| "epoch": 52.31, |
| "learning_rate": 9.537071885585247e-06, |
| "loss": 0.0941, |
| "step": 139000 |
| }, |
| { |
| "epoch": 52.35, |
| "learning_rate": 9.529544599171999e-06, |
| "loss": 0.096, |
| "step": 139100 |
| }, |
| { |
| "epoch": 52.39, |
| "learning_rate": 9.522017312758752e-06, |
| "loss": 0.0928, |
| "step": 139200 |
| }, |
| { |
| "epoch": 52.43, |
| "learning_rate": 9.514490026345502e-06, |
| "loss": 0.096, |
| "step": 139300 |
| }, |
| { |
| "epoch": 52.47, |
| "learning_rate": 9.506962739932255e-06, |
| "loss": 0.0953, |
| "step": 139400 |
| }, |
| { |
| "epoch": 52.5, |
| "learning_rate": 9.499435453519007e-06, |
| "loss": 0.0943, |
| "step": 139500 |
| }, |
| { |
| "epoch": 52.54, |
| "learning_rate": 9.491908167105758e-06, |
| "loss": 0.0948, |
| "step": 139600 |
| }, |
| { |
| "epoch": 52.58, |
| "learning_rate": 9.48438088069251e-06, |
| "loss": 0.0953, |
| "step": 139700 |
| }, |
| { |
| "epoch": 52.62, |
| "learning_rate": 9.476853594279263e-06, |
| "loss": 0.0952, |
| "step": 139800 |
| }, |
| { |
| "epoch": 52.65, |
| "learning_rate": 9.469326307866014e-06, |
| "loss": 0.0934, |
| "step": 139900 |
| }, |
| { |
| "epoch": 52.69, |
| "learning_rate": 9.461799021452767e-06, |
| "loss": 0.0965, |
| "step": 140000 |
| }, |
| { |
| "epoch": 52.73, |
| "learning_rate": 9.454271735039519e-06, |
| "loss": 0.0962, |
| "step": 140100 |
| }, |
| { |
| "epoch": 52.77, |
| "learning_rate": 9.44674444862627e-06, |
| "loss": 0.0954, |
| "step": 140200 |
| }, |
| { |
| "epoch": 52.8, |
| "learning_rate": 9.439217162213024e-06, |
| "loss": 0.095, |
| "step": 140300 |
| }, |
| { |
| "epoch": 52.84, |
| "learning_rate": 9.431689875799775e-06, |
| "loss": 0.0941, |
| "step": 140400 |
| }, |
| { |
| "epoch": 52.88, |
| "learning_rate": 9.424162589386527e-06, |
| "loss": 0.0953, |
| "step": 140500 |
| }, |
| { |
| "epoch": 52.92, |
| "learning_rate": 9.41663530297328e-06, |
| "loss": 0.0963, |
| "step": 140600 |
| }, |
| { |
| "epoch": 52.95, |
| "learning_rate": 9.40910801656003e-06, |
| "loss": 0.0958, |
| "step": 140700 |
| }, |
| { |
| "epoch": 52.99, |
| "learning_rate": 9.401580730146783e-06, |
| "loss": 0.0939, |
| "step": 140800 |
| }, |
| { |
| "epoch": 53.0, |
| "eval_loss": 0.09453196078538895, |
| "eval_runtime": 45.2882, |
| "eval_samples_per_second": 165.606, |
| "eval_steps_per_second": 10.356, |
| "step": 140821 |
| }, |
| { |
| "epoch": 53.03, |
| "learning_rate": 9.394053443733536e-06, |
| "loss": 0.095, |
| "step": 140900 |
| }, |
| { |
| "epoch": 53.07, |
| "learning_rate": 9.386526157320287e-06, |
| "loss": 0.0955, |
| "step": 141000 |
| }, |
| { |
| "epoch": 53.11, |
| "learning_rate": 9.378998870907039e-06, |
| "loss": 0.0944, |
| "step": 141100 |
| }, |
| { |
| "epoch": 53.14, |
| "learning_rate": 9.371471584493792e-06, |
| "loss": 0.0953, |
| "step": 141200 |
| }, |
| { |
| "epoch": 53.18, |
| "learning_rate": 9.363944298080542e-06, |
| "loss": 0.0945, |
| "step": 141300 |
| }, |
| { |
| "epoch": 53.22, |
| "learning_rate": 9.356417011667295e-06, |
| "loss": 0.0959, |
| "step": 141400 |
| }, |
| { |
| "epoch": 53.26, |
| "learning_rate": 9.348889725254047e-06, |
| "loss": 0.0938, |
| "step": 141500 |
| }, |
| { |
| "epoch": 53.29, |
| "learning_rate": 9.341362438840798e-06, |
| "loss": 0.0956, |
| "step": 141600 |
| }, |
| { |
| "epoch": 53.33, |
| "learning_rate": 9.33383515242755e-06, |
| "loss": 0.0962, |
| "step": 141700 |
| }, |
| { |
| "epoch": 53.37, |
| "learning_rate": 9.326307866014303e-06, |
| "loss": 0.0959, |
| "step": 141800 |
| }, |
| { |
| "epoch": 53.41, |
| "learning_rate": 9.318780579601054e-06, |
| "loss": 0.0953, |
| "step": 141900 |
| }, |
| { |
| "epoch": 53.44, |
| "learning_rate": 9.311253293187807e-06, |
| "loss": 0.0952, |
| "step": 142000 |
| }, |
| { |
| "epoch": 53.48, |
| "learning_rate": 9.303726006774557e-06, |
| "loss": 0.0945, |
| "step": 142100 |
| }, |
| { |
| "epoch": 53.52, |
| "learning_rate": 9.29619872036131e-06, |
| "loss": 0.0958, |
| "step": 142200 |
| }, |
| { |
| "epoch": 53.56, |
| "learning_rate": 9.288671433948063e-06, |
| "loss": 0.0949, |
| "step": 142300 |
| }, |
| { |
| "epoch": 53.59, |
| "learning_rate": 9.281144147534813e-06, |
| "loss": 0.0943, |
| "step": 142400 |
| }, |
| { |
| "epoch": 53.63, |
| "learning_rate": 9.273616861121566e-06, |
| "loss": 0.0957, |
| "step": 142500 |
| }, |
| { |
| "epoch": 53.67, |
| "learning_rate": 9.266089574708318e-06, |
| "loss": 0.0949, |
| "step": 142600 |
| }, |
| { |
| "epoch": 53.71, |
| "learning_rate": 9.25856228829507e-06, |
| "loss": 0.0939, |
| "step": 142700 |
| }, |
| { |
| "epoch": 53.74, |
| "learning_rate": 9.251035001881823e-06, |
| "loss": 0.0942, |
| "step": 142800 |
| }, |
| { |
| "epoch": 53.78, |
| "learning_rate": 9.243507715468574e-06, |
| "loss": 0.0955, |
| "step": 142900 |
| }, |
| { |
| "epoch": 53.82, |
| "learning_rate": 9.235980429055327e-06, |
| "loss": 0.0935, |
| "step": 143000 |
| }, |
| { |
| "epoch": 53.86, |
| "learning_rate": 9.22845314264208e-06, |
| "loss": 0.0952, |
| "step": 143100 |
| }, |
| { |
| "epoch": 53.9, |
| "learning_rate": 9.22092585622883e-06, |
| "loss": 0.0946, |
| "step": 143200 |
| }, |
| { |
| "epoch": 53.93, |
| "learning_rate": 9.213398569815583e-06, |
| "loss": 0.0971, |
| "step": 143300 |
| }, |
| { |
| "epoch": 53.97, |
| "learning_rate": 9.205871283402335e-06, |
| "loss": 0.0961, |
| "step": 143400 |
| }, |
| { |
| "epoch": 54.0, |
| "eval_loss": 0.09483154118061066, |
| "eval_runtime": 45.4413, |
| "eval_samples_per_second": 165.048, |
| "eval_steps_per_second": 10.321, |
| "step": 143478 |
| }, |
| { |
| "epoch": 54.01, |
| "learning_rate": 9.198343996989086e-06, |
| "loss": 0.0943, |
| "step": 143500 |
| }, |
| { |
| "epoch": 54.05, |
| "learning_rate": 9.190816710575838e-06, |
| "loss": 0.097, |
| "step": 143600 |
| }, |
| { |
| "epoch": 54.08, |
| "learning_rate": 9.183289424162591e-06, |
| "loss": 0.0956, |
| "step": 143700 |
| }, |
| { |
| "epoch": 54.12, |
| "learning_rate": 9.175762137749342e-06, |
| "loss": 0.0945, |
| "step": 143800 |
| }, |
| { |
| "epoch": 54.16, |
| "learning_rate": 9.168234851336094e-06, |
| "loss": 0.0968, |
| "step": 143900 |
| }, |
| { |
| "epoch": 54.2, |
| "learning_rate": 9.160707564922847e-06, |
| "loss": 0.0955, |
| "step": 144000 |
| }, |
| { |
| "epoch": 54.23, |
| "learning_rate": 9.153180278509598e-06, |
| "loss": 0.0952, |
| "step": 144100 |
| }, |
| { |
| "epoch": 54.27, |
| "learning_rate": 9.14565299209635e-06, |
| "loss": 0.0941, |
| "step": 144200 |
| }, |
| { |
| "epoch": 54.31, |
| "learning_rate": 9.138125705683103e-06, |
| "loss": 0.0935, |
| "step": 144300 |
| }, |
| { |
| "epoch": 54.35, |
| "learning_rate": 9.130598419269853e-06, |
| "loss": 0.0949, |
| "step": 144400 |
| }, |
| { |
| "epoch": 54.38, |
| "learning_rate": 9.123071132856606e-06, |
| "loss": 0.0957, |
| "step": 144500 |
| }, |
| { |
| "epoch": 54.42, |
| "learning_rate": 9.115543846443358e-06, |
| "loss": 0.0958, |
| "step": 144600 |
| }, |
| { |
| "epoch": 54.46, |
| "learning_rate": 9.10801656003011e-06, |
| "loss": 0.0932, |
| "step": 144700 |
| }, |
| { |
| "epoch": 54.5, |
| "learning_rate": 9.100489273616862e-06, |
| "loss": 0.0945, |
| "step": 144800 |
| }, |
| { |
| "epoch": 54.54, |
| "learning_rate": 9.092961987203613e-06, |
| "loss": 0.0956, |
| "step": 144900 |
| }, |
| { |
| "epoch": 54.57, |
| "learning_rate": 9.085434700790365e-06, |
| "loss": 0.0949, |
| "step": 145000 |
| }, |
| { |
| "epoch": 54.61, |
| "learning_rate": 9.077907414377118e-06, |
| "loss": 0.0942, |
| "step": 145100 |
| }, |
| { |
| "epoch": 54.65, |
| "learning_rate": 9.070380127963868e-06, |
| "loss": 0.0949, |
| "step": 145200 |
| }, |
| { |
| "epoch": 54.69, |
| "learning_rate": 9.062852841550623e-06, |
| "loss": 0.0947, |
| "step": 145300 |
| }, |
| { |
| "epoch": 54.72, |
| "learning_rate": 9.055325555137373e-06, |
| "loss": 0.0952, |
| "step": 145400 |
| }, |
| { |
| "epoch": 54.76, |
| "learning_rate": 9.047798268724126e-06, |
| "loss": 0.096, |
| "step": 145500 |
| }, |
| { |
| "epoch": 54.8, |
| "learning_rate": 9.040270982310879e-06, |
| "loss": 0.0953, |
| "step": 145600 |
| }, |
| { |
| "epoch": 54.84, |
| "learning_rate": 9.03274369589763e-06, |
| "loss": 0.094, |
| "step": 145700 |
| }, |
| { |
| "epoch": 54.87, |
| "learning_rate": 9.025216409484382e-06, |
| "loss": 0.0936, |
| "step": 145800 |
| }, |
| { |
| "epoch": 54.91, |
| "learning_rate": 9.017689123071134e-06, |
| "loss": 0.0951, |
| "step": 145900 |
| }, |
| { |
| "epoch": 54.95, |
| "learning_rate": 9.010161836657885e-06, |
| "loss": 0.0959, |
| "step": 146000 |
| }, |
| { |
| "epoch": 54.99, |
| "learning_rate": 9.002634550244638e-06, |
| "loss": 0.0964, |
| "step": 146100 |
| }, |
| { |
| "epoch": 55.0, |
| "eval_loss": 0.09549073874950409, |
| "eval_runtime": 45.2457, |
| "eval_samples_per_second": 165.762, |
| "eval_steps_per_second": 10.366, |
| "step": 146135 |
| }, |
| { |
| "epoch": 55.02, |
| "learning_rate": 8.99510726383139e-06, |
| "loss": 0.0963, |
| "step": 146200 |
| }, |
| { |
| "epoch": 55.06, |
| "learning_rate": 8.987579977418141e-06, |
| "loss": 0.0962, |
| "step": 146300 |
| }, |
| { |
| "epoch": 55.1, |
| "learning_rate": 8.980052691004894e-06, |
| "loss": 0.0954, |
| "step": 146400 |
| }, |
| { |
| "epoch": 55.14, |
| "learning_rate": 8.972525404591646e-06, |
| "loss": 0.0934, |
| "step": 146500 |
| }, |
| { |
| "epoch": 55.18, |
| "learning_rate": 8.964998118178397e-06, |
| "loss": 0.0945, |
| "step": 146600 |
| }, |
| { |
| "epoch": 55.21, |
| "learning_rate": 8.95747083176515e-06, |
| "loss": 0.0936, |
| "step": 146700 |
| }, |
| { |
| "epoch": 55.25, |
| "learning_rate": 8.949943545351902e-06, |
| "loss": 0.095, |
| "step": 146800 |
| }, |
| { |
| "epoch": 55.29, |
| "learning_rate": 8.942416258938653e-06, |
| "loss": 0.094, |
| "step": 146900 |
| }, |
| { |
| "epoch": 55.33, |
| "learning_rate": 8.934888972525405e-06, |
| "loss": 0.0944, |
| "step": 147000 |
| }, |
| { |
| "epoch": 55.36, |
| "learning_rate": 8.927361686112158e-06, |
| "loss": 0.0947, |
| "step": 147100 |
| }, |
| { |
| "epoch": 55.4, |
| "learning_rate": 8.919834399698909e-06, |
| "loss": 0.0966, |
| "step": 147200 |
| }, |
| { |
| "epoch": 55.44, |
| "learning_rate": 8.912307113285661e-06, |
| "loss": 0.0933, |
| "step": 147300 |
| }, |
| { |
| "epoch": 55.48, |
| "learning_rate": 8.904779826872414e-06, |
| "loss": 0.0939, |
| "step": 147400 |
| }, |
| { |
| "epoch": 55.51, |
| "learning_rate": 8.897252540459164e-06, |
| "loss": 0.0953, |
| "step": 147500 |
| }, |
| { |
| "epoch": 55.55, |
| "learning_rate": 8.889725254045917e-06, |
| "loss": 0.0963, |
| "step": 147600 |
| }, |
| { |
| "epoch": 55.59, |
| "learning_rate": 8.88219796763267e-06, |
| "loss": 0.0947, |
| "step": 147700 |
| }, |
| { |
| "epoch": 55.63, |
| "learning_rate": 8.874670681219422e-06, |
| "loss": 0.0933, |
| "step": 147800 |
| }, |
| { |
| "epoch": 55.66, |
| "learning_rate": 8.867143394806173e-06, |
| "loss": 0.0951, |
| "step": 147900 |
| }, |
| { |
| "epoch": 55.7, |
| "learning_rate": 8.859616108392925e-06, |
| "loss": 0.0955, |
| "step": 148000 |
| }, |
| { |
| "epoch": 55.74, |
| "learning_rate": 8.852088821979678e-06, |
| "loss": 0.0943, |
| "step": 148100 |
| }, |
| { |
| "epoch": 55.78, |
| "learning_rate": 8.844561535566429e-06, |
| "loss": 0.0942, |
| "step": 148200 |
| }, |
| { |
| "epoch": 55.81, |
| "learning_rate": 8.837034249153181e-06, |
| "loss": 0.0962, |
| "step": 148300 |
| }, |
| { |
| "epoch": 55.85, |
| "learning_rate": 8.829506962739934e-06, |
| "loss": 0.0937, |
| "step": 148400 |
| }, |
| { |
| "epoch": 55.89, |
| "learning_rate": 8.821979676326684e-06, |
| "loss": 0.0944, |
| "step": 148500 |
| }, |
| { |
| "epoch": 55.93, |
| "learning_rate": 8.814452389913437e-06, |
| "loss": 0.0937, |
| "step": 148600 |
| }, |
| { |
| "epoch": 55.97, |
| "learning_rate": 8.80692510350019e-06, |
| "loss": 0.0934, |
| "step": 148700 |
| }, |
| { |
| "epoch": 56.0, |
| "eval_loss": 0.0948183611035347, |
| "eval_runtime": 44.9888, |
| "eval_samples_per_second": 166.708, |
| "eval_steps_per_second": 10.425, |
| "step": 148792 |
| }, |
| { |
| "epoch": 56.0, |
| "learning_rate": 8.79939781708694e-06, |
| "loss": 0.0939, |
| "step": 148800 |
| }, |
| { |
| "epoch": 56.04, |
| "learning_rate": 8.791870530673693e-06, |
| "loss": 0.0966, |
| "step": 148900 |
| }, |
| { |
| "epoch": 56.08, |
| "learning_rate": 8.784343244260445e-06, |
| "loss": 0.0951, |
| "step": 149000 |
| }, |
| { |
| "epoch": 56.12, |
| "learning_rate": 8.776815957847196e-06, |
| "loss": 0.0955, |
| "step": 149100 |
| }, |
| { |
| "epoch": 56.15, |
| "learning_rate": 8.769288671433949e-06, |
| "loss": 0.0959, |
| "step": 149200 |
| }, |
| { |
| "epoch": 56.19, |
| "learning_rate": 8.761761385020701e-06, |
| "loss": 0.0949, |
| "step": 149300 |
| }, |
| { |
| "epoch": 56.23, |
| "learning_rate": 8.754234098607452e-06, |
| "loss": 0.0938, |
| "step": 149400 |
| }, |
| { |
| "epoch": 56.27, |
| "learning_rate": 8.746706812194205e-06, |
| "loss": 0.0941, |
| "step": 149500 |
| }, |
| { |
| "epoch": 56.3, |
| "learning_rate": 8.739179525780957e-06, |
| "loss": 0.0939, |
| "step": 149600 |
| }, |
| { |
| "epoch": 56.34, |
| "learning_rate": 8.731652239367708e-06, |
| "loss": 0.0944, |
| "step": 149700 |
| }, |
| { |
| "epoch": 56.38, |
| "learning_rate": 8.72412495295446e-06, |
| "loss": 0.0949, |
| "step": 149800 |
| }, |
| { |
| "epoch": 56.42, |
| "learning_rate": 8.716597666541213e-06, |
| "loss": 0.0952, |
| "step": 149900 |
| }, |
| { |
| "epoch": 56.45, |
| "learning_rate": 8.709070380127964e-06, |
| "loss": 0.0967, |
| "step": 150000 |
| }, |
| { |
| "epoch": 56.49, |
| "learning_rate": 8.701543093714716e-06, |
| "loss": 0.0948, |
| "step": 150100 |
| }, |
| { |
| "epoch": 56.53, |
| "learning_rate": 8.694015807301469e-06, |
| "loss": 0.0942, |
| "step": 150200 |
| }, |
| { |
| "epoch": 56.57, |
| "learning_rate": 8.686488520888221e-06, |
| "loss": 0.0948, |
| "step": 150300 |
| }, |
| { |
| "epoch": 56.61, |
| "learning_rate": 8.678961234474974e-06, |
| "loss": 0.0954, |
| "step": 150400 |
| }, |
| { |
| "epoch": 56.64, |
| "learning_rate": 8.671433948061725e-06, |
| "loss": 0.0951, |
| "step": 150500 |
| }, |
| { |
| "epoch": 56.68, |
| "learning_rate": 8.663906661648477e-06, |
| "loss": 0.094, |
| "step": 150600 |
| }, |
| { |
| "epoch": 56.72, |
| "learning_rate": 8.65637937523523e-06, |
| "loss": 0.094, |
| "step": 150700 |
| }, |
| { |
| "epoch": 56.76, |
| "learning_rate": 8.64885208882198e-06, |
| "loss": 0.0947, |
| "step": 150800 |
| }, |
| { |
| "epoch": 56.79, |
| "learning_rate": 8.641324802408733e-06, |
| "loss": 0.0954, |
| "step": 150900 |
| }, |
| { |
| "epoch": 56.83, |
| "learning_rate": 8.633797515995484e-06, |
| "loss": 0.0941, |
| "step": 151000 |
| }, |
| { |
| "epoch": 56.87, |
| "learning_rate": 8.626270229582236e-06, |
| "loss": 0.0948, |
| "step": 151100 |
| }, |
| { |
| "epoch": 56.91, |
| "learning_rate": 8.618742943168989e-06, |
| "loss": 0.0934, |
| "step": 151200 |
| }, |
| { |
| "epoch": 56.94, |
| "learning_rate": 8.61121565675574e-06, |
| "loss": 0.0927, |
| "step": 151300 |
| }, |
| { |
| "epoch": 56.98, |
| "learning_rate": 8.603688370342492e-06, |
| "loss": 0.0965, |
| "step": 151400 |
| }, |
| { |
| "epoch": 57.0, |
| "eval_loss": 0.09426940232515335, |
| "eval_runtime": 45.2417, |
| "eval_samples_per_second": 165.776, |
| "eval_steps_per_second": 10.367, |
| "step": 151449 |
| }, |
| { |
| "epoch": 57.02, |
| "learning_rate": 8.596161083929245e-06, |
| "loss": 0.0941, |
| "step": 151500 |
| }, |
| { |
| "epoch": 57.06, |
| "learning_rate": 8.588633797515995e-06, |
| "loss": 0.0947, |
| "step": 151600 |
| }, |
| { |
| "epoch": 57.09, |
| "learning_rate": 8.581106511102748e-06, |
| "loss": 0.0932, |
| "step": 151700 |
| }, |
| { |
| "epoch": 57.13, |
| "learning_rate": 8.5735792246895e-06, |
| "loss": 0.0947, |
| "step": 151800 |
| }, |
| { |
| "epoch": 57.17, |
| "learning_rate": 8.566051938276251e-06, |
| "loss": 0.0954, |
| "step": 151900 |
| }, |
| { |
| "epoch": 57.21, |
| "learning_rate": 8.558524651863004e-06, |
| "loss": 0.0956, |
| "step": 152000 |
| }, |
| { |
| "epoch": 57.25, |
| "learning_rate": 8.550997365449756e-06, |
| "loss": 0.0939, |
| "step": 152100 |
| }, |
| { |
| "epoch": 57.28, |
| "learning_rate": 8.543470079036507e-06, |
| "loss": 0.0943, |
| "step": 152200 |
| }, |
| { |
| "epoch": 57.32, |
| "learning_rate": 8.53594279262326e-06, |
| "loss": 0.0926, |
| "step": 152300 |
| }, |
| { |
| "epoch": 57.36, |
| "learning_rate": 8.528415506210012e-06, |
| "loss": 0.0936, |
| "step": 152400 |
| }, |
| { |
| "epoch": 57.4, |
| "learning_rate": 8.520888219796763e-06, |
| "loss": 0.0942, |
| "step": 152500 |
| }, |
| { |
| "epoch": 57.43, |
| "learning_rate": 8.513360933383515e-06, |
| "loss": 0.0955, |
| "step": 152600 |
| }, |
| { |
| "epoch": 57.47, |
| "learning_rate": 8.505833646970268e-06, |
| "loss": 0.0955, |
| "step": 152700 |
| }, |
| { |
| "epoch": 57.51, |
| "learning_rate": 8.49830636055702e-06, |
| "loss": 0.0946, |
| "step": 152800 |
| }, |
| { |
| "epoch": 57.55, |
| "learning_rate": 8.490779074143773e-06, |
| "loss": 0.0964, |
| "step": 152900 |
| }, |
| { |
| "epoch": 57.58, |
| "learning_rate": 8.483251787730524e-06, |
| "loss": 0.094, |
| "step": 153000 |
| }, |
| { |
| "epoch": 57.62, |
| "learning_rate": 8.475724501317276e-06, |
| "loss": 0.0949, |
| "step": 153100 |
| }, |
| { |
| "epoch": 57.66, |
| "learning_rate": 8.468197214904029e-06, |
| "loss": 0.0947, |
| "step": 153200 |
| }, |
| { |
| "epoch": 57.7, |
| "learning_rate": 8.46066992849078e-06, |
| "loss": 0.0952, |
| "step": 153300 |
| }, |
| { |
| "epoch": 57.73, |
| "learning_rate": 8.453142642077532e-06, |
| "loss": 0.0939, |
| "step": 153400 |
| }, |
| { |
| "epoch": 57.77, |
| "learning_rate": 8.445615355664285e-06, |
| "loss": 0.0947, |
| "step": 153500 |
| }, |
| { |
| "epoch": 57.81, |
| "learning_rate": 8.438088069251036e-06, |
| "loss": 0.094, |
| "step": 153600 |
| }, |
| { |
| "epoch": 57.85, |
| "learning_rate": 8.430560782837788e-06, |
| "loss": 0.0922, |
| "step": 153700 |
| }, |
| { |
| "epoch": 57.88, |
| "learning_rate": 8.423033496424539e-06, |
| "loss": 0.0947, |
| "step": 153800 |
| }, |
| { |
| "epoch": 57.92, |
| "learning_rate": 8.415506210011291e-06, |
| "loss": 0.0948, |
| "step": 153900 |
| }, |
| { |
| "epoch": 57.96, |
| "learning_rate": 8.407978923598044e-06, |
| "loss": 0.0921, |
| "step": 154000 |
| }, |
| { |
| "epoch": 58.0, |
| "learning_rate": 8.400451637184795e-06, |
| "loss": 0.0966, |
| "step": 154100 |
| }, |
| { |
| "epoch": 58.0, |
| "eval_loss": 0.094062440097332, |
| "eval_runtime": 45.4411, |
| "eval_samples_per_second": 165.049, |
| "eval_steps_per_second": 10.321, |
| "step": 154106 |
| }, |
| { |
| "epoch": 58.04, |
| "learning_rate": 8.392924350771547e-06, |
| "loss": 0.0938, |
| "step": 154200 |
| }, |
| { |
| "epoch": 58.07, |
| "learning_rate": 8.3853970643583e-06, |
| "loss": 0.0938, |
| "step": 154300 |
| }, |
| { |
| "epoch": 58.11, |
| "learning_rate": 8.37786977794505e-06, |
| "loss": 0.0936, |
| "step": 154400 |
| }, |
| { |
| "epoch": 58.15, |
| "learning_rate": 8.370342491531803e-06, |
| "loss": 0.0942, |
| "step": 154500 |
| }, |
| { |
| "epoch": 58.19, |
| "learning_rate": 8.362815205118556e-06, |
| "loss": 0.0961, |
| "step": 154600 |
| }, |
| { |
| "epoch": 58.22, |
| "learning_rate": 8.355287918705306e-06, |
| "loss": 0.0945, |
| "step": 154700 |
| }, |
| { |
| "epoch": 58.26, |
| "learning_rate": 8.347760632292059e-06, |
| "loss": 0.0954, |
| "step": 154800 |
| }, |
| { |
| "epoch": 58.3, |
| "learning_rate": 8.340233345878811e-06, |
| "loss": 0.0942, |
| "step": 154900 |
| }, |
| { |
| "epoch": 58.34, |
| "learning_rate": 8.332706059465562e-06, |
| "loss": 0.095, |
| "step": 155000 |
| }, |
| { |
| "epoch": 58.37, |
| "learning_rate": 8.325178773052315e-06, |
| "loss": 0.0945, |
| "step": 155100 |
| }, |
| { |
| "epoch": 58.41, |
| "learning_rate": 8.317651486639067e-06, |
| "loss": 0.0937, |
| "step": 155200 |
| }, |
| { |
| "epoch": 58.45, |
| "learning_rate": 8.31012420022582e-06, |
| "loss": 0.0956, |
| "step": 155300 |
| }, |
| { |
| "epoch": 58.49, |
| "learning_rate": 8.302596913812572e-06, |
| "loss": 0.0926, |
| "step": 155400 |
| }, |
| { |
| "epoch": 58.52, |
| "learning_rate": 8.295069627399323e-06, |
| "loss": 0.0936, |
| "step": 155500 |
| }, |
| { |
| "epoch": 58.56, |
| "learning_rate": 8.287542340986076e-06, |
| "loss": 0.095, |
| "step": 155600 |
| }, |
| { |
| "epoch": 58.6, |
| "learning_rate": 8.280015054572828e-06, |
| "loss": 0.0933, |
| "step": 155700 |
| }, |
| { |
| "epoch": 58.64, |
| "learning_rate": 8.272487768159579e-06, |
| "loss": 0.0941, |
| "step": 155800 |
| }, |
| { |
| "epoch": 58.68, |
| "learning_rate": 8.264960481746331e-06, |
| "loss": 0.0937, |
| "step": 155900 |
| }, |
| { |
| "epoch": 58.71, |
| "learning_rate": 8.257433195333084e-06, |
| "loss": 0.0948, |
| "step": 156000 |
| }, |
| { |
| "epoch": 58.75, |
| "learning_rate": 8.249905908919835e-06, |
| "loss": 0.0947, |
| "step": 156100 |
| }, |
| { |
| "epoch": 58.79, |
| "learning_rate": 8.242378622506587e-06, |
| "loss": 0.0919, |
| "step": 156200 |
| }, |
| { |
| "epoch": 58.83, |
| "learning_rate": 8.23485133609334e-06, |
| "loss": 0.0956, |
| "step": 156300 |
| }, |
| { |
| "epoch": 58.86, |
| "learning_rate": 8.22732404968009e-06, |
| "loss": 0.0946, |
| "step": 156400 |
| }, |
| { |
| "epoch": 58.9, |
| "learning_rate": 8.219796763266843e-06, |
| "loss": 0.0934, |
| "step": 156500 |
| }, |
| { |
| "epoch": 58.94, |
| "learning_rate": 8.212269476853596e-06, |
| "loss": 0.0953, |
| "step": 156600 |
| }, |
| { |
| "epoch": 58.98, |
| "learning_rate": 8.204742190440347e-06, |
| "loss": 0.0926, |
| "step": 156700 |
| }, |
| { |
| "epoch": 59.0, |
| "eval_loss": 0.0938277319073677, |
| "eval_runtime": 45.0217, |
| "eval_samples_per_second": 166.586, |
| "eval_steps_per_second": 10.417, |
| "step": 156763 |
| }, |
| { |
| "epoch": 59.01, |
| "learning_rate": 8.197214904027099e-06, |
| "loss": 0.0939, |
| "step": 156800 |
| }, |
| { |
| "epoch": 59.05, |
| "learning_rate": 8.18968761761385e-06, |
| "loss": 0.0931, |
| "step": 156900 |
| }, |
| { |
| "epoch": 59.09, |
| "learning_rate": 8.182160331200602e-06, |
| "loss": 0.0945, |
| "step": 157000 |
| }, |
| { |
| "epoch": 59.13, |
| "learning_rate": 8.174633044787355e-06, |
| "loss": 0.0941, |
| "step": 157100 |
| }, |
| { |
| "epoch": 59.16, |
| "learning_rate": 8.167105758374106e-06, |
| "loss": 0.0937, |
| "step": 157200 |
| }, |
| { |
| "epoch": 59.2, |
| "learning_rate": 8.159578471960858e-06, |
| "loss": 0.0945, |
| "step": 157300 |
| }, |
| { |
| "epoch": 59.24, |
| "learning_rate": 8.15205118554761e-06, |
| "loss": 0.0949, |
| "step": 157400 |
| }, |
| { |
| "epoch": 59.28, |
| "learning_rate": 8.144523899134362e-06, |
| "loss": 0.0926, |
| "step": 157500 |
| }, |
| { |
| "epoch": 59.32, |
| "learning_rate": 8.136996612721114e-06, |
| "loss": 0.0943, |
| "step": 157600 |
| }, |
| { |
| "epoch": 59.35, |
| "learning_rate": 8.129469326307867e-06, |
| "loss": 0.0929, |
| "step": 157700 |
| }, |
| { |
| "epoch": 59.39, |
| "learning_rate": 8.121942039894619e-06, |
| "loss": 0.0964, |
| "step": 157800 |
| }, |
| { |
| "epoch": 59.43, |
| "learning_rate": 8.114414753481372e-06, |
| "loss": 0.0944, |
| "step": 157900 |
| }, |
| { |
| "epoch": 59.47, |
| "learning_rate": 8.106887467068122e-06, |
| "loss": 0.0943, |
| "step": 158000 |
| }, |
| { |
| "epoch": 59.5, |
| "learning_rate": 8.099360180654875e-06, |
| "loss": 0.0962, |
| "step": 158100 |
| }, |
| { |
| "epoch": 59.54, |
| "learning_rate": 8.091832894241627e-06, |
| "loss": 0.0948, |
| "step": 158200 |
| }, |
| { |
| "epoch": 59.58, |
| "learning_rate": 8.084305607828378e-06, |
| "loss": 0.095, |
| "step": 158300 |
| }, |
| { |
| "epoch": 59.62, |
| "learning_rate": 8.07677832141513e-06, |
| "loss": 0.0941, |
| "step": 158400 |
| }, |
| { |
| "epoch": 59.65, |
| "learning_rate": 8.069251035001883e-06, |
| "loss": 0.0949, |
| "step": 158500 |
| }, |
| { |
| "epoch": 59.69, |
| "learning_rate": 8.061723748588634e-06, |
| "loss": 0.0945, |
| "step": 158600 |
| }, |
| { |
| "epoch": 59.73, |
| "learning_rate": 8.054196462175387e-06, |
| "loss": 0.0946, |
| "step": 158700 |
| }, |
| { |
| "epoch": 59.77, |
| "learning_rate": 8.046669175762139e-06, |
| "loss": 0.0948, |
| "step": 158800 |
| }, |
| { |
| "epoch": 59.8, |
| "learning_rate": 8.03914188934889e-06, |
| "loss": 0.0946, |
| "step": 158900 |
| }, |
| { |
| "epoch": 59.84, |
| "learning_rate": 8.031614602935642e-06, |
| "loss": 0.0945, |
| "step": 159000 |
| }, |
| { |
| "epoch": 59.88, |
| "learning_rate": 8.024087316522395e-06, |
| "loss": 0.0944, |
| "step": 159100 |
| }, |
| { |
| "epoch": 59.92, |
| "learning_rate": 8.016560030109146e-06, |
| "loss": 0.0942, |
| "step": 159200 |
| }, |
| { |
| "epoch": 59.95, |
| "learning_rate": 8.009032743695898e-06, |
| "loss": 0.0937, |
| "step": 159300 |
| }, |
| { |
| "epoch": 59.99, |
| "learning_rate": 8.00150545728265e-06, |
| "loss": 0.0928, |
| "step": 159400 |
| }, |
| { |
| "epoch": 60.0, |
| "eval_loss": 0.09416601806879044, |
| "eval_runtime": 45.0453, |
| "eval_samples_per_second": 166.499, |
| "eval_steps_per_second": 10.412, |
| "step": 159420 |
| }, |
| { |
| "epoch": 60.03, |
| "learning_rate": 7.993978170869402e-06, |
| "loss": 0.0923, |
| "step": 159500 |
| }, |
| { |
| "epoch": 60.07, |
| "learning_rate": 7.986450884456154e-06, |
| "loss": 0.0956, |
| "step": 159600 |
| }, |
| { |
| "epoch": 60.11, |
| "learning_rate": 7.978923598042907e-06, |
| "loss": 0.0954, |
| "step": 159700 |
| }, |
| { |
| "epoch": 60.14, |
| "learning_rate": 7.971396311629657e-06, |
| "loss": 0.0946, |
| "step": 159800 |
| }, |
| { |
| "epoch": 60.18, |
| "learning_rate": 7.96386902521641e-06, |
| "loss": 0.0952, |
| "step": 159900 |
| }, |
| { |
| "epoch": 60.22, |
| "learning_rate": 7.95634173880316e-06, |
| "loss": 0.0932, |
| "step": 160000 |
| }, |
| { |
| "epoch": 60.26, |
| "learning_rate": 7.948814452389913e-06, |
| "loss": 0.0942, |
| "step": 160100 |
| }, |
| { |
| "epoch": 60.29, |
| "learning_rate": 7.941287165976666e-06, |
| "loss": 0.0923, |
| "step": 160200 |
| }, |
| { |
| "epoch": 60.33, |
| "learning_rate": 7.933759879563418e-06, |
| "loss": 0.0943, |
| "step": 160300 |
| }, |
| { |
| "epoch": 60.37, |
| "learning_rate": 7.926232593150171e-06, |
| "loss": 0.0926, |
| "step": 160400 |
| }, |
| { |
| "epoch": 60.41, |
| "learning_rate": 7.918705306736922e-06, |
| "loss": 0.0944, |
| "step": 160500 |
| }, |
| { |
| "epoch": 60.44, |
| "learning_rate": 7.911178020323674e-06, |
| "loss": 0.0951, |
| "step": 160600 |
| }, |
| { |
| "epoch": 60.48, |
| "learning_rate": 7.903650733910427e-06, |
| "loss": 0.0925, |
| "step": 160700 |
| }, |
| { |
| "epoch": 60.52, |
| "learning_rate": 7.896123447497178e-06, |
| "loss": 0.0938, |
| "step": 160800 |
| }, |
| { |
| "epoch": 60.56, |
| "learning_rate": 7.88859616108393e-06, |
| "loss": 0.0949, |
| "step": 160900 |
| }, |
| { |
| "epoch": 60.59, |
| "learning_rate": 7.881068874670683e-06, |
| "loss": 0.0924, |
| "step": 161000 |
| }, |
| { |
| "epoch": 60.63, |
| "learning_rate": 7.873541588257433e-06, |
| "loss": 0.0945, |
| "step": 161100 |
| }, |
| { |
| "epoch": 60.67, |
| "learning_rate": 7.866014301844186e-06, |
| "loss": 0.0935, |
| "step": 161200 |
| }, |
| { |
| "epoch": 60.71, |
| "learning_rate": 7.858487015430938e-06, |
| "loss": 0.095, |
| "step": 161300 |
| }, |
| { |
| "epoch": 60.75, |
| "learning_rate": 7.85095972901769e-06, |
| "loss": 0.0938, |
| "step": 161400 |
| }, |
| { |
| "epoch": 60.78, |
| "learning_rate": 7.843432442604442e-06, |
| "loss": 0.0937, |
| "step": 161500 |
| }, |
| { |
| "epoch": 60.82, |
| "learning_rate": 7.835905156191194e-06, |
| "loss": 0.0942, |
| "step": 161600 |
| }, |
| { |
| "epoch": 60.86, |
| "learning_rate": 7.828377869777945e-06, |
| "loss": 0.0955, |
| "step": 161700 |
| }, |
| { |
| "epoch": 60.9, |
| "learning_rate": 7.820850583364698e-06, |
| "loss": 0.0932, |
| "step": 161800 |
| }, |
| { |
| "epoch": 60.93, |
| "learning_rate": 7.81332329695145e-06, |
| "loss": 0.094, |
| "step": 161900 |
| }, |
| { |
| "epoch": 60.97, |
| "learning_rate": 7.805796010538201e-06, |
| "loss": 0.093, |
| "step": 162000 |
| }, |
| { |
| "epoch": 61.0, |
| "eval_loss": 0.09355577826499939, |
| "eval_runtime": 45.0615, |
| "eval_samples_per_second": 166.439, |
| "eval_steps_per_second": 10.408, |
| "step": 162077 |
| }, |
| { |
| "epoch": 61.01, |
| "learning_rate": 7.798268724124953e-06, |
| "loss": 0.0932, |
| "step": 162100 |
| }, |
| { |
| "epoch": 61.05, |
| "learning_rate": 7.790741437711706e-06, |
| "loss": 0.0942, |
| "step": 162200 |
| }, |
| { |
| "epoch": 61.08, |
| "learning_rate": 7.783214151298457e-06, |
| "loss": 0.0933, |
| "step": 162300 |
| }, |
| { |
| "epoch": 61.12, |
| "learning_rate": 7.77568686488521e-06, |
| "loss": 0.0942, |
| "step": 162400 |
| }, |
| { |
| "epoch": 61.16, |
| "learning_rate": 7.768159578471962e-06, |
| "loss": 0.0938, |
| "step": 162500 |
| }, |
| { |
| "epoch": 61.2, |
| "learning_rate": 7.760632292058713e-06, |
| "loss": 0.0948, |
| "step": 162600 |
| }, |
| { |
| "epoch": 61.23, |
| "learning_rate": 7.753105005645465e-06, |
| "loss": 0.0946, |
| "step": 162700 |
| }, |
| { |
| "epoch": 61.27, |
| "learning_rate": 7.745577719232218e-06, |
| "loss": 0.0926, |
| "step": 162800 |
| }, |
| { |
| "epoch": 61.31, |
| "learning_rate": 7.73805043281897e-06, |
| "loss": 0.0945, |
| "step": 162900 |
| }, |
| { |
| "epoch": 61.35, |
| "learning_rate": 7.730523146405721e-06, |
| "loss": 0.0923, |
| "step": 163000 |
| }, |
| { |
| "epoch": 61.39, |
| "learning_rate": 7.722995859992473e-06, |
| "loss": 0.0935, |
| "step": 163100 |
| }, |
| { |
| "epoch": 61.42, |
| "learning_rate": 7.715468573579226e-06, |
| "loss": 0.0938, |
| "step": 163200 |
| }, |
| { |
| "epoch": 61.46, |
| "learning_rate": 7.707941287165977e-06, |
| "loss": 0.0938, |
| "step": 163300 |
| }, |
| { |
| "epoch": 61.5, |
| "learning_rate": 7.70041400075273e-06, |
| "loss": 0.0929, |
| "step": 163400 |
| }, |
| { |
| "epoch": 61.54, |
| "learning_rate": 7.692886714339482e-06, |
| "loss": 0.0937, |
| "step": 163500 |
| }, |
| { |
| "epoch": 61.57, |
| "learning_rate": 7.685359427926233e-06, |
| "loss": 0.0921, |
| "step": 163600 |
| }, |
| { |
| "epoch": 61.61, |
| "learning_rate": 7.677832141512985e-06, |
| "loss": 0.0933, |
| "step": 163700 |
| }, |
| { |
| "epoch": 61.65, |
| "learning_rate": 7.670304855099738e-06, |
| "loss": 0.0929, |
| "step": 163800 |
| }, |
| { |
| "epoch": 61.69, |
| "learning_rate": 7.662777568686489e-06, |
| "loss": 0.0931, |
| "step": 163900 |
| }, |
| { |
| "epoch": 61.72, |
| "learning_rate": 7.655250282273241e-06, |
| "loss": 0.0947, |
| "step": 164000 |
| }, |
| { |
| "epoch": 61.76, |
| "learning_rate": 7.647722995859994e-06, |
| "loss": 0.0944, |
| "step": 164100 |
| }, |
| { |
| "epoch": 61.8, |
| "learning_rate": 7.640195709446744e-06, |
| "loss": 0.0929, |
| "step": 164200 |
| }, |
| { |
| "epoch": 61.84, |
| "learning_rate": 7.632668423033497e-06, |
| "loss": 0.0945, |
| "step": 164300 |
| }, |
| { |
| "epoch": 61.87, |
| "learning_rate": 7.6251411366202485e-06, |
| "loss": 0.0952, |
| "step": 164400 |
| }, |
| { |
| "epoch": 61.91, |
| "learning_rate": 7.617613850207001e-06, |
| "loss": 0.0939, |
| "step": 164500 |
| }, |
| { |
| "epoch": 61.95, |
| "learning_rate": 7.610086563793753e-06, |
| "loss": 0.0949, |
| "step": 164600 |
| }, |
| { |
| "epoch": 61.99, |
| "learning_rate": 7.602559277380504e-06, |
| "loss": 0.0939, |
| "step": 164700 |
| }, |
| { |
| "epoch": 62.0, |
| "eval_loss": 0.09392710030078888, |
| "eval_runtime": 45.1193, |
| "eval_samples_per_second": 166.226, |
| "eval_steps_per_second": 10.395, |
| "step": 164734 |
| }, |
| { |
| "epoch": 62.02, |
| "learning_rate": 7.595031990967257e-06, |
| "loss": 0.0937, |
| "step": 164800 |
| }, |
| { |
| "epoch": 62.06, |
| "learning_rate": 7.5875047045540086e-06, |
| "loss": 0.0934, |
| "step": 164900 |
| }, |
| { |
| "epoch": 62.1, |
| "learning_rate": 7.57997741814076e-06, |
| "loss": 0.094, |
| "step": 165000 |
| }, |
| { |
| "epoch": 62.14, |
| "learning_rate": 7.572450131727512e-06, |
| "loss": 0.0947, |
| "step": 165100 |
| }, |
| { |
| "epoch": 62.18, |
| "learning_rate": 7.564922845314265e-06, |
| "loss": 0.0926, |
| "step": 165200 |
| }, |
| { |
| "epoch": 62.21, |
| "learning_rate": 7.557395558901017e-06, |
| "loss": 0.0947, |
| "step": 165300 |
| }, |
| { |
| "epoch": 62.25, |
| "learning_rate": 7.5498682724877694e-06, |
| "loss": 0.0937, |
| "step": 165400 |
| }, |
| { |
| "epoch": 62.29, |
| "learning_rate": 7.542340986074521e-06, |
| "loss": 0.0914, |
| "step": 165500 |
| }, |
| { |
| "epoch": 62.33, |
| "learning_rate": 7.534813699661273e-06, |
| "loss": 0.0953, |
| "step": 165600 |
| }, |
| { |
| "epoch": 62.36, |
| "learning_rate": 7.527286413248025e-06, |
| "loss": 0.0952, |
| "step": 165700 |
| }, |
| { |
| "epoch": 62.4, |
| "learning_rate": 7.519759126834777e-06, |
| "loss": 0.0935, |
| "step": 165800 |
| }, |
| { |
| "epoch": 62.44, |
| "learning_rate": 7.512231840421529e-06, |
| "loss": 0.0947, |
| "step": 165900 |
| }, |
| { |
| "epoch": 62.48, |
| "learning_rate": 7.504704554008281e-06, |
| "loss": 0.0935, |
| "step": 166000 |
| }, |
| { |
| "epoch": 62.51, |
| "learning_rate": 7.497177267595033e-06, |
| "loss": 0.0957, |
| "step": 166100 |
| }, |
| { |
| "epoch": 62.55, |
| "learning_rate": 7.4896499811817845e-06, |
| "loss": 0.0926, |
| "step": 166200 |
| }, |
| { |
| "epoch": 62.59, |
| "learning_rate": 7.482122694768537e-06, |
| "loss": 0.0943, |
| "step": 166300 |
| }, |
| { |
| "epoch": 62.63, |
| "learning_rate": 7.474595408355289e-06, |
| "loss": 0.0938, |
| "step": 166400 |
| }, |
| { |
| "epoch": 62.66, |
| "learning_rate": 7.46706812194204e-06, |
| "loss": 0.0943, |
| "step": 166500 |
| }, |
| { |
| "epoch": 62.7, |
| "learning_rate": 7.459540835528792e-06, |
| "loss": 0.0924, |
| "step": 166600 |
| }, |
| { |
| "epoch": 62.74, |
| "learning_rate": 7.4520135491155445e-06, |
| "loss": 0.0929, |
| "step": 166700 |
| }, |
| { |
| "epoch": 62.78, |
| "learning_rate": 7.444486262702296e-06, |
| "loss": 0.0936, |
| "step": 166800 |
| }, |
| { |
| "epoch": 62.82, |
| "learning_rate": 7.436958976289048e-06, |
| "loss": 0.0931, |
| "step": 166900 |
| }, |
| { |
| "epoch": 62.85, |
| "learning_rate": 7.4294316898758e-06, |
| "loss": 0.0943, |
| "step": 167000 |
| }, |
| { |
| "epoch": 62.89, |
| "learning_rate": 7.421904403462552e-06, |
| "loss": 0.094, |
| "step": 167100 |
| }, |
| { |
| "epoch": 62.93, |
| "learning_rate": 7.414377117049304e-06, |
| "loss": 0.0931, |
| "step": 167200 |
| }, |
| { |
| "epoch": 62.97, |
| "learning_rate": 7.406849830636056e-06, |
| "loss": 0.0936, |
| "step": 167300 |
| }, |
| { |
| "epoch": 63.0, |
| "eval_loss": 0.09357059001922607, |
| "eval_runtime": 45.0624, |
| "eval_samples_per_second": 166.436, |
| "eval_steps_per_second": 10.408, |
| "step": 167391 |
| }, |
| { |
| "epoch": 63.0, |
| "learning_rate": 7.399322544222808e-06, |
| "loss": 0.0936, |
| "step": 167400 |
| }, |
| { |
| "epoch": 63.04, |
| "learning_rate": 7.3917952578095595e-06, |
| "loss": 0.094, |
| "step": 167500 |
| }, |
| { |
| "epoch": 63.08, |
| "learning_rate": 7.384267971396312e-06, |
| "loss": 0.0946, |
| "step": 167600 |
| }, |
| { |
| "epoch": 63.12, |
| "learning_rate": 7.3767406849830646e-06, |
| "loss": 0.0941, |
| "step": 167700 |
| }, |
| { |
| "epoch": 63.15, |
| "learning_rate": 7.369213398569817e-06, |
| "loss": 0.0943, |
| "step": 167800 |
| }, |
| { |
| "epoch": 63.19, |
| "learning_rate": 7.361686112156569e-06, |
| "loss": 0.0949, |
| "step": 167900 |
| }, |
| { |
| "epoch": 63.23, |
| "learning_rate": 7.35415882574332e-06, |
| "loss": 0.0945, |
| "step": 168000 |
| }, |
| { |
| "epoch": 63.27, |
| "learning_rate": 7.346631539330072e-06, |
| "loss": 0.0924, |
| "step": 168100 |
| }, |
| { |
| "epoch": 63.3, |
| "learning_rate": 7.3391042529168246e-06, |
| "loss": 0.094, |
| "step": 168200 |
| }, |
| { |
| "epoch": 63.34, |
| "learning_rate": 7.331576966503576e-06, |
| "loss": 0.0955, |
| "step": 168300 |
| }, |
| { |
| "epoch": 63.38, |
| "learning_rate": 7.324049680090328e-06, |
| "loss": 0.0937, |
| "step": 168400 |
| }, |
| { |
| "epoch": 63.42, |
| "learning_rate": 7.31652239367708e-06, |
| "loss": 0.0924, |
| "step": 168500 |
| }, |
| { |
| "epoch": 63.46, |
| "learning_rate": 7.308995107263832e-06, |
| "loss": 0.0943, |
| "step": 168600 |
| }, |
| { |
| "epoch": 63.49, |
| "learning_rate": 7.301467820850584e-06, |
| "loss": 0.094, |
| "step": 168700 |
| }, |
| { |
| "epoch": 63.53, |
| "learning_rate": 7.293940534437336e-06, |
| "loss": 0.0916, |
| "step": 168800 |
| }, |
| { |
| "epoch": 63.57, |
| "learning_rate": 7.286413248024088e-06, |
| "loss": 0.092, |
| "step": 168900 |
| }, |
| { |
| "epoch": 63.61, |
| "learning_rate": 7.27888596161084e-06, |
| "loss": 0.0951, |
| "step": 169000 |
| }, |
| { |
| "epoch": 63.64, |
| "learning_rate": 7.271358675197592e-06, |
| "loss": 0.0928, |
| "step": 169100 |
| }, |
| { |
| "epoch": 63.68, |
| "learning_rate": 7.263831388784344e-06, |
| "loss": 0.0938, |
| "step": 169200 |
| }, |
| { |
| "epoch": 63.72, |
| "learning_rate": 7.2563041023710954e-06, |
| "loss": 0.0936, |
| "step": 169300 |
| }, |
| { |
| "epoch": 63.76, |
| "learning_rate": 7.248776815957848e-06, |
| "loss": 0.0928, |
| "step": 169400 |
| }, |
| { |
| "epoch": 63.79, |
| "learning_rate": 7.2412495295446e-06, |
| "loss": 0.0944, |
| "step": 169500 |
| }, |
| { |
| "epoch": 63.83, |
| "learning_rate": 7.233722243131351e-06, |
| "loss": 0.0925, |
| "step": 169600 |
| }, |
| { |
| "epoch": 63.87, |
| "learning_rate": 7.226194956718103e-06, |
| "loss": 0.0932, |
| "step": 169700 |
| }, |
| { |
| "epoch": 63.91, |
| "learning_rate": 7.2186676703048555e-06, |
| "loss": 0.0934, |
| "step": 169800 |
| }, |
| { |
| "epoch": 63.94, |
| "learning_rate": 7.211140383891607e-06, |
| "loss": 0.0927, |
| "step": 169900 |
| }, |
| { |
| "epoch": 63.98, |
| "learning_rate": 7.203613097478359e-06, |
| "loss": 0.093, |
| "step": 170000 |
| }, |
| { |
| "epoch": 64.0, |
| "eval_loss": 0.09292689710855484, |
| "eval_runtime": 45.1577, |
| "eval_samples_per_second": 166.085, |
| "eval_steps_per_second": 10.386, |
| "step": 170048 |
| }, |
| { |
| "epoch": 64.02, |
| "learning_rate": 7.196085811065112e-06, |
| "loss": 0.0933, |
| "step": 170100 |
| }, |
| { |
| "epoch": 64.06, |
| "learning_rate": 7.188558524651864e-06, |
| "loss": 0.0938, |
| "step": 170200 |
| }, |
| { |
| "epoch": 64.09, |
| "learning_rate": 7.181031238238616e-06, |
| "loss": 0.0913, |
| "step": 170300 |
| }, |
| { |
| "epoch": 64.13, |
| "learning_rate": 7.173503951825368e-06, |
| "loss": 0.0919, |
| "step": 170400 |
| }, |
| { |
| "epoch": 64.17, |
| "learning_rate": 7.16597666541212e-06, |
| "loss": 0.0949, |
| "step": 170500 |
| }, |
| { |
| "epoch": 64.21, |
| "learning_rate": 7.158449378998872e-06, |
| "loss": 0.0938, |
| "step": 170600 |
| }, |
| { |
| "epoch": 64.25, |
| "learning_rate": 7.150922092585624e-06, |
| "loss": 0.0948, |
| "step": 170700 |
| }, |
| { |
| "epoch": 64.28, |
| "learning_rate": 7.1433948061723755e-06, |
| "loss": 0.093, |
| "step": 170800 |
| }, |
| { |
| "epoch": 64.32, |
| "learning_rate": 7.135867519759127e-06, |
| "loss": 0.0933, |
| "step": 170900 |
| }, |
| { |
| "epoch": 64.36, |
| "learning_rate": 7.12834023334588e-06, |
| "loss": 0.0915, |
| "step": 171000 |
| }, |
| { |
| "epoch": 64.4, |
| "learning_rate": 7.120812946932631e-06, |
| "loss": 0.093, |
| "step": 171100 |
| }, |
| { |
| "epoch": 64.43, |
| "learning_rate": 7.113285660519383e-06, |
| "loss": 0.0933, |
| "step": 171200 |
| }, |
| { |
| "epoch": 64.47, |
| "learning_rate": 7.1057583741061356e-06, |
| "loss": 0.0936, |
| "step": 171300 |
| }, |
| { |
| "epoch": 64.51, |
| "learning_rate": 7.098231087692887e-06, |
| "loss": 0.0935, |
| "step": 171400 |
| }, |
| { |
| "epoch": 64.55, |
| "learning_rate": 7.090703801279639e-06, |
| "loss": 0.094, |
| "step": 171500 |
| }, |
| { |
| "epoch": 64.58, |
| "learning_rate": 7.083176514866391e-06, |
| "loss": 0.0941, |
| "step": 171600 |
| }, |
| { |
| "epoch": 64.62, |
| "learning_rate": 7.075649228453143e-06, |
| "loss": 0.0946, |
| "step": 171700 |
| }, |
| { |
| "epoch": 64.66, |
| "learning_rate": 7.068121942039895e-06, |
| "loss": 0.0946, |
| "step": 171800 |
| }, |
| { |
| "epoch": 64.7, |
| "learning_rate": 7.060594655626647e-06, |
| "loss": 0.0926, |
| "step": 171900 |
| }, |
| { |
| "epoch": 64.73, |
| "learning_rate": 7.053067369213399e-06, |
| "loss": 0.0946, |
| "step": 172000 |
| }, |
| { |
| "epoch": 64.77, |
| "learning_rate": 7.045540082800151e-06, |
| "loss": 0.0937, |
| "step": 172100 |
| }, |
| { |
| "epoch": 64.81, |
| "learning_rate": 7.038012796386903e-06, |
| "loss": 0.0927, |
| "step": 172200 |
| }, |
| { |
| "epoch": 64.85, |
| "learning_rate": 7.030485509973655e-06, |
| "loss": 0.0945, |
| "step": 172300 |
| }, |
| { |
| "epoch": 64.89, |
| "learning_rate": 7.022958223560406e-06, |
| "loss": 0.0918, |
| "step": 172400 |
| }, |
| { |
| "epoch": 64.92, |
| "learning_rate": 7.015430937147158e-06, |
| "loss": 0.0923, |
| "step": 172500 |
| }, |
| { |
| "epoch": 64.96, |
| "learning_rate": 7.0079036507339114e-06, |
| "loss": 0.0926, |
| "step": 172600 |
| }, |
| { |
| "epoch": 65.0, |
| "learning_rate": 7.000376364320663e-06, |
| "loss": 0.0929, |
| "step": 172700 |
| }, |
| { |
| "epoch": 65.0, |
| "eval_loss": 0.0930134728550911, |
| "eval_runtime": 44.9287, |
| "eval_samples_per_second": 166.931, |
| "eval_steps_per_second": 10.439, |
| "step": 172705 |
| }, |
| { |
| "epoch": 65.04, |
| "learning_rate": 6.992849077907416e-06, |
| "loss": 0.0929, |
| "step": 172800 |
| }, |
| { |
| "epoch": 65.07, |
| "learning_rate": 6.985321791494167e-06, |
| "loss": 0.0932, |
| "step": 172900 |
| }, |
| { |
| "epoch": 65.11, |
| "learning_rate": 6.977794505080919e-06, |
| "loss": 0.0948, |
| "step": 173000 |
| }, |
| { |
| "epoch": 65.15, |
| "learning_rate": 6.9702672186676715e-06, |
| "loss": 0.093, |
| "step": 173100 |
| }, |
| { |
| "epoch": 65.19, |
| "learning_rate": 6.962739932254423e-06, |
| "loss": 0.0947, |
| "step": 173200 |
| }, |
| { |
| "epoch": 65.22, |
| "learning_rate": 6.955212645841175e-06, |
| "loss": 0.0925, |
| "step": 173300 |
| }, |
| { |
| "epoch": 65.26, |
| "learning_rate": 6.947685359427927e-06, |
| "loss": 0.093, |
| "step": 173400 |
| }, |
| { |
| "epoch": 65.3, |
| "learning_rate": 6.940158073014679e-06, |
| "loss": 0.0931, |
| "step": 173500 |
| }, |
| { |
| "epoch": 65.34, |
| "learning_rate": 6.932630786601431e-06, |
| "loss": 0.0944, |
| "step": 173600 |
| }, |
| { |
| "epoch": 65.37, |
| "learning_rate": 6.925103500188183e-06, |
| "loss": 0.0923, |
| "step": 173700 |
| }, |
| { |
| "epoch": 65.41, |
| "learning_rate": 6.917576213774935e-06, |
| "loss": 0.0919, |
| "step": 173800 |
| }, |
| { |
| "epoch": 65.45, |
| "learning_rate": 6.9100489273616865e-06, |
| "loss": 0.0945, |
| "step": 173900 |
| }, |
| { |
| "epoch": 65.49, |
| "learning_rate": 6.902521640948438e-06, |
| "loss": 0.0924, |
| "step": 174000 |
| }, |
| { |
| "epoch": 65.53, |
| "learning_rate": 6.894994354535191e-06, |
| "loss": 0.0941, |
| "step": 174100 |
| }, |
| { |
| "epoch": 65.56, |
| "learning_rate": 6.887467068121942e-06, |
| "loss": 0.0918, |
| "step": 174200 |
| }, |
| { |
| "epoch": 65.6, |
| "learning_rate": 6.879939781708694e-06, |
| "loss": 0.0935, |
| "step": 174300 |
| }, |
| { |
| "epoch": 65.64, |
| "learning_rate": 6.8724124952954465e-06, |
| "loss": 0.0944, |
| "step": 174400 |
| }, |
| { |
| "epoch": 65.68, |
| "learning_rate": 6.864885208882198e-06, |
| "loss": 0.0924, |
| "step": 174500 |
| }, |
| { |
| "epoch": 65.71, |
| "learning_rate": 6.85735792246895e-06, |
| "loss": 0.0927, |
| "step": 174600 |
| }, |
| { |
| "epoch": 65.75, |
| "learning_rate": 6.849830636055702e-06, |
| "loss": 0.094, |
| "step": 174700 |
| }, |
| { |
| "epoch": 65.79, |
| "learning_rate": 6.842303349642454e-06, |
| "loss": 0.0935, |
| "step": 174800 |
| }, |
| { |
| "epoch": 65.83, |
| "learning_rate": 6.834776063229206e-06, |
| "loss": 0.0927, |
| "step": 174900 |
| }, |
| { |
| "epoch": 65.86, |
| "learning_rate": 6.827248776815958e-06, |
| "loss": 0.0937, |
| "step": 175000 |
| }, |
| { |
| "epoch": 65.9, |
| "learning_rate": 6.819721490402711e-06, |
| "loss": 0.0938, |
| "step": 175100 |
| }, |
| { |
| "epoch": 65.94, |
| "learning_rate": 6.812194203989463e-06, |
| "loss": 0.0931, |
| "step": 175200 |
| }, |
| { |
| "epoch": 65.98, |
| "learning_rate": 6.804666917576215e-06, |
| "loss": 0.0917, |
| "step": 175300 |
| }, |
| { |
| "epoch": 66.0, |
| "eval_loss": 0.09251850843429565, |
| "eval_runtime": 44.9106, |
| "eval_samples_per_second": 166.998, |
| "eval_steps_per_second": 10.443, |
| "step": 175362 |
| }, |
| { |
| "epoch": 66.01, |
| "learning_rate": 6.797139631162967e-06, |
| "loss": 0.0936, |
| "step": 175400 |
| }, |
| { |
| "epoch": 66.05, |
| "learning_rate": 6.789612344749718e-06, |
| "loss": 0.0948, |
| "step": 175500 |
| }, |
| { |
| "epoch": 66.09, |
| "learning_rate": 6.782085058336471e-06, |
| "loss": 0.0945, |
| "step": 175600 |
| }, |
| { |
| "epoch": 66.13, |
| "learning_rate": 6.7745577719232224e-06, |
| "loss": 0.0937, |
| "step": 175700 |
| }, |
| { |
| "epoch": 66.16, |
| "learning_rate": 6.767030485509974e-06, |
| "loss": 0.0945, |
| "step": 175800 |
| }, |
| { |
| "epoch": 66.2, |
| "learning_rate": 6.759503199096727e-06, |
| "loss": 0.0932, |
| "step": 175900 |
| }, |
| { |
| "epoch": 66.24, |
| "learning_rate": 6.751975912683478e-06, |
| "loss": 0.0936, |
| "step": 176000 |
| }, |
| { |
| "epoch": 66.28, |
| "learning_rate": 6.74444862627023e-06, |
| "loss": 0.0933, |
| "step": 176100 |
| }, |
| { |
| "epoch": 66.32, |
| "learning_rate": 6.7369213398569825e-06, |
| "loss": 0.0926, |
| "step": 176200 |
| }, |
| { |
| "epoch": 66.35, |
| "learning_rate": 6.729394053443734e-06, |
| "loss": 0.093, |
| "step": 176300 |
| }, |
| { |
| "epoch": 66.39, |
| "learning_rate": 6.721866767030486e-06, |
| "loss": 0.0929, |
| "step": 176400 |
| }, |
| { |
| "epoch": 66.43, |
| "learning_rate": 6.714339480617238e-06, |
| "loss": 0.0934, |
| "step": 176500 |
| }, |
| { |
| "epoch": 66.47, |
| "learning_rate": 6.70681219420399e-06, |
| "loss": 0.0936, |
| "step": 176600 |
| }, |
| { |
| "epoch": 66.5, |
| "learning_rate": 6.699284907790742e-06, |
| "loss": 0.0916, |
| "step": 176700 |
| }, |
| { |
| "epoch": 66.54, |
| "learning_rate": 6.691757621377494e-06, |
| "loss": 0.0921, |
| "step": 176800 |
| }, |
| { |
| "epoch": 66.58, |
| "learning_rate": 6.684230334964246e-06, |
| "loss": 0.094, |
| "step": 176900 |
| }, |
| { |
| "epoch": 66.62, |
| "learning_rate": 6.6767030485509975e-06, |
| "loss": 0.0915, |
| "step": 177000 |
| }, |
| { |
| "epoch": 66.65, |
| "learning_rate": 6.669175762137749e-06, |
| "loss": 0.0919, |
| "step": 177100 |
| }, |
| { |
| "epoch": 66.69, |
| "learning_rate": 6.661648475724502e-06, |
| "loss": 0.0936, |
| "step": 177200 |
| }, |
| { |
| "epoch": 66.73, |
| "learning_rate": 6.654121189311253e-06, |
| "loss": 0.0927, |
| "step": 177300 |
| }, |
| { |
| "epoch": 66.77, |
| "learning_rate": 6.646593902898005e-06, |
| "loss": 0.0921, |
| "step": 177400 |
| }, |
| { |
| "epoch": 66.8, |
| "learning_rate": 6.6390666164847575e-06, |
| "loss": 0.0929, |
| "step": 177500 |
| }, |
| { |
| "epoch": 66.84, |
| "learning_rate": 6.63153933007151e-06, |
| "loss": 0.0934, |
| "step": 177600 |
| }, |
| { |
| "epoch": 66.88, |
| "learning_rate": 6.6240120436582625e-06, |
| "loss": 0.0932, |
| "step": 177700 |
| }, |
| { |
| "epoch": 66.92, |
| "learning_rate": 6.616484757245014e-06, |
| "loss": 0.0944, |
| "step": 177800 |
| }, |
| { |
| "epoch": 66.96, |
| "learning_rate": 6.608957470831766e-06, |
| "loss": 0.092, |
| "step": 177900 |
| }, |
| { |
| "epoch": 66.99, |
| "learning_rate": 6.601430184418518e-06, |
| "loss": 0.0948, |
| "step": 178000 |
| }, |
| { |
| "epoch": 67.0, |
| "eval_loss": 0.09316383302211761, |
| "eval_runtime": 44.8531, |
| "eval_samples_per_second": 167.212, |
| "eval_steps_per_second": 10.456, |
| "step": 178019 |
| }, |
| { |
| "epoch": 67.03, |
| "learning_rate": 6.59390289800527e-06, |
| "loss": 0.0931, |
| "step": 178100 |
| }, |
| { |
| "epoch": 67.07, |
| "learning_rate": 6.586375611592022e-06, |
| "loss": 0.0929, |
| "step": 178200 |
| }, |
| { |
| "epoch": 67.11, |
| "learning_rate": 6.578848325178774e-06, |
| "loss": 0.0933, |
| "step": 178300 |
| }, |
| { |
| "epoch": 67.14, |
| "learning_rate": 6.571321038765526e-06, |
| "loss": 0.0909, |
| "step": 178400 |
| }, |
| { |
| "epoch": 67.18, |
| "learning_rate": 6.5637937523522776e-06, |
| "loss": 0.093, |
| "step": 178500 |
| }, |
| { |
| "epoch": 67.22, |
| "learning_rate": 6.556266465939029e-06, |
| "loss": 0.0942, |
| "step": 178600 |
| }, |
| { |
| "epoch": 67.26, |
| "learning_rate": 6.548739179525782e-06, |
| "loss": 0.0926, |
| "step": 178700 |
| }, |
| { |
| "epoch": 67.29, |
| "learning_rate": 6.541211893112533e-06, |
| "loss": 0.0921, |
| "step": 178800 |
| }, |
| { |
| "epoch": 67.33, |
| "learning_rate": 6.533684606699285e-06, |
| "loss": 0.0932, |
| "step": 178900 |
| }, |
| { |
| "epoch": 67.37, |
| "learning_rate": 6.526157320286038e-06, |
| "loss": 0.0939, |
| "step": 179000 |
| }, |
| { |
| "epoch": 67.41, |
| "learning_rate": 6.518630033872789e-06, |
| "loss": 0.0943, |
| "step": 179100 |
| }, |
| { |
| "epoch": 67.44, |
| "learning_rate": 6.511102747459541e-06, |
| "loss": 0.0925, |
| "step": 179200 |
| }, |
| { |
| "epoch": 67.48, |
| "learning_rate": 6.5035754610462934e-06, |
| "loss": 0.092, |
| "step": 179300 |
| }, |
| { |
| "epoch": 67.52, |
| "learning_rate": 6.496048174633045e-06, |
| "loss": 0.093, |
| "step": 179400 |
| }, |
| { |
| "epoch": 67.56, |
| "learning_rate": 6.488520888219797e-06, |
| "loss": 0.0942, |
| "step": 179500 |
| }, |
| { |
| "epoch": 67.6, |
| "learning_rate": 6.480993601806549e-06, |
| "loss": 0.0933, |
| "step": 179600 |
| }, |
| { |
| "epoch": 67.63, |
| "learning_rate": 6.473466315393301e-06, |
| "loss": 0.0921, |
| "step": 179700 |
| }, |
| { |
| "epoch": 67.67, |
| "learning_rate": 6.465939028980053e-06, |
| "loss": 0.0928, |
| "step": 179800 |
| }, |
| { |
| "epoch": 67.71, |
| "learning_rate": 6.458411742566805e-06, |
| "loss": 0.0945, |
| "step": 179900 |
| }, |
| { |
| "epoch": 67.75, |
| "learning_rate": 6.450884456153557e-06, |
| "loss": 0.0925, |
| "step": 180000 |
| }, |
| { |
| "epoch": 67.78, |
| "learning_rate": 6.443357169740309e-06, |
| "loss": 0.0912, |
| "step": 180100 |
| }, |
| { |
| "epoch": 67.82, |
| "learning_rate": 6.435829883327062e-06, |
| "loss": 0.0917, |
| "step": 180200 |
| }, |
| { |
| "epoch": 67.86, |
| "learning_rate": 6.4283025969138135e-06, |
| "loss": 0.093, |
| "step": 180300 |
| }, |
| { |
| "epoch": 67.9, |
| "learning_rate": 6.420775310500565e-06, |
| "loss": 0.0928, |
| "step": 180400 |
| }, |
| { |
| "epoch": 67.93, |
| "learning_rate": 6.413248024087318e-06, |
| "loss": 0.0915, |
| "step": 180500 |
| }, |
| { |
| "epoch": 67.97, |
| "learning_rate": 6.405720737674069e-06, |
| "loss": 0.0931, |
| "step": 180600 |
| }, |
| { |
| "epoch": 68.0, |
| "eval_loss": 0.09266681969165802, |
| "eval_runtime": 44.9069, |
| "eval_samples_per_second": 167.012, |
| "eval_steps_per_second": 10.444, |
| "step": 180676 |
| }, |
| { |
| "epoch": 68.01, |
| "learning_rate": 6.398193451260821e-06, |
| "loss": 0.0939, |
| "step": 180700 |
| }, |
| { |
| "epoch": 68.05, |
| "learning_rate": 6.3906661648475735e-06, |
| "loss": 0.0933, |
| "step": 180800 |
| }, |
| { |
| "epoch": 68.08, |
| "learning_rate": 6.383138878434325e-06, |
| "loss": 0.0922, |
| "step": 180900 |
| }, |
| { |
| "epoch": 68.12, |
| "learning_rate": 6.375611592021077e-06, |
| "loss": 0.0922, |
| "step": 181000 |
| }, |
| { |
| "epoch": 68.16, |
| "learning_rate": 6.368084305607829e-06, |
| "loss": 0.0935, |
| "step": 181100 |
| }, |
| { |
| "epoch": 68.2, |
| "learning_rate": 6.360557019194581e-06, |
| "loss": 0.0938, |
| "step": 181200 |
| }, |
| { |
| "epoch": 68.23, |
| "learning_rate": 6.353029732781333e-06, |
| "loss": 0.0929, |
| "step": 181300 |
| }, |
| { |
| "epoch": 68.27, |
| "learning_rate": 6.345502446368084e-06, |
| "loss": 0.093, |
| "step": 181400 |
| }, |
| { |
| "epoch": 68.31, |
| "learning_rate": 6.337975159954837e-06, |
| "loss": 0.0932, |
| "step": 181500 |
| }, |
| { |
| "epoch": 68.35, |
| "learning_rate": 6.3304478735415885e-06, |
| "loss": 0.0914, |
| "step": 181600 |
| }, |
| { |
| "epoch": 68.39, |
| "learning_rate": 6.32292058712834e-06, |
| "loss": 0.0926, |
| "step": 181700 |
| }, |
| { |
| "epoch": 68.42, |
| "learning_rate": 6.315393300715093e-06, |
| "loss": 0.0935, |
| "step": 181800 |
| }, |
| { |
| "epoch": 68.46, |
| "learning_rate": 6.307866014301844e-06, |
| "loss": 0.0923, |
| "step": 181900 |
| }, |
| { |
| "epoch": 68.5, |
| "learning_rate": 6.300338727888596e-06, |
| "loss": 0.0921, |
| "step": 182000 |
| }, |
| { |
| "epoch": 68.54, |
| "learning_rate": 6.2928114414753486e-06, |
| "loss": 0.0929, |
| "step": 182100 |
| }, |
| { |
| "epoch": 68.57, |
| "learning_rate": 6.2852841550621e-06, |
| "loss": 0.0935, |
| "step": 182200 |
| }, |
| { |
| "epoch": 68.61, |
| "learning_rate": 6.277756868648852e-06, |
| "loss": 0.0923, |
| "step": 182300 |
| }, |
| { |
| "epoch": 68.65, |
| "learning_rate": 6.270229582235604e-06, |
| "loss": 0.093, |
| "step": 182400 |
| }, |
| { |
| "epoch": 68.69, |
| "learning_rate": 6.262702295822356e-06, |
| "loss": 0.0912, |
| "step": 182500 |
| }, |
| { |
| "epoch": 68.72, |
| "learning_rate": 6.2551750094091094e-06, |
| "loss": 0.0922, |
| "step": 182600 |
| }, |
| { |
| "epoch": 68.76, |
| "learning_rate": 6.247647722995861e-06, |
| "loss": 0.0919, |
| "step": 182700 |
| }, |
| { |
| "epoch": 68.8, |
| "learning_rate": 6.240120436582613e-06, |
| "loss": 0.0933, |
| "step": 182800 |
| }, |
| { |
| "epoch": 68.84, |
| "learning_rate": 6.2325931501693644e-06, |
| "loss": 0.0936, |
| "step": 182900 |
| }, |
| { |
| "epoch": 68.87, |
| "learning_rate": 6.225065863756117e-06, |
| "loss": 0.0935, |
| "step": 183000 |
| }, |
| { |
| "epoch": 68.91, |
| "learning_rate": 6.217538577342869e-06, |
| "loss": 0.0917, |
| "step": 183100 |
| }, |
| { |
| "epoch": 68.95, |
| "learning_rate": 6.21001129092962e-06, |
| "loss": 0.0916, |
| "step": 183200 |
| }, |
| { |
| "epoch": 68.99, |
| "learning_rate": 6.202484004516373e-06, |
| "loss": 0.0911, |
| "step": 183300 |
| }, |
| { |
| "epoch": 69.0, |
| "eval_loss": 0.092154860496521, |
| "eval_runtime": 44.9385, |
| "eval_samples_per_second": 166.895, |
| "eval_steps_per_second": 10.436, |
| "step": 183333 |
| }, |
| { |
| "epoch": 69.03, |
| "learning_rate": 6.1949567181031245e-06, |
| "loss": 0.0919, |
| "step": 183400 |
| }, |
| { |
| "epoch": 69.06, |
| "learning_rate": 6.187429431689876e-06, |
| "loss": 0.0931, |
| "step": 183500 |
| }, |
| { |
| "epoch": 69.1, |
| "learning_rate": 6.179902145276629e-06, |
| "loss": 0.0923, |
| "step": 183600 |
| }, |
| { |
| "epoch": 69.14, |
| "learning_rate": 6.17237485886338e-06, |
| "loss": 0.0927, |
| "step": 183700 |
| }, |
| { |
| "epoch": 69.18, |
| "learning_rate": 6.164847572450132e-06, |
| "loss": 0.0942, |
| "step": 183800 |
| }, |
| { |
| "epoch": 69.21, |
| "learning_rate": 6.1573202860368845e-06, |
| "loss": 0.0926, |
| "step": 183900 |
| }, |
| { |
| "epoch": 69.25, |
| "learning_rate": 6.149792999623636e-06, |
| "loss": 0.0943, |
| "step": 184000 |
| }, |
| { |
| "epoch": 69.29, |
| "learning_rate": 6.142265713210388e-06, |
| "loss": 0.0918, |
| "step": 184100 |
| }, |
| { |
| "epoch": 69.33, |
| "learning_rate": 6.13473842679714e-06, |
| "loss": 0.0933, |
| "step": 184200 |
| }, |
| { |
| "epoch": 69.36, |
| "learning_rate": 6.127211140383892e-06, |
| "loss": 0.093, |
| "step": 184300 |
| }, |
| { |
| "epoch": 69.4, |
| "learning_rate": 6.119683853970644e-06, |
| "loss": 0.0927, |
| "step": 184400 |
| }, |
| { |
| "epoch": 69.44, |
| "learning_rate": 6.112156567557395e-06, |
| "loss": 0.0925, |
| "step": 184500 |
| }, |
| { |
| "epoch": 69.48, |
| "learning_rate": 6.104629281144148e-06, |
| "loss": 0.0922, |
| "step": 184600 |
| }, |
| { |
| "epoch": 69.51, |
| "learning_rate": 6.0971019947308995e-06, |
| "loss": 0.0926, |
| "step": 184700 |
| }, |
| { |
| "epoch": 69.55, |
| "learning_rate": 6.089574708317651e-06, |
| "loss": 0.0928, |
| "step": 184800 |
| }, |
| { |
| "epoch": 69.59, |
| "learning_rate": 6.082047421904404e-06, |
| "loss": 0.092, |
| "step": 184900 |
| }, |
| { |
| "epoch": 69.63, |
| "learning_rate": 6.074520135491156e-06, |
| "loss": 0.0934, |
| "step": 185000 |
| }, |
| { |
| "epoch": 69.67, |
| "learning_rate": 6.066992849077909e-06, |
| "loss": 0.0927, |
| "step": 185100 |
| }, |
| { |
| "epoch": 69.7, |
| "learning_rate": 6.05946556266466e-06, |
| "loss": 0.0923, |
| "step": 185200 |
| }, |
| { |
| "epoch": 69.74, |
| "learning_rate": 6.051938276251412e-06, |
| "loss": 0.0933, |
| "step": 185300 |
| }, |
| { |
| "epoch": 69.78, |
| "learning_rate": 6.0444109898381646e-06, |
| "loss": 0.0914, |
| "step": 185400 |
| }, |
| { |
| "epoch": 69.82, |
| "learning_rate": 6.036883703424916e-06, |
| "loss": 0.0928, |
| "step": 185500 |
| }, |
| { |
| "epoch": 69.85, |
| "learning_rate": 6.029356417011668e-06, |
| "loss": 0.0917, |
| "step": 185600 |
| }, |
| { |
| "epoch": 69.89, |
| "learning_rate": 6.02182913059842e-06, |
| "loss": 0.0932, |
| "step": 185700 |
| }, |
| { |
| "epoch": 69.93, |
| "learning_rate": 6.014301844185172e-06, |
| "loss": 0.0917, |
| "step": 185800 |
| }, |
| { |
| "epoch": 69.97, |
| "learning_rate": 6.006774557771924e-06, |
| "loss": 0.0923, |
| "step": 185900 |
| }, |
| { |
| "epoch": 70.0, |
| "eval_loss": 0.0924314558506012, |
| "eval_runtime": 45.1886, |
| "eval_samples_per_second": 165.971, |
| "eval_steps_per_second": 10.379, |
| "step": 185990 |
| }, |
| { |
| "epoch": 70.0, |
| "learning_rate": 5.999247271358675e-06, |
| "loss": 0.0933, |
| "step": 186000 |
| }, |
| { |
| "epoch": 70.04, |
| "learning_rate": 5.991719984945428e-06, |
| "loss": 0.0918, |
| "step": 186100 |
| }, |
| { |
| "epoch": 70.08, |
| "learning_rate": 5.98419269853218e-06, |
| "loss": 0.0919, |
| "step": 186200 |
| }, |
| { |
| "epoch": 70.12, |
| "learning_rate": 5.976665412118931e-06, |
| "loss": 0.0942, |
| "step": 186300 |
| }, |
| { |
| "epoch": 70.15, |
| "learning_rate": 5.969138125705684e-06, |
| "loss": 0.092, |
| "step": 186400 |
| }, |
| { |
| "epoch": 70.19, |
| "learning_rate": 5.9616108392924354e-06, |
| "loss": 0.0923, |
| "step": 186500 |
| }, |
| { |
| "epoch": 70.23, |
| "learning_rate": 5.954083552879187e-06, |
| "loss": 0.0929, |
| "step": 186600 |
| }, |
| { |
| "epoch": 70.27, |
| "learning_rate": 5.94655626646594e-06, |
| "loss": 0.0932, |
| "step": 186700 |
| }, |
| { |
| "epoch": 70.3, |
| "learning_rate": 5.939028980052691e-06, |
| "loss": 0.0936, |
| "step": 186800 |
| }, |
| { |
| "epoch": 70.34, |
| "learning_rate": 5.931501693639443e-06, |
| "loss": 0.0931, |
| "step": 186900 |
| }, |
| { |
| "epoch": 70.38, |
| "learning_rate": 5.9239744072261955e-06, |
| "loss": 0.0919, |
| "step": 187000 |
| }, |
| { |
| "epoch": 70.42, |
| "learning_rate": 5.916447120812947e-06, |
| "loss": 0.092, |
| "step": 187100 |
| }, |
| { |
| "epoch": 70.46, |
| "learning_rate": 5.908919834399699e-06, |
| "loss": 0.0949, |
| "step": 187200 |
| }, |
| { |
| "epoch": 70.49, |
| "learning_rate": 5.901392547986451e-06, |
| "loss": 0.0931, |
| "step": 187300 |
| }, |
| { |
| "epoch": 70.53, |
| "learning_rate": 5.893865261573203e-06, |
| "loss": 0.0927, |
| "step": 187400 |
| }, |
| { |
| "epoch": 70.57, |
| "learning_rate": 5.8863379751599555e-06, |
| "loss": 0.0918, |
| "step": 187500 |
| }, |
| { |
| "epoch": 70.61, |
| "learning_rate": 5.878810688746708e-06, |
| "loss": 0.0916, |
| "step": 187600 |
| }, |
| { |
| "epoch": 70.64, |
| "learning_rate": 5.87128340233346e-06, |
| "loss": 0.0926, |
| "step": 187700 |
| }, |
| { |
| "epoch": 70.68, |
| "learning_rate": 5.863756115920211e-06, |
| "loss": 0.0913, |
| "step": 187800 |
| }, |
| { |
| "epoch": 70.72, |
| "learning_rate": 5.856228829506964e-06, |
| "loss": 0.093, |
| "step": 187900 |
| }, |
| { |
| "epoch": 70.76, |
| "learning_rate": 5.8487015430937155e-06, |
| "loss": 0.091, |
| "step": 188000 |
| }, |
| { |
| "epoch": 70.79, |
| "learning_rate": 5.841174256680467e-06, |
| "loss": 0.0941, |
| "step": 188100 |
| }, |
| { |
| "epoch": 70.83, |
| "learning_rate": 5.83364697026722e-06, |
| "loss": 0.0933, |
| "step": 188200 |
| }, |
| { |
| "epoch": 70.87, |
| "learning_rate": 5.826119683853971e-06, |
| "loss": 0.0916, |
| "step": 188300 |
| }, |
| { |
| "epoch": 70.91, |
| "learning_rate": 5.818592397440723e-06, |
| "loss": 0.0946, |
| "step": 188400 |
| }, |
| { |
| "epoch": 70.94, |
| "learning_rate": 5.8110651110274755e-06, |
| "loss": 0.0927, |
| "step": 188500 |
| }, |
| { |
| "epoch": 70.98, |
| "learning_rate": 5.803537824614227e-06, |
| "loss": 0.0923, |
| "step": 188600 |
| }, |
| { |
| "epoch": 71.0, |
| "eval_loss": 0.09234917163848877, |
| "eval_runtime": 45.0733, |
| "eval_samples_per_second": 166.395, |
| "eval_steps_per_second": 10.405, |
| "step": 188647 |
| }, |
| { |
| "epoch": 71.02, |
| "learning_rate": 5.796010538200979e-06, |
| "loss": 0.0929, |
| "step": 188700 |
| }, |
| { |
| "epoch": 71.06, |
| "learning_rate": 5.788483251787731e-06, |
| "loss": 0.0928, |
| "step": 188800 |
| }, |
| { |
| "epoch": 71.1, |
| "learning_rate": 5.780955965374483e-06, |
| "loss": 0.0925, |
| "step": 188900 |
| }, |
| { |
| "epoch": 71.13, |
| "learning_rate": 5.773428678961235e-06, |
| "loss": 0.0928, |
| "step": 189000 |
| }, |
| { |
| "epoch": 71.17, |
| "learning_rate": 5.765901392547986e-06, |
| "loss": 0.0943, |
| "step": 189100 |
| }, |
| { |
| "epoch": 71.21, |
| "learning_rate": 5.758374106134739e-06, |
| "loss": 0.092, |
| "step": 189200 |
| }, |
| { |
| "epoch": 71.25, |
| "learning_rate": 5.7508468197214906e-06, |
| "loss": 0.0928, |
| "step": 189300 |
| }, |
| { |
| "epoch": 71.28, |
| "learning_rate": 5.743319533308242e-06, |
| "loss": 0.0917, |
| "step": 189400 |
| }, |
| { |
| "epoch": 71.32, |
| "learning_rate": 5.735792246894995e-06, |
| "loss": 0.0919, |
| "step": 189500 |
| }, |
| { |
| "epoch": 71.36, |
| "learning_rate": 5.728264960481746e-06, |
| "loss": 0.0922, |
| "step": 189600 |
| }, |
| { |
| "epoch": 71.4, |
| "learning_rate": 5.720737674068498e-06, |
| "loss": 0.0931, |
| "step": 189700 |
| }, |
| { |
| "epoch": 71.43, |
| "learning_rate": 5.713210387655251e-06, |
| "loss": 0.0922, |
| "step": 189800 |
| }, |
| { |
| "epoch": 71.47, |
| "learning_rate": 5.705683101242002e-06, |
| "loss": 0.0908, |
| "step": 189900 |
| }, |
| { |
| "epoch": 71.51, |
| "learning_rate": 5.698155814828756e-06, |
| "loss": 0.0909, |
| "step": 190000 |
| }, |
| { |
| "epoch": 71.55, |
| "learning_rate": 5.690628528415507e-06, |
| "loss": 0.0922, |
| "step": 190100 |
| }, |
| { |
| "epoch": 71.58, |
| "learning_rate": 5.683101242002259e-06, |
| "loss": 0.0941, |
| "step": 190200 |
| }, |
| { |
| "epoch": 71.62, |
| "learning_rate": 5.675573955589011e-06, |
| "loss": 0.0931, |
| "step": 190300 |
| }, |
| { |
| "epoch": 71.66, |
| "learning_rate": 5.668046669175763e-06, |
| "loss": 0.092, |
| "step": 190400 |
| }, |
| { |
| "epoch": 71.7, |
| "learning_rate": 5.660519382762515e-06, |
| "loss": 0.0903, |
| "step": 190500 |
| }, |
| { |
| "epoch": 71.74, |
| "learning_rate": 5.6529920963492665e-06, |
| "loss": 0.0917, |
| "step": 190600 |
| }, |
| { |
| "epoch": 71.77, |
| "learning_rate": 5.645464809936019e-06, |
| "loss": 0.0911, |
| "step": 190700 |
| }, |
| { |
| "epoch": 71.81, |
| "learning_rate": 5.637937523522771e-06, |
| "loss": 0.0924, |
| "step": 190800 |
| }, |
| { |
| "epoch": 71.85, |
| "learning_rate": 5.630410237109522e-06, |
| "loss": 0.0922, |
| "step": 190900 |
| }, |
| { |
| "epoch": 71.89, |
| "learning_rate": 5.622882950696275e-06, |
| "loss": 0.0935, |
| "step": 191000 |
| }, |
| { |
| "epoch": 71.92, |
| "learning_rate": 5.6153556642830265e-06, |
| "loss": 0.0912, |
| "step": 191100 |
| }, |
| { |
| "epoch": 71.96, |
| "learning_rate": 5.607828377869778e-06, |
| "loss": 0.0919, |
| "step": 191200 |
| }, |
| { |
| "epoch": 72.0, |
| "learning_rate": 5.600301091456531e-06, |
| "loss": 0.0929, |
| "step": 191300 |
| }, |
| { |
| "epoch": 72.0, |
| "eval_loss": 0.09194895625114441, |
| "eval_runtime": 45.1877, |
| "eval_samples_per_second": 165.975, |
| "eval_steps_per_second": 10.379, |
| "step": 191304 |
| }, |
| { |
| "epoch": 72.04, |
| "learning_rate": 5.592773805043282e-06, |
| "loss": 0.0934, |
| "step": 191400 |
| }, |
| { |
| "epoch": 72.07, |
| "learning_rate": 5.585246518630034e-06, |
| "loss": 0.0922, |
| "step": 191500 |
| }, |
| { |
| "epoch": 72.11, |
| "learning_rate": 5.5777192322167865e-06, |
| "loss": 0.0934, |
| "step": 191600 |
| }, |
| { |
| "epoch": 72.15, |
| "learning_rate": 5.570191945803538e-06, |
| "loss": 0.0902, |
| "step": 191700 |
| }, |
| { |
| "epoch": 72.19, |
| "learning_rate": 5.56266465939029e-06, |
| "loss": 0.0921, |
| "step": 191800 |
| }, |
| { |
| "epoch": 72.22, |
| "learning_rate": 5.5551373729770415e-06, |
| "loss": 0.0922, |
| "step": 191900 |
| }, |
| { |
| "epoch": 72.26, |
| "learning_rate": 5.547610086563794e-06, |
| "loss": 0.0925, |
| "step": 192000 |
| }, |
| { |
| "epoch": 72.3, |
| "learning_rate": 5.540082800150546e-06, |
| "loss": 0.0927, |
| "step": 192100 |
| }, |
| { |
| "epoch": 72.34, |
| "learning_rate": 5.532555513737297e-06, |
| "loss": 0.0923, |
| "step": 192200 |
| }, |
| { |
| "epoch": 72.37, |
| "learning_rate": 5.52502822732405e-06, |
| "loss": 0.0924, |
| "step": 192300 |
| }, |
| { |
| "epoch": 72.41, |
| "learning_rate": 5.5175009409108015e-06, |
| "loss": 0.0919, |
| "step": 192400 |
| }, |
| { |
| "epoch": 72.45, |
| "learning_rate": 5.509973654497555e-06, |
| "loss": 0.0937, |
| "step": 192500 |
| }, |
| { |
| "epoch": 72.49, |
| "learning_rate": 5.5024463680843066e-06, |
| "loss": 0.0919, |
| "step": 192600 |
| }, |
| { |
| "epoch": 72.53, |
| "learning_rate": 5.494919081671058e-06, |
| "loss": 0.0922, |
| "step": 192700 |
| }, |
| { |
| "epoch": 72.56, |
| "learning_rate": 5.487391795257811e-06, |
| "loss": 0.0925, |
| "step": 192800 |
| }, |
| { |
| "epoch": 72.6, |
| "learning_rate": 5.479864508844562e-06, |
| "loss": 0.0919, |
| "step": 192900 |
| }, |
| { |
| "epoch": 72.64, |
| "learning_rate": 5.472337222431314e-06, |
| "loss": 0.0908, |
| "step": 193000 |
| }, |
| { |
| "epoch": 72.68, |
| "learning_rate": 5.464809936018067e-06, |
| "loss": 0.0925, |
| "step": 193100 |
| }, |
| { |
| "epoch": 72.71, |
| "learning_rate": 5.457282649604818e-06, |
| "loss": 0.0921, |
| "step": 193200 |
| }, |
| { |
| "epoch": 72.75, |
| "learning_rate": 5.44975536319157e-06, |
| "loss": 0.0909, |
| "step": 193300 |
| }, |
| { |
| "epoch": 72.79, |
| "learning_rate": 5.442228076778322e-06, |
| "loss": 0.0938, |
| "step": 193400 |
| }, |
| { |
| "epoch": 72.83, |
| "learning_rate": 5.434700790365074e-06, |
| "loss": 0.0919, |
| "step": 193500 |
| }, |
| { |
| "epoch": 72.86, |
| "learning_rate": 5.427173503951826e-06, |
| "loss": 0.0941, |
| "step": 193600 |
| }, |
| { |
| "epoch": 72.9, |
| "learning_rate": 5.4196462175385774e-06, |
| "loss": 0.0916, |
| "step": 193700 |
| }, |
| { |
| "epoch": 72.94, |
| "learning_rate": 5.41211893112533e-06, |
| "loss": 0.0922, |
| "step": 193800 |
| }, |
| { |
| "epoch": 72.98, |
| "learning_rate": 5.404591644712082e-06, |
| "loss": 0.0916, |
| "step": 193900 |
| }, |
| { |
| "epoch": 73.0, |
| "eval_loss": 0.09231603145599365, |
| "eval_runtime": 45.0139, |
| "eval_samples_per_second": 166.615, |
| "eval_steps_per_second": 10.419, |
| "step": 193961 |
| }, |
| { |
| "epoch": 73.01, |
| "learning_rate": 5.397064358298833e-06, |
| "loss": 0.093, |
| "step": 194000 |
| }, |
| { |
| "epoch": 73.05, |
| "learning_rate": 5.389537071885586e-06, |
| "loss": 0.0926, |
| "step": 194100 |
| }, |
| { |
| "epoch": 73.09, |
| "learning_rate": 5.3820097854723375e-06, |
| "loss": 0.0935, |
| "step": 194200 |
| }, |
| { |
| "epoch": 73.13, |
| "learning_rate": 5.374482499059089e-06, |
| "loss": 0.0924, |
| "step": 194300 |
| }, |
| { |
| "epoch": 73.17, |
| "learning_rate": 5.366955212645842e-06, |
| "loss": 0.0918, |
| "step": 194400 |
| }, |
| { |
| "epoch": 73.2, |
| "learning_rate": 5.359427926232593e-06, |
| "loss": 0.0929, |
| "step": 194500 |
| }, |
| { |
| "epoch": 73.24, |
| "learning_rate": 5.351900639819345e-06, |
| "loss": 0.0932, |
| "step": 194600 |
| }, |
| { |
| "epoch": 73.28, |
| "learning_rate": 5.3443733534060975e-06, |
| "loss": 0.092, |
| "step": 194700 |
| }, |
| { |
| "epoch": 73.32, |
| "learning_rate": 5.336846066992849e-06, |
| "loss": 0.0913, |
| "step": 194800 |
| }, |
| { |
| "epoch": 73.35, |
| "learning_rate": 5.329318780579601e-06, |
| "loss": 0.093, |
| "step": 194900 |
| }, |
| { |
| "epoch": 73.39, |
| "learning_rate": 5.321791494166354e-06, |
| "loss": 0.0922, |
| "step": 195000 |
| }, |
| { |
| "epoch": 73.43, |
| "learning_rate": 5.314264207753106e-06, |
| "loss": 0.0898, |
| "step": 195100 |
| }, |
| { |
| "epoch": 73.47, |
| "learning_rate": 5.3067369213398575e-06, |
| "loss": 0.0921, |
| "step": 195200 |
| }, |
| { |
| "epoch": 73.5, |
| "learning_rate": 5.29920963492661e-06, |
| "loss": 0.0927, |
| "step": 195300 |
| }, |
| { |
| "epoch": 73.54, |
| "learning_rate": 5.291682348513362e-06, |
| "loss": 0.0932, |
| "step": 195400 |
| }, |
| { |
| "epoch": 73.58, |
| "learning_rate": 5.284155062100113e-06, |
| "loss": 0.092, |
| "step": 195500 |
| }, |
| { |
| "epoch": 73.62, |
| "learning_rate": 5.276627775686866e-06, |
| "loss": 0.091, |
| "step": 195600 |
| }, |
| { |
| "epoch": 73.65, |
| "learning_rate": 5.2691004892736175e-06, |
| "loss": 0.093, |
| "step": 195700 |
| }, |
| { |
| "epoch": 73.69, |
| "learning_rate": 5.261573202860369e-06, |
| "loss": 0.0943, |
| "step": 195800 |
| }, |
| { |
| "epoch": 73.73, |
| "learning_rate": 5.254045916447122e-06, |
| "loss": 0.0913, |
| "step": 195900 |
| }, |
| { |
| "epoch": 73.77, |
| "learning_rate": 5.246518630033873e-06, |
| "loss": 0.0905, |
| "step": 196000 |
| }, |
| { |
| "epoch": 73.81, |
| "learning_rate": 5.238991343620625e-06, |
| "loss": 0.0905, |
| "step": 196100 |
| }, |
| { |
| "epoch": 73.84, |
| "learning_rate": 5.2314640572073776e-06, |
| "loss": 0.0923, |
| "step": 196200 |
| }, |
| { |
| "epoch": 73.88, |
| "learning_rate": 5.223936770794129e-06, |
| "loss": 0.0923, |
| "step": 196300 |
| }, |
| { |
| "epoch": 73.92, |
| "learning_rate": 5.216409484380881e-06, |
| "loss": 0.0927, |
| "step": 196400 |
| }, |
| { |
| "epoch": 73.96, |
| "learning_rate": 5.2088821979676326e-06, |
| "loss": 0.093, |
| "step": 196500 |
| }, |
| { |
| "epoch": 73.99, |
| "learning_rate": 5.201354911554385e-06, |
| "loss": 0.0927, |
| "step": 196600 |
| }, |
| { |
| "epoch": 74.0, |
| "eval_loss": 0.0920698270201683, |
| "eval_runtime": 45.1732, |
| "eval_samples_per_second": 166.028, |
| "eval_steps_per_second": 10.382, |
| "step": 196618 |
| }, |
| { |
| "epoch": 74.03, |
| "learning_rate": 5.193827625141137e-06, |
| "loss": 0.0911, |
| "step": 196700 |
| }, |
| { |
| "epoch": 74.07, |
| "learning_rate": 5.186300338727888e-06, |
| "loss": 0.0921, |
| "step": 196800 |
| }, |
| { |
| "epoch": 74.11, |
| "learning_rate": 5.178773052314641e-06, |
| "loss": 0.0924, |
| "step": 196900 |
| }, |
| { |
| "epoch": 74.14, |
| "learning_rate": 5.171245765901393e-06, |
| "loss": 0.0937, |
| "step": 197000 |
| }, |
| { |
| "epoch": 74.18, |
| "learning_rate": 5.163718479488144e-06, |
| "loss": 0.0927, |
| "step": 197100 |
| }, |
| { |
| "epoch": 74.22, |
| "learning_rate": 5.156191193074897e-06, |
| "loss": 0.0932, |
| "step": 197200 |
| }, |
| { |
| "epoch": 74.26, |
| "learning_rate": 5.1486639066616484e-06, |
| "loss": 0.091, |
| "step": 197300 |
| }, |
| { |
| "epoch": 74.29, |
| "learning_rate": 5.1411366202484e-06, |
| "loss": 0.0914, |
| "step": 197400 |
| }, |
| { |
| "epoch": 74.33, |
| "learning_rate": 5.1336093338351535e-06, |
| "loss": 0.0921, |
| "step": 197500 |
| }, |
| { |
| "epoch": 74.37, |
| "learning_rate": 5.126082047421905e-06, |
| "loss": 0.0932, |
| "step": 197600 |
| }, |
| { |
| "epoch": 74.41, |
| "learning_rate": 5.118554761008658e-06, |
| "loss": 0.0928, |
| "step": 197700 |
| }, |
| { |
| "epoch": 74.44, |
| "learning_rate": 5.111027474595409e-06, |
| "loss": 0.0918, |
| "step": 197800 |
| }, |
| { |
| "epoch": 74.48, |
| "learning_rate": 5.103500188182161e-06, |
| "loss": 0.0894, |
| "step": 197900 |
| }, |
| { |
| "epoch": 74.52, |
| "learning_rate": 5.095972901768913e-06, |
| "loss": 0.091, |
| "step": 198000 |
| }, |
| { |
| "epoch": 74.56, |
| "learning_rate": 5.088445615355665e-06, |
| "loss": 0.0929, |
| "step": 198100 |
| }, |
| { |
| "epoch": 74.6, |
| "learning_rate": 5.080918328942417e-06, |
| "loss": 0.0931, |
| "step": 198200 |
| }, |
| { |
| "epoch": 74.63, |
| "learning_rate": 5.0733910425291685e-06, |
| "loss": 0.092, |
| "step": 198300 |
| }, |
| { |
| "epoch": 74.67, |
| "learning_rate": 5.065863756115921e-06, |
| "loss": 0.0916, |
| "step": 198400 |
| }, |
| { |
| "epoch": 74.71, |
| "learning_rate": 5.058336469702673e-06, |
| "loss": 0.0914, |
| "step": 198500 |
| }, |
| { |
| "epoch": 74.75, |
| "learning_rate": 5.050809183289424e-06, |
| "loss": 0.0929, |
| "step": 198600 |
| }, |
| { |
| "epoch": 74.78, |
| "learning_rate": 5.043281896876177e-06, |
| "loss": 0.0913, |
| "step": 198700 |
| }, |
| { |
| "epoch": 74.82, |
| "learning_rate": 5.0357546104629285e-06, |
| "loss": 0.0918, |
| "step": 198800 |
| }, |
| { |
| "epoch": 74.86, |
| "learning_rate": 5.02822732404968e-06, |
| "loss": 0.0922, |
| "step": 198900 |
| }, |
| { |
| "epoch": 74.9, |
| "learning_rate": 5.020700037636433e-06, |
| "loss": 0.0918, |
| "step": 199000 |
| }, |
| { |
| "epoch": 74.93, |
| "learning_rate": 5.013172751223184e-06, |
| "loss": 0.0918, |
| "step": 199100 |
| }, |
| { |
| "epoch": 74.97, |
| "learning_rate": 5.005645464809936e-06, |
| "loss": 0.0907, |
| "step": 199200 |
| }, |
| { |
| "epoch": 75.0, |
| "eval_loss": 0.09217877686023712, |
| "eval_runtime": 44.7295, |
| "eval_samples_per_second": 167.675, |
| "eval_steps_per_second": 10.485, |
| "step": 199275 |
| }, |
| { |
| "epoch": 75.01, |
| "learning_rate": 4.9981181783966885e-06, |
| "loss": 0.0917, |
| "step": 199300 |
| }, |
| { |
| "epoch": 75.05, |
| "learning_rate": 4.99059089198344e-06, |
| "loss": 0.0921, |
| "step": 199400 |
| }, |
| { |
| "epoch": 75.08, |
| "learning_rate": 4.983063605570193e-06, |
| "loss": 0.0925, |
| "step": 199500 |
| }, |
| { |
| "epoch": 75.12, |
| "learning_rate": 4.975536319156944e-06, |
| "loss": 0.0928, |
| "step": 199600 |
| }, |
| { |
| "epoch": 75.16, |
| "learning_rate": 4.968009032743696e-06, |
| "loss": 0.092, |
| "step": 199700 |
| }, |
| { |
| "epoch": 75.2, |
| "learning_rate": 4.9604817463304486e-06, |
| "loss": 0.0913, |
| "step": 199800 |
| }, |
| { |
| "epoch": 75.24, |
| "learning_rate": 4.9529544599172e-06, |
| "loss": 0.0928, |
| "step": 199900 |
| }, |
| { |
| "epoch": 75.27, |
| "learning_rate": 4.945427173503952e-06, |
| "loss": 0.0917, |
| "step": 200000 |
| }, |
| { |
| "epoch": 75.31, |
| "learning_rate": 4.937899887090704e-06, |
| "loss": 0.0928, |
| "step": 200100 |
| }, |
| { |
| "epoch": 75.35, |
| "learning_rate": 4.930372600677456e-06, |
| "loss": 0.0919, |
| "step": 200200 |
| }, |
| { |
| "epoch": 75.39, |
| "learning_rate": 4.922845314264208e-06, |
| "loss": 0.0919, |
| "step": 200300 |
| }, |
| { |
| "epoch": 75.42, |
| "learning_rate": 4.91531802785096e-06, |
| "loss": 0.092, |
| "step": 200400 |
| }, |
| { |
| "epoch": 75.46, |
| "learning_rate": 4.907790741437712e-06, |
| "loss": 0.0902, |
| "step": 200500 |
| }, |
| { |
| "epoch": 75.5, |
| "learning_rate": 4.9002634550244644e-06, |
| "loss": 0.0911, |
| "step": 200600 |
| }, |
| { |
| "epoch": 75.54, |
| "learning_rate": 4.892736168611216e-06, |
| "loss": 0.093, |
| "step": 200700 |
| }, |
| { |
| "epoch": 75.57, |
| "learning_rate": 4.885208882197968e-06, |
| "loss": 0.0917, |
| "step": 200800 |
| }, |
| { |
| "epoch": 75.61, |
| "learning_rate": 4.87768159578472e-06, |
| "loss": 0.0911, |
| "step": 200900 |
| }, |
| { |
| "epoch": 75.65, |
| "learning_rate": 4.870154309371472e-06, |
| "loss": 0.0927, |
| "step": 201000 |
| }, |
| { |
| "epoch": 75.69, |
| "learning_rate": 4.862627022958224e-06, |
| "loss": 0.0917, |
| "step": 201100 |
| }, |
| { |
| "epoch": 75.72, |
| "learning_rate": 4.855099736544976e-06, |
| "loss": 0.0936, |
| "step": 201200 |
| }, |
| { |
| "epoch": 75.76, |
| "learning_rate": 4.847572450131728e-06, |
| "loss": 0.0938, |
| "step": 201300 |
| }, |
| { |
| "epoch": 75.8, |
| "learning_rate": 4.8400451637184795e-06, |
| "loss": 0.0896, |
| "step": 201400 |
| }, |
| { |
| "epoch": 75.84, |
| "learning_rate": 4.832517877305232e-06, |
| "loss": 0.0934, |
| "step": 201500 |
| }, |
| { |
| "epoch": 75.88, |
| "learning_rate": 4.824990590891984e-06, |
| "loss": 0.0902, |
| "step": 201600 |
| }, |
| { |
| "epoch": 75.91, |
| "learning_rate": 4.817463304478735e-06, |
| "loss": 0.0906, |
| "step": 201700 |
| }, |
| { |
| "epoch": 75.95, |
| "learning_rate": 4.809936018065488e-06, |
| "loss": 0.0906, |
| "step": 201800 |
| }, |
| { |
| "epoch": 75.99, |
| "learning_rate": 4.80240873165224e-06, |
| "loss": 0.0927, |
| "step": 201900 |
| }, |
| { |
| "epoch": 76.0, |
| "eval_loss": 0.09185120463371277, |
| "eval_runtime": 44.8491, |
| "eval_samples_per_second": 167.228, |
| "eval_steps_per_second": 10.457, |
| "step": 201932 |
| }, |
| { |
| "epoch": 76.03, |
| "learning_rate": 4.794881445238992e-06, |
| "loss": 0.0924, |
| "step": 202000 |
| }, |
| { |
| "epoch": 76.06, |
| "learning_rate": 4.787354158825744e-06, |
| "loss": 0.0922, |
| "step": 202100 |
| }, |
| { |
| "epoch": 76.1, |
| "learning_rate": 4.779826872412496e-06, |
| "loss": 0.0933, |
| "step": 202200 |
| }, |
| { |
| "epoch": 76.14, |
| "learning_rate": 4.772299585999248e-06, |
| "loss": 0.0922, |
| "step": 202300 |
| }, |
| { |
| "epoch": 76.18, |
| "learning_rate": 4.7647722995859995e-06, |
| "loss": 0.0916, |
| "step": 202400 |
| }, |
| { |
| "epoch": 76.21, |
| "learning_rate": 4.757245013172751e-06, |
| "loss": 0.0916, |
| "step": 202500 |
| }, |
| { |
| "epoch": 76.25, |
| "learning_rate": 4.749717726759504e-06, |
| "loss": 0.0931, |
| "step": 202600 |
| }, |
| { |
| "epoch": 76.29, |
| "learning_rate": 4.742190440346255e-06, |
| "loss": 0.0907, |
| "step": 202700 |
| }, |
| { |
| "epoch": 76.33, |
| "learning_rate": 4.734663153933007e-06, |
| "loss": 0.092, |
| "step": 202800 |
| }, |
| { |
| "epoch": 76.36, |
| "learning_rate": 4.7271358675197595e-06, |
| "loss": 0.0912, |
| "step": 202900 |
| }, |
| { |
| "epoch": 76.4, |
| "learning_rate": 4.719608581106512e-06, |
| "loss": 0.0924, |
| "step": 203000 |
| }, |
| { |
| "epoch": 76.44, |
| "learning_rate": 4.712081294693264e-06, |
| "loss": 0.0915, |
| "step": 203100 |
| }, |
| { |
| "epoch": 76.48, |
| "learning_rate": 4.704554008280015e-06, |
| "loss": 0.0908, |
| "step": 203200 |
| }, |
| { |
| "epoch": 76.51, |
| "learning_rate": 4.697026721866768e-06, |
| "loss": 0.0919, |
| "step": 203300 |
| }, |
| { |
| "epoch": 76.55, |
| "learning_rate": 4.6894994354535196e-06, |
| "loss": 0.0912, |
| "step": 203400 |
| }, |
| { |
| "epoch": 76.59, |
| "learning_rate": 4.681972149040271e-06, |
| "loss": 0.092, |
| "step": 203500 |
| }, |
| { |
| "epoch": 76.63, |
| "learning_rate": 4.674444862627024e-06, |
| "loss": 0.091, |
| "step": 203600 |
| }, |
| { |
| "epoch": 76.67, |
| "learning_rate": 4.666917576213775e-06, |
| "loss": 0.0928, |
| "step": 203700 |
| }, |
| { |
| "epoch": 76.7, |
| "learning_rate": 4.659390289800527e-06, |
| "loss": 0.0902, |
| "step": 203800 |
| }, |
| { |
| "epoch": 76.74, |
| "learning_rate": 4.651863003387279e-06, |
| "loss": 0.0921, |
| "step": 203900 |
| }, |
| { |
| "epoch": 76.78, |
| "learning_rate": 4.644335716974031e-06, |
| "loss": 0.0899, |
| "step": 204000 |
| }, |
| { |
| "epoch": 76.82, |
| "learning_rate": 4.636808430560783e-06, |
| "loss": 0.0897, |
| "step": 204100 |
| }, |
| { |
| "epoch": 76.85, |
| "learning_rate": 4.629281144147535e-06, |
| "loss": 0.0909, |
| "step": 204200 |
| }, |
| { |
| "epoch": 76.89, |
| "learning_rate": 4.621753857734287e-06, |
| "loss": 0.0933, |
| "step": 204300 |
| }, |
| { |
| "epoch": 76.93, |
| "learning_rate": 4.61422657132104e-06, |
| "loss": 0.0937, |
| "step": 204400 |
| }, |
| { |
| "epoch": 76.97, |
| "learning_rate": 4.606699284907791e-06, |
| "loss": 0.0925, |
| "step": 204500 |
| }, |
| { |
| "epoch": 77.0, |
| "eval_loss": 0.09133084863424301, |
| "eval_runtime": 45.172, |
| "eval_samples_per_second": 166.032, |
| "eval_steps_per_second": 10.383, |
| "step": 204589 |
| }, |
| { |
| "epoch": 77.0, |
| "learning_rate": 4.599171998494543e-06, |
| "loss": 0.0911, |
| "step": 204600 |
| }, |
| { |
| "epoch": 77.04, |
| "learning_rate": 4.5916447120812955e-06, |
| "loss": 0.0936, |
| "step": 204700 |
| }, |
| { |
| "epoch": 77.08, |
| "learning_rate": 4.584117425668047e-06, |
| "loss": 0.0913, |
| "step": 204800 |
| }, |
| { |
| "epoch": 77.12, |
| "learning_rate": 4.576590139254799e-06, |
| "loss": 0.0911, |
| "step": 204900 |
| }, |
| { |
| "epoch": 77.15, |
| "learning_rate": 4.569062852841551e-06, |
| "loss": 0.0921, |
| "step": 205000 |
| }, |
| { |
| "epoch": 77.19, |
| "learning_rate": 4.561535566428303e-06, |
| "loss": 0.0918, |
| "step": 205100 |
| }, |
| { |
| "epoch": 77.23, |
| "learning_rate": 4.554008280015055e-06, |
| "loss": 0.0918, |
| "step": 205200 |
| }, |
| { |
| "epoch": 77.27, |
| "learning_rate": 4.546480993601806e-06, |
| "loss": 0.0922, |
| "step": 205300 |
| }, |
| { |
| "epoch": 77.31, |
| "learning_rate": 4.538953707188559e-06, |
| "loss": 0.0918, |
| "step": 205400 |
| }, |
| { |
| "epoch": 77.34, |
| "learning_rate": 4.531426420775311e-06, |
| "loss": 0.0921, |
| "step": 205500 |
| }, |
| { |
| "epoch": 77.38, |
| "learning_rate": 4.523899134362063e-06, |
| "loss": 0.0906, |
| "step": 205600 |
| }, |
| { |
| "epoch": 77.42, |
| "learning_rate": 4.516371847948815e-06, |
| "loss": 0.0905, |
| "step": 205700 |
| }, |
| { |
| "epoch": 77.46, |
| "learning_rate": 4.508844561535567e-06, |
| "loss": 0.0913, |
| "step": 205800 |
| }, |
| { |
| "epoch": 77.49, |
| "learning_rate": 4.501317275122319e-06, |
| "loss": 0.0928, |
| "step": 205900 |
| }, |
| { |
| "epoch": 77.53, |
| "learning_rate": 4.4937899887090705e-06, |
| "loss": 0.0931, |
| "step": 206000 |
| }, |
| { |
| "epoch": 77.57, |
| "learning_rate": 4.486262702295823e-06, |
| "loss": 0.0925, |
| "step": 206100 |
| }, |
| { |
| "epoch": 77.61, |
| "learning_rate": 4.478735415882575e-06, |
| "loss": 0.0907, |
| "step": 206200 |
| }, |
| { |
| "epoch": 77.64, |
| "learning_rate": 4.471208129469326e-06, |
| "loss": 0.0901, |
| "step": 206300 |
| }, |
| { |
| "epoch": 77.68, |
| "learning_rate": 4.463680843056079e-06, |
| "loss": 0.0925, |
| "step": 206400 |
| }, |
| { |
| "epoch": 77.72, |
| "learning_rate": 4.4561535566428305e-06, |
| "loss": 0.0899, |
| "step": 206500 |
| }, |
| { |
| "epoch": 77.76, |
| "learning_rate": 4.448626270229582e-06, |
| "loss": 0.0911, |
| "step": 206600 |
| }, |
| { |
| "epoch": 77.79, |
| "learning_rate": 4.441098983816335e-06, |
| "loss": 0.0921, |
| "step": 206700 |
| }, |
| { |
| "epoch": 77.83, |
| "learning_rate": 4.433571697403086e-06, |
| "loss": 0.092, |
| "step": 206800 |
| }, |
| { |
| "epoch": 77.87, |
| "learning_rate": 4.426044410989839e-06, |
| "loss": 0.0925, |
| "step": 206900 |
| }, |
| { |
| "epoch": 77.91, |
| "learning_rate": 4.4185171245765906e-06, |
| "loss": 0.0904, |
| "step": 207000 |
| }, |
| { |
| "epoch": 77.95, |
| "learning_rate": 4.410989838163342e-06, |
| "loss": 0.0904, |
| "step": 207100 |
| }, |
| { |
| "epoch": 77.98, |
| "learning_rate": 4.403462551750095e-06, |
| "loss": 0.0921, |
| "step": 207200 |
| }, |
| { |
| "epoch": 78.0, |
| "eval_loss": 0.09170127660036087, |
| "eval_runtime": 44.7794, |
| "eval_samples_per_second": 167.488, |
| "eval_steps_per_second": 10.474, |
| "step": 207246 |
| }, |
| { |
| "epoch": 78.02, |
| "learning_rate": 4.395935265336846e-06, |
| "loss": 0.0911, |
| "step": 207300 |
| }, |
| { |
| "epoch": 78.06, |
| "learning_rate": 4.388407978923598e-06, |
| "loss": 0.0918, |
| "step": 207400 |
| }, |
| { |
| "epoch": 78.1, |
| "learning_rate": 4.380880692510351e-06, |
| "loss": 0.0918, |
| "step": 207500 |
| }, |
| { |
| "epoch": 78.13, |
| "learning_rate": 4.373353406097102e-06, |
| "loss": 0.0901, |
| "step": 207600 |
| }, |
| { |
| "epoch": 78.17, |
| "learning_rate": 4.365826119683854e-06, |
| "loss": 0.0909, |
| "step": 207700 |
| }, |
| { |
| "epoch": 78.21, |
| "learning_rate": 4.3582988332706064e-06, |
| "loss": 0.0924, |
| "step": 207800 |
| }, |
| { |
| "epoch": 78.25, |
| "learning_rate": 4.350771546857358e-06, |
| "loss": 0.093, |
| "step": 207900 |
| }, |
| { |
| "epoch": 78.28, |
| "learning_rate": 4.343244260444111e-06, |
| "loss": 0.0917, |
| "step": 208000 |
| }, |
| { |
| "epoch": 78.32, |
| "learning_rate": 4.335716974030862e-06, |
| "loss": 0.0918, |
| "step": 208100 |
| }, |
| { |
| "epoch": 78.36, |
| "learning_rate": 4.328189687617615e-06, |
| "loss": 0.0917, |
| "step": 208200 |
| }, |
| { |
| "epoch": 78.4, |
| "learning_rate": 4.3206624012043665e-06, |
| "loss": 0.0918, |
| "step": 208300 |
| }, |
| { |
| "epoch": 78.43, |
| "learning_rate": 4.313135114791118e-06, |
| "loss": 0.0936, |
| "step": 208400 |
| }, |
| { |
| "epoch": 78.47, |
| "learning_rate": 4.30560782837787e-06, |
| "loss": 0.092, |
| "step": 208500 |
| }, |
| { |
| "epoch": 78.51, |
| "learning_rate": 4.298080541964622e-06, |
| "loss": 0.0911, |
| "step": 208600 |
| }, |
| { |
| "epoch": 78.55, |
| "learning_rate": 4.290553255551374e-06, |
| "loss": 0.0909, |
| "step": 208700 |
| }, |
| { |
| "epoch": 78.58, |
| "learning_rate": 4.283025969138126e-06, |
| "loss": 0.0913, |
| "step": 208800 |
| }, |
| { |
| "epoch": 78.62, |
| "learning_rate": 4.275498682724878e-06, |
| "loss": 0.0902, |
| "step": 208900 |
| }, |
| { |
| "epoch": 78.66, |
| "learning_rate": 4.26797139631163e-06, |
| "loss": 0.0917, |
| "step": 209000 |
| }, |
| { |
| "epoch": 78.7, |
| "learning_rate": 4.2604441098983815e-06, |
| "loss": 0.092, |
| "step": 209100 |
| }, |
| { |
| "epoch": 78.74, |
| "learning_rate": 4.252916823485134e-06, |
| "loss": 0.0927, |
| "step": 209200 |
| }, |
| { |
| "epoch": 78.77, |
| "learning_rate": 4.2453895370718865e-06, |
| "loss": 0.093, |
| "step": 209300 |
| }, |
| { |
| "epoch": 78.81, |
| "learning_rate": 4.237862250658638e-06, |
| "loss": 0.0923, |
| "step": 209400 |
| }, |
| { |
| "epoch": 78.85, |
| "learning_rate": 4.23033496424539e-06, |
| "loss": 0.0911, |
| "step": 209500 |
| }, |
| { |
| "epoch": 78.89, |
| "learning_rate": 4.222807677832142e-06, |
| "loss": 0.0914, |
| "step": 209600 |
| }, |
| { |
| "epoch": 78.92, |
| "learning_rate": 4.215280391418894e-06, |
| "loss": 0.092, |
| "step": 209700 |
| }, |
| { |
| "epoch": 78.96, |
| "learning_rate": 4.207753105005646e-06, |
| "loss": 0.0919, |
| "step": 209800 |
| }, |
| { |
| "epoch": 79.0, |
| "learning_rate": 4.200225818592397e-06, |
| "loss": 0.0895, |
| "step": 209900 |
| }, |
| { |
| "epoch": 79.0, |
| "eval_loss": 0.09116315096616745, |
| "eval_runtime": 45.3163, |
| "eval_samples_per_second": 165.503, |
| "eval_steps_per_second": 10.349, |
| "step": 209903 |
| }, |
| { |
| "epoch": 79.04, |
| "learning_rate": 4.19269853217915e-06, |
| "loss": 0.0912, |
| "step": 210000 |
| }, |
| { |
| "epoch": 79.07, |
| "learning_rate": 4.1851712457659015e-06, |
| "loss": 0.0909, |
| "step": 210100 |
| }, |
| { |
| "epoch": 79.11, |
| "learning_rate": 4.177643959352653e-06, |
| "loss": 0.0911, |
| "step": 210200 |
| }, |
| { |
| "epoch": 79.15, |
| "learning_rate": 4.170116672939406e-06, |
| "loss": 0.0925, |
| "step": 210300 |
| }, |
| { |
| "epoch": 79.19, |
| "learning_rate": 4.162589386526157e-06, |
| "loss": 0.0926, |
| "step": 210400 |
| }, |
| { |
| "epoch": 79.22, |
| "learning_rate": 4.15506210011291e-06, |
| "loss": 0.0913, |
| "step": 210500 |
| }, |
| { |
| "epoch": 79.26, |
| "learning_rate": 4.1475348136996616e-06, |
| "loss": 0.0907, |
| "step": 210600 |
| }, |
| { |
| "epoch": 79.3, |
| "learning_rate": 4.140007527286414e-06, |
| "loss": 0.0912, |
| "step": 210700 |
| }, |
| { |
| "epoch": 79.34, |
| "learning_rate": 4.132480240873166e-06, |
| "loss": 0.0906, |
| "step": 210800 |
| }, |
| { |
| "epoch": 79.38, |
| "learning_rate": 4.124952954459917e-06, |
| "loss": 0.0914, |
| "step": 210900 |
| }, |
| { |
| "epoch": 79.41, |
| "learning_rate": 4.11742566804667e-06, |
| "loss": 0.0916, |
| "step": 211000 |
| }, |
| { |
| "epoch": 79.45, |
| "learning_rate": 4.109898381633422e-06, |
| "loss": 0.0909, |
| "step": 211100 |
| }, |
| { |
| "epoch": 79.49, |
| "learning_rate": 4.102371095220173e-06, |
| "loss": 0.0917, |
| "step": 211200 |
| }, |
| { |
| "epoch": 79.53, |
| "learning_rate": 4.094843808806925e-06, |
| "loss": 0.0917, |
| "step": 211300 |
| }, |
| { |
| "epoch": 79.56, |
| "learning_rate": 4.0873165223936774e-06, |
| "loss": 0.0915, |
| "step": 211400 |
| }, |
| { |
| "epoch": 79.6, |
| "learning_rate": 4.079789235980429e-06, |
| "loss": 0.091, |
| "step": 211500 |
| }, |
| { |
| "epoch": 79.64, |
| "learning_rate": 4.072261949567181e-06, |
| "loss": 0.0915, |
| "step": 211600 |
| }, |
| { |
| "epoch": 79.68, |
| "learning_rate": 4.064734663153933e-06, |
| "loss": 0.0919, |
| "step": 211700 |
| }, |
| { |
| "epoch": 79.71, |
| "learning_rate": 4.057207376740686e-06, |
| "loss": 0.0909, |
| "step": 211800 |
| }, |
| { |
| "epoch": 79.75, |
| "learning_rate": 4.0496800903274375e-06, |
| "loss": 0.0915, |
| "step": 211900 |
| }, |
| { |
| "epoch": 79.79, |
| "learning_rate": 4.042152803914189e-06, |
| "loss": 0.0904, |
| "step": 212000 |
| }, |
| { |
| "epoch": 79.83, |
| "learning_rate": 4.034625517500942e-06, |
| "loss": 0.09, |
| "step": 212100 |
| }, |
| { |
| "epoch": 79.86, |
| "learning_rate": 4.027098231087693e-06, |
| "loss": 0.0918, |
| "step": 212200 |
| }, |
| { |
| "epoch": 79.9, |
| "learning_rate": 4.019570944674445e-06, |
| "loss": 0.0912, |
| "step": 212300 |
| }, |
| { |
| "epoch": 79.94, |
| "learning_rate": 4.0120436582611975e-06, |
| "loss": 0.0903, |
| "step": 212400 |
| }, |
| { |
| "epoch": 79.98, |
| "learning_rate": 4.004516371847949e-06, |
| "loss": 0.0916, |
| "step": 212500 |
| }, |
| { |
| "epoch": 80.0, |
| "eval_loss": 0.09135947376489639, |
| "eval_runtime": 45.1657, |
| "eval_samples_per_second": 166.055, |
| "eval_steps_per_second": 10.384, |
| "step": 212560 |
| }, |
| { |
| "epoch": 80.02, |
| "learning_rate": 3.996989085434701e-06, |
| "loss": 0.0909, |
| "step": 212600 |
| }, |
| { |
| "epoch": 80.05, |
| "learning_rate": 3.989461799021453e-06, |
| "loss": 0.0912, |
| "step": 212700 |
| }, |
| { |
| "epoch": 80.09, |
| "learning_rate": 3.981934512608205e-06, |
| "loss": 0.091, |
| "step": 212800 |
| }, |
| { |
| "epoch": 80.13, |
| "learning_rate": 3.974407226194957e-06, |
| "loss": 0.0916, |
| "step": 212900 |
| }, |
| { |
| "epoch": 80.17, |
| "learning_rate": 3.966879939781709e-06, |
| "loss": 0.0918, |
| "step": 213000 |
| }, |
| { |
| "epoch": 80.2, |
| "learning_rate": 3.959352653368461e-06, |
| "loss": 0.0927, |
| "step": 213100 |
| }, |
| { |
| "epoch": 80.24, |
| "learning_rate": 3.951825366955213e-06, |
| "loss": 0.0928, |
| "step": 213200 |
| }, |
| { |
| "epoch": 80.28, |
| "learning_rate": 3.944298080541965e-06, |
| "loss": 0.0902, |
| "step": 213300 |
| }, |
| { |
| "epoch": 80.32, |
| "learning_rate": 3.936770794128717e-06, |
| "loss": 0.0909, |
| "step": 213400 |
| }, |
| { |
| "epoch": 80.35, |
| "learning_rate": 3.929243507715469e-06, |
| "loss": 0.0909, |
| "step": 213500 |
| }, |
| { |
| "epoch": 80.39, |
| "learning_rate": 3.921716221302221e-06, |
| "loss": 0.089, |
| "step": 213600 |
| }, |
| { |
| "epoch": 80.43, |
| "learning_rate": 3.9141889348889725e-06, |
| "loss": 0.0928, |
| "step": 213700 |
| }, |
| { |
| "epoch": 80.47, |
| "learning_rate": 3.906661648475725e-06, |
| "loss": 0.0895, |
| "step": 213800 |
| }, |
| { |
| "epoch": 80.5, |
| "learning_rate": 3.899134362062477e-06, |
| "loss": 0.091, |
| "step": 213900 |
| }, |
| { |
| "epoch": 80.54, |
| "learning_rate": 3.891607075649228e-06, |
| "loss": 0.0908, |
| "step": 214000 |
| }, |
| { |
| "epoch": 80.58, |
| "learning_rate": 3.884079789235981e-06, |
| "loss": 0.0921, |
| "step": 214100 |
| }, |
| { |
| "epoch": 80.62, |
| "learning_rate": 3.8765525028227326e-06, |
| "loss": 0.0904, |
| "step": 214200 |
| }, |
| { |
| "epoch": 80.65, |
| "learning_rate": 3.869025216409485e-06, |
| "loss": 0.0914, |
| "step": 214300 |
| }, |
| { |
| "epoch": 80.69, |
| "learning_rate": 3.861497929996237e-06, |
| "loss": 0.0898, |
| "step": 214400 |
| }, |
| { |
| "epoch": 80.73, |
| "learning_rate": 3.853970643582988e-06, |
| "loss": 0.0923, |
| "step": 214500 |
| }, |
| { |
| "epoch": 80.77, |
| "learning_rate": 3.846443357169741e-06, |
| "loss": 0.0934, |
| "step": 214600 |
| }, |
| { |
| "epoch": 80.81, |
| "learning_rate": 3.838916070756493e-06, |
| "loss": 0.0914, |
| "step": 214700 |
| }, |
| { |
| "epoch": 80.84, |
| "learning_rate": 3.831388784343244e-06, |
| "loss": 0.09, |
| "step": 214800 |
| }, |
| { |
| "epoch": 80.88, |
| "learning_rate": 3.823861497929997e-06, |
| "loss": 0.091, |
| "step": 214900 |
| }, |
| { |
| "epoch": 80.92, |
| "learning_rate": 3.8163342115167484e-06, |
| "loss": 0.0907, |
| "step": 215000 |
| }, |
| { |
| "epoch": 80.96, |
| "learning_rate": 3.8088069251035005e-06, |
| "loss": 0.0913, |
| "step": 215100 |
| }, |
| { |
| "epoch": 80.99, |
| "learning_rate": 3.801279638690252e-06, |
| "loss": 0.09, |
| "step": 215200 |
| }, |
| { |
| "epoch": 81.0, |
| "eval_loss": 0.09087579697370529, |
| "eval_runtime": 45.0879, |
| "eval_samples_per_second": 166.342, |
| "eval_steps_per_second": 10.402, |
| "step": 215217 |
| }, |
| { |
| "epoch": 81.03, |
| "learning_rate": 3.7937523522770043e-06, |
| "loss": 0.0912, |
| "step": 215300 |
| }, |
| { |
| "epoch": 81.07, |
| "learning_rate": 3.786225065863756e-06, |
| "loss": 0.0922, |
| "step": 215400 |
| }, |
| { |
| "epoch": 81.11, |
| "learning_rate": 3.7786977794505085e-06, |
| "loss": 0.0913, |
| "step": 215500 |
| }, |
| { |
| "epoch": 81.14, |
| "learning_rate": 3.7711704930372606e-06, |
| "loss": 0.0917, |
| "step": 215600 |
| }, |
| { |
| "epoch": 81.18, |
| "learning_rate": 3.7636432066240126e-06, |
| "loss": 0.0921, |
| "step": 215700 |
| }, |
| { |
| "epoch": 81.22, |
| "learning_rate": 3.7561159202107643e-06, |
| "loss": 0.0913, |
| "step": 215800 |
| }, |
| { |
| "epoch": 81.26, |
| "learning_rate": 3.7485886337975164e-06, |
| "loss": 0.0918, |
| "step": 215900 |
| }, |
| { |
| "epoch": 81.29, |
| "learning_rate": 3.7410613473842685e-06, |
| "loss": 0.0917, |
| "step": 216000 |
| }, |
| { |
| "epoch": 81.33, |
| "learning_rate": 3.73353406097102e-06, |
| "loss": 0.0888, |
| "step": 216100 |
| }, |
| { |
| "epoch": 81.37, |
| "learning_rate": 3.7260067745577722e-06, |
| "loss": 0.0896, |
| "step": 216200 |
| }, |
| { |
| "epoch": 81.41, |
| "learning_rate": 3.718479488144524e-06, |
| "loss": 0.0917, |
| "step": 216300 |
| }, |
| { |
| "epoch": 81.45, |
| "learning_rate": 3.710952201731276e-06, |
| "loss": 0.0918, |
| "step": 216400 |
| }, |
| { |
| "epoch": 81.48, |
| "learning_rate": 3.703424915318028e-06, |
| "loss": 0.0908, |
| "step": 216500 |
| }, |
| { |
| "epoch": 81.52, |
| "learning_rate": 3.6958976289047798e-06, |
| "loss": 0.0921, |
| "step": 216600 |
| }, |
| { |
| "epoch": 81.56, |
| "learning_rate": 3.6883703424915323e-06, |
| "loss": 0.0896, |
| "step": 216700 |
| }, |
| { |
| "epoch": 81.6, |
| "learning_rate": 3.6808430560782844e-06, |
| "loss": 0.0909, |
| "step": 216800 |
| }, |
| { |
| "epoch": 81.63, |
| "learning_rate": 3.673315769665036e-06, |
| "loss": 0.0912, |
| "step": 216900 |
| }, |
| { |
| "epoch": 81.67, |
| "learning_rate": 3.665788483251788e-06, |
| "loss": 0.0915, |
| "step": 217000 |
| }, |
| { |
| "epoch": 81.71, |
| "learning_rate": 3.65826119683854e-06, |
| "loss": 0.091, |
| "step": 217100 |
| }, |
| { |
| "epoch": 81.75, |
| "learning_rate": 3.650733910425292e-06, |
| "loss": 0.09, |
| "step": 217200 |
| }, |
| { |
| "epoch": 81.78, |
| "learning_rate": 3.643206624012044e-06, |
| "loss": 0.0912, |
| "step": 217300 |
| }, |
| { |
| "epoch": 81.82, |
| "learning_rate": 3.635679337598796e-06, |
| "loss": 0.0908, |
| "step": 217400 |
| }, |
| { |
| "epoch": 81.86, |
| "learning_rate": 3.6281520511855477e-06, |
| "loss": 0.0902, |
| "step": 217500 |
| }, |
| { |
| "epoch": 81.9, |
| "learning_rate": 3.6206247647723e-06, |
| "loss": 0.0907, |
| "step": 217600 |
| }, |
| { |
| "epoch": 81.93, |
| "learning_rate": 3.6130974783590515e-06, |
| "loss": 0.0892, |
| "step": 217700 |
| }, |
| { |
| "epoch": 81.97, |
| "learning_rate": 3.6055701919458036e-06, |
| "loss": 0.0916, |
| "step": 217800 |
| }, |
| { |
| "epoch": 82.0, |
| "eval_loss": 0.09082730859518051, |
| "eval_runtime": 45.1546, |
| "eval_samples_per_second": 166.096, |
| "eval_steps_per_second": 10.387, |
| "step": 217874 |
| }, |
| { |
| "epoch": 82.01, |
| "learning_rate": 3.598042905532556e-06, |
| "loss": 0.0915, |
| "step": 217900 |
| }, |
| { |
| "epoch": 82.05, |
| "learning_rate": 3.590515619119308e-06, |
| "loss": 0.091, |
| "step": 218000 |
| }, |
| { |
| "epoch": 82.09, |
| "learning_rate": 3.58298833270606e-06, |
| "loss": 0.0918, |
| "step": 218100 |
| }, |
| { |
| "epoch": 82.12, |
| "learning_rate": 3.575461046292812e-06, |
| "loss": 0.0911, |
| "step": 218200 |
| }, |
| { |
| "epoch": 82.16, |
| "learning_rate": 3.5679337598795636e-06, |
| "loss": 0.0907, |
| "step": 218300 |
| }, |
| { |
| "epoch": 82.2, |
| "learning_rate": 3.5604064734663157e-06, |
| "loss": 0.0912, |
| "step": 218400 |
| }, |
| { |
| "epoch": 82.24, |
| "learning_rate": 3.5528791870530678e-06, |
| "loss": 0.0915, |
| "step": 218500 |
| }, |
| { |
| "epoch": 82.27, |
| "learning_rate": 3.5453519006398194e-06, |
| "loss": 0.0909, |
| "step": 218600 |
| }, |
| { |
| "epoch": 82.31, |
| "learning_rate": 3.5378246142265715e-06, |
| "loss": 0.0915, |
| "step": 218700 |
| }, |
| { |
| "epoch": 82.35, |
| "learning_rate": 3.5302973278133236e-06, |
| "loss": 0.0895, |
| "step": 218800 |
| }, |
| { |
| "epoch": 82.39, |
| "learning_rate": 3.5227700414000753e-06, |
| "loss": 0.0909, |
| "step": 218900 |
| }, |
| { |
| "epoch": 82.42, |
| "learning_rate": 3.5152427549868274e-06, |
| "loss": 0.0922, |
| "step": 219000 |
| }, |
| { |
| "epoch": 82.46, |
| "learning_rate": 3.507715468573579e-06, |
| "loss": 0.0923, |
| "step": 219100 |
| }, |
| { |
| "epoch": 82.5, |
| "learning_rate": 3.5001881821603316e-06, |
| "loss": 0.0923, |
| "step": 219200 |
| }, |
| { |
| "epoch": 82.54, |
| "learning_rate": 3.4926608957470836e-06, |
| "loss": 0.0904, |
| "step": 219300 |
| }, |
| { |
| "epoch": 82.57, |
| "learning_rate": 3.4851336093338357e-06, |
| "loss": 0.0904, |
| "step": 219400 |
| }, |
| { |
| "epoch": 82.61, |
| "learning_rate": 3.4776063229205874e-06, |
| "loss": 0.0921, |
| "step": 219500 |
| }, |
| { |
| "epoch": 82.65, |
| "learning_rate": 3.4700790365073395e-06, |
| "loss": 0.0904, |
| "step": 219600 |
| }, |
| { |
| "epoch": 82.69, |
| "learning_rate": 3.4625517500940916e-06, |
| "loss": 0.0903, |
| "step": 219700 |
| }, |
| { |
| "epoch": 82.72, |
| "learning_rate": 3.4550244636808433e-06, |
| "loss": 0.0911, |
| "step": 219800 |
| }, |
| { |
| "epoch": 82.76, |
| "learning_rate": 3.4474971772675953e-06, |
| "loss": 0.0904, |
| "step": 219900 |
| }, |
| { |
| "epoch": 82.8, |
| "learning_rate": 3.439969890854347e-06, |
| "loss": 0.0898, |
| "step": 220000 |
| }, |
| { |
| "epoch": 82.84, |
| "learning_rate": 3.432442604441099e-06, |
| "loss": 0.0906, |
| "step": 220100 |
| }, |
| { |
| "epoch": 82.88, |
| "learning_rate": 3.424915318027851e-06, |
| "loss": 0.0885, |
| "step": 220200 |
| }, |
| { |
| "epoch": 82.91, |
| "learning_rate": 3.417388031614603e-06, |
| "loss": 0.0917, |
| "step": 220300 |
| }, |
| { |
| "epoch": 82.95, |
| "learning_rate": 3.4098607452013554e-06, |
| "loss": 0.0888, |
| "step": 220400 |
| }, |
| { |
| "epoch": 82.99, |
| "learning_rate": 3.4023334587881075e-06, |
| "loss": 0.0902, |
| "step": 220500 |
| }, |
| { |
| "epoch": 83.0, |
| "eval_loss": 0.09073475003242493, |
| "eval_runtime": 44.912, |
| "eval_samples_per_second": 166.993, |
| "eval_steps_per_second": 10.443, |
| "step": 220531 |
| }, |
| { |
| "epoch": 83.03, |
| "learning_rate": 3.394806172374859e-06, |
| "loss": 0.0911, |
| "step": 220600 |
| }, |
| { |
| "epoch": 83.06, |
| "learning_rate": 3.3872788859616112e-06, |
| "loss": 0.0922, |
| "step": 220700 |
| }, |
| { |
| "epoch": 83.1, |
| "learning_rate": 3.3797515995483633e-06, |
| "loss": 0.0911, |
| "step": 220800 |
| }, |
| { |
| "epoch": 83.14, |
| "learning_rate": 3.372224313135115e-06, |
| "loss": 0.0914, |
| "step": 220900 |
| }, |
| { |
| "epoch": 83.18, |
| "learning_rate": 3.364697026721867e-06, |
| "loss": 0.0912, |
| "step": 221000 |
| }, |
| { |
| "epoch": 83.21, |
| "learning_rate": 3.357169740308619e-06, |
| "loss": 0.091, |
| "step": 221100 |
| }, |
| { |
| "epoch": 83.25, |
| "learning_rate": 3.349642453895371e-06, |
| "loss": 0.0911, |
| "step": 221200 |
| }, |
| { |
| "epoch": 83.29, |
| "learning_rate": 3.342115167482123e-06, |
| "loss": 0.0896, |
| "step": 221300 |
| }, |
| { |
| "epoch": 83.33, |
| "learning_rate": 3.3345878810688746e-06, |
| "loss": 0.0926, |
| "step": 221400 |
| }, |
| { |
| "epoch": 83.36, |
| "learning_rate": 3.3270605946556267e-06, |
| "loss": 0.0896, |
| "step": 221500 |
| }, |
| { |
| "epoch": 83.4, |
| "learning_rate": 3.3195333082423788e-06, |
| "loss": 0.0918, |
| "step": 221600 |
| }, |
| { |
| "epoch": 83.44, |
| "learning_rate": 3.3120060218291313e-06, |
| "loss": 0.0904, |
| "step": 221700 |
| }, |
| { |
| "epoch": 83.48, |
| "learning_rate": 3.304478735415883e-06, |
| "loss": 0.0924, |
| "step": 221800 |
| }, |
| { |
| "epoch": 83.52, |
| "learning_rate": 3.296951449002635e-06, |
| "loss": 0.0898, |
| "step": 221900 |
| }, |
| { |
| "epoch": 83.55, |
| "learning_rate": 3.289424162589387e-06, |
| "loss": 0.0919, |
| "step": 222000 |
| }, |
| { |
| "epoch": 83.59, |
| "learning_rate": 3.2818968761761388e-06, |
| "loss": 0.0902, |
| "step": 222100 |
| }, |
| { |
| "epoch": 83.63, |
| "learning_rate": 3.274369589762891e-06, |
| "loss": 0.0911, |
| "step": 222200 |
| }, |
| { |
| "epoch": 83.67, |
| "learning_rate": 3.2668423033496425e-06, |
| "loss": 0.0907, |
| "step": 222300 |
| }, |
| { |
| "epoch": 83.7, |
| "learning_rate": 3.2593150169363946e-06, |
| "loss": 0.0907, |
| "step": 222400 |
| }, |
| { |
| "epoch": 83.74, |
| "learning_rate": 3.2517877305231467e-06, |
| "loss": 0.0894, |
| "step": 222500 |
| }, |
| { |
| "epoch": 83.78, |
| "learning_rate": 3.2442604441098984e-06, |
| "loss": 0.0912, |
| "step": 222600 |
| }, |
| { |
| "epoch": 83.82, |
| "learning_rate": 3.2367331576966505e-06, |
| "loss": 0.0919, |
| "step": 222700 |
| }, |
| { |
| "epoch": 83.85, |
| "learning_rate": 3.2292058712834026e-06, |
| "loss": 0.0906, |
| "step": 222800 |
| }, |
| { |
| "epoch": 83.89, |
| "learning_rate": 3.2216785848701546e-06, |
| "loss": 0.0902, |
| "step": 222900 |
| }, |
| { |
| "epoch": 83.93, |
| "learning_rate": 3.2141512984569067e-06, |
| "loss": 0.0908, |
| "step": 223000 |
| }, |
| { |
| "epoch": 83.97, |
| "learning_rate": 3.206624012043659e-06, |
| "loss": 0.0911, |
| "step": 223100 |
| }, |
| { |
| "epoch": 84.0, |
| "eval_loss": 0.09099774062633514, |
| "eval_runtime": 45.2441, |
| "eval_samples_per_second": 165.768, |
| "eval_steps_per_second": 10.366, |
| "step": 223188 |
| }, |
| { |
| "epoch": 84.0, |
| "learning_rate": 3.1990967256304105e-06, |
| "loss": 0.0915, |
| "step": 223200 |
| }, |
| { |
| "epoch": 84.04, |
| "learning_rate": 3.1915694392171626e-06, |
| "loss": 0.0904, |
| "step": 223300 |
| }, |
| { |
| "epoch": 84.08, |
| "learning_rate": 3.1840421528039147e-06, |
| "loss": 0.0894, |
| "step": 223400 |
| }, |
| { |
| "epoch": 84.12, |
| "learning_rate": 3.1765148663906663e-06, |
| "loss": 0.0901, |
| "step": 223500 |
| }, |
| { |
| "epoch": 84.16, |
| "learning_rate": 3.1689875799774184e-06, |
| "loss": 0.0901, |
| "step": 223600 |
| }, |
| { |
| "epoch": 84.19, |
| "learning_rate": 3.16146029356417e-06, |
| "loss": 0.091, |
| "step": 223700 |
| }, |
| { |
| "epoch": 84.23, |
| "learning_rate": 3.153933007150922e-06, |
| "loss": 0.0913, |
| "step": 223800 |
| }, |
| { |
| "epoch": 84.27, |
| "learning_rate": 3.1464057207376743e-06, |
| "loss": 0.091, |
| "step": 223900 |
| }, |
| { |
| "epoch": 84.31, |
| "learning_rate": 3.138878434324426e-06, |
| "loss": 0.0907, |
| "step": 224000 |
| }, |
| { |
| "epoch": 84.34, |
| "learning_rate": 3.131351147911178e-06, |
| "loss": 0.0913, |
| "step": 224100 |
| }, |
| { |
| "epoch": 84.38, |
| "learning_rate": 3.1238238614979305e-06, |
| "loss": 0.0898, |
| "step": 224200 |
| }, |
| { |
| "epoch": 84.42, |
| "learning_rate": 3.1162965750846822e-06, |
| "loss": 0.0897, |
| "step": 224300 |
| }, |
| { |
| "epoch": 84.46, |
| "learning_rate": 3.1087692886714343e-06, |
| "loss": 0.0897, |
| "step": 224400 |
| }, |
| { |
| "epoch": 84.49, |
| "learning_rate": 3.1012420022581864e-06, |
| "loss": 0.0915, |
| "step": 224500 |
| }, |
| { |
| "epoch": 84.53, |
| "learning_rate": 3.093714715844938e-06, |
| "loss": 0.0916, |
| "step": 224600 |
| }, |
| { |
| "epoch": 84.57, |
| "learning_rate": 3.08618742943169e-06, |
| "loss": 0.0932, |
| "step": 224700 |
| }, |
| { |
| "epoch": 84.61, |
| "learning_rate": 3.0786601430184422e-06, |
| "loss": 0.0909, |
| "step": 224800 |
| }, |
| { |
| "epoch": 84.64, |
| "learning_rate": 3.071132856605194e-06, |
| "loss": 0.0897, |
| "step": 224900 |
| }, |
| { |
| "epoch": 84.68, |
| "learning_rate": 3.063605570191946e-06, |
| "loss": 0.0914, |
| "step": 225000 |
| }, |
| { |
| "epoch": 84.72, |
| "learning_rate": 3.0560782837786977e-06, |
| "loss": 0.0891, |
| "step": 225100 |
| }, |
| { |
| "epoch": 84.76, |
| "learning_rate": 3.0485509973654498e-06, |
| "loss": 0.0896, |
| "step": 225200 |
| }, |
| { |
| "epoch": 84.79, |
| "learning_rate": 3.041023710952202e-06, |
| "loss": 0.0911, |
| "step": 225300 |
| }, |
| { |
| "epoch": 84.83, |
| "learning_rate": 3.0334964245389544e-06, |
| "loss": 0.0931, |
| "step": 225400 |
| }, |
| { |
| "epoch": 84.87, |
| "learning_rate": 3.025969138125706e-06, |
| "loss": 0.0922, |
| "step": 225500 |
| }, |
| { |
| "epoch": 84.91, |
| "learning_rate": 3.018441851712458e-06, |
| "loss": 0.0914, |
| "step": 225600 |
| }, |
| { |
| "epoch": 84.95, |
| "learning_rate": 3.01091456529921e-06, |
| "loss": 0.0914, |
| "step": 225700 |
| }, |
| { |
| "epoch": 84.98, |
| "learning_rate": 3.003387278885962e-06, |
| "loss": 0.091, |
| "step": 225800 |
| }, |
| { |
| "epoch": 85.0, |
| "eval_loss": 0.0903320163488388, |
| "eval_runtime": 45.2024, |
| "eval_samples_per_second": 165.921, |
| "eval_steps_per_second": 10.376, |
| "step": 225845 |
| }, |
| { |
| "epoch": 85.02, |
| "learning_rate": 2.995859992472714e-06, |
| "loss": 0.0899, |
| "step": 225900 |
| }, |
| { |
| "epoch": 85.06, |
| "learning_rate": 2.9883327060594656e-06, |
| "loss": 0.0913, |
| "step": 226000 |
| }, |
| { |
| "epoch": 85.1, |
| "learning_rate": 2.9808054196462177e-06, |
| "loss": 0.091, |
| "step": 226100 |
| }, |
| { |
| "epoch": 85.13, |
| "learning_rate": 2.97327813323297e-06, |
| "loss": 0.0931, |
| "step": 226200 |
| }, |
| { |
| "epoch": 85.17, |
| "learning_rate": 2.9657508468197215e-06, |
| "loss": 0.0925, |
| "step": 226300 |
| }, |
| { |
| "epoch": 85.21, |
| "learning_rate": 2.9582235604064736e-06, |
| "loss": 0.0903, |
| "step": 226400 |
| }, |
| { |
| "epoch": 85.25, |
| "learning_rate": 2.9506962739932257e-06, |
| "loss": 0.0908, |
| "step": 226500 |
| }, |
| { |
| "epoch": 85.28, |
| "learning_rate": 2.9431689875799777e-06, |
| "loss": 0.091, |
| "step": 226600 |
| }, |
| { |
| "epoch": 85.32, |
| "learning_rate": 2.93564170116673e-06, |
| "loss": 0.0901, |
| "step": 226700 |
| }, |
| { |
| "epoch": 85.36, |
| "learning_rate": 2.928114414753482e-06, |
| "loss": 0.0907, |
| "step": 226800 |
| }, |
| { |
| "epoch": 85.4, |
| "learning_rate": 2.9205871283402336e-06, |
| "loss": 0.0912, |
| "step": 226900 |
| }, |
| { |
| "epoch": 85.43, |
| "learning_rate": 2.9130598419269857e-06, |
| "loss": 0.0904, |
| "step": 227000 |
| }, |
| { |
| "epoch": 85.47, |
| "learning_rate": 2.9055325555137378e-06, |
| "loss": 0.0913, |
| "step": 227100 |
| }, |
| { |
| "epoch": 85.51, |
| "learning_rate": 2.8980052691004894e-06, |
| "loss": 0.0917, |
| "step": 227200 |
| }, |
| { |
| "epoch": 85.55, |
| "learning_rate": 2.8904779826872415e-06, |
| "loss": 0.0897, |
| "step": 227300 |
| }, |
| { |
| "epoch": 85.59, |
| "learning_rate": 2.882950696273993e-06, |
| "loss": 0.09, |
| "step": 227400 |
| }, |
| { |
| "epoch": 85.62, |
| "learning_rate": 2.8754234098607453e-06, |
| "loss": 0.0913, |
| "step": 227500 |
| }, |
| { |
| "epoch": 85.66, |
| "learning_rate": 2.8678961234474974e-06, |
| "loss": 0.0899, |
| "step": 227600 |
| }, |
| { |
| "epoch": 85.7, |
| "learning_rate": 2.860368837034249e-06, |
| "loss": 0.0905, |
| "step": 227700 |
| }, |
| { |
| "epoch": 85.74, |
| "learning_rate": 2.852841550621001e-06, |
| "loss": 0.0898, |
| "step": 227800 |
| }, |
| { |
| "epoch": 85.77, |
| "learning_rate": 2.8453142642077536e-06, |
| "loss": 0.09, |
| "step": 227900 |
| }, |
| { |
| "epoch": 85.81, |
| "learning_rate": 2.8377869777945053e-06, |
| "loss": 0.0908, |
| "step": 228000 |
| }, |
| { |
| "epoch": 85.85, |
| "learning_rate": 2.8302596913812574e-06, |
| "loss": 0.0906, |
| "step": 228100 |
| }, |
| { |
| "epoch": 85.89, |
| "learning_rate": 2.8227324049680095e-06, |
| "loss": 0.0904, |
| "step": 228200 |
| }, |
| { |
| "epoch": 85.92, |
| "learning_rate": 2.815205118554761e-06, |
| "loss": 0.0897, |
| "step": 228300 |
| }, |
| { |
| "epoch": 85.96, |
| "learning_rate": 2.8076778321415132e-06, |
| "loss": 0.091, |
| "step": 228400 |
| }, |
| { |
| "epoch": 86.0, |
| "learning_rate": 2.8001505457282653e-06, |
| "loss": 0.0903, |
| "step": 228500 |
| }, |
| { |
| "epoch": 86.0, |
| "eval_loss": 0.0905364602804184, |
| "eval_runtime": 45.2173, |
| "eval_samples_per_second": 165.866, |
| "eval_steps_per_second": 10.372, |
| "step": 228502 |
| }, |
| { |
| "epoch": 86.04, |
| "learning_rate": 2.792623259315017e-06, |
| "loss": 0.0907, |
| "step": 228600 |
| }, |
| { |
| "epoch": 86.07, |
| "learning_rate": 2.785095972901769e-06, |
| "loss": 0.0918, |
| "step": 228700 |
| }, |
| { |
| "epoch": 86.11, |
| "learning_rate": 2.7775686864885208e-06, |
| "loss": 0.0925, |
| "step": 228800 |
| }, |
| { |
| "epoch": 86.15, |
| "learning_rate": 2.770041400075273e-06, |
| "loss": 0.0897, |
| "step": 228900 |
| }, |
| { |
| "epoch": 86.19, |
| "learning_rate": 2.762514113662025e-06, |
| "loss": 0.0895, |
| "step": 229000 |
| }, |
| { |
| "epoch": 86.23, |
| "learning_rate": 2.7549868272487774e-06, |
| "loss": 0.088, |
| "step": 229100 |
| }, |
| { |
| "epoch": 86.26, |
| "learning_rate": 2.747459540835529e-06, |
| "loss": 0.0899, |
| "step": 229200 |
| }, |
| { |
| "epoch": 86.3, |
| "learning_rate": 2.739932254422281e-06, |
| "loss": 0.0898, |
| "step": 229300 |
| }, |
| { |
| "epoch": 86.34, |
| "learning_rate": 2.7324049680090333e-06, |
| "loss": 0.0904, |
| "step": 229400 |
| }, |
| { |
| "epoch": 86.38, |
| "learning_rate": 2.724877681595785e-06, |
| "loss": 0.0904, |
| "step": 229500 |
| }, |
| { |
| "epoch": 86.41, |
| "learning_rate": 2.717350395182537e-06, |
| "loss": 0.0906, |
| "step": 229600 |
| }, |
| { |
| "epoch": 86.45, |
| "learning_rate": 2.7098231087692887e-06, |
| "loss": 0.0899, |
| "step": 229700 |
| }, |
| { |
| "epoch": 86.49, |
| "learning_rate": 2.702295822356041e-06, |
| "loss": 0.0889, |
| "step": 229800 |
| }, |
| { |
| "epoch": 86.53, |
| "learning_rate": 2.694768535942793e-06, |
| "loss": 0.0915, |
| "step": 229900 |
| }, |
| { |
| "epoch": 86.56, |
| "learning_rate": 2.6872412495295446e-06, |
| "loss": 0.09, |
| "step": 230000 |
| }, |
| { |
| "epoch": 86.6, |
| "learning_rate": 2.6797139631162967e-06, |
| "loss": 0.0911, |
| "step": 230100 |
| }, |
| { |
| "epoch": 86.64, |
| "learning_rate": 2.6721866767030487e-06, |
| "loss": 0.0911, |
| "step": 230200 |
| }, |
| { |
| "epoch": 86.68, |
| "learning_rate": 2.6646593902898004e-06, |
| "loss": 0.0914, |
| "step": 230300 |
| }, |
| { |
| "epoch": 86.71, |
| "learning_rate": 2.657132103876553e-06, |
| "loss": 0.0904, |
| "step": 230400 |
| }, |
| { |
| "epoch": 86.75, |
| "learning_rate": 2.649604817463305e-06, |
| "loss": 0.0892, |
| "step": 230500 |
| }, |
| { |
| "epoch": 86.79, |
| "learning_rate": 2.6420775310500567e-06, |
| "loss": 0.0896, |
| "step": 230600 |
| }, |
| { |
| "epoch": 86.83, |
| "learning_rate": 2.6345502446368088e-06, |
| "loss": 0.0902, |
| "step": 230700 |
| }, |
| { |
| "epoch": 86.86, |
| "learning_rate": 2.627022958223561e-06, |
| "loss": 0.0907, |
| "step": 230800 |
| }, |
| { |
| "epoch": 86.9, |
| "learning_rate": 2.6194956718103125e-06, |
| "loss": 0.0904, |
| "step": 230900 |
| }, |
| { |
| "epoch": 86.94, |
| "learning_rate": 2.6119683853970646e-06, |
| "loss": 0.0906, |
| "step": 231000 |
| }, |
| { |
| "epoch": 86.98, |
| "learning_rate": 2.6044410989838163e-06, |
| "loss": 0.0907, |
| "step": 231100 |
| }, |
| { |
| "epoch": 87.0, |
| "eval_loss": 0.09008638560771942, |
| "eval_runtime": 45.2794, |
| "eval_samples_per_second": 165.638, |
| "eval_steps_per_second": 10.358, |
| "step": 231159 |
| }, |
| { |
| "epoch": 87.02, |
| "learning_rate": 2.5969138125705684e-06, |
| "loss": 0.0909, |
| "step": 231200 |
| }, |
| { |
| "epoch": 87.05, |
| "learning_rate": 2.5893865261573205e-06, |
| "loss": 0.0889, |
| "step": 231300 |
| }, |
| { |
| "epoch": 87.09, |
| "learning_rate": 2.581859239744072e-06, |
| "loss": 0.0905, |
| "step": 231400 |
| }, |
| { |
| "epoch": 87.13, |
| "learning_rate": 2.5743319533308242e-06, |
| "loss": 0.0887, |
| "step": 231500 |
| }, |
| { |
| "epoch": 87.17, |
| "learning_rate": 2.5668046669175767e-06, |
| "loss": 0.0914, |
| "step": 231600 |
| }, |
| { |
| "epoch": 87.2, |
| "learning_rate": 2.559277380504329e-06, |
| "loss": 0.0906, |
| "step": 231700 |
| }, |
| { |
| "epoch": 87.24, |
| "learning_rate": 2.5517500940910805e-06, |
| "loss": 0.0909, |
| "step": 231800 |
| }, |
| { |
| "epoch": 87.28, |
| "learning_rate": 2.5442228076778326e-06, |
| "loss": 0.09, |
| "step": 231900 |
| }, |
| { |
| "epoch": 87.32, |
| "learning_rate": 2.5366955212645842e-06, |
| "loss": 0.0888, |
| "step": 232000 |
| }, |
| { |
| "epoch": 87.35, |
| "learning_rate": 2.5291682348513363e-06, |
| "loss": 0.09, |
| "step": 232100 |
| }, |
| { |
| "epoch": 87.39, |
| "learning_rate": 2.5216409484380884e-06, |
| "loss": 0.0895, |
| "step": 232200 |
| }, |
| { |
| "epoch": 87.43, |
| "learning_rate": 2.51411366202484e-06, |
| "loss": 0.0907, |
| "step": 232300 |
| }, |
| { |
| "epoch": 87.47, |
| "learning_rate": 2.506586375611592e-06, |
| "loss": 0.0906, |
| "step": 232400 |
| }, |
| { |
| "epoch": 87.5, |
| "learning_rate": 2.4990590891983443e-06, |
| "loss": 0.0907, |
| "step": 232500 |
| }, |
| { |
| "epoch": 87.54, |
| "learning_rate": 2.4915318027850964e-06, |
| "loss": 0.0907, |
| "step": 232600 |
| }, |
| { |
| "epoch": 87.58, |
| "learning_rate": 2.484004516371848e-06, |
| "loss": 0.0907, |
| "step": 232700 |
| }, |
| { |
| "epoch": 87.62, |
| "learning_rate": 2.4764772299586e-06, |
| "loss": 0.0892, |
| "step": 232800 |
| }, |
| { |
| "epoch": 87.66, |
| "learning_rate": 2.468949943545352e-06, |
| "loss": 0.0908, |
| "step": 232900 |
| }, |
| { |
| "epoch": 87.69, |
| "learning_rate": 2.461422657132104e-06, |
| "loss": 0.0906, |
| "step": 233000 |
| }, |
| { |
| "epoch": 87.73, |
| "learning_rate": 2.453895370718856e-06, |
| "loss": 0.0907, |
| "step": 233100 |
| }, |
| { |
| "epoch": 87.77, |
| "learning_rate": 2.446368084305608e-06, |
| "loss": 0.0905, |
| "step": 233200 |
| }, |
| { |
| "epoch": 87.81, |
| "learning_rate": 2.43884079789236e-06, |
| "loss": 0.0913, |
| "step": 233300 |
| }, |
| { |
| "epoch": 87.84, |
| "learning_rate": 2.431313511479112e-06, |
| "loss": 0.0926, |
| "step": 233400 |
| }, |
| { |
| "epoch": 87.88, |
| "learning_rate": 2.423786225065864e-06, |
| "loss": 0.0924, |
| "step": 233500 |
| }, |
| { |
| "epoch": 87.92, |
| "learning_rate": 2.416258938652616e-06, |
| "loss": 0.0897, |
| "step": 233600 |
| }, |
| { |
| "epoch": 87.96, |
| "learning_rate": 2.4087316522393677e-06, |
| "loss": 0.0922, |
| "step": 233700 |
| }, |
| { |
| "epoch": 87.99, |
| "learning_rate": 2.40120436582612e-06, |
| "loss": 0.0908, |
| "step": 233800 |
| }, |
| { |
| "epoch": 88.0, |
| "eval_loss": 0.0906805768609047, |
| "eval_runtime": 44.9216, |
| "eval_samples_per_second": 166.958, |
| "eval_steps_per_second": 10.44, |
| "step": 233816 |
| }, |
| { |
| "epoch": 88.03, |
| "learning_rate": 2.393677079412872e-06, |
| "loss": 0.0916, |
| "step": 233900 |
| }, |
| { |
| "epoch": 88.07, |
| "learning_rate": 2.386149792999624e-06, |
| "loss": 0.0901, |
| "step": 234000 |
| }, |
| { |
| "epoch": 88.11, |
| "learning_rate": 2.3786225065863756e-06, |
| "loss": 0.0899, |
| "step": 234100 |
| }, |
| { |
| "epoch": 88.14, |
| "learning_rate": 2.3710952201731277e-06, |
| "loss": 0.0909, |
| "step": 234200 |
| }, |
| { |
| "epoch": 88.18, |
| "learning_rate": 2.3635679337598798e-06, |
| "loss": 0.0904, |
| "step": 234300 |
| }, |
| { |
| "epoch": 88.22, |
| "learning_rate": 2.356040647346632e-06, |
| "loss": 0.0914, |
| "step": 234400 |
| }, |
| { |
| "epoch": 88.26, |
| "learning_rate": 2.348513360933384e-06, |
| "loss": 0.091, |
| "step": 234500 |
| }, |
| { |
| "epoch": 88.3, |
| "learning_rate": 2.3409860745201356e-06, |
| "loss": 0.0919, |
| "step": 234600 |
| }, |
| { |
| "epoch": 88.33, |
| "learning_rate": 2.3334587881068877e-06, |
| "loss": 0.091, |
| "step": 234700 |
| }, |
| { |
| "epoch": 88.37, |
| "learning_rate": 2.3259315016936394e-06, |
| "loss": 0.0899, |
| "step": 234800 |
| }, |
| { |
| "epoch": 88.41, |
| "learning_rate": 2.3184042152803915e-06, |
| "loss": 0.0899, |
| "step": 234900 |
| }, |
| { |
| "epoch": 88.45, |
| "learning_rate": 2.3108769288671436e-06, |
| "loss": 0.0899, |
| "step": 235000 |
| }, |
| { |
| "epoch": 88.48, |
| "learning_rate": 2.3033496424538956e-06, |
| "loss": 0.0916, |
| "step": 235100 |
| }, |
| { |
| "epoch": 88.52, |
| "learning_rate": 2.2958223560406477e-06, |
| "loss": 0.0901, |
| "step": 235200 |
| }, |
| { |
| "epoch": 88.56, |
| "learning_rate": 2.2882950696273994e-06, |
| "loss": 0.0898, |
| "step": 235300 |
| }, |
| { |
| "epoch": 88.6, |
| "learning_rate": 2.2807677832141515e-06, |
| "loss": 0.0908, |
| "step": 235400 |
| }, |
| { |
| "epoch": 88.63, |
| "learning_rate": 2.273240496800903e-06, |
| "loss": 0.0891, |
| "step": 235500 |
| }, |
| { |
| "epoch": 88.67, |
| "learning_rate": 2.2657132103876557e-06, |
| "loss": 0.0909, |
| "step": 235600 |
| }, |
| { |
| "epoch": 88.71, |
| "learning_rate": 2.2581859239744073e-06, |
| "loss": 0.0889, |
| "step": 235700 |
| }, |
| { |
| "epoch": 88.75, |
| "learning_rate": 2.2506586375611594e-06, |
| "loss": 0.093, |
| "step": 235800 |
| }, |
| { |
| "epoch": 88.78, |
| "learning_rate": 2.2431313511479115e-06, |
| "loss": 0.0902, |
| "step": 235900 |
| }, |
| { |
| "epoch": 88.82, |
| "learning_rate": 2.235604064734663e-06, |
| "loss": 0.09, |
| "step": 236000 |
| }, |
| { |
| "epoch": 88.86, |
| "learning_rate": 2.2280767783214153e-06, |
| "loss": 0.0893, |
| "step": 236100 |
| }, |
| { |
| "epoch": 88.9, |
| "learning_rate": 2.2205494919081674e-06, |
| "loss": 0.0902, |
| "step": 236200 |
| }, |
| { |
| "epoch": 88.93, |
| "learning_rate": 2.2130222054949194e-06, |
| "loss": 0.0912, |
| "step": 236300 |
| }, |
| { |
| "epoch": 88.97, |
| "learning_rate": 2.205494919081671e-06, |
| "loss": 0.0911, |
| "step": 236400 |
| }, |
| { |
| "epoch": 89.0, |
| "eval_loss": 0.09018085896968842, |
| "eval_runtime": 45.1243, |
| "eval_samples_per_second": 166.207, |
| "eval_steps_per_second": 10.394, |
| "step": 236473 |
| }, |
| { |
| "epoch": 89.01, |
| "learning_rate": 2.197967632668423e-06, |
| "loss": 0.092, |
| "step": 236500 |
| }, |
| { |
| "epoch": 89.05, |
| "learning_rate": 2.1904403462551753e-06, |
| "loss": 0.0904, |
| "step": 236600 |
| }, |
| { |
| "epoch": 89.09, |
| "learning_rate": 2.182913059841927e-06, |
| "loss": 0.09, |
| "step": 236700 |
| }, |
| { |
| "epoch": 89.12, |
| "learning_rate": 2.175385773428679e-06, |
| "loss": 0.0911, |
| "step": 236800 |
| }, |
| { |
| "epoch": 89.16, |
| "learning_rate": 2.167858487015431e-06, |
| "loss": 0.0883, |
| "step": 236900 |
| }, |
| { |
| "epoch": 89.2, |
| "learning_rate": 2.1603312006021832e-06, |
| "loss": 0.0904, |
| "step": 237000 |
| }, |
| { |
| "epoch": 89.24, |
| "learning_rate": 2.152803914188935e-06, |
| "loss": 0.0912, |
| "step": 237100 |
| }, |
| { |
| "epoch": 89.27, |
| "learning_rate": 2.145276627775687e-06, |
| "loss": 0.0892, |
| "step": 237200 |
| }, |
| { |
| "epoch": 89.31, |
| "learning_rate": 2.137749341362439e-06, |
| "loss": 0.0912, |
| "step": 237300 |
| }, |
| { |
| "epoch": 89.35, |
| "learning_rate": 2.1302220549491907e-06, |
| "loss": 0.09, |
| "step": 237400 |
| }, |
| { |
| "epoch": 89.39, |
| "learning_rate": 2.1226947685359433e-06, |
| "loss": 0.0904, |
| "step": 237500 |
| }, |
| { |
| "epoch": 89.42, |
| "learning_rate": 2.115167482122695e-06, |
| "loss": 0.0913, |
| "step": 237600 |
| }, |
| { |
| "epoch": 89.46, |
| "learning_rate": 2.107640195709447e-06, |
| "loss": 0.0904, |
| "step": 237700 |
| }, |
| { |
| "epoch": 89.5, |
| "learning_rate": 2.1001129092961987e-06, |
| "loss": 0.0899, |
| "step": 237800 |
| }, |
| { |
| "epoch": 89.54, |
| "learning_rate": 2.0925856228829508e-06, |
| "loss": 0.0893, |
| "step": 237900 |
| }, |
| { |
| "epoch": 89.57, |
| "learning_rate": 2.085058336469703e-06, |
| "loss": 0.0903, |
| "step": 238000 |
| }, |
| { |
| "epoch": 89.61, |
| "learning_rate": 2.077531050056455e-06, |
| "loss": 0.09, |
| "step": 238100 |
| }, |
| { |
| "epoch": 89.65, |
| "learning_rate": 2.070003763643207e-06, |
| "loss": 0.0903, |
| "step": 238200 |
| }, |
| { |
| "epoch": 89.69, |
| "learning_rate": 2.0624764772299587e-06, |
| "loss": 0.0891, |
| "step": 238300 |
| }, |
| { |
| "epoch": 89.73, |
| "learning_rate": 2.054949190816711e-06, |
| "loss": 0.0892, |
| "step": 238400 |
| }, |
| { |
| "epoch": 89.76, |
| "learning_rate": 2.0474219044034625e-06, |
| "loss": 0.09, |
| "step": 238500 |
| }, |
| { |
| "epoch": 89.8, |
| "learning_rate": 2.0398946179902146e-06, |
| "loss": 0.0908, |
| "step": 238600 |
| }, |
| { |
| "epoch": 89.84, |
| "learning_rate": 2.0323673315769666e-06, |
| "loss": 0.0902, |
| "step": 238700 |
| }, |
| { |
| "epoch": 89.88, |
| "learning_rate": 2.0248400451637187e-06, |
| "loss": 0.0916, |
| "step": 238800 |
| }, |
| { |
| "epoch": 89.91, |
| "learning_rate": 2.017312758750471e-06, |
| "loss": 0.0891, |
| "step": 238900 |
| }, |
| { |
| "epoch": 89.95, |
| "learning_rate": 2.0097854723372225e-06, |
| "loss": 0.0898, |
| "step": 239000 |
| }, |
| { |
| "epoch": 89.99, |
| "learning_rate": 2.0022581859239746e-06, |
| "loss": 0.0905, |
| "step": 239100 |
| }, |
| { |
| "epoch": 90.0, |
| "eval_loss": 0.09060540050268173, |
| "eval_runtime": 45.1371, |
| "eval_samples_per_second": 166.16, |
| "eval_steps_per_second": 10.391, |
| "step": 239130 |
| }, |
| { |
| "epoch": 90.03, |
| "learning_rate": 1.9947308995107267e-06, |
| "loss": 0.0915, |
| "step": 239200 |
| }, |
| { |
| "epoch": 90.06, |
| "learning_rate": 1.9872036130974783e-06, |
| "loss": 0.0896, |
| "step": 239300 |
| }, |
| { |
| "epoch": 90.1, |
| "learning_rate": 1.9796763266842304e-06, |
| "loss": 0.0899, |
| "step": 239400 |
| }, |
| { |
| "epoch": 90.14, |
| "learning_rate": 1.9721490402709825e-06, |
| "loss": 0.091, |
| "step": 239500 |
| }, |
| { |
| "epoch": 90.18, |
| "learning_rate": 1.9646217538577346e-06, |
| "loss": 0.0894, |
| "step": 239600 |
| }, |
| { |
| "epoch": 90.21, |
| "learning_rate": 1.9570944674444863e-06, |
| "loss": 0.0897, |
| "step": 239700 |
| }, |
| { |
| "epoch": 90.25, |
| "learning_rate": 1.9495671810312384e-06, |
| "loss": 0.0905, |
| "step": 239800 |
| }, |
| { |
| "epoch": 90.29, |
| "learning_rate": 1.9420398946179905e-06, |
| "loss": 0.0893, |
| "step": 239900 |
| }, |
| { |
| "epoch": 90.33, |
| "learning_rate": 1.9345126082047425e-06, |
| "loss": 0.0904, |
| "step": 240000 |
| }, |
| { |
| "epoch": 90.37, |
| "learning_rate": 1.926985321791494e-06, |
| "loss": 0.0908, |
| "step": 240100 |
| }, |
| { |
| "epoch": 90.4, |
| "learning_rate": 1.9194580353782463e-06, |
| "loss": 0.0892, |
| "step": 240200 |
| }, |
| { |
| "epoch": 90.44, |
| "learning_rate": 1.9119307489649984e-06, |
| "loss": 0.0906, |
| "step": 240300 |
| }, |
| { |
| "epoch": 90.48, |
| "learning_rate": 1.9044034625517503e-06, |
| "loss": 0.0907, |
| "step": 240400 |
| }, |
| { |
| "epoch": 90.52, |
| "learning_rate": 1.8968761761385021e-06, |
| "loss": 0.0917, |
| "step": 240500 |
| }, |
| { |
| "epoch": 90.55, |
| "learning_rate": 1.8893488897252542e-06, |
| "loss": 0.0902, |
| "step": 240600 |
| }, |
| { |
| "epoch": 90.59, |
| "learning_rate": 1.8818216033120063e-06, |
| "loss": 0.0894, |
| "step": 240700 |
| }, |
| { |
| "epoch": 90.63, |
| "learning_rate": 1.8742943168987582e-06, |
| "loss": 0.0887, |
| "step": 240800 |
| }, |
| { |
| "epoch": 90.67, |
| "learning_rate": 1.86676703048551e-06, |
| "loss": 0.0913, |
| "step": 240900 |
| }, |
| { |
| "epoch": 90.7, |
| "learning_rate": 1.859239744072262e-06, |
| "loss": 0.091, |
| "step": 241000 |
| }, |
| { |
| "epoch": 90.74, |
| "learning_rate": 1.851712457659014e-06, |
| "loss": 0.0892, |
| "step": 241100 |
| }, |
| { |
| "epoch": 90.78, |
| "learning_rate": 1.8441851712457661e-06, |
| "loss": 0.0898, |
| "step": 241200 |
| }, |
| { |
| "epoch": 90.82, |
| "learning_rate": 1.836657884832518e-06, |
| "loss": 0.0888, |
| "step": 241300 |
| }, |
| { |
| "epoch": 90.85, |
| "learning_rate": 1.82913059841927e-06, |
| "loss": 0.0895, |
| "step": 241400 |
| }, |
| { |
| "epoch": 90.89, |
| "learning_rate": 1.821603312006022e-06, |
| "loss": 0.0905, |
| "step": 241500 |
| }, |
| { |
| "epoch": 90.93, |
| "learning_rate": 1.8140760255927739e-06, |
| "loss": 0.0906, |
| "step": 241600 |
| }, |
| { |
| "epoch": 90.97, |
| "learning_rate": 1.8065487391795257e-06, |
| "loss": 0.089, |
| "step": 241700 |
| }, |
| { |
| "epoch": 91.0, |
| "eval_loss": 0.0901167169213295, |
| "eval_runtime": 44.9942, |
| "eval_samples_per_second": 166.688, |
| "eval_steps_per_second": 10.424, |
| "step": 241787 |
| }, |
| { |
| "epoch": 91.0, |
| "learning_rate": 1.799021452766278e-06, |
| "loss": 0.0896, |
| "step": 241800 |
| }, |
| { |
| "epoch": 91.04, |
| "learning_rate": 1.79149416635303e-06, |
| "loss": 0.0903, |
| "step": 241900 |
| }, |
| { |
| "epoch": 91.08, |
| "learning_rate": 1.7839668799397818e-06, |
| "loss": 0.0903, |
| "step": 242000 |
| }, |
| { |
| "epoch": 91.12, |
| "learning_rate": 1.7764395935265339e-06, |
| "loss": 0.0887, |
| "step": 242100 |
| }, |
| { |
| "epoch": 91.16, |
| "learning_rate": 1.7689123071132858e-06, |
| "loss": 0.0893, |
| "step": 242200 |
| }, |
| { |
| "epoch": 91.19, |
| "learning_rate": 1.7613850207000376e-06, |
| "loss": 0.0905, |
| "step": 242300 |
| }, |
| { |
| "epoch": 91.23, |
| "learning_rate": 1.7538577342867895e-06, |
| "loss": 0.0896, |
| "step": 242400 |
| }, |
| { |
| "epoch": 91.27, |
| "learning_rate": 1.7463304478735418e-06, |
| "loss": 0.0897, |
| "step": 242500 |
| }, |
| { |
| "epoch": 91.31, |
| "learning_rate": 1.7388031614602937e-06, |
| "loss": 0.091, |
| "step": 242600 |
| }, |
| { |
| "epoch": 91.34, |
| "learning_rate": 1.7312758750470458e-06, |
| "loss": 0.0905, |
| "step": 242700 |
| }, |
| { |
| "epoch": 91.38, |
| "learning_rate": 1.7237485886337977e-06, |
| "loss": 0.0887, |
| "step": 242800 |
| }, |
| { |
| "epoch": 91.42, |
| "learning_rate": 1.7162213022205495e-06, |
| "loss": 0.0896, |
| "step": 242900 |
| }, |
| { |
| "epoch": 91.46, |
| "learning_rate": 1.7086940158073014e-06, |
| "loss": 0.0905, |
| "step": 243000 |
| }, |
| { |
| "epoch": 91.49, |
| "learning_rate": 1.7011667293940537e-06, |
| "loss": 0.0874, |
| "step": 243100 |
| }, |
| { |
| "epoch": 91.53, |
| "learning_rate": 1.6936394429808056e-06, |
| "loss": 0.0896, |
| "step": 243200 |
| }, |
| { |
| "epoch": 91.57, |
| "learning_rate": 1.6861121565675575e-06, |
| "loss": 0.0888, |
| "step": 243300 |
| }, |
| { |
| "epoch": 91.61, |
| "learning_rate": 1.6785848701543096e-06, |
| "loss": 0.0891, |
| "step": 243400 |
| }, |
| { |
| "epoch": 91.64, |
| "learning_rate": 1.6710575837410615e-06, |
| "loss": 0.0905, |
| "step": 243500 |
| }, |
| { |
| "epoch": 91.68, |
| "learning_rate": 1.6635302973278133e-06, |
| "loss": 0.0915, |
| "step": 243600 |
| }, |
| { |
| "epoch": 91.72, |
| "learning_rate": 1.6560030109145656e-06, |
| "loss": 0.0912, |
| "step": 243700 |
| }, |
| { |
| "epoch": 91.76, |
| "learning_rate": 1.6484757245013175e-06, |
| "loss": 0.0892, |
| "step": 243800 |
| }, |
| { |
| "epoch": 91.8, |
| "learning_rate": 1.6409484380880694e-06, |
| "loss": 0.089, |
| "step": 243900 |
| }, |
| { |
| "epoch": 91.83, |
| "learning_rate": 1.6334211516748213e-06, |
| "loss": 0.0894, |
| "step": 244000 |
| }, |
| { |
| "epoch": 91.87, |
| "learning_rate": 1.6258938652615734e-06, |
| "loss": 0.0902, |
| "step": 244100 |
| }, |
| { |
| "epoch": 91.91, |
| "learning_rate": 1.6183665788483252e-06, |
| "loss": 0.0898, |
| "step": 244200 |
| }, |
| { |
| "epoch": 91.95, |
| "learning_rate": 1.6108392924350773e-06, |
| "loss": 0.0897, |
| "step": 244300 |
| }, |
| { |
| "epoch": 91.98, |
| "learning_rate": 1.6033120060218294e-06, |
| "loss": 0.0908, |
| "step": 244400 |
| }, |
| { |
| "epoch": 92.0, |
| "eval_loss": 0.08964475989341736, |
| "eval_runtime": 43.9599, |
| "eval_samples_per_second": 170.61, |
| "eval_steps_per_second": 10.669, |
| "step": 244444 |
| }, |
| { |
| "epoch": 92.02, |
| "learning_rate": 1.5957847196085813e-06, |
| "loss": 0.0888, |
| "step": 244500 |
| }, |
| { |
| "epoch": 92.06, |
| "learning_rate": 1.5882574331953332e-06, |
| "loss": 0.0873, |
| "step": 244600 |
| }, |
| { |
| "epoch": 92.1, |
| "learning_rate": 1.580730146782085e-06, |
| "loss": 0.091, |
| "step": 244700 |
| }, |
| { |
| "epoch": 92.13, |
| "learning_rate": 1.5732028603688371e-06, |
| "loss": 0.0898, |
| "step": 244800 |
| }, |
| { |
| "epoch": 92.17, |
| "learning_rate": 1.565675573955589e-06, |
| "loss": 0.0895, |
| "step": 244900 |
| }, |
| { |
| "epoch": 92.21, |
| "learning_rate": 1.5581482875423411e-06, |
| "loss": 0.0898, |
| "step": 245000 |
| }, |
| { |
| "epoch": 92.25, |
| "learning_rate": 1.5506210011290932e-06, |
| "loss": 0.0912, |
| "step": 245100 |
| }, |
| { |
| "epoch": 92.28, |
| "learning_rate": 1.543093714715845e-06, |
| "loss": 0.09, |
| "step": 245200 |
| }, |
| { |
| "epoch": 92.32, |
| "learning_rate": 1.535566428302597e-06, |
| "loss": 0.0899, |
| "step": 245300 |
| }, |
| { |
| "epoch": 92.36, |
| "learning_rate": 1.5280391418893488e-06, |
| "loss": 0.0904, |
| "step": 245400 |
| }, |
| { |
| "epoch": 92.4, |
| "learning_rate": 1.520511855476101e-06, |
| "loss": 0.0907, |
| "step": 245500 |
| }, |
| { |
| "epoch": 92.44, |
| "learning_rate": 1.512984569062853e-06, |
| "loss": 0.0901, |
| "step": 245600 |
| }, |
| { |
| "epoch": 92.47, |
| "learning_rate": 1.505457282649605e-06, |
| "loss": 0.0912, |
| "step": 245700 |
| }, |
| { |
| "epoch": 92.51, |
| "learning_rate": 1.497929996236357e-06, |
| "loss": 0.0901, |
| "step": 245800 |
| }, |
| { |
| "epoch": 92.55, |
| "learning_rate": 1.4904027098231089e-06, |
| "loss": 0.091, |
| "step": 245900 |
| }, |
| { |
| "epoch": 92.59, |
| "learning_rate": 1.4828754234098607e-06, |
| "loss": 0.0893, |
| "step": 246000 |
| }, |
| { |
| "epoch": 92.62, |
| "learning_rate": 1.4753481369966128e-06, |
| "loss": 0.0899, |
| "step": 246100 |
| }, |
| { |
| "epoch": 92.66, |
| "learning_rate": 1.467820850583365e-06, |
| "loss": 0.0898, |
| "step": 246200 |
| }, |
| { |
| "epoch": 92.7, |
| "learning_rate": 1.4602935641701168e-06, |
| "loss": 0.0908, |
| "step": 246300 |
| }, |
| { |
| "epoch": 92.74, |
| "learning_rate": 1.4527662777568689e-06, |
| "loss": 0.0909, |
| "step": 246400 |
| }, |
| { |
| "epoch": 92.77, |
| "learning_rate": 1.4452389913436208e-06, |
| "loss": 0.0887, |
| "step": 246500 |
| }, |
| { |
| "epoch": 92.81, |
| "learning_rate": 1.4377117049303726e-06, |
| "loss": 0.089, |
| "step": 246600 |
| }, |
| { |
| "epoch": 92.85, |
| "learning_rate": 1.4301844185171245e-06, |
| "loss": 0.0903, |
| "step": 246700 |
| }, |
| { |
| "epoch": 92.89, |
| "learning_rate": 1.4226571321038768e-06, |
| "loss": 0.0889, |
| "step": 246800 |
| }, |
| { |
| "epoch": 92.92, |
| "learning_rate": 1.4151298456906287e-06, |
| "loss": 0.0894, |
| "step": 246900 |
| }, |
| { |
| "epoch": 92.96, |
| "learning_rate": 1.4076025592773806e-06, |
| "loss": 0.0913, |
| "step": 247000 |
| }, |
| { |
| "epoch": 93.0, |
| "learning_rate": 1.4000752728641327e-06, |
| "loss": 0.0894, |
| "step": 247100 |
| }, |
| { |
| "epoch": 93.0, |
| "eval_loss": 0.08920498192310333, |
| "eval_runtime": 43.79, |
| "eval_samples_per_second": 171.272, |
| "eval_steps_per_second": 10.71, |
| "step": 247101 |
| }, |
| { |
| "epoch": 93.04, |
| "learning_rate": 1.3925479864508845e-06, |
| "loss": 0.0895, |
| "step": 247200 |
| }, |
| { |
| "epoch": 93.07, |
| "learning_rate": 1.3850207000376364e-06, |
| "loss": 0.0895, |
| "step": 247300 |
| }, |
| { |
| "epoch": 93.11, |
| "learning_rate": 1.3774934136243887e-06, |
| "loss": 0.0912, |
| "step": 247400 |
| }, |
| { |
| "epoch": 93.15, |
| "learning_rate": 1.3699661272111406e-06, |
| "loss": 0.0905, |
| "step": 247500 |
| }, |
| { |
| "epoch": 93.19, |
| "learning_rate": 1.3624388407978925e-06, |
| "loss": 0.0893, |
| "step": 247600 |
| }, |
| { |
| "epoch": 93.23, |
| "learning_rate": 1.3549115543846444e-06, |
| "loss": 0.0889, |
| "step": 247700 |
| }, |
| { |
| "epoch": 93.26, |
| "learning_rate": 1.3473842679713964e-06, |
| "loss": 0.0902, |
| "step": 247800 |
| }, |
| { |
| "epoch": 93.3, |
| "learning_rate": 1.3398569815581483e-06, |
| "loss": 0.0891, |
| "step": 247900 |
| }, |
| { |
| "epoch": 93.34, |
| "learning_rate": 1.3323296951449002e-06, |
| "loss": 0.0896, |
| "step": 248000 |
| }, |
| { |
| "epoch": 93.38, |
| "learning_rate": 1.3248024087316525e-06, |
| "loss": 0.0899, |
| "step": 248100 |
| }, |
| { |
| "epoch": 93.41, |
| "learning_rate": 1.3172751223184044e-06, |
| "loss": 0.0898, |
| "step": 248200 |
| }, |
| { |
| "epoch": 93.45, |
| "learning_rate": 1.3097478359051563e-06, |
| "loss": 0.0884, |
| "step": 248300 |
| }, |
| { |
| "epoch": 93.49, |
| "learning_rate": 1.3022205494919081e-06, |
| "loss": 0.0902, |
| "step": 248400 |
| }, |
| { |
| "epoch": 93.53, |
| "learning_rate": 1.2946932630786602e-06, |
| "loss": 0.0923, |
| "step": 248500 |
| }, |
| { |
| "epoch": 93.56, |
| "learning_rate": 1.2871659766654121e-06, |
| "loss": 0.0896, |
| "step": 248600 |
| }, |
| { |
| "epoch": 93.6, |
| "learning_rate": 1.2796386902521644e-06, |
| "loss": 0.0904, |
| "step": 248700 |
| }, |
| { |
| "epoch": 93.64, |
| "learning_rate": 1.2721114038389163e-06, |
| "loss": 0.0911, |
| "step": 248800 |
| }, |
| { |
| "epoch": 93.68, |
| "learning_rate": 1.2645841174256682e-06, |
| "loss": 0.0897, |
| "step": 248900 |
| }, |
| { |
| "epoch": 93.71, |
| "learning_rate": 1.25705683101242e-06, |
| "loss": 0.0898, |
| "step": 249000 |
| }, |
| { |
| "epoch": 93.75, |
| "learning_rate": 1.2495295445991721e-06, |
| "loss": 0.0889, |
| "step": 249100 |
| }, |
| { |
| "epoch": 93.79, |
| "learning_rate": 1.242002258185924e-06, |
| "loss": 0.0892, |
| "step": 249200 |
| }, |
| { |
| "epoch": 93.83, |
| "learning_rate": 1.234474971772676e-06, |
| "loss": 0.0881, |
| "step": 249300 |
| }, |
| { |
| "epoch": 93.87, |
| "learning_rate": 1.226947685359428e-06, |
| "loss": 0.0904, |
| "step": 249400 |
| }, |
| { |
| "epoch": 93.9, |
| "learning_rate": 1.21942039894618e-06, |
| "loss": 0.0894, |
| "step": 249500 |
| }, |
| { |
| "epoch": 93.94, |
| "learning_rate": 1.211893112532932e-06, |
| "loss": 0.0904, |
| "step": 249600 |
| }, |
| { |
| "epoch": 93.98, |
| "learning_rate": 1.2043658261196838e-06, |
| "loss": 0.0899, |
| "step": 249700 |
| }, |
| { |
| "epoch": 94.0, |
| "eval_loss": 0.08932201564311981, |
| "eval_runtime": 43.7672, |
| "eval_samples_per_second": 171.361, |
| "eval_steps_per_second": 10.716, |
| "step": 249758 |
| }, |
| { |
| "epoch": 94.02, |
| "learning_rate": 1.196838539706436e-06, |
| "loss": 0.0896, |
| "step": 249800 |
| }, |
| { |
| "epoch": 94.05, |
| "learning_rate": 1.1893112532931878e-06, |
| "loss": 0.0895, |
| "step": 249900 |
| }, |
| { |
| "epoch": 94.09, |
| "learning_rate": 1.1817839668799399e-06, |
| "loss": 0.0899, |
| "step": 250000 |
| }, |
| { |
| "epoch": 94.13, |
| "learning_rate": 1.174256680466692e-06, |
| "loss": 0.0902, |
| "step": 250100 |
| }, |
| { |
| "epoch": 94.17, |
| "learning_rate": 1.1667293940534439e-06, |
| "loss": 0.0901, |
| "step": 250200 |
| }, |
| { |
| "epoch": 94.2, |
| "learning_rate": 1.1592021076401957e-06, |
| "loss": 0.0885, |
| "step": 250300 |
| }, |
| { |
| "epoch": 94.24, |
| "learning_rate": 1.1516748212269478e-06, |
| "loss": 0.0906, |
| "step": 250400 |
| }, |
| { |
| "epoch": 94.28, |
| "learning_rate": 1.1441475348136997e-06, |
| "loss": 0.0899, |
| "step": 250500 |
| }, |
| { |
| "epoch": 94.32, |
| "learning_rate": 1.1366202484004516e-06, |
| "loss": 0.0894, |
| "step": 250600 |
| }, |
| { |
| "epoch": 94.35, |
| "learning_rate": 1.1290929619872037e-06, |
| "loss": 0.0892, |
| "step": 250700 |
| }, |
| { |
| "epoch": 94.39, |
| "learning_rate": 1.1215656755739558e-06, |
| "loss": 0.0912, |
| "step": 250800 |
| }, |
| { |
| "epoch": 94.43, |
| "learning_rate": 1.1140383891607076e-06, |
| "loss": 0.0871, |
| "step": 250900 |
| }, |
| { |
| "epoch": 94.47, |
| "learning_rate": 1.1065111027474597e-06, |
| "loss": 0.0898, |
| "step": 251000 |
| }, |
| { |
| "epoch": 94.51, |
| "learning_rate": 1.0989838163342116e-06, |
| "loss": 0.0896, |
| "step": 251100 |
| }, |
| { |
| "epoch": 94.54, |
| "learning_rate": 1.0914565299209635e-06, |
| "loss": 0.0914, |
| "step": 251200 |
| }, |
| { |
| "epoch": 94.58, |
| "learning_rate": 1.0839292435077156e-06, |
| "loss": 0.0907, |
| "step": 251300 |
| }, |
| { |
| "epoch": 94.62, |
| "learning_rate": 1.0764019570944674e-06, |
| "loss": 0.0896, |
| "step": 251400 |
| }, |
| { |
| "epoch": 94.66, |
| "learning_rate": 1.0688746706812195e-06, |
| "loss": 0.0888, |
| "step": 251500 |
| }, |
| { |
| "epoch": 94.69, |
| "learning_rate": 1.0613473842679716e-06, |
| "loss": 0.0879, |
| "step": 251600 |
| }, |
| { |
| "epoch": 94.73, |
| "learning_rate": 1.0538200978547235e-06, |
| "loss": 0.0899, |
| "step": 251700 |
| }, |
| { |
| "epoch": 94.77, |
| "learning_rate": 1.0462928114414754e-06, |
| "loss": 0.0903, |
| "step": 251800 |
| }, |
| { |
| "epoch": 94.81, |
| "learning_rate": 1.0387655250282275e-06, |
| "loss": 0.0878, |
| "step": 251900 |
| }, |
| { |
| "epoch": 94.84, |
| "learning_rate": 1.0312382386149794e-06, |
| "loss": 0.0894, |
| "step": 252000 |
| }, |
| { |
| "epoch": 94.88, |
| "learning_rate": 1.0237109522017312e-06, |
| "loss": 0.0901, |
| "step": 252100 |
| }, |
| { |
| "epoch": 94.92, |
| "learning_rate": 1.0161836657884833e-06, |
| "loss": 0.0906, |
| "step": 252200 |
| }, |
| { |
| "epoch": 94.96, |
| "learning_rate": 1.0086563793752354e-06, |
| "loss": 0.0911, |
| "step": 252300 |
| }, |
| { |
| "epoch": 94.99, |
| "learning_rate": 1.0011290929619873e-06, |
| "loss": 0.0899, |
| "step": 252400 |
| }, |
| { |
| "epoch": 95.0, |
| "eval_loss": 0.08966313302516937, |
| "eval_runtime": 43.6014, |
| "eval_samples_per_second": 172.013, |
| "eval_steps_per_second": 10.757, |
| "step": 252415 |
| }, |
| { |
| "epoch": 95.03, |
| "learning_rate": 9.936018065487392e-07, |
| "loss": 0.0903, |
| "step": 252500 |
| }, |
| { |
| "epoch": 95.07, |
| "learning_rate": 9.860745201354913e-07, |
| "loss": 0.0907, |
| "step": 252600 |
| }, |
| { |
| "epoch": 95.11, |
| "learning_rate": 9.785472337222431e-07, |
| "loss": 0.0909, |
| "step": 252700 |
| }, |
| { |
| "epoch": 95.14, |
| "learning_rate": 9.710199473089952e-07, |
| "loss": 0.0911, |
| "step": 252800 |
| }, |
| { |
| "epoch": 95.18, |
| "learning_rate": 9.63492660895747e-07, |
| "loss": 0.0916, |
| "step": 252900 |
| }, |
| { |
| "epoch": 95.22, |
| "learning_rate": 9.559653744824992e-07, |
| "loss": 0.0908, |
| "step": 253000 |
| }, |
| { |
| "epoch": 95.26, |
| "learning_rate": 9.484380880692511e-07, |
| "loss": 0.0907, |
| "step": 253100 |
| }, |
| { |
| "epoch": 95.3, |
| "learning_rate": 9.409108016560032e-07, |
| "loss": 0.0892, |
| "step": 253200 |
| }, |
| { |
| "epoch": 95.33, |
| "learning_rate": 9.33383515242755e-07, |
| "loss": 0.091, |
| "step": 253300 |
| }, |
| { |
| "epoch": 95.37, |
| "learning_rate": 9.25856228829507e-07, |
| "loss": 0.0888, |
| "step": 253400 |
| }, |
| { |
| "epoch": 95.41, |
| "learning_rate": 9.18328942416259e-07, |
| "loss": 0.0892, |
| "step": 253500 |
| }, |
| { |
| "epoch": 95.45, |
| "learning_rate": 9.10801656003011e-07, |
| "loss": 0.09, |
| "step": 253600 |
| }, |
| { |
| "epoch": 95.48, |
| "learning_rate": 9.032743695897629e-07, |
| "loss": 0.0896, |
| "step": 253700 |
| }, |
| { |
| "epoch": 95.52, |
| "learning_rate": 8.95747083176515e-07, |
| "loss": 0.0891, |
| "step": 253800 |
| }, |
| { |
| "epoch": 95.56, |
| "learning_rate": 8.882197967632669e-07, |
| "loss": 0.0897, |
| "step": 253900 |
| }, |
| { |
| "epoch": 95.6, |
| "learning_rate": 8.806925103500188e-07, |
| "loss": 0.0893, |
| "step": 254000 |
| }, |
| { |
| "epoch": 95.63, |
| "learning_rate": 8.731652239367709e-07, |
| "loss": 0.0901, |
| "step": 254100 |
| }, |
| { |
| "epoch": 95.67, |
| "learning_rate": 8.656379375235229e-07, |
| "loss": 0.0887, |
| "step": 254200 |
| }, |
| { |
| "epoch": 95.71, |
| "learning_rate": 8.581106511102748e-07, |
| "loss": 0.0886, |
| "step": 254300 |
| }, |
| { |
| "epoch": 95.75, |
| "learning_rate": 8.505833646970269e-07, |
| "loss": 0.0907, |
| "step": 254400 |
| }, |
| { |
| "epoch": 95.78, |
| "learning_rate": 8.430560782837787e-07, |
| "loss": 0.0894, |
| "step": 254500 |
| }, |
| { |
| "epoch": 95.82, |
| "learning_rate": 8.355287918705307e-07, |
| "loss": 0.0899, |
| "step": 254600 |
| }, |
| { |
| "epoch": 95.86, |
| "learning_rate": 8.280015054572828e-07, |
| "loss": 0.0881, |
| "step": 254700 |
| }, |
| { |
| "epoch": 95.9, |
| "learning_rate": 8.204742190440347e-07, |
| "loss": 0.0902, |
| "step": 254800 |
| }, |
| { |
| "epoch": 95.94, |
| "learning_rate": 8.129469326307867e-07, |
| "loss": 0.0895, |
| "step": 254900 |
| }, |
| { |
| "epoch": 95.97, |
| "learning_rate": 8.054196462175387e-07, |
| "loss": 0.0904, |
| "step": 255000 |
| }, |
| { |
| "epoch": 96.0, |
| "eval_loss": 0.0898142084479332, |
| "eval_runtime": 43.5802, |
| "eval_samples_per_second": 172.096, |
| "eval_steps_per_second": 10.762, |
| "step": 255072 |
| }, |
| { |
| "epoch": 96.01, |
| "learning_rate": 7.978923598042906e-07, |
| "loss": 0.0897, |
| "step": 255100 |
| }, |
| { |
| "epoch": 96.05, |
| "learning_rate": 7.903650733910425e-07, |
| "loss": 0.0908, |
| "step": 255200 |
| }, |
| { |
| "epoch": 96.09, |
| "learning_rate": 7.828377869777945e-07, |
| "loss": 0.0905, |
| "step": 255300 |
| }, |
| { |
| "epoch": 96.12, |
| "learning_rate": 7.753105005645466e-07, |
| "loss": 0.0892, |
| "step": 255400 |
| }, |
| { |
| "epoch": 96.16, |
| "learning_rate": 7.677832141512985e-07, |
| "loss": 0.0891, |
| "step": 255500 |
| }, |
| { |
| "epoch": 96.2, |
| "learning_rate": 7.602559277380505e-07, |
| "loss": 0.0898, |
| "step": 255600 |
| }, |
| { |
| "epoch": 96.24, |
| "learning_rate": 7.527286413248026e-07, |
| "loss": 0.0893, |
| "step": 255700 |
| }, |
| { |
| "epoch": 96.27, |
| "learning_rate": 7.452013549115544e-07, |
| "loss": 0.0896, |
| "step": 255800 |
| }, |
| { |
| "epoch": 96.31, |
| "learning_rate": 7.376740684983064e-07, |
| "loss": 0.0897, |
| "step": 255900 |
| }, |
| { |
| "epoch": 96.35, |
| "learning_rate": 7.301467820850584e-07, |
| "loss": 0.0903, |
| "step": 256000 |
| }, |
| { |
| "epoch": 96.39, |
| "learning_rate": 7.226194956718104e-07, |
| "loss": 0.0898, |
| "step": 256100 |
| }, |
| { |
| "epoch": 96.42, |
| "learning_rate": 7.150922092585623e-07, |
| "loss": 0.0895, |
| "step": 256200 |
| }, |
| { |
| "epoch": 96.46, |
| "learning_rate": 7.075649228453143e-07, |
| "loss": 0.0902, |
| "step": 256300 |
| }, |
| { |
| "epoch": 96.5, |
| "learning_rate": 7.000376364320663e-07, |
| "loss": 0.0889, |
| "step": 256400 |
| }, |
| { |
| "epoch": 96.54, |
| "learning_rate": 6.925103500188182e-07, |
| "loss": 0.0893, |
| "step": 256500 |
| }, |
| { |
| "epoch": 96.58, |
| "learning_rate": 6.849830636055703e-07, |
| "loss": 0.0901, |
| "step": 256600 |
| }, |
| { |
| "epoch": 96.61, |
| "learning_rate": 6.774557771923222e-07, |
| "loss": 0.09, |
| "step": 256700 |
| }, |
| { |
| "epoch": 96.65, |
| "learning_rate": 6.699284907790742e-07, |
| "loss": 0.0889, |
| "step": 256800 |
| }, |
| { |
| "epoch": 96.69, |
| "learning_rate": 6.624012043658263e-07, |
| "loss": 0.0887, |
| "step": 256900 |
| }, |
| { |
| "epoch": 96.73, |
| "learning_rate": 6.548739179525781e-07, |
| "loss": 0.089, |
| "step": 257000 |
| }, |
| { |
| "epoch": 96.76, |
| "learning_rate": 6.473466315393301e-07, |
| "loss": 0.0907, |
| "step": 257100 |
| }, |
| { |
| "epoch": 96.8, |
| "learning_rate": 6.398193451260822e-07, |
| "loss": 0.0888, |
| "step": 257200 |
| }, |
| { |
| "epoch": 96.84, |
| "learning_rate": 6.322920587128341e-07, |
| "loss": 0.0882, |
| "step": 257300 |
| }, |
| { |
| "epoch": 96.88, |
| "learning_rate": 6.247647722995861e-07, |
| "loss": 0.0897, |
| "step": 257400 |
| }, |
| { |
| "epoch": 96.91, |
| "learning_rate": 6.17237485886338e-07, |
| "loss": 0.0893, |
| "step": 257500 |
| }, |
| { |
| "epoch": 96.95, |
| "learning_rate": 6.0971019947309e-07, |
| "loss": 0.0892, |
| "step": 257600 |
| }, |
| { |
| "epoch": 96.99, |
| "learning_rate": 6.021829130598419e-07, |
| "loss": 0.0906, |
| "step": 257700 |
| }, |
| { |
| "epoch": 97.0, |
| "eval_loss": 0.08935380727052689, |
| "eval_runtime": 43.4106, |
| "eval_samples_per_second": 172.769, |
| "eval_steps_per_second": 10.804, |
| "step": 257729 |
| }, |
| { |
| "epoch": 97.03, |
| "learning_rate": 5.946556266465939e-07, |
| "loss": 0.0902, |
| "step": 257800 |
| }, |
| { |
| "epoch": 97.06, |
| "learning_rate": 5.87128340233346e-07, |
| "loss": 0.0891, |
| "step": 257900 |
| }, |
| { |
| "epoch": 97.1, |
| "learning_rate": 5.796010538200979e-07, |
| "loss": 0.088, |
| "step": 258000 |
| }, |
| { |
| "epoch": 97.14, |
| "learning_rate": 5.720737674068498e-07, |
| "loss": 0.0905, |
| "step": 258100 |
| }, |
| { |
| "epoch": 97.18, |
| "learning_rate": 5.645464809936018e-07, |
| "loss": 0.088, |
| "step": 258200 |
| }, |
| { |
| "epoch": 97.21, |
| "learning_rate": 5.570191945803538e-07, |
| "loss": 0.0886, |
| "step": 258300 |
| }, |
| { |
| "epoch": 97.25, |
| "learning_rate": 5.494919081671058e-07, |
| "loss": 0.0889, |
| "step": 258400 |
| }, |
| { |
| "epoch": 97.29, |
| "learning_rate": 5.419646217538578e-07, |
| "loss": 0.0877, |
| "step": 258500 |
| }, |
| { |
| "epoch": 97.33, |
| "learning_rate": 5.344373353406098e-07, |
| "loss": 0.0904, |
| "step": 258600 |
| }, |
| { |
| "epoch": 97.37, |
| "learning_rate": 5.269100489273618e-07, |
| "loss": 0.0886, |
| "step": 258700 |
| }, |
| { |
| "epoch": 97.4, |
| "learning_rate": 5.193827625141137e-07, |
| "loss": 0.0896, |
| "step": 258800 |
| }, |
| { |
| "epoch": 97.44, |
| "learning_rate": 5.118554761008656e-07, |
| "loss": 0.0884, |
| "step": 258900 |
| }, |
| { |
| "epoch": 97.48, |
| "learning_rate": 5.043281896876177e-07, |
| "loss": 0.0896, |
| "step": 259000 |
| }, |
| { |
| "epoch": 97.52, |
| "learning_rate": 4.968009032743696e-07, |
| "loss": 0.0886, |
| "step": 259100 |
| }, |
| { |
| "epoch": 97.55, |
| "learning_rate": 4.892736168611216e-07, |
| "loss": 0.0895, |
| "step": 259200 |
| }, |
| { |
| "epoch": 97.59, |
| "learning_rate": 4.817463304478736e-07, |
| "loss": 0.0889, |
| "step": 259300 |
| }, |
| { |
| "epoch": 97.63, |
| "learning_rate": 4.7421904403462554e-07, |
| "loss": 0.0888, |
| "step": 259400 |
| }, |
| { |
| "epoch": 97.67, |
| "learning_rate": 4.666917576213775e-07, |
| "loss": 0.0897, |
| "step": 259500 |
| }, |
| { |
| "epoch": 97.7, |
| "learning_rate": 4.591644712081295e-07, |
| "loss": 0.0903, |
| "step": 259600 |
| }, |
| { |
| "epoch": 97.74, |
| "learning_rate": 4.5163718479488144e-07, |
| "loss": 0.0883, |
| "step": 259700 |
| }, |
| { |
| "epoch": 97.78, |
| "learning_rate": 4.4410989838163347e-07, |
| "loss": 0.0902, |
| "step": 259800 |
| }, |
| { |
| "epoch": 97.82, |
| "learning_rate": 4.3658261196838546e-07, |
| "loss": 0.09, |
| "step": 259900 |
| }, |
| { |
| "epoch": 97.85, |
| "learning_rate": 4.290553255551374e-07, |
| "loss": 0.0883, |
| "step": 260000 |
| }, |
| { |
| "epoch": 97.89, |
| "learning_rate": 4.2152803914188937e-07, |
| "loss": 0.0904, |
| "step": 260100 |
| }, |
| { |
| "epoch": 97.93, |
| "learning_rate": 4.140007527286414e-07, |
| "loss": 0.0891, |
| "step": 260200 |
| }, |
| { |
| "epoch": 97.97, |
| "learning_rate": 4.0647346631539334e-07, |
| "loss": 0.0892, |
| "step": 260300 |
| }, |
| { |
| "epoch": 98.0, |
| "eval_loss": 0.08942902088165283, |
| "eval_runtime": 43.3793, |
| "eval_samples_per_second": 172.893, |
| "eval_steps_per_second": 10.812, |
| "step": 260386 |
| }, |
| { |
| "epoch": 98.01, |
| "learning_rate": 3.989461799021453e-07, |
| "loss": 0.0902, |
| "step": 260400 |
| }, |
| { |
| "epoch": 98.04, |
| "learning_rate": 3.9141889348889725e-07, |
| "loss": 0.0906, |
| "step": 260500 |
| }, |
| { |
| "epoch": 98.08, |
| "learning_rate": 3.8389160707564924e-07, |
| "loss": 0.0889, |
| "step": 260600 |
| }, |
| { |
| "epoch": 98.12, |
| "learning_rate": 3.763643206624013e-07, |
| "loss": 0.0907, |
| "step": 260700 |
| }, |
| { |
| "epoch": 98.16, |
| "learning_rate": 3.688370342491532e-07, |
| "loss": 0.0879, |
| "step": 260800 |
| }, |
| { |
| "epoch": 98.19, |
| "learning_rate": 3.613097478359052e-07, |
| "loss": 0.0877, |
| "step": 260900 |
| }, |
| { |
| "epoch": 98.23, |
| "learning_rate": 3.537824614226572e-07, |
| "loss": 0.0895, |
| "step": 261000 |
| }, |
| { |
| "epoch": 98.27, |
| "learning_rate": 3.462551750094091e-07, |
| "loss": 0.0903, |
| "step": 261100 |
| }, |
| { |
| "epoch": 98.31, |
| "learning_rate": 3.387278885961611e-07, |
| "loss": 0.0897, |
| "step": 261200 |
| }, |
| { |
| "epoch": 98.34, |
| "learning_rate": 3.312006021829131e-07, |
| "loss": 0.0886, |
| "step": 261300 |
| }, |
| { |
| "epoch": 98.38, |
| "learning_rate": 3.2367331576966506e-07, |
| "loss": 0.0882, |
| "step": 261400 |
| }, |
| { |
| "epoch": 98.42, |
| "learning_rate": 3.1614602935641704e-07, |
| "loss": 0.0894, |
| "step": 261500 |
| }, |
| { |
| "epoch": 98.46, |
| "learning_rate": 3.08618742943169e-07, |
| "loss": 0.0901, |
| "step": 261600 |
| }, |
| { |
| "epoch": 98.49, |
| "learning_rate": 3.0109145652992096e-07, |
| "loss": 0.0909, |
| "step": 261700 |
| }, |
| { |
| "epoch": 98.53, |
| "learning_rate": 2.93564170116673e-07, |
| "loss": 0.0897, |
| "step": 261800 |
| }, |
| { |
| "epoch": 98.57, |
| "learning_rate": 2.860368837034249e-07, |
| "loss": 0.0907, |
| "step": 261900 |
| }, |
| { |
| "epoch": 98.61, |
| "learning_rate": 2.785095972901769e-07, |
| "loss": 0.0885, |
| "step": 262000 |
| }, |
| { |
| "epoch": 98.65, |
| "learning_rate": 2.709823108769289e-07, |
| "loss": 0.0898, |
| "step": 262100 |
| }, |
| { |
| "epoch": 98.68, |
| "learning_rate": 2.634550244636809e-07, |
| "loss": 0.0897, |
| "step": 262200 |
| }, |
| { |
| "epoch": 98.72, |
| "learning_rate": 2.559277380504328e-07, |
| "loss": 0.0887, |
| "step": 262300 |
| }, |
| { |
| "epoch": 98.76, |
| "learning_rate": 2.484004516371848e-07, |
| "loss": 0.0888, |
| "step": 262400 |
| }, |
| { |
| "epoch": 98.8, |
| "learning_rate": 2.408731652239368e-07, |
| "loss": 0.0897, |
| "step": 262500 |
| }, |
| { |
| "epoch": 98.83, |
| "learning_rate": 2.3334587881068876e-07, |
| "loss": 0.0894, |
| "step": 262600 |
| }, |
| { |
| "epoch": 98.87, |
| "learning_rate": 2.2581859239744072e-07, |
| "loss": 0.0902, |
| "step": 262700 |
| }, |
| { |
| "epoch": 98.91, |
| "learning_rate": 2.1829130598419273e-07, |
| "loss": 0.0898, |
| "step": 262800 |
| }, |
| { |
| "epoch": 98.95, |
| "learning_rate": 2.1076401957094469e-07, |
| "loss": 0.0908, |
| "step": 262900 |
| }, |
| { |
| "epoch": 98.98, |
| "learning_rate": 2.0323673315769667e-07, |
| "loss": 0.0881, |
| "step": 263000 |
| }, |
| { |
| "epoch": 99.0, |
| "eval_loss": 0.08917281776666641, |
| "eval_runtime": 43.4811, |
| "eval_samples_per_second": 172.489, |
| "eval_steps_per_second": 10.786, |
| "step": 263043 |
| }, |
| { |
| "epoch": 99.02, |
| "learning_rate": 1.9570944674444863e-07, |
| "loss": 0.0909, |
| "step": 263100 |
| }, |
| { |
| "epoch": 99.06, |
| "learning_rate": 1.8818216033120064e-07, |
| "loss": 0.0888, |
| "step": 263200 |
| }, |
| { |
| "epoch": 99.1, |
| "learning_rate": 1.806548739179526e-07, |
| "loss": 0.0897, |
| "step": 263300 |
| }, |
| { |
| "epoch": 99.13, |
| "learning_rate": 1.7312758750470455e-07, |
| "loss": 0.0897, |
| "step": 263400 |
| }, |
| { |
| "epoch": 99.17, |
| "learning_rate": 1.6560030109145656e-07, |
| "loss": 0.0882, |
| "step": 263500 |
| }, |
| { |
| "epoch": 99.21, |
| "learning_rate": 1.5807301467820852e-07, |
| "loss": 0.0889, |
| "step": 263600 |
| }, |
| { |
| "epoch": 99.25, |
| "learning_rate": 1.5054572826496048e-07, |
| "loss": 0.0908, |
| "step": 263700 |
| }, |
| { |
| "epoch": 99.28, |
| "learning_rate": 1.4301844185171246e-07, |
| "loss": 0.088, |
| "step": 263800 |
| }, |
| { |
| "epoch": 99.32, |
| "learning_rate": 1.3549115543846445e-07, |
| "loss": 0.0876, |
| "step": 263900 |
| }, |
| { |
| "epoch": 99.36, |
| "learning_rate": 1.279638690252164e-07, |
| "loss": 0.088, |
| "step": 264000 |
| }, |
| { |
| "epoch": 99.4, |
| "learning_rate": 1.204365826119684e-07, |
| "loss": 0.0892, |
| "step": 264100 |
| }, |
| { |
| "epoch": 99.44, |
| "learning_rate": 1.1290929619872036e-07, |
| "loss": 0.0895, |
| "step": 264200 |
| }, |
| { |
| "epoch": 99.47, |
| "learning_rate": 1.0538200978547234e-07, |
| "loss": 0.0899, |
| "step": 264300 |
| }, |
| { |
| "epoch": 99.51, |
| "learning_rate": 9.785472337222431e-08, |
| "loss": 0.089, |
| "step": 264400 |
| }, |
| { |
| "epoch": 99.55, |
| "learning_rate": 9.03274369589763e-08, |
| "loss": 0.0895, |
| "step": 264500 |
| }, |
| { |
| "epoch": 99.59, |
| "learning_rate": 8.280015054572828e-08, |
| "loss": 0.091, |
| "step": 264600 |
| }, |
| { |
| "epoch": 99.62, |
| "learning_rate": 7.527286413248024e-08, |
| "loss": 0.0896, |
| "step": 264700 |
| }, |
| { |
| "epoch": 99.66, |
| "learning_rate": 6.774557771923222e-08, |
| "loss": 0.0894, |
| "step": 264800 |
| }, |
| { |
| "epoch": 99.7, |
| "learning_rate": 6.02182913059842e-08, |
| "loss": 0.0898, |
| "step": 264900 |
| }, |
| { |
| "epoch": 99.74, |
| "learning_rate": 5.269100489273617e-08, |
| "loss": 0.0882, |
| "step": 265000 |
| }, |
| { |
| "epoch": 99.77, |
| "learning_rate": 4.516371847948815e-08, |
| "loss": 0.0901, |
| "step": 265100 |
| }, |
| { |
| "epoch": 99.81, |
| "learning_rate": 3.763643206624012e-08, |
| "loss": 0.0885, |
| "step": 265200 |
| }, |
| { |
| "epoch": 99.85, |
| "learning_rate": 3.01091456529921e-08, |
| "loss": 0.0901, |
| "step": 265300 |
| }, |
| { |
| "epoch": 99.89, |
| "learning_rate": 2.2581859239744074e-08, |
| "loss": 0.0897, |
| "step": 265400 |
| }, |
| { |
| "epoch": 99.92, |
| "learning_rate": 1.505457282649605e-08, |
| "loss": 0.0902, |
| "step": 265500 |
| }, |
| { |
| "epoch": 99.96, |
| "learning_rate": 7.527286413248024e-09, |
| "loss": 0.0911, |
| "step": 265600 |
| }, |
| { |
| "epoch": 100.0, |
| "learning_rate": 0.0, |
| "loss": 0.09, |
| "step": 265700 |
| }, |
| { |
| "epoch": 100.0, |
| "eval_loss": 0.08935302495956421, |
| "eval_runtime": 44.5127, |
| "eval_samples_per_second": 168.491, |
| "eval_steps_per_second": 10.536, |
| "step": 265700 |
| }, |
| { |
| "epoch": 100.0, |
| "step": 265700, |
| "total_flos": 3.31604966375424e+20, |
| "train_loss": 0.10943094944119408, |
| "train_runtime": 65782.603, |
| "train_samples_per_second": 64.607, |
| "train_steps_per_second": 4.039 |
| } |
| ], |
| "max_steps": 265700, |
| "num_train_epochs": 100, |
| "total_flos": 3.31604966375424e+20, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|