| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "global_step": 102820, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0001, | |
| "loss": 2.9981, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 0.7217285633087158, | |
| "eval_runtime": 1.4105, | |
| "eval_samples_per_second": 708.956, | |
| "eval_steps_per_second": 22.687, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.95113369820172e-05, | |
| "loss": 0.3596, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_loss": 0.683592677116394, | |
| "eval_runtime": 1.425, | |
| "eval_samples_per_second": 701.743, | |
| "eval_steps_per_second": 22.456, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 9.90226739640344e-05, | |
| "loss": 0.3481, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_loss": 0.6578707695007324, | |
| "eval_runtime": 1.4239, | |
| "eval_samples_per_second": 702.301, | |
| "eval_steps_per_second": 22.474, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 9.85340109460516e-05, | |
| "loss": 0.3381, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 0.6479542255401611, | |
| "eval_runtime": 1.42, | |
| "eval_samples_per_second": 704.242, | |
| "eval_steps_per_second": 22.536, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 9.80453479280688e-05, | |
| "loss": 0.3289, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_loss": 0.6387728452682495, | |
| "eval_runtime": 1.4329, | |
| "eval_samples_per_second": 697.89, | |
| "eval_steps_per_second": 22.332, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 9.7556684910086e-05, | |
| "loss": 0.3279, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_loss": 0.6298181414604187, | |
| "eval_runtime": 1.4061, | |
| "eval_samples_per_second": 711.206, | |
| "eval_steps_per_second": 22.759, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 9.706802189210322e-05, | |
| "loss": 0.3217, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_loss": 0.62165367603302, | |
| "eval_runtime": 1.421, | |
| "eval_samples_per_second": 703.748, | |
| "eval_steps_per_second": 22.52, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 9.65793588741204e-05, | |
| "loss": 0.3165, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_loss": 0.616775393486023, | |
| "eval_runtime": 1.4244, | |
| "eval_samples_per_second": 702.031, | |
| "eval_steps_per_second": 22.465, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 9.609069585613761e-05, | |
| "loss": 0.3188, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_loss": 0.6099900007247925, | |
| "eval_runtime": 1.4195, | |
| "eval_samples_per_second": 704.489, | |
| "eval_steps_per_second": 22.544, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 9.560203283815481e-05, | |
| "loss": 0.3022, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_loss": 0.6031085252761841, | |
| "eval_runtime": 1.4165, | |
| "eval_samples_per_second": 705.968, | |
| "eval_steps_per_second": 22.591, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.511336982017201e-05, | |
| "loss": 0.3167, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_loss": 0.5986317992210388, | |
| "eval_runtime": 1.413, | |
| "eval_samples_per_second": 707.704, | |
| "eval_steps_per_second": 22.647, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 9.462470680218921e-05, | |
| "loss": 0.3123, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_loss": 0.594712495803833, | |
| "eval_runtime": 1.4076, | |
| "eval_samples_per_second": 710.451, | |
| "eval_steps_per_second": 22.734, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 9.413604378420641e-05, | |
| "loss": 0.3102, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_loss": 0.589926540851593, | |
| "eval_runtime": 1.4215, | |
| "eval_samples_per_second": 703.5, | |
| "eval_steps_per_second": 22.512, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.364738076622361e-05, | |
| "loss": 0.3029, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_loss": 0.5852935910224915, | |
| "eval_runtime": 1.4145, | |
| "eval_samples_per_second": 706.961, | |
| "eval_steps_per_second": 22.623, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 9.315871774824082e-05, | |
| "loss": 0.2999, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_loss": 0.5810648798942566, | |
| "eval_runtime": 1.4374, | |
| "eval_samples_per_second": 695.724, | |
| "eval_steps_per_second": 22.263, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 9.267005473025801e-05, | |
| "loss": 0.2898, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_loss": 0.5774234533309937, | |
| "eval_runtime": 1.422, | |
| "eval_samples_per_second": 703.256, | |
| "eval_steps_per_second": 22.504, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 9.218139171227522e-05, | |
| "loss": 0.2924, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_loss": 0.5741690993309021, | |
| "eval_runtime": 1.4369, | |
| "eval_samples_per_second": 695.963, | |
| "eval_steps_per_second": 22.271, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 9.169272869429242e-05, | |
| "loss": 0.2965, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_loss": 0.5683675408363342, | |
| "eval_runtime": 1.4175, | |
| "eval_samples_per_second": 705.474, | |
| "eval_steps_per_second": 22.575, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 9.120406567630962e-05, | |
| "loss": 0.3003, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_loss": 0.5660465359687805, | |
| "eval_runtime": 1.4225, | |
| "eval_samples_per_second": 703.011, | |
| "eval_steps_per_second": 22.496, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 9.071540265832682e-05, | |
| "loss": 0.2877, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_loss": 0.5636941194534302, | |
| "eval_runtime": 1.4249, | |
| "eval_samples_per_second": 701.786, | |
| "eval_steps_per_second": 22.457, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 9.022673964034402e-05, | |
| "loss": 0.28, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_loss": 0.5614505410194397, | |
| "eval_runtime": 1.418, | |
| "eval_samples_per_second": 705.226, | |
| "eval_steps_per_second": 22.567, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 8.973807662236122e-05, | |
| "loss": 0.2596, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "eval_loss": 0.560372531414032, | |
| "eval_runtime": 1.419, | |
| "eval_samples_per_second": 704.735, | |
| "eval_steps_per_second": 22.552, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 8.924941360437843e-05, | |
| "loss": 0.2629, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_loss": 0.5570399165153503, | |
| "eval_runtime": 1.422, | |
| "eval_samples_per_second": 703.254, | |
| "eval_steps_per_second": 22.504, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 8.876075058639562e-05, | |
| "loss": 0.2588, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_loss": 0.5555282831192017, | |
| "eval_runtime": 1.4115, | |
| "eval_samples_per_second": 708.456, | |
| "eval_steps_per_second": 22.671, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 8.827208756841283e-05, | |
| "loss": 0.2623, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "eval_loss": 0.5514973402023315, | |
| "eval_runtime": 1.4195, | |
| "eval_samples_per_second": 704.486, | |
| "eval_steps_per_second": 22.544, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 8.778342455043003e-05, | |
| "loss": 0.2553, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "eval_loss": 0.5486002564430237, | |
| "eval_runtime": 1.42, | |
| "eval_samples_per_second": 704.24, | |
| "eval_steps_per_second": 22.536, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 8.729476153244723e-05, | |
| "loss": 0.262, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "eval_loss": 0.5457084774971008, | |
| "eval_runtime": 1.4135, | |
| "eval_samples_per_second": 707.481, | |
| "eval_steps_per_second": 22.639, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 8.680609851446443e-05, | |
| "loss": 0.2613, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "eval_loss": 0.5417377948760986, | |
| "eval_runtime": 1.4156, | |
| "eval_samples_per_second": 706.429, | |
| "eval_steps_per_second": 22.606, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 8.631743549648163e-05, | |
| "loss": 0.2679, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "eval_loss": 0.5402753949165344, | |
| "eval_runtime": 1.4211, | |
| "eval_samples_per_second": 703.665, | |
| "eval_steps_per_second": 22.517, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 8.582877247849883e-05, | |
| "loss": 0.2537, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "eval_loss": 0.5380659699440002, | |
| "eval_runtime": 1.4234, | |
| "eval_samples_per_second": 702.521, | |
| "eval_steps_per_second": 22.481, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 8.534010946051603e-05, | |
| "loss": 0.2502, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "eval_loss": 0.532864511013031, | |
| "eval_runtime": 1.4508, | |
| "eval_samples_per_second": 689.292, | |
| "eval_steps_per_second": 22.057, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 8.485144644253323e-05, | |
| "loss": 0.2594, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "eval_loss": 0.5308486223220825, | |
| "eval_runtime": 1.4175, | |
| "eval_samples_per_second": 705.473, | |
| "eval_steps_per_second": 22.575, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 8.436278342455043e-05, | |
| "loss": 0.2495, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_loss": 0.5297316312789917, | |
| "eval_runtime": 1.416, | |
| "eval_samples_per_second": 706.219, | |
| "eval_steps_per_second": 22.599, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 8.387412040656764e-05, | |
| "loss": 0.2499, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "eval_loss": 0.5281020402908325, | |
| "eval_runtime": 1.4056, | |
| "eval_samples_per_second": 711.457, | |
| "eval_steps_per_second": 22.767, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 8.338545738858483e-05, | |
| "loss": 0.2578, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_loss": 0.5247856378555298, | |
| "eval_runtime": 1.4135, | |
| "eval_samples_per_second": 707.459, | |
| "eval_steps_per_second": 22.639, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 8.289679437060204e-05, | |
| "loss": 0.2497, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "eval_loss": 0.5230081677436829, | |
| "eval_runtime": 1.4438, | |
| "eval_samples_per_second": 692.611, | |
| "eval_steps_per_second": 22.164, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 8.240813135261924e-05, | |
| "loss": 0.2565, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_loss": 0.5200880765914917, | |
| "eval_runtime": 1.4398, | |
| "eval_samples_per_second": 694.522, | |
| "eval_steps_per_second": 22.225, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 8.191946833463644e-05, | |
| "loss": 0.2523, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "eval_loss": 0.5170234441757202, | |
| "eval_runtime": 1.4299, | |
| "eval_samples_per_second": 699.349, | |
| "eval_steps_per_second": 22.379, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 8.143080531665364e-05, | |
| "loss": 0.2501, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "eval_loss": 0.5144360065460205, | |
| "eval_runtime": 1.4239, | |
| "eval_samples_per_second": 702.275, | |
| "eval_steps_per_second": 22.473, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 8.094214229867084e-05, | |
| "loss": 0.2488, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "eval_loss": 0.5127125382423401, | |
| "eval_runtime": 1.4244, | |
| "eval_samples_per_second": 702.027, | |
| "eval_steps_per_second": 22.465, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 8.045347928068804e-05, | |
| "loss": 0.2465, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "eval_loss": 0.511444091796875, | |
| "eval_runtime": 1.422, | |
| "eval_samples_per_second": 703.257, | |
| "eval_steps_per_second": 22.504, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 7.996481626270525e-05, | |
| "loss": 0.2282, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_loss": 0.512248158454895, | |
| "eval_runtime": 1.419, | |
| "eval_samples_per_second": 704.734, | |
| "eval_steps_per_second": 22.551, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 7.947615324472244e-05, | |
| "loss": 0.2251, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "eval_loss": 0.5102840065956116, | |
| "eval_runtime": 1.414, | |
| "eval_samples_per_second": 707.209, | |
| "eval_steps_per_second": 22.631, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 7.898749022673965e-05, | |
| "loss": 0.2172, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "eval_loss": 0.5100817680358887, | |
| "eval_runtime": 1.4279, | |
| "eval_samples_per_second": 700.321, | |
| "eval_steps_per_second": 22.41, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 7.849882720875685e-05, | |
| "loss": 0.2143, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "eval_loss": 0.509198784828186, | |
| "eval_runtime": 1.4398, | |
| "eval_samples_per_second": 694.524, | |
| "eval_steps_per_second": 22.225, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 7.801016419077405e-05, | |
| "loss": 0.2215, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "eval_loss": 0.5054255127906799, | |
| "eval_runtime": 1.4289, | |
| "eval_samples_per_second": 699.834, | |
| "eval_steps_per_second": 22.395, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 7.752150117279125e-05, | |
| "loss": 0.2175, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "eval_loss": 0.5042800307273865, | |
| "eval_runtime": 1.4234, | |
| "eval_samples_per_second": 702.521, | |
| "eval_steps_per_second": 22.481, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 7.703283815480845e-05, | |
| "loss": 0.2181, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "eval_loss": 0.5006986260414124, | |
| "eval_runtime": 1.419, | |
| "eval_samples_per_second": 704.732, | |
| "eval_steps_per_second": 22.551, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 7.654417513682565e-05, | |
| "loss": 0.2229, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "eval_loss": 0.49812304973602295, | |
| "eval_runtime": 1.4369, | |
| "eval_samples_per_second": 695.962, | |
| "eval_steps_per_second": 22.271, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 7.605551211884286e-05, | |
| "loss": 0.2103, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "eval_loss": 0.49662986397743225, | |
| "eval_runtime": 1.4066, | |
| "eval_samples_per_second": 710.955, | |
| "eval_steps_per_second": 22.751, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 7.556684910086005e-05, | |
| "loss": 0.2195, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "eval_loss": 0.4949464499950409, | |
| "eval_runtime": 1.4145, | |
| "eval_samples_per_second": 706.961, | |
| "eval_steps_per_second": 22.623, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 7.507818608287726e-05, | |
| "loss": 0.2197, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "eval_loss": 0.49265730381011963, | |
| "eval_runtime": 1.4441, | |
| "eval_samples_per_second": 692.484, | |
| "eval_steps_per_second": 22.159, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 7.458952306489444e-05, | |
| "loss": 0.2163, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "eval_loss": 0.4933662414550781, | |
| "eval_runtime": 1.4196, | |
| "eval_samples_per_second": 704.41, | |
| "eval_steps_per_second": 22.541, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 7.410086004691166e-05, | |
| "loss": 0.2203, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "eval_loss": 0.4912818670272827, | |
| "eval_runtime": 1.4225, | |
| "eval_samples_per_second": 703.011, | |
| "eval_steps_per_second": 22.496, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 7.361219702892886e-05, | |
| "loss": 0.2131, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "eval_loss": 0.49019622802734375, | |
| "eval_runtime": 1.4165, | |
| "eval_samples_per_second": 705.97, | |
| "eval_steps_per_second": 22.591, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 7.312353401094606e-05, | |
| "loss": 0.2192, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "eval_loss": 0.48748642206192017, | |
| "eval_runtime": 1.419, | |
| "eval_samples_per_second": 704.732, | |
| "eval_steps_per_second": 22.551, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 7.263487099296326e-05, | |
| "loss": 0.216, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "eval_loss": 0.4867847263813019, | |
| "eval_runtime": 1.4078, | |
| "eval_samples_per_second": 710.341, | |
| "eval_steps_per_second": 22.731, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 7.214620797498047e-05, | |
| "loss": 0.2151, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_loss": 0.4847819209098816, | |
| "eval_runtime": 1.4175, | |
| "eval_samples_per_second": 705.473, | |
| "eval_steps_per_second": 22.575, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 7.165754495699765e-05, | |
| "loss": 0.2134, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "eval_loss": 0.48309269547462463, | |
| "eval_runtime": 1.4264, | |
| "eval_samples_per_second": 701.053, | |
| "eval_steps_per_second": 22.434, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 7.116888193901487e-05, | |
| "loss": 0.215, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "eval_loss": 0.4808345437049866, | |
| "eval_runtime": 1.4354, | |
| "eval_samples_per_second": 696.688, | |
| "eval_steps_per_second": 22.294, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 7.068021892103205e-05, | |
| "loss": 0.2149, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "eval_loss": 0.48003917932510376, | |
| "eval_runtime": 1.4294, | |
| "eval_samples_per_second": 699.594, | |
| "eval_steps_per_second": 22.387, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 7.019155590304925e-05, | |
| "loss": 0.2081, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "eval_loss": 0.4828941524028778, | |
| "eval_runtime": 1.4264, | |
| "eval_samples_per_second": 701.051, | |
| "eval_steps_per_second": 22.434, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 6.970289288506647e-05, | |
| "loss": 0.1851, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "eval_loss": 0.48561614751815796, | |
| "eval_runtime": 1.4155, | |
| "eval_samples_per_second": 706.463, | |
| "eval_steps_per_second": 22.607, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 6.921422986708365e-05, | |
| "loss": 0.1888, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "eval_loss": 0.48478779196739197, | |
| "eval_runtime": 1.4155, | |
| "eval_samples_per_second": 706.465, | |
| "eval_steps_per_second": 22.607, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 6.872556684910086e-05, | |
| "loss": 0.1916, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "eval_loss": 0.4795476198196411, | |
| "eval_runtime": 1.4239, | |
| "eval_samples_per_second": 702.273, | |
| "eval_steps_per_second": 22.473, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 6.823690383111806e-05, | |
| "loss": 0.1932, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "eval_loss": 0.47898271679878235, | |
| "eval_runtime": 1.42, | |
| "eval_samples_per_second": 704.241, | |
| "eval_steps_per_second": 22.536, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 6.774824081313526e-05, | |
| "loss": 0.1882, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "eval_loss": 0.48221901059150696, | |
| "eval_runtime": 1.4234, | |
| "eval_samples_per_second": 702.521, | |
| "eval_steps_per_second": 22.481, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 6.725957779515246e-05, | |
| "loss": 0.1845, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "eval_loss": 0.479130357503891, | |
| "eval_runtime": 1.4215, | |
| "eval_samples_per_second": 703.503, | |
| "eval_steps_per_second": 22.512, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 6.677091477716966e-05, | |
| "loss": 0.1895, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "eval_loss": 0.4773789644241333, | |
| "eval_runtime": 1.4294, | |
| "eval_samples_per_second": 699.592, | |
| "eval_steps_per_second": 22.387, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 6.628225175918686e-05, | |
| "loss": 0.1909, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "eval_loss": 0.4763247072696686, | |
| "eval_runtime": 1.409, | |
| "eval_samples_per_second": 709.702, | |
| "eval_steps_per_second": 22.71, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 6.579358874120408e-05, | |
| "loss": 0.1841, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "eval_loss": 0.4759540259838104, | |
| "eval_runtime": 1.421, | |
| "eval_samples_per_second": 703.747, | |
| "eval_steps_per_second": 22.52, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 6.530492572322126e-05, | |
| "loss": 0.1882, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "eval_loss": 0.4739590585231781, | |
| "eval_runtime": 1.417, | |
| "eval_samples_per_second": 705.724, | |
| "eval_steps_per_second": 22.583, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 6.481626270523847e-05, | |
| "loss": 0.1902, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "eval_loss": 0.47059980034828186, | |
| "eval_runtime": 1.4215, | |
| "eval_samples_per_second": 703.504, | |
| "eval_steps_per_second": 22.512, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 6.432759968725567e-05, | |
| "loss": 0.1924, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "eval_loss": 0.46917036175727844, | |
| "eval_runtime": 1.4388, | |
| "eval_samples_per_second": 695.005, | |
| "eval_steps_per_second": 22.24, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 6.383893666927287e-05, | |
| "loss": 0.1845, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "eval_loss": 0.46856725215911865, | |
| "eval_runtime": 1.4135, | |
| "eval_samples_per_second": 707.458, | |
| "eval_steps_per_second": 22.639, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 6.335027365129007e-05, | |
| "loss": 0.1892, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "eval_loss": 0.46638262271881104, | |
| "eval_runtime": 1.4632, | |
| "eval_samples_per_second": 683.445, | |
| "eval_steps_per_second": 21.87, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 6.286161063330727e-05, | |
| "loss": 0.1849, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "eval_loss": 0.46737831830978394, | |
| "eval_runtime": 1.4319, | |
| "eval_samples_per_second": 698.377, | |
| "eval_steps_per_second": 22.348, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 6.237294761532447e-05, | |
| "loss": 0.1883, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "eval_loss": 0.46423232555389404, | |
| "eval_runtime": 1.4249, | |
| "eval_samples_per_second": 701.787, | |
| "eval_steps_per_second": 22.457, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 6.188428459734168e-05, | |
| "loss": 0.1821, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "eval_loss": 0.4651487171649933, | |
| "eval_runtime": 1.4135, | |
| "eval_samples_per_second": 707.454, | |
| "eval_steps_per_second": 22.639, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 6.139562157935887e-05, | |
| "loss": 0.1905, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "eval_loss": 0.462035208940506, | |
| "eval_runtime": 1.417, | |
| "eval_samples_per_second": 705.722, | |
| "eval_steps_per_second": 22.583, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 6.090695856137608e-05, | |
| "loss": 0.185, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "eval_loss": 0.4627071022987366, | |
| "eval_runtime": 1.4179, | |
| "eval_samples_per_second": 705.261, | |
| "eval_steps_per_second": 22.568, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 6.0418295543393276e-05, | |
| "loss": 0.19, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "eval_loss": 0.4600967466831207, | |
| "eval_runtime": 1.422, | |
| "eval_samples_per_second": 703.255, | |
| "eval_steps_per_second": 22.504, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 5.992963252541048e-05, | |
| "loss": 0.1734, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "eval_loss": 0.46835413575172424, | |
| "eval_runtime": 1.4105, | |
| "eval_samples_per_second": 708.959, | |
| "eval_steps_per_second": 22.687, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 5.944096950742768e-05, | |
| "loss": 0.1665, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "eval_loss": 0.4674428403377533, | |
| "eval_runtime": 1.4224, | |
| "eval_samples_per_second": 703.013, | |
| "eval_steps_per_second": 22.496, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 5.895230648944489e-05, | |
| "loss": 0.1621, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "eval_loss": 0.46939995884895325, | |
| "eval_runtime": 1.418, | |
| "eval_samples_per_second": 705.227, | |
| "eval_steps_per_second": 22.567, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 5.846364347146208e-05, | |
| "loss": 0.1633, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "eval_loss": 0.4672936499118805, | |
| "eval_runtime": 1.4229, | |
| "eval_samples_per_second": 702.769, | |
| "eval_steps_per_second": 22.489, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 5.797498045347929e-05, | |
| "loss": 0.1612, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "eval_loss": 0.4673324525356293, | |
| "eval_runtime": 1.42, | |
| "eval_samples_per_second": 704.241, | |
| "eval_steps_per_second": 22.536, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 5.7486317435496486e-05, | |
| "loss": 0.1644, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "eval_loss": 0.4646117091178894, | |
| "eval_runtime": 1.4244, | |
| "eval_samples_per_second": 702.031, | |
| "eval_steps_per_second": 22.465, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 5.699765441751369e-05, | |
| "loss": 0.1655, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "eval_loss": 0.46449655294418335, | |
| "eval_runtime": 1.4359, | |
| "eval_samples_per_second": 696.446, | |
| "eval_steps_per_second": 22.286, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 5.6508991399530885e-05, | |
| "loss": 0.1627, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "eval_loss": 0.4624975025653839, | |
| "eval_runtime": 1.4244, | |
| "eval_samples_per_second": 702.032, | |
| "eval_steps_per_second": 22.465, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 5.6020328381548085e-05, | |
| "loss": 0.1675, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "eval_loss": 0.46210145950317383, | |
| "eval_runtime": 1.4274, | |
| "eval_samples_per_second": 700.566, | |
| "eval_steps_per_second": 22.418, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 5.553166536356529e-05, | |
| "loss": 0.1648, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "eval_loss": 0.4633449614048004, | |
| "eval_runtime": 1.4487, | |
| "eval_samples_per_second": 690.281, | |
| "eval_steps_per_second": 22.089, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 5.5043002345582483e-05, | |
| "loss": 0.1691, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "eval_loss": 0.4609707295894623, | |
| "eval_runtime": 1.4403, | |
| "eval_samples_per_second": 694.283, | |
| "eval_steps_per_second": 22.217, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 5.455433932759969e-05, | |
| "loss": 0.1642, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "eval_loss": 0.46095407009124756, | |
| "eval_runtime": 1.4319, | |
| "eval_samples_per_second": 698.378, | |
| "eval_steps_per_second": 22.348, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 5.406567630961689e-05, | |
| "loss": 0.1666, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "eval_loss": 0.46066999435424805, | |
| "eval_runtime": 1.4264, | |
| "eval_samples_per_second": 701.053, | |
| "eval_steps_per_second": 22.434, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 5.3577013291634095e-05, | |
| "loss": 0.167, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "eval_loss": 0.45587822794914246, | |
| "eval_runtime": 1.4344, | |
| "eval_samples_per_second": 697.169, | |
| "eval_steps_per_second": 22.309, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 5.308835027365129e-05, | |
| "loss": 0.1691, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "eval_loss": 0.45380640029907227, | |
| "eval_runtime": 1.4344, | |
| "eval_samples_per_second": 697.166, | |
| "eval_steps_per_second": 22.309, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 5.2599687255668494e-05, | |
| "loss": 0.1674, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "eval_loss": 0.45414891839027405, | |
| "eval_runtime": 1.4319, | |
| "eval_samples_per_second": 698.38, | |
| "eval_steps_per_second": 22.348, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 5.2111024237685694e-05, | |
| "loss": 0.1613, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "eval_loss": 0.45553380250930786, | |
| "eval_runtime": 1.4458, | |
| "eval_samples_per_second": 691.661, | |
| "eval_steps_per_second": 22.133, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 5.16223612197029e-05, | |
| "loss": 0.1613, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "eval_loss": 0.4527079463005066, | |
| "eval_runtime": 1.4195, | |
| "eval_samples_per_second": 704.487, | |
| "eval_steps_per_second": 22.544, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 5.113369820172009e-05, | |
| "loss": 0.1639, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "eval_loss": 0.44933873414993286, | |
| "eval_runtime": 1.416, | |
| "eval_samples_per_second": 706.217, | |
| "eval_steps_per_second": 22.599, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 5.06450351837373e-05, | |
| "loss": 0.1685, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "eval_loss": 0.44989633560180664, | |
| "eval_runtime": 1.4289, | |
| "eval_samples_per_second": 699.837, | |
| "eval_steps_per_second": 22.395, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "learning_rate": 5.01563721657545e-05, | |
| "loss": 0.1629, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "eval_loss": 0.45488646626472473, | |
| "eval_runtime": 1.4239, | |
| "eval_samples_per_second": 702.276, | |
| "eval_steps_per_second": 22.473, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 4.9667709147771705e-05, | |
| "loss": 0.1484, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "eval_loss": 0.4580441415309906, | |
| "eval_runtime": 1.416, | |
| "eval_samples_per_second": 706.223, | |
| "eval_steps_per_second": 22.599, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 4.9179046129788904e-05, | |
| "loss": 0.1468, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "eval_loss": 0.4577222168445587, | |
| "eval_runtime": 1.4304, | |
| "eval_samples_per_second": 699.106, | |
| "eval_steps_per_second": 22.371, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "learning_rate": 4.86903831118061e-05, | |
| "loss": 0.147, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "eval_loss": 0.4562654197216034, | |
| "eval_runtime": 1.4284, | |
| "eval_samples_per_second": 700.079, | |
| "eval_steps_per_second": 22.403, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 4.8201720093823296e-05, | |
| "loss": 0.1486, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "eval_loss": 0.4565419852733612, | |
| "eval_runtime": 1.4264, | |
| "eval_samples_per_second": 701.053, | |
| "eval_steps_per_second": 22.434, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "learning_rate": 4.77130570758405e-05, | |
| "loss": 0.1461, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "eval_loss": 0.45437780022621155, | |
| "eval_runtime": 1.4279, | |
| "eval_samples_per_second": 700.322, | |
| "eval_steps_per_second": 22.41, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "learning_rate": 4.72243940578577e-05, | |
| "loss": 0.1435, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "eval_loss": 0.4550324082374573, | |
| "eval_runtime": 1.4289, | |
| "eval_samples_per_second": 699.835, | |
| "eval_steps_per_second": 22.395, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 4.67357310398749e-05, | |
| "loss": 0.1463, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "eval_loss": 0.4553817808628082, | |
| "eval_runtime": 1.4304, | |
| "eval_samples_per_second": 699.106, | |
| "eval_steps_per_second": 22.371, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 4.62470680218921e-05, | |
| "loss": 0.1495, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "eval_loss": 0.45491766929626465, | |
| "eval_runtime": 1.4195, | |
| "eval_samples_per_second": 704.489, | |
| "eval_steps_per_second": 22.544, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "learning_rate": 4.575840500390931e-05, | |
| "loss": 0.143, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "eval_loss": 0.45461103320121765, | |
| "eval_runtime": 1.4225, | |
| "eval_samples_per_second": 703.01, | |
| "eval_steps_per_second": 22.496, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 4.5269741985926506e-05, | |
| "loss": 0.1473, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "eval_loss": 0.4515800178050995, | |
| "eval_runtime": 1.4373, | |
| "eval_samples_per_second": 695.726, | |
| "eval_steps_per_second": 22.263, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 4.4781078967943706e-05, | |
| "loss": 0.1481, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "eval_loss": 0.45015862584114075, | |
| "eval_runtime": 1.4259, | |
| "eval_samples_per_second": 701.296, | |
| "eval_steps_per_second": 22.441, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 4.4292415949960905e-05, | |
| "loss": 0.1494, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "eval_loss": 0.4483198821544647, | |
| "eval_runtime": 1.4359, | |
| "eval_samples_per_second": 696.444, | |
| "eval_steps_per_second": 22.286, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "learning_rate": 4.380375293197811e-05, | |
| "loss": 0.1413, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "eval_loss": 0.4498542249202728, | |
| "eval_runtime": 1.4473, | |
| "eval_samples_per_second": 690.949, | |
| "eval_steps_per_second": 22.11, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 4.331508991399531e-05, | |
| "loss": 0.1498, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "eval_loss": 0.447781503200531, | |
| "eval_runtime": 1.4225, | |
| "eval_samples_per_second": 703.012, | |
| "eval_steps_per_second": 22.496, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 4.282642689601251e-05, | |
| "loss": 0.146, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "eval_loss": 0.4458942115306854, | |
| "eval_runtime": 1.413, | |
| "eval_samples_per_second": 707.707, | |
| "eval_steps_per_second": 22.647, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 4.233776387802971e-05, | |
| "loss": 0.1455, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "eval_loss": 0.44468608498573303, | |
| "eval_runtime": 1.4239, | |
| "eval_samples_per_second": 702.274, | |
| "eval_steps_per_second": 22.473, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 4.1849100860046916e-05, | |
| "loss": 0.1439, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "eval_loss": 0.446806401014328, | |
| "eval_runtime": 1.4224, | |
| "eval_samples_per_second": 703.012, | |
| "eval_steps_per_second": 22.496, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 4.1360437842064116e-05, | |
| "loss": 0.1472, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "eval_loss": 0.44735094904899597, | |
| "eval_runtime": 1.4315, | |
| "eval_samples_per_second": 698.575, | |
| "eval_steps_per_second": 22.354, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "learning_rate": 4.0871774824081315e-05, | |
| "loss": 0.1481, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "eval_loss": 0.4440445303916931, | |
| "eval_runtime": 1.4374, | |
| "eval_samples_per_second": 695.724, | |
| "eval_steps_per_second": 22.263, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 4.0383111806098515e-05, | |
| "loss": 0.1462, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "eval_loss": 0.44478389620780945, | |
| "eval_runtime": 1.4258, | |
| "eval_samples_per_second": 701.358, | |
| "eval_steps_per_second": 22.443, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 3.989444878811572e-05, | |
| "loss": 0.1335, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "eval_loss": 0.450600266456604, | |
| "eval_runtime": 1.4249, | |
| "eval_samples_per_second": 701.787, | |
| "eval_steps_per_second": 22.457, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 3.940578577013292e-05, | |
| "loss": 0.1279, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "eval_loss": 0.45286017656326294, | |
| "eval_runtime": 1.4175, | |
| "eval_samples_per_second": 705.475, | |
| "eval_steps_per_second": 22.575, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 3.891712275215012e-05, | |
| "loss": 0.1318, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "eval_loss": 0.4531707763671875, | |
| "eval_runtime": 1.4314, | |
| "eval_samples_per_second": 698.621, | |
| "eval_steps_per_second": 22.356, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "learning_rate": 3.842845973416732e-05, | |
| "loss": 0.1297, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "eval_loss": 0.4539336562156677, | |
| "eval_runtime": 1.422, | |
| "eval_samples_per_second": 703.255, | |
| "eval_steps_per_second": 22.504, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 3.7939796716184525e-05, | |
| "loss": 0.1314, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "eval_loss": 0.4507242441177368, | |
| "eval_runtime": 1.4354, | |
| "eval_samples_per_second": 696.689, | |
| "eval_steps_per_second": 22.294, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "learning_rate": 3.7451133698201725e-05, | |
| "loss": 0.1295, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "eval_loss": 0.45251962542533875, | |
| "eval_runtime": 1.4215, | |
| "eval_samples_per_second": 703.503, | |
| "eval_steps_per_second": 22.512, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "learning_rate": 3.6962470680218924e-05, | |
| "loss": 0.1311, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "eval_loss": 0.45232245326042175, | |
| "eval_runtime": 1.4364, | |
| "eval_samples_per_second": 696.202, | |
| "eval_steps_per_second": 22.278, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 3.6473807662236124e-05, | |
| "loss": 0.1303, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "eval_loss": 0.45104601979255676, | |
| "eval_runtime": 1.421, | |
| "eval_samples_per_second": 703.749, | |
| "eval_steps_per_second": 22.52, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 3.598514464425333e-05, | |
| "loss": 0.1289, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "eval_loss": 0.44871556758880615, | |
| "eval_runtime": 1.4274, | |
| "eval_samples_per_second": 700.563, | |
| "eval_steps_per_second": 22.418, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 6.47, | |
| "learning_rate": 3.549648162627053e-05, | |
| "loss": 0.1375, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 6.47, | |
| "eval_loss": 0.4471152126789093, | |
| "eval_runtime": 1.4185, | |
| "eval_samples_per_second": 704.978, | |
| "eval_steps_per_second": 22.559, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 3.500781860828773e-05, | |
| "loss": 0.1295, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "eval_loss": 0.44929370284080505, | |
| "eval_runtime": 1.414, | |
| "eval_samples_per_second": 707.208, | |
| "eval_steps_per_second": 22.631, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 3.451915559030492e-05, | |
| "loss": 0.1291, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "eval_loss": 0.44738081097602844, | |
| "eval_runtime": 1.4095, | |
| "eval_samples_per_second": 709.453, | |
| "eval_steps_per_second": 22.703, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 3.403049257232213e-05, | |
| "loss": 0.1297, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "eval_loss": 0.4483153820037842, | |
| "eval_runtime": 1.4314, | |
| "eval_samples_per_second": 698.622, | |
| "eval_steps_per_second": 22.356, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "learning_rate": 3.354182955433933e-05, | |
| "loss": 0.1354, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "eval_loss": 0.4452635943889618, | |
| "eval_runtime": 1.4074, | |
| "eval_samples_per_second": 710.532, | |
| "eval_steps_per_second": 22.737, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 6.71, | |
| "learning_rate": 3.305316653635653e-05, | |
| "loss": 0.1316, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 6.71, | |
| "eval_loss": 0.4459252953529358, | |
| "eval_runtime": 1.4149, | |
| "eval_samples_per_second": 706.773, | |
| "eval_steps_per_second": 22.617, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 3.2564503518373726e-05, | |
| "loss": 0.1303, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "eval_loss": 0.4454708397388458, | |
| "eval_runtime": 1.4195, | |
| "eval_samples_per_second": 704.485, | |
| "eval_steps_per_second": 22.544, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "learning_rate": 3.207584050039093e-05, | |
| "loss": 0.1352, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "eval_loss": 0.4453655481338501, | |
| "eval_runtime": 1.4036, | |
| "eval_samples_per_second": 712.466, | |
| "eval_steps_per_second": 22.799, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 3.158717748240813e-05, | |
| "loss": 0.1278, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "eval_loss": 0.44641512632369995, | |
| "eval_runtime": 1.4284, | |
| "eval_samples_per_second": 700.077, | |
| "eval_steps_per_second": 22.402, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 3.109851446442533e-05, | |
| "loss": 0.127, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "eval_loss": 0.446814626455307, | |
| "eval_runtime": 1.4239, | |
| "eval_samples_per_second": 702.277, | |
| "eval_steps_per_second": 22.473, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 3.060985144644253e-05, | |
| "loss": 0.1337, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "eval_loss": 0.44566431641578674, | |
| "eval_runtime": 1.421, | |
| "eval_samples_per_second": 703.749, | |
| "eval_steps_per_second": 22.52, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 3.0121188428459734e-05, | |
| "loss": 0.1322, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 0.4453600347042084, | |
| "eval_runtime": 1.3961, | |
| "eval_samples_per_second": 716.266, | |
| "eval_steps_per_second": 22.921, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "learning_rate": 2.9632525410476936e-05, | |
| "loss": 0.1171, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "eval_loss": 0.45082348585128784, | |
| "eval_runtime": 1.4021, | |
| "eval_samples_per_second": 713.223, | |
| "eval_steps_per_second": 22.823, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 2.9143862392494136e-05, | |
| "loss": 0.1201, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "eval_loss": 0.45133039355278015, | |
| "eval_runtime": 1.4284, | |
| "eval_samples_per_second": 700.08, | |
| "eval_steps_per_second": 22.403, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 2.865519937451134e-05, | |
| "loss": 0.1119, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "eval_loss": 0.4528238773345947, | |
| "eval_runtime": 1.4066, | |
| "eval_samples_per_second": 710.952, | |
| "eval_steps_per_second": 22.75, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 2.8166536356528538e-05, | |
| "loss": 0.1178, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "eval_loss": 0.4517793655395508, | |
| "eval_runtime": 1.4279, | |
| "eval_samples_per_second": 700.319, | |
| "eval_steps_per_second": 22.41, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 2.767787333854574e-05, | |
| "loss": 0.1172, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "eval_loss": 0.45097029209136963, | |
| "eval_runtime": 1.4105, | |
| "eval_samples_per_second": 708.955, | |
| "eval_steps_per_second": 22.687, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "learning_rate": 2.718921032056294e-05, | |
| "loss": 0.1229, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "eval_loss": 0.4481058418750763, | |
| "eval_runtime": 1.4051, | |
| "eval_samples_per_second": 711.694, | |
| "eval_steps_per_second": 22.774, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "learning_rate": 2.6700547302580143e-05, | |
| "loss": 0.12, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "eval_loss": 0.4482279419898987, | |
| "eval_runtime": 1.411, | |
| "eval_samples_per_second": 708.702, | |
| "eval_steps_per_second": 22.678, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 2.6211884284597343e-05, | |
| "loss": 0.1158, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "eval_loss": 0.4505749046802521, | |
| "eval_runtime": 1.4021, | |
| "eval_samples_per_second": 713.221, | |
| "eval_steps_per_second": 22.823, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 2.5723221266614546e-05, | |
| "loss": 0.1212, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "eval_loss": 0.4481782913208008, | |
| "eval_runtime": 1.4165, | |
| "eval_samples_per_second": 705.97, | |
| "eval_steps_per_second": 22.591, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "learning_rate": 2.5234558248631745e-05, | |
| "loss": 0.1189, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "eval_loss": 0.44906875491142273, | |
| "eval_runtime": 1.4185, | |
| "eval_samples_per_second": 704.978, | |
| "eval_steps_per_second": 22.559, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 7.54, | |
| "learning_rate": 2.4745895230648948e-05, | |
| "loss": 0.1225, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 7.54, | |
| "eval_loss": 0.4473673701286316, | |
| "eval_runtime": 1.408, | |
| "eval_samples_per_second": 710.203, | |
| "eval_steps_per_second": 22.727, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 2.4257232212666147e-05, | |
| "loss": 0.1206, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "eval_loss": 0.4478332996368408, | |
| "eval_runtime": 1.4299, | |
| "eval_samples_per_second": 699.349, | |
| "eval_steps_per_second": 22.379, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 7.63, | |
| "learning_rate": 2.376856919468335e-05, | |
| "loss": 0.1205, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 7.63, | |
| "eval_loss": 0.4450225234031677, | |
| "eval_runtime": 1.4139, | |
| "eval_samples_per_second": 707.288, | |
| "eval_steps_per_second": 22.633, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "learning_rate": 2.3279906176700546e-05, | |
| "loss": 0.1237, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "eval_loss": 0.44548895955085754, | |
| "eval_runtime": 1.418, | |
| "eval_samples_per_second": 705.229, | |
| "eval_steps_per_second": 22.567, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "learning_rate": 2.279124315871775e-05, | |
| "loss": 0.1211, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "eval_loss": 0.4440496861934662, | |
| "eval_runtime": 1.4428, | |
| "eval_samples_per_second": 693.091, | |
| "eval_steps_per_second": 22.179, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "learning_rate": 2.230258014073495e-05, | |
| "loss": 0.1167, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "eval_loss": 0.44403979182243347, | |
| "eval_runtime": 1.415, | |
| "eval_samples_per_second": 706.709, | |
| "eval_steps_per_second": 22.615, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 2.181391712275215e-05, | |
| "loss": 0.1195, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "eval_loss": 0.44386279582977295, | |
| "eval_runtime": 1.411, | |
| "eval_samples_per_second": 708.704, | |
| "eval_steps_per_second": 22.679, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "learning_rate": 2.132525410476935e-05, | |
| "loss": 0.1236, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "eval_loss": 0.4440469443798065, | |
| "eval_runtime": 1.413, | |
| "eval_samples_per_second": 707.702, | |
| "eval_steps_per_second": 22.646, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 2.0836591086786554e-05, | |
| "loss": 0.1196, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "eval_loss": 0.4431215822696686, | |
| "eval_runtime": 1.4071, | |
| "eval_samples_per_second": 710.704, | |
| "eval_steps_per_second": 22.743, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "learning_rate": 2.0347928068803753e-05, | |
| "loss": 0.1154, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "eval_loss": 0.44494298100471497, | |
| "eval_runtime": 1.3991, | |
| "eval_samples_per_second": 714.74, | |
| "eval_steps_per_second": 22.872, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 8.02, | |
| "learning_rate": 1.9859265050820956e-05, | |
| "loss": 0.1146, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 8.02, | |
| "eval_loss": 0.44438567757606506, | |
| "eval_runtime": 1.4136, | |
| "eval_samples_per_second": 707.391, | |
| "eval_steps_per_second": 22.637, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 8.07, | |
| "learning_rate": 1.9370602032838155e-05, | |
| "loss": 0.111, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 8.07, | |
| "eval_loss": 0.4510573744773865, | |
| "eval_runtime": 1.4011, | |
| "eval_samples_per_second": 713.727, | |
| "eval_steps_per_second": 22.839, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "learning_rate": 1.8881939014855358e-05, | |
| "loss": 0.1107, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "eval_loss": 0.45002079010009766, | |
| "eval_runtime": 1.4195, | |
| "eval_samples_per_second": 704.487, | |
| "eval_steps_per_second": 22.544, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "learning_rate": 1.8393275996872558e-05, | |
| "loss": 0.1069, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "eval_loss": 0.4500637352466583, | |
| "eval_runtime": 1.4026, | |
| "eval_samples_per_second": 712.972, | |
| "eval_steps_per_second": 22.815, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "learning_rate": 1.790461297888976e-05, | |
| "loss": 0.1091, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "eval_loss": 0.450139582157135, | |
| "eval_runtime": 1.423, | |
| "eval_samples_per_second": 702.764, | |
| "eval_steps_per_second": 22.488, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 1.741594996090696e-05, | |
| "loss": 0.1107, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "eval_loss": 0.4503149390220642, | |
| "eval_runtime": 1.407, | |
| "eval_samples_per_second": 710.707, | |
| "eval_steps_per_second": 22.743, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "learning_rate": 1.6927286942924163e-05, | |
| "loss": 0.11, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "eval_loss": 0.4499202370643616, | |
| "eval_runtime": 1.3956, | |
| "eval_samples_per_second": 716.522, | |
| "eval_steps_per_second": 22.929, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "learning_rate": 1.6438623924941362e-05, | |
| "loss": 0.1084, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "eval_loss": 0.4492938220500946, | |
| "eval_runtime": 1.4061, | |
| "eval_samples_per_second": 711.208, | |
| "eval_steps_per_second": 22.759, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "learning_rate": 1.5949960906958562e-05, | |
| "loss": 0.1142, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "eval_loss": 0.449333518743515, | |
| "eval_runtime": 1.417, | |
| "eval_samples_per_second": 705.721, | |
| "eval_steps_per_second": 22.583, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "learning_rate": 1.546129788897576e-05, | |
| "loss": 0.1091, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "eval_loss": 0.4506095051765442, | |
| "eval_runtime": 1.412, | |
| "eval_samples_per_second": 708.206, | |
| "eval_steps_per_second": 22.663, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 8.51, | |
| "learning_rate": 1.4972634870992962e-05, | |
| "loss": 0.1072, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 8.51, | |
| "eval_loss": 0.44918569922447205, | |
| "eval_runtime": 1.4021, | |
| "eval_samples_per_second": 713.221, | |
| "eval_steps_per_second": 22.823, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "learning_rate": 1.4483971853010164e-05, | |
| "loss": 0.1128, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "eval_loss": 0.44889140129089355, | |
| "eval_runtime": 1.414, | |
| "eval_samples_per_second": 707.21, | |
| "eval_steps_per_second": 22.631, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "learning_rate": 1.3995308835027365e-05, | |
| "loss": 0.1123, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "eval_loss": 0.44764241576194763, | |
| "eval_runtime": 1.4076, | |
| "eval_samples_per_second": 710.451, | |
| "eval_steps_per_second": 22.734, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "learning_rate": 1.3506645817044566e-05, | |
| "loss": 0.1093, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "eval_loss": 0.44862595200538635, | |
| "eval_runtime": 1.3991, | |
| "eval_samples_per_second": 714.742, | |
| "eval_steps_per_second": 22.872, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "learning_rate": 1.3017982799061767e-05, | |
| "loss": 0.1111, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "eval_loss": 0.4482482969760895, | |
| "eval_runtime": 1.3981, | |
| "eval_samples_per_second": 715.242, | |
| "eval_steps_per_second": 22.888, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "learning_rate": 1.2529319781078968e-05, | |
| "loss": 0.1086, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "eval_loss": 0.44752514362335205, | |
| "eval_runtime": 1.4036, | |
| "eval_samples_per_second": 712.46, | |
| "eval_steps_per_second": 22.799, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "learning_rate": 1.204065676309617e-05, | |
| "loss": 0.11, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "eval_loss": 0.44697925448417664, | |
| "eval_runtime": 1.401, | |
| "eval_samples_per_second": 713.773, | |
| "eval_steps_per_second": 22.841, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "learning_rate": 1.155199374511337e-05, | |
| "loss": 0.1118, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "eval_loss": 0.4479255974292755, | |
| "eval_runtime": 1.4107, | |
| "eval_samples_per_second": 708.879, | |
| "eval_steps_per_second": 22.684, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 8.9, | |
| "learning_rate": 1.1063330727130572e-05, | |
| "loss": 0.1078, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 8.9, | |
| "eval_loss": 0.4473107159137726, | |
| "eval_runtime": 1.4011, | |
| "eval_samples_per_second": 713.727, | |
| "eval_steps_per_second": 22.839, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "learning_rate": 1.0574667709147771e-05, | |
| "loss": 0.1083, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "eval_loss": 0.44750386476516724, | |
| "eval_runtime": 1.4056, | |
| "eval_samples_per_second": 711.457, | |
| "eval_steps_per_second": 22.767, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 1.0086004691164972e-05, | |
| "loss": 0.1127, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 0.4473421573638916, | |
| "eval_runtime": 1.4175, | |
| "eval_samples_per_second": 705.475, | |
| "eval_steps_per_second": 22.575, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "learning_rate": 9.597341673182173e-06, | |
| "loss": 0.1034, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "eval_loss": 0.4490604102611542, | |
| "eval_runtime": 1.4334, | |
| "eval_samples_per_second": 697.652, | |
| "eval_steps_per_second": 22.325, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 9.09, | |
| "learning_rate": 9.108678655199375e-06, | |
| "loss": 0.1022, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 9.09, | |
| "eval_loss": 0.45033252239227295, | |
| "eval_runtime": 1.4284, | |
| "eval_samples_per_second": 700.078, | |
| "eval_steps_per_second": 22.403, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 9.14, | |
| "learning_rate": 8.620015637216576e-06, | |
| "loss": 0.1048, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 9.14, | |
| "eval_loss": 0.45117974281311035, | |
| "eval_runtime": 1.412, | |
| "eval_samples_per_second": 708.205, | |
| "eval_steps_per_second": 22.663, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 9.19, | |
| "learning_rate": 8.131352619233777e-06, | |
| "loss": 0.1018, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 9.19, | |
| "eval_loss": 0.45118698477745056, | |
| "eval_runtime": 1.421, | |
| "eval_samples_per_second": 703.749, | |
| "eval_steps_per_second": 22.52, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 9.24, | |
| "learning_rate": 7.642689601250978e-06, | |
| "loss": 0.1064, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 9.24, | |
| "eval_loss": 0.4503132402896881, | |
| "eval_runtime": 1.4151, | |
| "eval_samples_per_second": 706.64, | |
| "eval_steps_per_second": 22.612, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 9.29, | |
| "learning_rate": 7.154026583268178e-06, | |
| "loss": 0.1055, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 9.29, | |
| "eval_loss": 0.4488275647163391, | |
| "eval_runtime": 1.417, | |
| "eval_samples_per_second": 705.722, | |
| "eval_steps_per_second": 22.583, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 9.34, | |
| "learning_rate": 6.665363565285379e-06, | |
| "loss": 0.1125, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 9.34, | |
| "eval_loss": 0.44890880584716797, | |
| "eval_runtime": 1.4006, | |
| "eval_samples_per_second": 713.983, | |
| "eval_steps_per_second": 22.847, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "learning_rate": 6.1767005473025806e-06, | |
| "loss": 0.1002, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "eval_loss": 0.44921454787254333, | |
| "eval_runtime": 1.4134, | |
| "eval_samples_per_second": 707.5, | |
| "eval_steps_per_second": 22.64, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 9.43, | |
| "learning_rate": 5.688037529319782e-06, | |
| "loss": 0.1043, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 9.43, | |
| "eval_loss": 0.4495786130428314, | |
| "eval_runtime": 1.408, | |
| "eval_samples_per_second": 710.206, | |
| "eval_steps_per_second": 22.727, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 9.48, | |
| "learning_rate": 5.199374511336982e-06, | |
| "loss": 0.102, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 9.48, | |
| "eval_loss": 0.44972699880599976, | |
| "eval_runtime": 1.4141, | |
| "eval_samples_per_second": 707.139, | |
| "eval_steps_per_second": 22.628, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 9.53, | |
| "learning_rate": 4.710711493354183e-06, | |
| "loss": 0.1059, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 9.53, | |
| "eval_loss": 0.44945281744003296, | |
| "eval_runtime": 1.4001, | |
| "eval_samples_per_second": 714.235, | |
| "eval_steps_per_second": 22.856, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "learning_rate": 4.222048475371384e-06, | |
| "loss": 0.1012, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "eval_loss": 0.44921252131462097, | |
| "eval_runtime": 1.4051, | |
| "eval_samples_per_second": 711.709, | |
| "eval_steps_per_second": 22.775, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "learning_rate": 3.733385457388585e-06, | |
| "loss": 0.1066, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "eval_loss": 0.44877171516418457, | |
| "eval_runtime": 1.42, | |
| "eval_samples_per_second": 704.241, | |
| "eval_steps_per_second": 22.536, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "learning_rate": 3.244722439405786e-06, | |
| "loss": 0.1044, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "eval_loss": 0.4489387273788452, | |
| "eval_runtime": 1.4031, | |
| "eval_samples_per_second": 712.712, | |
| "eval_steps_per_second": 22.807, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 9.73, | |
| "learning_rate": 2.756059421422987e-06, | |
| "loss": 0.1049, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 9.73, | |
| "eval_loss": 0.4488721191883087, | |
| "eval_runtime": 1.4041, | |
| "eval_samples_per_second": 712.211, | |
| "eval_steps_per_second": 22.791, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 9.77, | |
| "learning_rate": 2.2673964034401876e-06, | |
| "loss": 0.1038, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 9.77, | |
| "eval_loss": 0.44900763034820557, | |
| "eval_runtime": 1.4001, | |
| "eval_samples_per_second": 714.232, | |
| "eval_steps_per_second": 22.855, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "learning_rate": 1.7787333854573888e-06, | |
| "loss": 0.1057, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "eval_loss": 0.44878125190734863, | |
| "eval_runtime": 1.409, | |
| "eval_samples_per_second": 709.704, | |
| "eval_steps_per_second": 22.711, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "learning_rate": 1.2900703674745897e-06, | |
| "loss": 0.1035, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "eval_loss": 0.448639452457428, | |
| "eval_runtime": 1.4105, | |
| "eval_samples_per_second": 708.955, | |
| "eval_steps_per_second": 22.687, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 9.92, | |
| "learning_rate": 8.014073494917906e-07, | |
| "loss": 0.1058, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 9.92, | |
| "eval_loss": 0.4486231207847595, | |
| "eval_runtime": 1.3971, | |
| "eval_samples_per_second": 715.758, | |
| "eval_steps_per_second": 22.904, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "learning_rate": 3.1274433150899144e-07, | |
| "loss": 0.1013, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "eval_loss": 0.44872137904167175, | |
| "eval_runtime": 1.4115, | |
| "eval_samples_per_second": 708.454, | |
| "eval_steps_per_second": 22.671, | |
| "step": 102500 | |
| } | |
| ], | |
| "max_steps": 102820, | |
| "num_train_epochs": 10, | |
| "total_flos": 6.16302255744e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |