| { | |
| "best_metric": 2.4363410472869873, | |
| "best_model_checkpoint": "/home/seemdog/manchu_BERT/1002_BERT_DA_1.0/checkpoint-86000", | |
| "epoch": 9.964620917517031, | |
| "global_step": 213000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9766081871345035e-05, | |
| "loss": 6.1581, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 5.5598931312561035, | |
| "eval_runtime": 54.891, | |
| "eval_samples_per_second": 120.767, | |
| "eval_steps_per_second": 1.895, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.953216374269006e-05, | |
| "loss": 5.3713, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 4.843267440795898, | |
| "eval_runtime": 54.8945, | |
| "eval_samples_per_second": 120.759, | |
| "eval_steps_per_second": 1.895, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9298245614035086e-05, | |
| "loss": 4.7624, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_loss": 4.427705764770508, | |
| "eval_runtime": 54.9095, | |
| "eval_samples_per_second": 120.726, | |
| "eval_steps_per_second": 1.894, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.906432748538012e-05, | |
| "loss": 4.2884, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 4.152446746826172, | |
| "eval_runtime": 54.9536, | |
| "eval_samples_per_second": 120.629, | |
| "eval_steps_per_second": 1.893, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.883040935672515e-05, | |
| "loss": 3.908, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_loss": 3.943004608154297, | |
| "eval_runtime": 54.9769, | |
| "eval_samples_per_second": 120.578, | |
| "eval_steps_per_second": 1.892, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.859649122807018e-05, | |
| "loss": 3.6357, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_loss": 3.7840378284454346, | |
| "eval_runtime": 54.9612, | |
| "eval_samples_per_second": 120.612, | |
| "eval_steps_per_second": 1.892, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.836257309941521e-05, | |
| "loss": 3.442, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_loss": 3.6515119075775146, | |
| "eval_runtime": 55.0182, | |
| "eval_samples_per_second": 120.487, | |
| "eval_steps_per_second": 1.89, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.8128654970760235e-05, | |
| "loss": 3.2982, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_loss": 3.5147831439971924, | |
| "eval_runtime": 54.9459, | |
| "eval_samples_per_second": 120.646, | |
| "eval_steps_per_second": 1.893, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.789473684210526e-05, | |
| "loss": 3.1681, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_loss": 3.4453866481781006, | |
| "eval_runtime": 54.9741, | |
| "eval_samples_per_second": 120.584, | |
| "eval_steps_per_second": 1.892, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.7660818713450294e-05, | |
| "loss": 3.0515, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_loss": 3.3482985496520996, | |
| "eval_runtime": 54.9922, | |
| "eval_samples_per_second": 120.544, | |
| "eval_steps_per_second": 1.891, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.7426900584795326e-05, | |
| "loss": 2.9408, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_loss": 3.274308919906616, | |
| "eval_runtime": 55.0307, | |
| "eval_samples_per_second": 120.46, | |
| "eval_steps_per_second": 1.89, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.719298245614036e-05, | |
| "loss": 2.8601, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_loss": 3.2094714641571045, | |
| "eval_runtime": 54.9444, | |
| "eval_samples_per_second": 120.649, | |
| "eval_steps_per_second": 1.893, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.695906432748538e-05, | |
| "loss": 2.7866, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_loss": 3.1299281120300293, | |
| "eval_runtime": 54.9484, | |
| "eval_samples_per_second": 120.64, | |
| "eval_steps_per_second": 1.893, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.672514619883041e-05, | |
| "loss": 2.7094, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_loss": 3.096022844314575, | |
| "eval_runtime": 55.155, | |
| "eval_samples_per_second": 120.189, | |
| "eval_steps_per_second": 1.886, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.649122807017544e-05, | |
| "loss": 2.6424, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_loss": 3.060807228088379, | |
| "eval_runtime": 55.1935, | |
| "eval_samples_per_second": 120.105, | |
| "eval_steps_per_second": 1.884, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.625730994152047e-05, | |
| "loss": 2.5729, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_loss": 3.0170695781707764, | |
| "eval_runtime": 55.2064, | |
| "eval_samples_per_second": 120.077, | |
| "eval_steps_per_second": 1.884, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.60233918128655e-05, | |
| "loss": 2.5108, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_loss": 2.9729015827178955, | |
| "eval_runtime": 55.2048, | |
| "eval_samples_per_second": 120.08, | |
| "eval_steps_per_second": 1.884, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.5789473684210527e-05, | |
| "loss": 2.4538, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_loss": 2.9392964839935303, | |
| "eval_runtime": 55.2009, | |
| "eval_samples_per_second": 120.089, | |
| "eval_steps_per_second": 1.884, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.555555555555556e-05, | |
| "loss": 2.3941, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_loss": 2.900946617126465, | |
| "eval_runtime": 55.1868, | |
| "eval_samples_per_second": 120.119, | |
| "eval_steps_per_second": 1.885, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.5321637426900585e-05, | |
| "loss": 2.3341, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_loss": 2.87040376663208, | |
| "eval_runtime": 55.0611, | |
| "eval_samples_per_second": 120.393, | |
| "eval_steps_per_second": 1.889, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.508771929824562e-05, | |
| "loss": 2.2797, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_loss": 2.8554604053497314, | |
| "eval_runtime": 54.9944, | |
| "eval_samples_per_second": 120.54, | |
| "eval_steps_per_second": 1.891, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.485380116959065e-05, | |
| "loss": 2.2284, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "eval_loss": 2.8280177116394043, | |
| "eval_runtime": 54.9695, | |
| "eval_samples_per_second": 120.594, | |
| "eval_steps_per_second": 1.892, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.4619883040935676e-05, | |
| "loss": 2.1651, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "eval_loss": 2.7877776622772217, | |
| "eval_runtime": 54.9786, | |
| "eval_samples_per_second": 120.574, | |
| "eval_steps_per_second": 1.892, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.43859649122807e-05, | |
| "loss": 2.1267, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_loss": 2.7796318531036377, | |
| "eval_runtime": 55.0112, | |
| "eval_samples_per_second": 120.503, | |
| "eval_steps_per_second": 1.891, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.4152046783625734e-05, | |
| "loss": 2.0887, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_loss": 2.7155935764312744, | |
| "eval_runtime": 54.9846, | |
| "eval_samples_per_second": 120.561, | |
| "eval_steps_per_second": 1.891, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 4.3918128654970766e-05, | |
| "loss": 2.0477, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "eval_loss": 2.7347090244293213, | |
| "eval_runtime": 54.9797, | |
| "eval_samples_per_second": 120.572, | |
| "eval_steps_per_second": 1.892, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 4.368421052631579e-05, | |
| "loss": 2.0055, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "eval_loss": 2.7260184288024902, | |
| "eval_runtime": 54.9686, | |
| "eval_samples_per_second": 120.596, | |
| "eval_steps_per_second": 1.892, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 4.345029239766082e-05, | |
| "loss": 1.9738, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "eval_loss": 2.7053301334381104, | |
| "eval_runtime": 54.975, | |
| "eval_samples_per_second": 120.582, | |
| "eval_steps_per_second": 1.892, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 4.321637426900585e-05, | |
| "loss": 1.9336, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "eval_loss": 2.6540746688842773, | |
| "eval_runtime": 54.9866, | |
| "eval_samples_per_second": 120.557, | |
| "eval_steps_per_second": 1.891, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 4.298245614035088e-05, | |
| "loss": 1.9008, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_loss": 2.6721866130828857, | |
| "eval_runtime": 54.9707, | |
| "eval_samples_per_second": 120.592, | |
| "eval_steps_per_second": 1.892, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 4.274853801169591e-05, | |
| "loss": 1.8603, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "eval_loss": 2.6387619972229004, | |
| "eval_runtime": 54.9719, | |
| "eval_samples_per_second": 120.589, | |
| "eval_steps_per_second": 1.892, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 4.251461988304094e-05, | |
| "loss": 1.8291, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_loss": 2.640782594680786, | |
| "eval_runtime": 54.9643, | |
| "eval_samples_per_second": 120.606, | |
| "eval_steps_per_second": 1.892, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 4.228070175438597e-05, | |
| "loss": 1.8059, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "eval_loss": 2.614128589630127, | |
| "eval_runtime": 54.9538, | |
| "eval_samples_per_second": 120.629, | |
| "eval_steps_per_second": 1.893, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 4.204678362573099e-05, | |
| "loss": 1.7663, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "eval_loss": 2.618607997894287, | |
| "eval_runtime": 55.0051, | |
| "eval_samples_per_second": 120.516, | |
| "eval_steps_per_second": 1.891, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 4.1812865497076025e-05, | |
| "loss": 1.7322, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "eval_loss": 2.6462574005126953, | |
| "eval_runtime": 54.9802, | |
| "eval_samples_per_second": 120.571, | |
| "eval_steps_per_second": 1.892, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 4.157894736842106e-05, | |
| "loss": 1.7187, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "eval_loss": 2.5989272594451904, | |
| "eval_runtime": 54.9619, | |
| "eval_samples_per_second": 120.611, | |
| "eval_steps_per_second": 1.892, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 4.134502923976608e-05, | |
| "loss": 1.6852, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "eval_loss": 2.5719058513641357, | |
| "eval_runtime": 54.9667, | |
| "eval_samples_per_second": 120.6, | |
| "eval_steps_per_second": 1.892, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 4.111111111111111e-05, | |
| "loss": 1.6649, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "eval_loss": 2.57804012298584, | |
| "eval_runtime": 54.9675, | |
| "eval_samples_per_second": 120.598, | |
| "eval_steps_per_second": 1.892, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 4.087719298245614e-05, | |
| "loss": 1.6285, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "eval_loss": 2.5606088638305664, | |
| "eval_runtime": 55.1929, | |
| "eval_samples_per_second": 120.106, | |
| "eval_steps_per_second": 1.884, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 4.0643274853801174e-05, | |
| "loss": 1.6033, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "eval_loss": 2.570094585418701, | |
| "eval_runtime": 55.1572, | |
| "eval_samples_per_second": 120.184, | |
| "eval_steps_per_second": 1.886, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 4.04093567251462e-05, | |
| "loss": 1.5833, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_loss": 2.5516393184661865, | |
| "eval_runtime": 55.1223, | |
| "eval_samples_per_second": 120.26, | |
| "eval_steps_per_second": 1.887, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 4.017543859649123e-05, | |
| "loss": 1.5701, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "eval_loss": 2.544060707092285, | |
| "eval_runtime": 54.9919, | |
| "eval_samples_per_second": 120.545, | |
| "eval_steps_per_second": 1.891, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 3.994152046783626e-05, | |
| "loss": 1.5252, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "eval_loss": 2.545295476913452, | |
| "eval_runtime": 54.9924, | |
| "eval_samples_per_second": 120.544, | |
| "eval_steps_per_second": 1.891, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 3.970760233918129e-05, | |
| "loss": 1.5019, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "eval_loss": 2.547807216644287, | |
| "eval_runtime": 55.007, | |
| "eval_samples_per_second": 120.512, | |
| "eval_steps_per_second": 1.891, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 3.9473684210526316e-05, | |
| "loss": 1.4789, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "eval_loss": 2.541635036468506, | |
| "eval_runtime": 54.9822, | |
| "eval_samples_per_second": 120.566, | |
| "eval_steps_per_second": 1.892, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 3.923976608187135e-05, | |
| "loss": 1.4611, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "eval_loss": 2.526390790939331, | |
| "eval_runtime": 54.9826, | |
| "eval_samples_per_second": 120.565, | |
| "eval_steps_per_second": 1.892, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 3.9005847953216374e-05, | |
| "loss": 1.4413, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "eval_loss": 2.5193886756896973, | |
| "eval_runtime": 54.9793, | |
| "eval_samples_per_second": 120.573, | |
| "eval_steps_per_second": 1.892, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 3.877192982456141e-05, | |
| "loss": 1.4106, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "eval_loss": 2.504810094833374, | |
| "eval_runtime": 55.0248, | |
| "eval_samples_per_second": 120.473, | |
| "eval_steps_per_second": 1.89, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 3.853801169590643e-05, | |
| "loss": 1.3928, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "eval_loss": 2.5266056060791016, | |
| "eval_runtime": 55.1287, | |
| "eval_samples_per_second": 120.246, | |
| "eval_steps_per_second": 1.886, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 3.8304093567251465e-05, | |
| "loss": 1.3857, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "eval_loss": 2.5026743412017822, | |
| "eval_runtime": 55.0968, | |
| "eval_samples_per_second": 120.315, | |
| "eval_steps_per_second": 1.888, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 3.80701754385965e-05, | |
| "loss": 1.3682, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "eval_loss": 2.5191988945007324, | |
| "eval_runtime": 55.0835, | |
| "eval_samples_per_second": 120.345, | |
| "eval_steps_per_second": 1.888, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 3.783625730994152e-05, | |
| "loss": 1.337, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "eval_loss": 2.4917993545532227, | |
| "eval_runtime": 55.1615, | |
| "eval_samples_per_second": 120.175, | |
| "eval_steps_per_second": 1.885, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 3.760233918128655e-05, | |
| "loss": 1.3314, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "eval_loss": 2.503882646560669, | |
| "eval_runtime": 55.1711, | |
| "eval_samples_per_second": 120.153, | |
| "eval_steps_per_second": 1.885, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 3.736842105263158e-05, | |
| "loss": 1.3213, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "eval_loss": 2.5335164070129395, | |
| "eval_runtime": 55.1504, | |
| "eval_samples_per_second": 120.199, | |
| "eval_steps_per_second": 1.886, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 3.713450292397661e-05, | |
| "loss": 1.2901, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "eval_loss": 2.5040109157562256, | |
| "eval_runtime": 55.1836, | |
| "eval_samples_per_second": 120.126, | |
| "eval_steps_per_second": 1.885, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 3.690058479532164e-05, | |
| "loss": 1.2927, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "eval_loss": 2.4990580081939697, | |
| "eval_runtime": 55.1982, | |
| "eval_samples_per_second": 120.095, | |
| "eval_steps_per_second": 1.884, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 3.6666666666666666e-05, | |
| "loss": 1.2631, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "eval_loss": 2.500002861022949, | |
| "eval_runtime": 55.1671, | |
| "eval_samples_per_second": 120.162, | |
| "eval_steps_per_second": 1.885, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 3.64327485380117e-05, | |
| "loss": 1.2526, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "eval_loss": 2.484260320663452, | |
| "eval_runtime": 55.0693, | |
| "eval_samples_per_second": 120.376, | |
| "eval_steps_per_second": 1.889, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 3.6198830409356724e-05, | |
| "loss": 1.2371, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "eval_loss": 2.480639696121216, | |
| "eval_runtime": 55.0676, | |
| "eval_samples_per_second": 120.379, | |
| "eval_steps_per_second": 1.889, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.5964912280701756e-05, | |
| "loss": 1.2194, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "eval_loss": 2.480283498764038, | |
| "eval_runtime": 54.981, | |
| "eval_samples_per_second": 120.569, | |
| "eval_steps_per_second": 1.892, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 3.573099415204679e-05, | |
| "loss": 1.2103, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "eval_loss": 2.4655823707580566, | |
| "eval_runtime": 54.9896, | |
| "eval_samples_per_second": 120.55, | |
| "eval_steps_per_second": 1.891, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 3.5497076023391815e-05, | |
| "loss": 1.1954, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "eval_loss": 2.467862367630005, | |
| "eval_runtime": 55.0349, | |
| "eval_samples_per_second": 120.451, | |
| "eval_steps_per_second": 1.89, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 3.526315789473684e-05, | |
| "loss": 1.1841, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "eval_loss": 2.4734864234924316, | |
| "eval_runtime": 55.0767, | |
| "eval_samples_per_second": 120.359, | |
| "eval_steps_per_second": 1.888, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 3.502923976608187e-05, | |
| "loss": 1.1697, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_loss": 2.4691245555877686, | |
| "eval_runtime": 55.01, | |
| "eval_samples_per_second": 120.505, | |
| "eval_steps_per_second": 1.891, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 3.4795321637426905e-05, | |
| "loss": 1.1488, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "eval_loss": 2.50709867477417, | |
| "eval_runtime": 55.0061, | |
| "eval_samples_per_second": 120.514, | |
| "eval_steps_per_second": 1.891, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 3.456140350877193e-05, | |
| "loss": 1.1343, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "eval_loss": 2.464665412902832, | |
| "eval_runtime": 54.9972, | |
| "eval_samples_per_second": 120.533, | |
| "eval_steps_per_second": 1.891, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 3.432748538011696e-05, | |
| "loss": 1.1285, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "eval_loss": 2.4716575145721436, | |
| "eval_runtime": 54.9735, | |
| "eval_samples_per_second": 120.585, | |
| "eval_steps_per_second": 1.892, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 3.409356725146199e-05, | |
| "loss": 1.1124, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "eval_loss": 2.476966619491577, | |
| "eval_runtime": 55.0007, | |
| "eval_samples_per_second": 120.526, | |
| "eval_steps_per_second": 1.891, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 3.385964912280702e-05, | |
| "loss": 1.1097, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "eval_loss": 2.487794876098633, | |
| "eval_runtime": 54.9919, | |
| "eval_samples_per_second": 120.545, | |
| "eval_steps_per_second": 1.891, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 3.362573099415205e-05, | |
| "loss": 1.0956, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "eval_loss": 2.4818880558013916, | |
| "eval_runtime": 55.0269, | |
| "eval_samples_per_second": 120.468, | |
| "eval_steps_per_second": 1.89, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 3.339181286549708e-05, | |
| "loss": 1.088, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "eval_loss": 2.4609289169311523, | |
| "eval_runtime": 54.9477, | |
| "eval_samples_per_second": 120.642, | |
| "eval_steps_per_second": 1.893, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 3.3157894736842106e-05, | |
| "loss": 1.0728, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "eval_loss": 2.4839322566986084, | |
| "eval_runtime": 54.9672, | |
| "eval_samples_per_second": 120.599, | |
| "eval_steps_per_second": 1.892, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 3.292397660818713e-05, | |
| "loss": 1.0587, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "eval_loss": 2.4727675914764404, | |
| "eval_runtime": 55.0507, | |
| "eval_samples_per_second": 120.416, | |
| "eval_steps_per_second": 1.889, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 3.2690058479532164e-05, | |
| "loss": 1.0534, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "eval_loss": 2.4812207221984863, | |
| "eval_runtime": 54.9899, | |
| "eval_samples_per_second": 120.549, | |
| "eval_steps_per_second": 1.891, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 3.24561403508772e-05, | |
| "loss": 1.0455, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "eval_loss": 2.469550609588623, | |
| "eval_runtime": 54.9765, | |
| "eval_samples_per_second": 120.579, | |
| "eval_steps_per_second": 1.892, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 3.222222222222223e-05, | |
| "loss": 1.0402, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "eval_loss": 2.458113431930542, | |
| "eval_runtime": 54.9925, | |
| "eval_samples_per_second": 120.544, | |
| "eval_steps_per_second": 1.891, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 3.198830409356725e-05, | |
| "loss": 1.0227, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "eval_loss": 2.4712133407592773, | |
| "eval_runtime": 54.9707, | |
| "eval_samples_per_second": 120.592, | |
| "eval_steps_per_second": 1.892, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 3.175438596491228e-05, | |
| "loss": 1.0172, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "eval_loss": 2.4822046756744385, | |
| "eval_runtime": 54.9842, | |
| "eval_samples_per_second": 120.562, | |
| "eval_steps_per_second": 1.891, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 3.152046783625731e-05, | |
| "loss": 0.9947, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "eval_loss": 2.455008029937744, | |
| "eval_runtime": 54.9636, | |
| "eval_samples_per_second": 120.607, | |
| "eval_steps_per_second": 1.892, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 3.128654970760234e-05, | |
| "loss": 0.9924, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "eval_loss": 2.440960168838501, | |
| "eval_runtime": 54.9708, | |
| "eval_samples_per_second": 120.591, | |
| "eval_steps_per_second": 1.892, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 3.105263157894737e-05, | |
| "loss": 0.9863, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "eval_loss": 2.454493761062622, | |
| "eval_runtime": 54.966, | |
| "eval_samples_per_second": 120.602, | |
| "eval_steps_per_second": 1.892, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 3.08187134502924e-05, | |
| "loss": 0.9793, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "eval_loss": 2.482584238052368, | |
| "eval_runtime": 55.0651, | |
| "eval_samples_per_second": 120.385, | |
| "eval_steps_per_second": 1.889, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 3.058479532163743e-05, | |
| "loss": 0.9639, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "eval_loss": 2.4847776889801025, | |
| "eval_runtime": 55.089, | |
| "eval_samples_per_second": 120.332, | |
| "eval_steps_per_second": 1.888, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 3.035087719298246e-05, | |
| "loss": 0.9584, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "eval_loss": 2.4647934436798096, | |
| "eval_runtime": 55.1206, | |
| "eval_samples_per_second": 120.263, | |
| "eval_steps_per_second": 1.887, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 3.0116959064327488e-05, | |
| "loss": 0.9508, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "eval_loss": 2.445103406906128, | |
| "eval_runtime": 55.0978, | |
| "eval_samples_per_second": 120.313, | |
| "eval_steps_per_second": 1.888, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 2.9883040935672517e-05, | |
| "loss": 0.9425, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "eval_loss": 2.4363410472869873, | |
| "eval_runtime": 55.0773, | |
| "eval_samples_per_second": 120.358, | |
| "eval_steps_per_second": 1.888, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 2.9649122807017543e-05, | |
| "loss": 0.9301, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "eval_loss": 2.4576821327209473, | |
| "eval_runtime": 55.0908, | |
| "eval_samples_per_second": 120.329, | |
| "eval_steps_per_second": 1.888, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 2.9415204678362572e-05, | |
| "loss": 0.922, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "eval_loss": 2.487666130065918, | |
| "eval_runtime": 55.1028, | |
| "eval_samples_per_second": 120.302, | |
| "eval_steps_per_second": 1.887, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 2.9181286549707604e-05, | |
| "loss": 0.9102, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "eval_loss": 2.462902784347534, | |
| "eval_runtime": 55.0955, | |
| "eval_samples_per_second": 120.318, | |
| "eval_steps_per_second": 1.888, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 2.8947368421052634e-05, | |
| "loss": 0.9081, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "eval_loss": 2.4494595527648926, | |
| "eval_runtime": 55.0849, | |
| "eval_samples_per_second": 120.341, | |
| "eval_steps_per_second": 1.888, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 2.8713450292397666e-05, | |
| "loss": 0.8956, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "eval_loss": 2.466681718826294, | |
| "eval_runtime": 55.0767, | |
| "eval_samples_per_second": 120.359, | |
| "eval_steps_per_second": 1.888, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 2.847953216374269e-05, | |
| "loss": 0.8932, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "eval_loss": 2.4637372493743896, | |
| "eval_runtime": 55.0713, | |
| "eval_samples_per_second": 120.371, | |
| "eval_steps_per_second": 1.888, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 2.824561403508772e-05, | |
| "loss": 0.8845, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "eval_loss": 2.4586174488067627, | |
| "eval_runtime": 55.0741, | |
| "eval_samples_per_second": 120.365, | |
| "eval_steps_per_second": 1.888, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 2.801169590643275e-05, | |
| "loss": 0.877, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "eval_loss": 2.471717357635498, | |
| "eval_runtime": 55.0727, | |
| "eval_samples_per_second": 120.368, | |
| "eval_steps_per_second": 1.888, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 0.8713, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "eval_loss": 2.4618284702301025, | |
| "eval_runtime": 55.0799, | |
| "eval_samples_per_second": 120.352, | |
| "eval_steps_per_second": 1.888, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 2.754385964912281e-05, | |
| "loss": 0.8768, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "eval_loss": 2.4480040073394775, | |
| "eval_runtime": 55.1696, | |
| "eval_samples_per_second": 120.157, | |
| "eval_steps_per_second": 1.885, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 2.7309941520467834e-05, | |
| "loss": 0.8662, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "eval_loss": 2.468902349472046, | |
| "eval_runtime": 55.1714, | |
| "eval_samples_per_second": 120.153, | |
| "eval_steps_per_second": 1.885, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 2.7076023391812866e-05, | |
| "loss": 0.8622, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "eval_loss": 2.4613983631134033, | |
| "eval_runtime": 55.1613, | |
| "eval_samples_per_second": 120.175, | |
| "eval_steps_per_second": 1.885, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 2.6842105263157896e-05, | |
| "loss": 0.8497, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "eval_loss": 2.488284111022949, | |
| "eval_runtime": 55.1664, | |
| "eval_samples_per_second": 120.164, | |
| "eval_steps_per_second": 1.885, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 2.6608187134502928e-05, | |
| "loss": 0.8399, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "eval_loss": 2.486598253250122, | |
| "eval_runtime": 55.142, | |
| "eval_samples_per_second": 120.217, | |
| "eval_steps_per_second": 1.886, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 2.6374269005847957e-05, | |
| "loss": 0.8397, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "eval_loss": 2.490933895111084, | |
| "eval_runtime": 55.1377, | |
| "eval_samples_per_second": 120.226, | |
| "eval_steps_per_second": 1.886, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 2.6140350877192983e-05, | |
| "loss": 0.8266, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "eval_loss": 2.4587643146514893, | |
| "eval_runtime": 55.0944, | |
| "eval_samples_per_second": 120.321, | |
| "eval_steps_per_second": 1.888, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 2.5906432748538012e-05, | |
| "loss": 0.8231, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "eval_loss": 2.4951488971710205, | |
| "eval_runtime": 55.155, | |
| "eval_samples_per_second": 120.189, | |
| "eval_steps_per_second": 1.886, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 2.567251461988304e-05, | |
| "loss": 0.8189, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "eval_loss": 2.458134889602661, | |
| "eval_runtime": 55.0735, | |
| "eval_samples_per_second": 120.366, | |
| "eval_steps_per_second": 1.888, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 2.5438596491228074e-05, | |
| "loss": 0.8155, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "eval_loss": 2.448225736618042, | |
| "eval_runtime": 55.0955, | |
| "eval_samples_per_second": 120.318, | |
| "eval_steps_per_second": 1.888, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 2.5204678362573103e-05, | |
| "loss": 0.8059, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "eval_loss": 2.489133358001709, | |
| "eval_runtime": 55.1106, | |
| "eval_samples_per_second": 120.285, | |
| "eval_steps_per_second": 1.887, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "learning_rate": 2.4970760233918132e-05, | |
| "loss": 0.8085, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "eval_loss": 2.491405487060547, | |
| "eval_runtime": 55.0557, | |
| "eval_samples_per_second": 120.405, | |
| "eval_steps_per_second": 1.889, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 2.4736842105263158e-05, | |
| "loss": 0.7851, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "eval_loss": 2.486567735671997, | |
| "eval_runtime": 55.0714, | |
| "eval_samples_per_second": 120.371, | |
| "eval_steps_per_second": 1.888, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 2.450292397660819e-05, | |
| "loss": 0.7827, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "eval_loss": 2.480097532272339, | |
| "eval_runtime": 55.0814, | |
| "eval_samples_per_second": 120.349, | |
| "eval_steps_per_second": 1.888, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "learning_rate": 2.4269005847953216e-05, | |
| "loss": 0.7813, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "eval_loss": 2.4855968952178955, | |
| "eval_runtime": 55.078, | |
| "eval_samples_per_second": 120.357, | |
| "eval_steps_per_second": 1.888, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 2.4035087719298245e-05, | |
| "loss": 0.7829, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "eval_loss": 2.462341785430908, | |
| "eval_runtime": 55.0705, | |
| "eval_samples_per_second": 120.373, | |
| "eval_steps_per_second": 1.888, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 2.3801169590643278e-05, | |
| "loss": 0.7724, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "eval_loss": 2.478029251098633, | |
| "eval_runtime": 55.0837, | |
| "eval_samples_per_second": 120.344, | |
| "eval_steps_per_second": 1.888, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 2.3567251461988303e-05, | |
| "loss": 0.7646, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "eval_loss": 2.4587323665618896, | |
| "eval_runtime": 55.1053, | |
| "eval_samples_per_second": 120.297, | |
| "eval_steps_per_second": 1.887, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "loss": 0.7604, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "eval_loss": 2.453984498977661, | |
| "eval_runtime": 55.0903, | |
| "eval_samples_per_second": 120.33, | |
| "eval_steps_per_second": 1.888, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 2.309941520467836e-05, | |
| "loss": 0.7518, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "eval_loss": 2.488924026489258, | |
| "eval_runtime": 55.1009, | |
| "eval_samples_per_second": 120.307, | |
| "eval_steps_per_second": 1.887, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 2.2865497076023394e-05, | |
| "loss": 0.7515, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "eval_loss": 2.4510860443115234, | |
| "eval_runtime": 55.088, | |
| "eval_samples_per_second": 120.335, | |
| "eval_steps_per_second": 1.888, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 2.2631578947368423e-05, | |
| "loss": 0.7511, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "eval_loss": 2.468933343887329, | |
| "eval_runtime": 55.0676, | |
| "eval_samples_per_second": 120.379, | |
| "eval_steps_per_second": 1.889, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "learning_rate": 2.2397660818713452e-05, | |
| "loss": 0.7424, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "eval_loss": 2.4676008224487305, | |
| "eval_runtime": 55.1052, | |
| "eval_samples_per_second": 120.297, | |
| "eval_steps_per_second": 1.887, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 2.216374269005848e-05, | |
| "loss": 0.7327, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "eval_loss": 2.482384443283081, | |
| "eval_runtime": 55.0883, | |
| "eval_samples_per_second": 120.334, | |
| "eval_steps_per_second": 1.888, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "learning_rate": 2.1929824561403507e-05, | |
| "loss": 0.7349, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "eval_loss": 2.450364351272583, | |
| "eval_runtime": 55.0642, | |
| "eval_samples_per_second": 120.387, | |
| "eval_steps_per_second": 1.889, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 2.169590643274854e-05, | |
| "loss": 0.7307, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "eval_loss": 2.4753456115722656, | |
| "eval_runtime": 55.0827, | |
| "eval_samples_per_second": 120.346, | |
| "eval_steps_per_second": 1.888, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 2.146198830409357e-05, | |
| "loss": 0.7269, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "eval_loss": 2.463690757751465, | |
| "eval_runtime": 55.087, | |
| "eval_samples_per_second": 120.337, | |
| "eval_steps_per_second": 1.888, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 2.1228070175438598e-05, | |
| "loss": 0.7175, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "eval_loss": 2.4744393825531006, | |
| "eval_runtime": 55.0809, | |
| "eval_samples_per_second": 120.35, | |
| "eval_steps_per_second": 1.888, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 2.0994152046783627e-05, | |
| "loss": 0.7178, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "eval_loss": 2.4851980209350586, | |
| "eval_runtime": 55.0877, | |
| "eval_samples_per_second": 120.335, | |
| "eval_steps_per_second": 1.888, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "learning_rate": 2.0760233918128656e-05, | |
| "loss": 0.7048, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "eval_loss": 2.5102007389068604, | |
| "eval_runtime": 55.1078, | |
| "eval_samples_per_second": 120.291, | |
| "eval_steps_per_second": 1.887, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 2.0526315789473685e-05, | |
| "loss": 0.7072, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "eval_loss": 2.5026237964630127, | |
| "eval_runtime": 55.1176, | |
| "eval_samples_per_second": 120.27, | |
| "eval_steps_per_second": 1.887, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 2.0292397660818714e-05, | |
| "loss": 0.7054, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "eval_loss": 2.4804298877716064, | |
| "eval_runtime": 55.0663, | |
| "eval_samples_per_second": 120.382, | |
| "eval_steps_per_second": 1.889, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 2.0058479532163744e-05, | |
| "loss": 0.7019, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "eval_loss": 2.4398744106292725, | |
| "eval_runtime": 54.9972, | |
| "eval_samples_per_second": 120.533, | |
| "eval_steps_per_second": 1.891, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 1.9824561403508773e-05, | |
| "loss": 0.6942, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "eval_loss": 2.4618844985961914, | |
| "eval_runtime": 55.1004, | |
| "eval_samples_per_second": 120.308, | |
| "eval_steps_per_second": 1.887, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 1.9590643274853802e-05, | |
| "loss": 0.6842, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "eval_loss": 2.496403217315674, | |
| "eval_runtime": 55.0871, | |
| "eval_samples_per_second": 120.337, | |
| "eval_steps_per_second": 1.888, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 1.935672514619883e-05, | |
| "loss": 0.6859, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "eval_loss": 2.483705520629883, | |
| "eval_runtime": 55.0869, | |
| "eval_samples_per_second": 120.337, | |
| "eval_steps_per_second": 1.888, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "learning_rate": 1.9122807017543863e-05, | |
| "loss": 0.6742, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "eval_loss": 2.489377498626709, | |
| "eval_runtime": 55.1198, | |
| "eval_samples_per_second": 120.265, | |
| "eval_steps_per_second": 1.887, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 1.888888888888889e-05, | |
| "loss": 0.6818, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "eval_loss": 2.507904052734375, | |
| "eval_runtime": 55.1222, | |
| "eval_samples_per_second": 120.26, | |
| "eval_steps_per_second": 1.887, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "learning_rate": 1.8654970760233918e-05, | |
| "loss": 0.6742, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "eval_loss": 2.4935832023620605, | |
| "eval_runtime": 55.1223, | |
| "eval_samples_per_second": 120.26, | |
| "eval_steps_per_second": 1.887, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "learning_rate": 1.8421052631578947e-05, | |
| "loss": 0.6756, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "eval_loss": 2.512763023376465, | |
| "eval_runtime": 55.167, | |
| "eval_samples_per_second": 120.162, | |
| "eval_steps_per_second": 1.885, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "learning_rate": 1.8187134502923976e-05, | |
| "loss": 0.6635, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "eval_loss": 2.5170469284057617, | |
| "eval_runtime": 55.1756, | |
| "eval_samples_per_second": 120.144, | |
| "eval_steps_per_second": 1.885, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 1.795321637426901e-05, | |
| "loss": 0.6645, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "eval_loss": 2.5008370876312256, | |
| "eval_runtime": 55.1095, | |
| "eval_samples_per_second": 120.288, | |
| "eval_steps_per_second": 1.887, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 1.7719298245614035e-05, | |
| "loss": 0.6617, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "eval_loss": 2.503709316253662, | |
| "eval_runtime": 55.1047, | |
| "eval_samples_per_second": 120.298, | |
| "eval_steps_per_second": 1.887, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 1.7485380116959067e-05, | |
| "loss": 0.6574, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "eval_loss": 2.4953572750091553, | |
| "eval_runtime": 55.0727, | |
| "eval_samples_per_second": 120.368, | |
| "eval_steps_per_second": 1.888, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "learning_rate": 1.7251461988304093e-05, | |
| "loss": 0.6519, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "eval_loss": 2.519571304321289, | |
| "eval_runtime": 55.1072, | |
| "eval_samples_per_second": 120.293, | |
| "eval_steps_per_second": 1.887, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 1.7017543859649125e-05, | |
| "loss": 0.6453, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "eval_loss": 2.485342502593994, | |
| "eval_runtime": 55.0939, | |
| "eval_samples_per_second": 120.322, | |
| "eval_steps_per_second": 1.888, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "learning_rate": 1.6783625730994155e-05, | |
| "loss": 0.6445, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "eval_loss": 2.485079765319824, | |
| "eval_runtime": 55.093, | |
| "eval_samples_per_second": 120.324, | |
| "eval_steps_per_second": 1.888, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 1.654970760233918e-05, | |
| "loss": 0.643, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "eval_loss": 2.4923973083496094, | |
| "eval_runtime": 55.1032, | |
| "eval_samples_per_second": 120.302, | |
| "eval_steps_per_second": 1.887, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "learning_rate": 1.6315789473684213e-05, | |
| "loss": 0.6373, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "eval_loss": 2.5037529468536377, | |
| "eval_runtime": 55.0798, | |
| "eval_samples_per_second": 120.353, | |
| "eval_steps_per_second": 1.888, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 1.608187134502924e-05, | |
| "loss": 0.6292, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "eval_loss": 2.488449811935425, | |
| "eval_runtime": 55.097, | |
| "eval_samples_per_second": 120.315, | |
| "eval_steps_per_second": 1.888, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 1.584795321637427e-05, | |
| "loss": 0.6386, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "eval_loss": 2.482603073120117, | |
| "eval_runtime": 55.1088, | |
| "eval_samples_per_second": 120.289, | |
| "eval_steps_per_second": 1.887, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 1.56140350877193e-05, | |
| "loss": 0.6357, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "eval_loss": 2.482375144958496, | |
| "eval_runtime": 55.1247, | |
| "eval_samples_per_second": 120.255, | |
| "eval_steps_per_second": 1.887, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "learning_rate": 1.538011695906433e-05, | |
| "loss": 0.6251, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "eval_loss": 2.4937736988067627, | |
| "eval_runtime": 55.1287, | |
| "eval_samples_per_second": 120.246, | |
| "eval_steps_per_second": 1.886, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "learning_rate": 1.5146198830409358e-05, | |
| "loss": 0.624, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "eval_loss": 2.5023653507232666, | |
| "eval_runtime": 55.1273, | |
| "eval_samples_per_second": 120.249, | |
| "eval_steps_per_second": 1.887, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 1.4912280701754386e-05, | |
| "loss": 0.6238, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "eval_loss": 2.520798444747925, | |
| "eval_runtime": 55.0799, | |
| "eval_samples_per_second": 120.352, | |
| "eval_steps_per_second": 1.888, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 1.4678362573099417e-05, | |
| "loss": 0.6165, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "eval_loss": 2.5339748859405518, | |
| "eval_runtime": 55.1169, | |
| "eval_samples_per_second": 120.272, | |
| "eval_steps_per_second": 1.887, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "learning_rate": 1.4444444444444444e-05, | |
| "loss": 0.6119, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "eval_loss": 2.5113964080810547, | |
| "eval_runtime": 55.0891, | |
| "eval_samples_per_second": 120.332, | |
| "eval_steps_per_second": 1.888, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "learning_rate": 1.4210526315789475e-05, | |
| "loss": 0.6089, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "eval_loss": 2.52811861038208, | |
| "eval_runtime": 55.112, | |
| "eval_samples_per_second": 120.282, | |
| "eval_steps_per_second": 1.887, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 1.3976608187134504e-05, | |
| "loss": 0.6035, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "eval_loss": 2.5194358825683594, | |
| "eval_runtime": 55.1145, | |
| "eval_samples_per_second": 120.277, | |
| "eval_steps_per_second": 1.887, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 1.3742690058479531e-05, | |
| "loss": 0.6018, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "eval_loss": 2.5066628456115723, | |
| "eval_runtime": 55.0979, | |
| "eval_samples_per_second": 120.313, | |
| "eval_steps_per_second": 1.888, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 1.3508771929824562e-05, | |
| "loss": 0.6016, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "eval_loss": 2.490973711013794, | |
| "eval_runtime": 54.9953, | |
| "eval_samples_per_second": 120.538, | |
| "eval_steps_per_second": 1.891, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "learning_rate": 1.327485380116959e-05, | |
| "loss": 0.6013, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "eval_loss": 2.489246368408203, | |
| "eval_runtime": 54.9888, | |
| "eval_samples_per_second": 120.552, | |
| "eval_steps_per_second": 1.891, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 1.304093567251462e-05, | |
| "loss": 0.5958, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "eval_loss": 2.528749704360962, | |
| "eval_runtime": 54.994, | |
| "eval_samples_per_second": 120.54, | |
| "eval_steps_per_second": 1.891, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 1.2807017543859651e-05, | |
| "loss": 0.5925, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "eval_loss": 2.528515100479126, | |
| "eval_runtime": 54.9798, | |
| "eval_samples_per_second": 120.571, | |
| "eval_steps_per_second": 1.892, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "learning_rate": 1.2573099415204679e-05, | |
| "loss": 0.5908, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "eval_loss": 2.510267734527588, | |
| "eval_runtime": 55.0014, | |
| "eval_samples_per_second": 120.524, | |
| "eval_steps_per_second": 1.891, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 7.53, | |
| "learning_rate": 1.2339181286549708e-05, | |
| "loss": 0.587, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 7.53, | |
| "eval_loss": 2.533625602722168, | |
| "eval_runtime": 54.9987, | |
| "eval_samples_per_second": 120.53, | |
| "eval_steps_per_second": 1.891, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "learning_rate": 1.2105263157894737e-05, | |
| "loss": 0.5851, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "eval_loss": 2.538762331008911, | |
| "eval_runtime": 54.9696, | |
| "eval_samples_per_second": 120.594, | |
| "eval_steps_per_second": 1.892, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 7.63, | |
| "learning_rate": 1.1871345029239766e-05, | |
| "loss": 0.579, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 7.63, | |
| "eval_loss": 2.5098183155059814, | |
| "eval_runtime": 54.9924, | |
| "eval_samples_per_second": 120.544, | |
| "eval_steps_per_second": 1.891, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "learning_rate": 1.1637426900584795e-05, | |
| "loss": 0.5764, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "eval_loss": 2.5329983234405518, | |
| "eval_runtime": 55.0148, | |
| "eval_samples_per_second": 120.495, | |
| "eval_steps_per_second": 1.89, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "learning_rate": 1.1403508771929824e-05, | |
| "loss": 0.5781, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "eval_loss": 2.512319803237915, | |
| "eval_runtime": 54.9674, | |
| "eval_samples_per_second": 120.599, | |
| "eval_steps_per_second": 1.892, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "learning_rate": 1.1169590643274855e-05, | |
| "loss": 0.5758, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "eval_loss": 2.5034148693084717, | |
| "eval_runtime": 54.9854, | |
| "eval_samples_per_second": 120.559, | |
| "eval_steps_per_second": 1.891, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "learning_rate": 1.0935672514619884e-05, | |
| "loss": 0.5792, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "eval_loss": 2.525723934173584, | |
| "eval_runtime": 55.017, | |
| "eval_samples_per_second": 120.49, | |
| "eval_steps_per_second": 1.89, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "learning_rate": 1.0701754385964913e-05, | |
| "loss": 0.5745, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "eval_loss": 2.526042938232422, | |
| "eval_runtime": 54.987, | |
| "eval_samples_per_second": 120.556, | |
| "eval_steps_per_second": 1.891, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "learning_rate": 1.0467836257309941e-05, | |
| "loss": 0.5702, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "eval_loss": 2.5171217918395996, | |
| "eval_runtime": 54.976, | |
| "eval_samples_per_second": 120.58, | |
| "eval_steps_per_second": 1.892, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "learning_rate": 1.023391812865497e-05, | |
| "loss": 0.5714, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "eval_loss": 2.509648323059082, | |
| "eval_runtime": 54.9828, | |
| "eval_samples_per_second": 120.565, | |
| "eval_steps_per_second": 1.892, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5692, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 2.4963207244873047, | |
| "eval_runtime": 54.9818, | |
| "eval_samples_per_second": 120.567, | |
| "eval_steps_per_second": 1.892, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 8.05, | |
| "learning_rate": 9.76608187134503e-06, | |
| "loss": 0.5541, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 8.05, | |
| "eval_loss": 2.5158822536468506, | |
| "eval_runtime": 54.9875, | |
| "eval_samples_per_second": 120.555, | |
| "eval_steps_per_second": 1.891, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 8.09, | |
| "learning_rate": 9.532163742690059e-06, | |
| "loss": 0.5609, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 8.09, | |
| "eval_loss": 2.52651047706604, | |
| "eval_runtime": 54.9727, | |
| "eval_samples_per_second": 120.587, | |
| "eval_steps_per_second": 1.892, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "learning_rate": 9.298245614035088e-06, | |
| "loss": 0.5567, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "eval_loss": 2.529944658279419, | |
| "eval_runtime": 54.9646, | |
| "eval_samples_per_second": 120.605, | |
| "eval_steps_per_second": 1.892, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 8.19, | |
| "learning_rate": 9.064327485380117e-06, | |
| "loss": 0.5593, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 8.19, | |
| "eval_loss": 2.5352935791015625, | |
| "eval_runtime": 54.9969, | |
| "eval_samples_per_second": 120.534, | |
| "eval_steps_per_second": 1.891, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "learning_rate": 8.830409356725146e-06, | |
| "loss": 0.5537, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "eval_loss": 2.5415403842926025, | |
| "eval_runtime": 54.9924, | |
| "eval_samples_per_second": 120.544, | |
| "eval_steps_per_second": 1.891, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 8.28, | |
| "learning_rate": 8.596491228070176e-06, | |
| "loss": 0.5465, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 8.28, | |
| "eval_loss": 2.5204358100891113, | |
| "eval_runtime": 55.0062, | |
| "eval_samples_per_second": 120.514, | |
| "eval_steps_per_second": 1.891, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "learning_rate": 8.362573099415205e-06, | |
| "loss": 0.548, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "eval_loss": 2.5008552074432373, | |
| "eval_runtime": 54.9691, | |
| "eval_samples_per_second": 120.595, | |
| "eval_steps_per_second": 1.892, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "learning_rate": 8.128654970760234e-06, | |
| "loss": 0.5477, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "eval_loss": 2.5255722999572754, | |
| "eval_runtime": 54.9912, | |
| "eval_samples_per_second": 120.547, | |
| "eval_steps_per_second": 1.891, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 8.42, | |
| "learning_rate": 7.894736842105263e-06, | |
| "loss": 0.5393, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 8.42, | |
| "eval_loss": 2.49310564994812, | |
| "eval_runtime": 54.9871, | |
| "eval_samples_per_second": 120.556, | |
| "eval_steps_per_second": 1.891, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 8.47, | |
| "learning_rate": 7.660818713450294e-06, | |
| "loss": 0.5441, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 8.47, | |
| "eval_loss": 2.5206234455108643, | |
| "eval_runtime": 54.9863, | |
| "eval_samples_per_second": 120.557, | |
| "eval_steps_per_second": 1.891, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 8.51, | |
| "learning_rate": 7.426900584795322e-06, | |
| "loss": 0.5419, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 8.51, | |
| "eval_loss": 2.511657476425171, | |
| "eval_runtime": 54.9931, | |
| "eval_samples_per_second": 120.542, | |
| "eval_steps_per_second": 1.891, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "learning_rate": 7.192982456140351e-06, | |
| "loss": 0.5377, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "eval_loss": 2.534726142883301, | |
| "eval_runtime": 55.0074, | |
| "eval_samples_per_second": 120.511, | |
| "eval_steps_per_second": 1.891, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "learning_rate": 6.95906432748538e-06, | |
| "loss": 0.5375, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "eval_loss": 2.4978044033050537, | |
| "eval_runtime": 55.0077, | |
| "eval_samples_per_second": 120.51, | |
| "eval_steps_per_second": 1.891, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 8.65, | |
| "learning_rate": 6.725146198830409e-06, | |
| "loss": 0.5375, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 8.65, | |
| "eval_loss": 2.4929347038269043, | |
| "eval_runtime": 54.9953, | |
| "eval_samples_per_second": 120.537, | |
| "eval_steps_per_second": 1.891, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "learning_rate": 6.4912280701754385e-06, | |
| "loss": 0.5354, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "eval_loss": 2.4908556938171387, | |
| "eval_runtime": 55.0037, | |
| "eval_samples_per_second": 120.519, | |
| "eval_steps_per_second": 1.891, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "learning_rate": 6.2573099415204685e-06, | |
| "loss": 0.5318, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "eval_loss": 2.531054973602295, | |
| "eval_runtime": 54.9993, | |
| "eval_samples_per_second": 120.529, | |
| "eval_steps_per_second": 1.891, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "learning_rate": 6.023391812865498e-06, | |
| "loss": 0.5338, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "eval_loss": 2.5138602256774902, | |
| "eval_runtime": 54.9949, | |
| "eval_samples_per_second": 120.539, | |
| "eval_steps_per_second": 1.891, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 8.84, | |
| "learning_rate": 5.789473684210527e-06, | |
| "loss": 0.5247, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 8.84, | |
| "eval_loss": 2.5182831287384033, | |
| "eval_runtime": 54.9996, | |
| "eval_samples_per_second": 120.528, | |
| "eval_steps_per_second": 1.891, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "learning_rate": 5.555555555555556e-06, | |
| "loss": 0.5249, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "eval_loss": 2.5073628425598145, | |
| "eval_runtime": 54.9824, | |
| "eval_samples_per_second": 120.566, | |
| "eval_steps_per_second": 1.892, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "learning_rate": 5.321637426900585e-06, | |
| "loss": 0.5266, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "eval_loss": 2.5005078315734863, | |
| "eval_runtime": 54.9464, | |
| "eval_samples_per_second": 120.645, | |
| "eval_steps_per_second": 1.893, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "learning_rate": 5.087719298245614e-06, | |
| "loss": 0.5279, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "eval_loss": 2.5144731998443604, | |
| "eval_runtime": 54.9856, | |
| "eval_samples_per_second": 120.559, | |
| "eval_steps_per_second": 1.891, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "learning_rate": 4.853801169590644e-06, | |
| "loss": 0.5231, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "eval_loss": 2.5163862705230713, | |
| "eval_runtime": 54.965, | |
| "eval_samples_per_second": 120.604, | |
| "eval_steps_per_second": 1.892, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 9.08, | |
| "learning_rate": 4.619883040935673e-06, | |
| "loss": 0.5157, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 9.08, | |
| "eval_loss": 2.4902589321136475, | |
| "eval_runtime": 54.9685, | |
| "eval_samples_per_second": 120.596, | |
| "eval_steps_per_second": 1.892, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "learning_rate": 4.3859649122807014e-06, | |
| "loss": 0.5153, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "eval_loss": 2.5248496532440186, | |
| "eval_runtime": 55.0107, | |
| "eval_samples_per_second": 120.504, | |
| "eval_steps_per_second": 1.891, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 9.17, | |
| "learning_rate": 4.152046783625731e-06, | |
| "loss": 0.5238, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 9.17, | |
| "eval_loss": 2.4956910610198975, | |
| "eval_runtime": 54.9681, | |
| "eval_samples_per_second": 120.597, | |
| "eval_steps_per_second": 1.892, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "learning_rate": 3.9181286549707605e-06, | |
| "loss": 0.5229, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "eval_loss": 2.509634256362915, | |
| "eval_runtime": 55.0395, | |
| "eval_samples_per_second": 120.441, | |
| "eval_steps_per_second": 1.89, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 9.26, | |
| "learning_rate": 3.6842105263157892e-06, | |
| "loss": 0.5099, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 9.26, | |
| "eval_loss": 2.505375862121582, | |
| "eval_runtime": 54.9659, | |
| "eval_samples_per_second": 120.602, | |
| "eval_steps_per_second": 1.892, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 9.31, | |
| "learning_rate": 3.4502923976608188e-06, | |
| "loss": 0.5164, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 9.31, | |
| "eval_loss": 2.512755870819092, | |
| "eval_runtime": 54.9727, | |
| "eval_samples_per_second": 120.587, | |
| "eval_steps_per_second": 1.892, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 9.36, | |
| "learning_rate": 3.216374269005848e-06, | |
| "loss": 0.5147, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 9.36, | |
| "eval_loss": 2.5104758739471436, | |
| "eval_runtime": 54.9829, | |
| "eval_samples_per_second": 120.565, | |
| "eval_steps_per_second": 1.891, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "learning_rate": 2.9824561403508774e-06, | |
| "loss": 0.5092, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "eval_loss": 2.5510807037353516, | |
| "eval_runtime": 54.9886, | |
| "eval_samples_per_second": 120.552, | |
| "eval_steps_per_second": 1.891, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 9.45, | |
| "learning_rate": 2.7485380116959066e-06, | |
| "loss": 0.5123, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 9.45, | |
| "eval_loss": 2.4837098121643066, | |
| "eval_runtime": 54.9612, | |
| "eval_samples_per_second": 120.612, | |
| "eval_steps_per_second": 1.892, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "learning_rate": 2.5146198830409357e-06, | |
| "loss": 0.5077, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "eval_loss": 2.5026121139526367, | |
| "eval_runtime": 55.0018, | |
| "eval_samples_per_second": 120.523, | |
| "eval_steps_per_second": 1.891, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 9.54, | |
| "learning_rate": 2.2807017543859652e-06, | |
| "loss": 0.5112, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 9.54, | |
| "eval_loss": 2.514636278152466, | |
| "eval_runtime": 54.9811, | |
| "eval_samples_per_second": 120.569, | |
| "eval_steps_per_second": 1.892, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 9.59, | |
| "learning_rate": 2.0467836257309943e-06, | |
| "loss": 0.5033, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 9.59, | |
| "eval_loss": 2.537416696548462, | |
| "eval_runtime": 54.983, | |
| "eval_samples_per_second": 120.565, | |
| "eval_steps_per_second": 1.891, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 9.64, | |
| "learning_rate": 1.8128654970760235e-06, | |
| "loss": 0.5111, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 9.64, | |
| "eval_loss": 2.515895366668701, | |
| "eval_runtime": 54.9923, | |
| "eval_samples_per_second": 120.544, | |
| "eval_steps_per_second": 1.891, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "learning_rate": 1.5789473684210528e-06, | |
| "loss": 0.5119, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "eval_loss": 2.5189149379730225, | |
| "eval_runtime": 54.9887, | |
| "eval_samples_per_second": 120.552, | |
| "eval_steps_per_second": 1.891, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 9.73, | |
| "learning_rate": 1.345029239766082e-06, | |
| "loss": 0.5022, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 9.73, | |
| "eval_loss": 2.506300926208496, | |
| "eval_runtime": 54.9799, | |
| "eval_samples_per_second": 120.571, | |
| "eval_steps_per_second": 1.892, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "learning_rate": 1.1111111111111112e-06, | |
| "loss": 0.5051, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "eval_loss": 2.4811651706695557, | |
| "eval_runtime": 54.958, | |
| "eval_samples_per_second": 120.619, | |
| "eval_steps_per_second": 1.892, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "learning_rate": 8.771929824561404e-07, | |
| "loss": 0.5028, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "eval_loss": 2.4914138317108154, | |
| "eval_runtime": 55.0024, | |
| "eval_samples_per_second": 120.522, | |
| "eval_steps_per_second": 1.891, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "learning_rate": 6.432748538011697e-07, | |
| "loss": 0.5066, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "eval_loss": 2.5056285858154297, | |
| "eval_runtime": 54.9649, | |
| "eval_samples_per_second": 120.604, | |
| "eval_steps_per_second": 1.892, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 9.92, | |
| "learning_rate": 4.093567251461989e-07, | |
| "loss": 0.5058, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 9.92, | |
| "eval_loss": 2.53446102142334, | |
| "eval_runtime": 54.9817, | |
| "eval_samples_per_second": 120.567, | |
| "eval_steps_per_second": 1.892, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "learning_rate": 1.7543859649122808e-07, | |
| "loss": 0.507, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "eval_loss": 2.507356882095337, | |
| "eval_runtime": 55.001, | |
| "eval_samples_per_second": 120.525, | |
| "eval_steps_per_second": 1.891, | |
| "step": 213000 | |
| } | |
| ], | |
| "max_steps": 213750, | |
| "num_train_epochs": 10, | |
| "total_flos": 4.4847043698061394e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |