Commit ·
2f6b212
1
Parent(s): bb1f7f3
Upload trainer_log_history.jsonl with huggingface_hub
Browse files- trainer_log_history.jsonl +293 -0
trainer_log_history.jsonl
ADDED
|
@@ -0,0 +1,293 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"loss": 1.5861, "learning_rate": 2.9999999999999997e-05, "epoch": 0.01, "step": 10}
|
| 2 |
+
{"eval_loss": 1.5546112060546875, "eval_runtime": 23.1798, "eval_samples_per_second": 4.314, "eval_steps_per_second": 0.561, "epoch": 0.01, "step": 10}
|
| 3 |
+
{"loss": 1.5346, "learning_rate": 5.9999999999999995e-05, "epoch": 0.03, "step": 20}
|
| 4 |
+
{"eval_loss": 1.4962812662124634, "eval_runtime": 23.5783, "eval_samples_per_second": 4.241, "eval_steps_per_second": 0.551, "epoch": 0.03, "step": 20}
|
| 5 |
+
{"loss": 1.4226, "learning_rate": 8.999999999999999e-05, "epoch": 0.04, "step": 30}
|
| 6 |
+
{"eval_loss": 1.3235948085784912, "eval_runtime": 23.7142, "eval_samples_per_second": 4.217, "eval_steps_per_second": 0.548, "epoch": 0.04, "step": 30}
|
| 7 |
+
{"loss": 1.1968, "learning_rate": 0.00011999999999999999, "epoch": 0.06, "step": 40}
|
| 8 |
+
{"eval_loss": 1.0622987747192383, "eval_runtime": 23.6224, "eval_samples_per_second": 4.233, "eval_steps_per_second": 0.55, "epoch": 0.06, "step": 40}
|
| 9 |
+
{"loss": 1.0149, "learning_rate": 0.00015, "epoch": 0.07, "step": 50}
|
| 10 |
+
{"eval_loss": 0.9303178191184998, "eval_runtime": 23.6914, "eval_samples_per_second": 4.221, "eval_steps_per_second": 0.549, "epoch": 0.07, "step": 50}
|
| 11 |
+
{"loss": 0.8012, "learning_rate": 0.00017999999999999998, "epoch": 0.08, "step": 60}
|
| 12 |
+
{"eval_loss": 0.884680986404419, "eval_runtime": 23.5854, "eval_samples_per_second": 4.24, "eval_steps_per_second": 0.551, "epoch": 0.08, "step": 60}
|
| 13 |
+
{"loss": 0.9176, "learning_rate": 0.00020999999999999998, "epoch": 0.1, "step": 70}
|
| 14 |
+
{"eval_loss": 0.865105152130127, "eval_runtime": 23.5816, "eval_samples_per_second": 4.241, "eval_steps_per_second": 0.551, "epoch": 0.1, "step": 70}
|
| 15 |
+
{"loss": 0.8454, "learning_rate": 0.00023999999999999998, "epoch": 0.11, "step": 80}
|
| 16 |
+
{"eval_loss": 0.8409528136253357, "eval_runtime": 23.6657, "eval_samples_per_second": 4.226, "eval_steps_per_second": 0.549, "epoch": 0.11, "step": 80}
|
| 17 |
+
{"loss": 0.7406, "learning_rate": 0.00027, "epoch": 0.12, "step": 90}
|
| 18 |
+
{"eval_loss": 0.8041796088218689, "eval_runtime": 23.705, "eval_samples_per_second": 4.219, "eval_steps_per_second": 0.548, "epoch": 0.12, "step": 90}
|
| 19 |
+
{"loss": 0.8262, "learning_rate": 0.0003, "epoch": 0.14, "step": 100}
|
| 20 |
+
{"eval_loss": 0.7901164293289185, "eval_runtime": 23.5789, "eval_samples_per_second": 4.241, "eval_steps_per_second": 0.551, "epoch": 0.14, "step": 100}
|
| 21 |
+
{"loss": 0.7947, "learning_rate": 0.00029958041958041954, "epoch": 0.15, "step": 110}
|
| 22 |
+
{"eval_loss": 0.7940295338630676, "eval_runtime": 23.6534, "eval_samples_per_second": 4.228, "eval_steps_per_second": 0.55, "epoch": 0.15, "step": 110}
|
| 23 |
+
{"loss": 0.8331, "learning_rate": 0.00029916083916083915, "epoch": 0.17, "step": 120}
|
| 24 |
+
{"eval_loss": 0.7761766314506531, "eval_runtime": 23.5587, "eval_samples_per_second": 4.245, "eval_steps_per_second": 0.552, "epoch": 0.17, "step": 120}
|
| 25 |
+
{"loss": 0.7844, "learning_rate": 0.0002987412587412587, "epoch": 0.18, "step": 130}
|
| 26 |
+
{"eval_loss": 0.764004647731781, "eval_runtime": 23.6807, "eval_samples_per_second": 4.223, "eval_steps_per_second": 0.549, "epoch": 0.18, "step": 130}
|
| 27 |
+
{"loss": 0.7601, "learning_rate": 0.0002983216783216783, "epoch": 0.19, "step": 140}
|
| 28 |
+
{"eval_loss": 0.7403784990310669, "eval_runtime": 23.5213, "eval_samples_per_second": 4.251, "eval_steps_per_second": 0.553, "epoch": 0.19, "step": 140}
|
| 29 |
+
{"loss": 0.7325, "learning_rate": 0.0002979020979020979, "epoch": 0.21, "step": 150}
|
| 30 |
+
{"eval_loss": 0.7203081250190735, "eval_runtime": 23.5515, "eval_samples_per_second": 4.246, "eval_steps_per_second": 0.552, "epoch": 0.21, "step": 150}
|
| 31 |
+
{"loss": 0.7265, "learning_rate": 0.00029748251748251746, "epoch": 0.22, "step": 160}
|
| 32 |
+
{"eval_loss": 0.7069114446640015, "eval_runtime": 23.5863, "eval_samples_per_second": 4.24, "eval_steps_per_second": 0.551, "epoch": 0.22, "step": 160}
|
| 33 |
+
{"loss": 0.7468, "learning_rate": 0.000297062937062937, "epoch": 0.23, "step": 170}
|
| 34 |
+
{"eval_loss": 0.7000756859779358, "eval_runtime": 23.5962, "eval_samples_per_second": 4.238, "eval_steps_per_second": 0.551, "epoch": 0.23, "step": 170}
|
| 35 |
+
{"loss": 0.7092, "learning_rate": 0.00029664335664335664, "epoch": 0.25, "step": 180}
|
| 36 |
+
{"eval_loss": 0.6928163766860962, "eval_runtime": 23.5955, "eval_samples_per_second": 4.238, "eval_steps_per_second": 0.551, "epoch": 0.25, "step": 180}
|
| 37 |
+
{"loss": 0.7378, "learning_rate": 0.0002962237762237762, "epoch": 0.26, "step": 190}
|
| 38 |
+
{"eval_loss": 0.6906119585037231, "eval_runtime": 23.5746, "eval_samples_per_second": 4.242, "eval_steps_per_second": 0.551, "epoch": 0.26, "step": 190}
|
| 39 |
+
{"loss": 0.7679, "learning_rate": 0.00029580419580419576, "epoch": 0.28, "step": 200}
|
| 40 |
+
{"eval_loss": 0.6861391663551331, "eval_runtime": 23.6069, "eval_samples_per_second": 4.236, "eval_steps_per_second": 0.551, "epoch": 0.28, "step": 200}
|
| 41 |
+
{"loss": 0.7512, "learning_rate": 0.0002953846153846154, "epoch": 0.29, "step": 210}
|
| 42 |
+
{"eval_loss": 0.6841049194335938, "eval_runtime": 23.5512, "eval_samples_per_second": 4.246, "eval_steps_per_second": 0.552, "epoch": 0.29, "step": 210}
|
| 43 |
+
{"loss": 0.6518, "learning_rate": 0.00029496503496503494, "epoch": 0.3, "step": 220}
|
| 44 |
+
{"eval_loss": 0.6800382137298584, "eval_runtime": 23.5815, "eval_samples_per_second": 4.241, "eval_steps_per_second": 0.551, "epoch": 0.3, "step": 220}
|
| 45 |
+
{"loss": 0.7361, "learning_rate": 0.0002945454545454545, "epoch": 0.32, "step": 230}
|
| 46 |
+
{"eval_loss": 0.6794602870941162, "eval_runtime": 23.6023, "eval_samples_per_second": 4.237, "eval_steps_per_second": 0.551, "epoch": 0.32, "step": 230}
|
| 47 |
+
{"loss": 0.7661, "learning_rate": 0.0002941258741258741, "epoch": 0.33, "step": 240}
|
| 48 |
+
{"eval_loss": 0.6810237169265747, "eval_runtime": 23.55, "eval_samples_per_second": 4.246, "eval_steps_per_second": 0.552, "epoch": 0.33, "step": 240}
|
| 49 |
+
{"loss": 0.5877, "learning_rate": 0.0002937062937062937, "epoch": 0.34, "step": 250}
|
| 50 |
+
{"eval_loss": 0.6788613796234131, "eval_runtime": 23.5943, "eval_samples_per_second": 4.238, "eval_steps_per_second": 0.551, "epoch": 0.34, "step": 250}
|
| 51 |
+
{"loss": 0.655, "learning_rate": 0.00029328671328671325, "epoch": 0.36, "step": 260}
|
| 52 |
+
{"eval_loss": 0.671405553817749, "eval_runtime": 23.6778, "eval_samples_per_second": 4.223, "eval_steps_per_second": 0.549, "epoch": 0.36, "step": 260}
|
| 53 |
+
{"loss": 0.6352, "learning_rate": 0.0002928671328671328, "epoch": 0.37, "step": 270}
|
| 54 |
+
{"eval_loss": 0.6693644523620605, "eval_runtime": 23.5528, "eval_samples_per_second": 4.246, "eval_steps_per_second": 0.552, "epoch": 0.37, "step": 270}
|
| 55 |
+
{"loss": 0.6746, "learning_rate": 0.0002924475524475524, "epoch": 0.39, "step": 280}
|
| 56 |
+
{"eval_loss": 0.6700127124786377, "eval_runtime": 23.5688, "eval_samples_per_second": 4.243, "eval_steps_per_second": 0.552, "epoch": 0.39, "step": 280}
|
| 57 |
+
{"loss": 0.7004, "learning_rate": 0.000292027972027972, "epoch": 0.4, "step": 290}
|
| 58 |
+
{"eval_loss": 0.6715095639228821, "eval_runtime": 23.6593, "eval_samples_per_second": 4.227, "eval_steps_per_second": 0.549, "epoch": 0.4, "step": 290}
|
| 59 |
+
{"loss": 0.6779, "learning_rate": 0.00029160839160839155, "epoch": 0.41, "step": 300}
|
| 60 |
+
{"eval_loss": 0.6686553359031677, "eval_runtime": 23.5853, "eval_samples_per_second": 4.24, "eval_steps_per_second": 0.551, "epoch": 0.41, "step": 300}
|
| 61 |
+
{"loss": 0.621, "learning_rate": 0.00029118881118881117, "epoch": 0.43, "step": 310}
|
| 62 |
+
{"eval_loss": 0.6673153042793274, "eval_runtime": 23.5281, "eval_samples_per_second": 4.25, "eval_steps_per_second": 0.553, "epoch": 0.43, "step": 310}
|
| 63 |
+
{"loss": 0.6755, "learning_rate": 0.00029076923076923073, "epoch": 0.44, "step": 320}
|
| 64 |
+
{"eval_loss": 0.6658429503440857, "eval_runtime": 23.5721, "eval_samples_per_second": 4.242, "eval_steps_per_second": 0.552, "epoch": 0.44, "step": 320}
|
| 65 |
+
{"loss": 0.6725, "learning_rate": 0.00029034965034965035, "epoch": 0.46, "step": 330}
|
| 66 |
+
{"eval_loss": 0.6701672077178955, "eval_runtime": 23.6198, "eval_samples_per_second": 4.234, "eval_steps_per_second": 0.55, "epoch": 0.46, "step": 330}
|
| 67 |
+
{"loss": 0.7197, "learning_rate": 0.0002899300699300699, "epoch": 0.47, "step": 340}
|
| 68 |
+
{"eval_loss": 0.6637719869613647, "eval_runtime": 23.5613, "eval_samples_per_second": 4.244, "eval_steps_per_second": 0.552, "epoch": 0.47, "step": 340}
|
| 69 |
+
{"loss": 0.7827, "learning_rate": 0.0002895104895104895, "epoch": 0.48, "step": 350}
|
| 70 |
+
{"eval_loss": 0.665269672870636, "eval_runtime": 23.7009, "eval_samples_per_second": 4.219, "eval_steps_per_second": 0.549, "epoch": 0.48, "step": 350}
|
| 71 |
+
{"loss": 0.5587, "learning_rate": 0.00028909090909090904, "epoch": 0.5, "step": 360}
|
| 72 |
+
{"eval_loss": 0.6614734530448914, "eval_runtime": 23.5885, "eval_samples_per_second": 4.239, "eval_steps_per_second": 0.551, "epoch": 0.5, "step": 360}
|
| 73 |
+
{"loss": 0.6846, "learning_rate": 0.00028867132867132865, "epoch": 0.51, "step": 370}
|
| 74 |
+
{"eval_loss": 0.6604605317115784, "eval_runtime": 23.5867, "eval_samples_per_second": 4.24, "eval_steps_per_second": 0.551, "epoch": 0.51, "step": 370}
|
| 75 |
+
{"loss": 0.5939, "learning_rate": 0.0002882517482517482, "epoch": 0.52, "step": 380}
|
| 76 |
+
{"eval_loss": 0.6580032110214233, "eval_runtime": 23.6794, "eval_samples_per_second": 4.223, "eval_steps_per_second": 0.549, "epoch": 0.52, "step": 380}
|
| 77 |
+
{"loss": 0.804, "learning_rate": 0.0002878321678321678, "epoch": 0.54, "step": 390}
|
| 78 |
+
{"eval_loss": 0.6574468016624451, "eval_runtime": 23.5623, "eval_samples_per_second": 4.244, "eval_steps_per_second": 0.552, "epoch": 0.54, "step": 390}
|
| 79 |
+
{"loss": 0.6946, "learning_rate": 0.0002874125874125874, "epoch": 0.55, "step": 400}
|
| 80 |
+
{"eval_loss": 0.6552902460098267, "eval_runtime": 23.5759, "eval_samples_per_second": 4.242, "eval_steps_per_second": 0.551, "epoch": 0.55, "step": 400}
|
| 81 |
+
{"loss": 0.6129, "learning_rate": 0.00028699300699300696, "epoch": 0.57, "step": 410}
|
| 82 |
+
{"eval_loss": 0.6540884375572205, "eval_runtime": 23.6844, "eval_samples_per_second": 4.222, "eval_steps_per_second": 0.549, "epoch": 0.57, "step": 410}
|
| 83 |
+
{"loss": 0.6499, "learning_rate": 0.0002865734265734266, "epoch": 0.58, "step": 420}
|
| 84 |
+
{"eval_loss": 0.6544709205627441, "eval_runtime": 23.486, "eval_samples_per_second": 4.258, "eval_steps_per_second": 0.554, "epoch": 0.58, "step": 420}
|
| 85 |
+
{"loss": 0.6199, "learning_rate": 0.00028615384615384614, "epoch": 0.59, "step": 430}
|
| 86 |
+
{"eval_loss": 0.6521677374839783, "eval_runtime": 23.5787, "eval_samples_per_second": 4.241, "eval_steps_per_second": 0.551, "epoch": 0.59, "step": 430}
|
| 87 |
+
{"loss": 0.5753, "learning_rate": 0.0002857342657342657, "epoch": 0.61, "step": 440}
|
| 88 |
+
{"eval_loss": 0.6501143574714661, "eval_runtime": 23.5895, "eval_samples_per_second": 4.239, "eval_steps_per_second": 0.551, "epoch": 0.61, "step": 440}
|
| 89 |
+
{"loss": 0.6175, "learning_rate": 0.00028531468531468526, "epoch": 0.62, "step": 450}
|
| 90 |
+
{"eval_loss": 0.6489108204841614, "eval_runtime": 23.5249, "eval_samples_per_second": 4.251, "eval_steps_per_second": 0.553, "epoch": 0.62, "step": 450}
|
| 91 |
+
{"loss": 0.7238, "learning_rate": 0.0002848951048951049, "epoch": 0.63, "step": 460}
|
| 92 |
+
{"eval_loss": 0.6477003693580627, "eval_runtime": 23.6246, "eval_samples_per_second": 4.233, "eval_steps_per_second": 0.55, "epoch": 0.63, "step": 460}
|
| 93 |
+
{"loss": 0.7032, "learning_rate": 0.00028447552447552444, "epoch": 0.65, "step": 470}
|
| 94 |
+
{"eval_loss": 0.6449102759361267, "eval_runtime": 23.69, "eval_samples_per_second": 4.221, "eval_steps_per_second": 0.549, "epoch": 0.65, "step": 470}
|
| 95 |
+
{"loss": 0.6022, "learning_rate": 0.000284055944055944, "epoch": 0.66, "step": 480}
|
| 96 |
+
{"eval_loss": 0.6427639126777649, "eval_runtime": 23.568, "eval_samples_per_second": 4.243, "eval_steps_per_second": 0.552, "epoch": 0.66, "step": 480}
|
| 97 |
+
{"loss": 0.6425, "learning_rate": 0.0002836363636363636, "epoch": 0.68, "step": 490}
|
| 98 |
+
{"eval_loss": 0.6416438221931458, "eval_runtime": 23.5869, "eval_samples_per_second": 4.24, "eval_steps_per_second": 0.551, "epoch": 0.68, "step": 490}
|
| 99 |
+
{"loss": 0.6723, "learning_rate": 0.0002832167832167832, "epoch": 0.69, "step": 500}
|
| 100 |
+
{"eval_loss": 0.6422706842422485, "eval_runtime": 23.7408, "eval_samples_per_second": 4.212, "eval_steps_per_second": 0.548, "epoch": 0.69, "step": 500}
|
| 101 |
+
{"loss": 0.65, "learning_rate": 0.0002827972027972028, "epoch": 0.7, "step": 510}
|
| 102 |
+
{"eval_loss": 0.640707790851593, "eval_runtime": 23.6305, "eval_samples_per_second": 4.232, "eval_steps_per_second": 0.55, "epoch": 0.7, "step": 510}
|
| 103 |
+
{"loss": 0.5461, "learning_rate": 0.00028237762237762236, "epoch": 0.72, "step": 520}
|
| 104 |
+
{"eval_loss": 0.6413621306419373, "eval_runtime": 23.5561, "eval_samples_per_second": 4.245, "eval_steps_per_second": 0.552, "epoch": 0.72, "step": 520}
|
| 105 |
+
{"loss": 0.5654, "learning_rate": 0.0002819580419580419, "epoch": 0.73, "step": 530}
|
| 106 |
+
{"eval_loss": 0.6443601250648499, "eval_runtime": 23.6625, "eval_samples_per_second": 4.226, "eval_steps_per_second": 0.549, "epoch": 0.73, "step": 530}
|
| 107 |
+
{"loss": 0.685, "learning_rate": 0.0002815384615384615, "epoch": 0.74, "step": 540}
|
| 108 |
+
{"eval_loss": 0.6444653868675232, "eval_runtime": 23.6821, "eval_samples_per_second": 4.223, "eval_steps_per_second": 0.549, "epoch": 0.74, "step": 540}
|
| 109 |
+
{"loss": 0.6196, "learning_rate": 0.0002811188811188811, "epoch": 0.76, "step": 550}
|
| 110 |
+
{"eval_loss": 0.6420193314552307, "eval_runtime": 23.6122, "eval_samples_per_second": 4.235, "eval_steps_per_second": 0.551, "epoch": 0.76, "step": 550}
|
| 111 |
+
{"loss": 0.787, "learning_rate": 0.00028069930069930067, "epoch": 0.77, "step": 560}
|
| 112 |
+
{"eval_loss": 0.6415860652923584, "eval_runtime": 23.6089, "eval_samples_per_second": 4.236, "eval_steps_per_second": 0.551, "epoch": 0.77, "step": 560}
|
| 113 |
+
{"loss": 0.6576, "learning_rate": 0.00028027972027972023, "epoch": 0.79, "step": 570}
|
| 114 |
+
{"eval_loss": 0.643482506275177, "eval_runtime": 23.6156, "eval_samples_per_second": 4.234, "eval_steps_per_second": 0.55, "epoch": 0.79, "step": 570}
|
| 115 |
+
{"loss": 0.6749, "learning_rate": 0.00027986013986013985, "epoch": 0.8, "step": 580}
|
| 116 |
+
{"eval_loss": 0.6405051350593567, "eval_runtime": 23.628, "eval_samples_per_second": 4.232, "eval_steps_per_second": 0.55, "epoch": 0.8, "step": 580}
|
| 117 |
+
{"loss": 0.63, "learning_rate": 0.0002794405594405594, "epoch": 0.81, "step": 590}
|
| 118 |
+
{"eval_loss": 0.6396690011024475, "eval_runtime": 23.6168, "eval_samples_per_second": 4.234, "eval_steps_per_second": 0.55, "epoch": 0.81, "step": 590}
|
| 119 |
+
{"loss": 0.6797, "learning_rate": 0.00027902097902097903, "epoch": 0.83, "step": 600}
|
| 120 |
+
{"eval_loss": 0.6393585801124573, "eval_runtime": 23.5842, "eval_samples_per_second": 4.24, "eval_steps_per_second": 0.551, "epoch": 0.83, "step": 600}
|
| 121 |
+
{"loss": 0.6553, "learning_rate": 0.00027860139860139854, "epoch": 0.84, "step": 610}
|
| 122 |
+
{"eval_loss": 0.6381799578666687, "eval_runtime": 23.5984, "eval_samples_per_second": 4.238, "eval_steps_per_second": 0.551, "epoch": 0.84, "step": 610}
|
| 123 |
+
{"loss": 0.5907, "learning_rate": 0.00027818181818181815, "epoch": 0.86, "step": 620}
|
| 124 |
+
{"eval_loss": 0.638024091720581, "eval_runtime": 23.6565, "eval_samples_per_second": 4.227, "eval_steps_per_second": 0.55, "epoch": 0.86, "step": 620}
|
| 125 |
+
{"loss": 0.6526, "learning_rate": 0.0002777622377622377, "epoch": 0.87, "step": 630}
|
| 126 |
+
{"eval_loss": 0.6353902816772461, "eval_runtime": 23.6865, "eval_samples_per_second": 4.222, "eval_steps_per_second": 0.549, "epoch": 0.87, "step": 630}
|
| 127 |
+
{"loss": 0.6027, "learning_rate": 0.00027734265734265733, "epoch": 0.88, "step": 640}
|
| 128 |
+
{"eval_loss": 0.6318895816802979, "eval_runtime": 23.544, "eval_samples_per_second": 4.247, "eval_steps_per_second": 0.552, "epoch": 0.88, "step": 640}
|
| 129 |
+
{"loss": 0.5499, "learning_rate": 0.0002769230769230769, "epoch": 0.9, "step": 650}
|
| 130 |
+
{"eval_loss": 0.6283926963806152, "eval_runtime": 23.5591, "eval_samples_per_second": 4.245, "eval_steps_per_second": 0.552, "epoch": 0.9, "step": 650}
|
| 131 |
+
{"loss": 0.5983, "learning_rate": 0.00027650349650349646, "epoch": 0.91, "step": 660}
|
| 132 |
+
{"eval_loss": 0.6258216500282288, "eval_runtime": 23.6975, "eval_samples_per_second": 4.22, "eval_steps_per_second": 0.549, "epoch": 0.91, "step": 660}
|
| 133 |
+
{"loss": 0.6189, "learning_rate": 0.0002760839160839161, "epoch": 0.92, "step": 670}
|
| 134 |
+
{"eval_loss": 0.624355673789978, "eval_runtime": 23.5901, "eval_samples_per_second": 4.239, "eval_steps_per_second": 0.551, "epoch": 0.92, "step": 670}
|
| 135 |
+
{"loss": 0.6977, "learning_rate": 0.00027566433566433564, "epoch": 0.94, "step": 680}
|
| 136 |
+
{"eval_loss": 0.6230638027191162, "eval_runtime": 23.5414, "eval_samples_per_second": 4.248, "eval_steps_per_second": 0.552, "epoch": 0.94, "step": 680}
|
| 137 |
+
{"loss": 0.6097, "learning_rate": 0.00027524475524475525, "epoch": 0.95, "step": 690}
|
| 138 |
+
{"eval_loss": 0.6207563281059265, "eval_runtime": 23.5276, "eval_samples_per_second": 4.25, "eval_steps_per_second": 0.553, "epoch": 0.95, "step": 690}
|
| 139 |
+
{"loss": 0.5457, "learning_rate": 0.00027482517482517476, "epoch": 0.97, "step": 700}
|
| 140 |
+
{"eval_loss": 0.6207029223442078, "eval_runtime": 23.5497, "eval_samples_per_second": 4.246, "eval_steps_per_second": 0.552, "epoch": 0.97, "step": 700}
|
| 141 |
+
{"loss": 0.5926, "learning_rate": 0.0002744055944055944, "epoch": 0.98, "step": 710}
|
| 142 |
+
{"eval_loss": 0.6202435493469238, "eval_runtime": 23.685, "eval_samples_per_second": 4.222, "eval_steps_per_second": 0.549, "epoch": 0.98, "step": 710}
|
| 143 |
+
{"loss": 0.6451, "learning_rate": 0.00027398601398601394, "epoch": 0.99, "step": 720}
|
| 144 |
+
{"eval_loss": 0.6203433871269226, "eval_runtime": 23.5624, "eval_samples_per_second": 4.244, "eval_steps_per_second": 0.552, "epoch": 0.99, "step": 720}
|
| 145 |
+
{"loss": 0.5877, "learning_rate": 0.00027356643356643356, "epoch": 1.01, "step": 730}
|
| 146 |
+
{"eval_loss": 0.6191022396087646, "eval_runtime": 23.5692, "eval_samples_per_second": 4.243, "eval_steps_per_second": 0.552, "epoch": 1.01, "step": 730}
|
| 147 |
+
{"loss": 0.7144, "learning_rate": 0.0002731468531468531, "epoch": 1.02, "step": 740}
|
| 148 |
+
{"eval_loss": 0.6193973422050476, "eval_runtime": 23.712, "eval_samples_per_second": 4.217, "eval_steps_per_second": 0.548, "epoch": 1.02, "step": 740}
|
| 149 |
+
{"loss": 0.5671, "learning_rate": 0.0002727272727272727, "epoch": 1.03, "step": 750}
|
| 150 |
+
{"eval_loss": 0.6172534227371216, "eval_runtime": 23.541, "eval_samples_per_second": 4.248, "eval_steps_per_second": 0.552, "epoch": 1.03, "step": 750}
|
| 151 |
+
{"loss": 0.6423, "learning_rate": 0.0002723076923076923, "epoch": 1.05, "step": 760}
|
| 152 |
+
{"eval_loss": 0.6179550290107727, "eval_runtime": 23.5459, "eval_samples_per_second": 4.247, "eval_steps_per_second": 0.552, "epoch": 1.05, "step": 760}
|
| 153 |
+
{"loss": 0.657, "learning_rate": 0.00027188811188811186, "epoch": 1.06, "step": 770}
|
| 154 |
+
{"eval_loss": 0.6190667152404785, "eval_runtime": 23.6657, "eval_samples_per_second": 4.226, "eval_steps_per_second": 0.549, "epoch": 1.06, "step": 770}
|
| 155 |
+
{"loss": 0.6178, "learning_rate": 0.0002714685314685315, "epoch": 1.08, "step": 780}
|
| 156 |
+
{"eval_loss": 0.6176871657371521, "eval_runtime": 23.6599, "eval_samples_per_second": 4.227, "eval_steps_per_second": 0.549, "epoch": 1.08, "step": 780}
|
| 157 |
+
{"loss": 0.6659, "learning_rate": 0.000271048951048951, "epoch": 1.09, "step": 790}
|
| 158 |
+
{"eval_loss": 0.6174372434616089, "eval_runtime": 23.5345, "eval_samples_per_second": 4.249, "eval_steps_per_second": 0.552, "epoch": 1.09, "step": 790}
|
| 159 |
+
{"loss": 0.6216, "learning_rate": 0.0002706293706293706, "epoch": 1.1, "step": 800}
|
| 160 |
+
{"eval_loss": 0.6179863214492798, "eval_runtime": 23.5446, "eval_samples_per_second": 4.247, "eval_steps_per_second": 0.552, "epoch": 1.1, "step": 800}
|
| 161 |
+
{"loss": 0.5623, "learning_rate": 0.00027020979020979017, "epoch": 1.12, "step": 810}
|
| 162 |
+
{"eval_loss": 0.6160795092582703, "eval_runtime": 23.6019, "eval_samples_per_second": 4.237, "eval_steps_per_second": 0.551, "epoch": 1.12, "step": 810}
|
| 163 |
+
{"loss": 0.544, "learning_rate": 0.0002697902097902098, "epoch": 1.13, "step": 820}
|
| 164 |
+
{"eval_loss": 0.6154199838638306, "eval_runtime": 23.5405, "eval_samples_per_second": 4.248, "eval_steps_per_second": 0.552, "epoch": 1.13, "step": 820}
|
| 165 |
+
{"loss": 0.5405, "learning_rate": 0.00026937062937062935, "epoch": 1.14, "step": 830}
|
| 166 |
+
{"eval_loss": 0.6137506365776062, "eval_runtime": 23.6357, "eval_samples_per_second": 4.231, "eval_steps_per_second": 0.55, "epoch": 1.14, "step": 830}
|
| 167 |
+
{"loss": 0.5871, "learning_rate": 0.0002689510489510489, "epoch": 1.16, "step": 840}
|
| 168 |
+
{"eval_loss": 0.6168184876441956, "eval_runtime": 23.587, "eval_samples_per_second": 4.24, "eval_steps_per_second": 0.551, "epoch": 1.16, "step": 840}
|
| 169 |
+
{"loss": 0.688, "learning_rate": 0.0002685314685314685, "epoch": 1.17, "step": 850}
|
| 170 |
+
{"eval_loss": 0.6159818768501282, "eval_runtime": 23.6104, "eval_samples_per_second": 4.235, "eval_steps_per_second": 0.551, "epoch": 1.17, "step": 850}
|
| 171 |
+
{"loss": 0.6124, "learning_rate": 0.0002681118881118881, "epoch": 1.19, "step": 860}
|
| 172 |
+
{"eval_loss": 0.611853837966919, "eval_runtime": 23.5647, "eval_samples_per_second": 4.244, "eval_steps_per_second": 0.552, "epoch": 1.19, "step": 860}
|
| 173 |
+
{"loss": 0.629, "learning_rate": 0.0002676923076923077, "epoch": 1.2, "step": 870}
|
| 174 |
+
{"eval_loss": 0.6117254495620728, "eval_runtime": 23.5692, "eval_samples_per_second": 4.243, "eval_steps_per_second": 0.552, "epoch": 1.2, "step": 870}
|
| 175 |
+
{"loss": 0.5151, "learning_rate": 0.0002672727272727272, "epoch": 1.21, "step": 880}
|
| 176 |
+
{"eval_loss": 0.6104335784912109, "eval_runtime": 23.6021, "eval_samples_per_second": 4.237, "eval_steps_per_second": 0.551, "epoch": 1.21, "step": 880}
|
| 177 |
+
{"loss": 0.5627, "learning_rate": 0.00026685314685314683, "epoch": 1.23, "step": 890}
|
| 178 |
+
{"eval_loss": 0.6086432933807373, "eval_runtime": 23.6595, "eval_samples_per_second": 4.227, "eval_steps_per_second": 0.549, "epoch": 1.23, "step": 890}
|
| 179 |
+
{"loss": 0.5814, "learning_rate": 0.0002664335664335664, "epoch": 1.24, "step": 900}
|
| 180 |
+
{"eval_loss": 0.6092746257781982, "eval_runtime": 23.5251, "eval_samples_per_second": 4.251, "eval_steps_per_second": 0.553, "epoch": 1.24, "step": 900}
|
| 181 |
+
{"loss": 0.5602, "learning_rate": 0.000266013986013986, "epoch": 1.26, "step": 910}
|
| 182 |
+
{"eval_loss": 0.608718752861023, "eval_runtime": 23.5334, "eval_samples_per_second": 4.249, "eval_steps_per_second": 0.552, "epoch": 1.26, "step": 910}
|
| 183 |
+
{"loss": 0.6004, "learning_rate": 0.0002655944055944056, "epoch": 1.27, "step": 920}
|
| 184 |
+
{"eval_loss": 0.6084469556808472, "eval_runtime": 23.5616, "eval_samples_per_second": 4.244, "eval_steps_per_second": 0.552, "epoch": 1.27, "step": 920}
|
| 185 |
+
{"loss": 0.5979, "learning_rate": 0.00026517482517482514, "epoch": 1.28, "step": 930}
|
| 186 |
+
{"eval_loss": 0.6070427298545837, "eval_runtime": 23.5628, "eval_samples_per_second": 4.244, "eval_steps_per_second": 0.552, "epoch": 1.28, "step": 930}
|
| 187 |
+
{"loss": 0.6662, "learning_rate": 0.00026475524475524475, "epoch": 1.3, "step": 940}
|
| 188 |
+
{"eval_loss": 0.607434868812561, "eval_runtime": 23.6199, "eval_samples_per_second": 4.234, "eval_steps_per_second": 0.55, "epoch": 1.3, "step": 940}
|
| 189 |
+
{"loss": 0.6447, "learning_rate": 0.0002643356643356643, "epoch": 1.31, "step": 950}
|
| 190 |
+
{"eval_loss": 0.6066017746925354, "eval_runtime": 23.5508, "eval_samples_per_second": 4.246, "eval_steps_per_second": 0.552, "epoch": 1.31, "step": 950}
|
| 191 |
+
{"loss": 0.588, "learning_rate": 0.00026391608391608393, "epoch": 1.32, "step": 960}
|
| 192 |
+
{"eval_loss": 0.605922281742096, "eval_runtime": 23.5334, "eval_samples_per_second": 4.249, "eval_steps_per_second": 0.552, "epoch": 1.32, "step": 960}
|
| 193 |
+
{"loss": 0.6808, "learning_rate": 0.00026349650349650344, "epoch": 1.34, "step": 970}
|
| 194 |
+
{"eval_loss": 0.6060763597488403, "eval_runtime": 23.6283, "eval_samples_per_second": 4.232, "eval_steps_per_second": 0.55, "epoch": 1.34, "step": 970}
|
| 195 |
+
{"loss": 0.7089, "learning_rate": 0.00026307692307692306, "epoch": 1.35, "step": 980}
|
| 196 |
+
{"eval_loss": 0.6056197881698608, "eval_runtime": 23.5199, "eval_samples_per_second": 4.252, "eval_steps_per_second": 0.553, "epoch": 1.35, "step": 980}
|
| 197 |
+
{"loss": 0.6435, "learning_rate": 0.0002626573426573426, "epoch": 1.37, "step": 990}
|
| 198 |
+
{"eval_loss": 0.6043457388877869, "eval_runtime": 23.4979, "eval_samples_per_second": 4.256, "eval_steps_per_second": 0.553, "epoch": 1.37, "step": 990}
|
| 199 |
+
{"loss": 0.5691, "learning_rate": 0.00026223776223776224, "epoch": 1.38, "step": 1000}
|
| 200 |
+
{"eval_loss": 0.6017763018608093, "eval_runtime": 23.6214, "eval_samples_per_second": 4.233, "eval_steps_per_second": 0.55, "epoch": 1.38, "step": 1000}
|
| 201 |
+
{"loss": 0.4584, "learning_rate": 0.0002618181818181818, "epoch": 1.39, "step": 1010}
|
| 202 |
+
{"eval_loss": 0.6021450757980347, "eval_runtime": 23.5098, "eval_samples_per_second": 4.254, "eval_steps_per_second": 0.553, "epoch": 1.39, "step": 1010}
|
| 203 |
+
{"loss": 0.6848, "learning_rate": 0.00026139860139860136, "epoch": 1.41, "step": 1020}
|
| 204 |
+
{"eval_loss": 0.6020896434783936, "eval_runtime": 23.5, "eval_samples_per_second": 4.255, "eval_steps_per_second": 0.553, "epoch": 1.41, "step": 1020}
|
| 205 |
+
{"loss": 0.5807, "learning_rate": 0.000260979020979021, "epoch": 1.42, "step": 1030}
|
| 206 |
+
{"eval_loss": 0.6019191145896912, "eval_runtime": 23.6199, "eval_samples_per_second": 4.234, "eval_steps_per_second": 0.55, "epoch": 1.42, "step": 1030}
|
| 207 |
+
{"loss": 0.5409, "learning_rate": 0.00026055944055944054, "epoch": 1.43, "step": 1040}
|
| 208 |
+
{"eval_loss": 0.6014266014099121, "eval_runtime": 23.4902, "eval_samples_per_second": 4.257, "eval_steps_per_second": 0.553, "epoch": 1.43, "step": 1040}
|
| 209 |
+
{"loss": 0.5266, "learning_rate": 0.0002601398601398601, "epoch": 1.45, "step": 1050}
|
| 210 |
+
{"eval_loss": 0.6019847989082336, "eval_runtime": 23.5346, "eval_samples_per_second": 4.249, "eval_steps_per_second": 0.552, "epoch": 1.45, "step": 1050}
|
| 211 |
+
{"loss": 0.6526, "learning_rate": 0.00025972027972027967, "epoch": 1.46, "step": 1060}
|
| 212 |
+
{"eval_loss": 0.6054437756538391, "eval_runtime": 23.5086, "eval_samples_per_second": 4.254, "eval_steps_per_second": 0.553, "epoch": 1.46, "step": 1060}
|
| 213 |
+
{"loss": 0.6598, "learning_rate": 0.0002593006993006993, "epoch": 1.48, "step": 1070}
|
| 214 |
+
{"eval_loss": 0.6089524626731873, "eval_runtime": 23.5442, "eval_samples_per_second": 4.247, "eval_steps_per_second": 0.552, "epoch": 1.48, "step": 1070}
|
| 215 |
+
{"loss": 0.4933, "learning_rate": 0.00025888111888111885, "epoch": 1.49, "step": 1080}
|
| 216 |
+
{"eval_loss": 0.601382315158844, "eval_runtime": 23.5524, "eval_samples_per_second": 4.246, "eval_steps_per_second": 0.552, "epoch": 1.49, "step": 1080}
|
| 217 |
+
{"loss": 0.5707, "learning_rate": 0.00025846153846153846, "epoch": 1.5, "step": 1090}
|
| 218 |
+
{"eval_loss": 0.5991722345352173, "eval_runtime": 23.5666, "eval_samples_per_second": 4.243, "eval_steps_per_second": 0.552, "epoch": 1.5, "step": 1090}
|
| 219 |
+
{"loss": 0.7365, "learning_rate": 0.000258041958041958, "epoch": 1.52, "step": 1100}
|
| 220 |
+
{"eval_loss": 0.6007959246635437, "eval_runtime": 23.659, "eval_samples_per_second": 4.227, "eval_steps_per_second": 0.549, "epoch": 1.52, "step": 1100}
|
| 221 |
+
{"loss": 0.5684, "learning_rate": 0.0002576223776223776, "epoch": 1.53, "step": 1110}
|
| 222 |
+
{"eval_loss": 0.5978883504867554, "eval_runtime": 23.557, "eval_samples_per_second": 4.245, "eval_steps_per_second": 0.552, "epoch": 1.53, "step": 1110}
|
| 223 |
+
{"loss": 0.6895, "learning_rate": 0.0002572027972027972, "epoch": 1.54, "step": 1120}
|
| 224 |
+
{"eval_loss": 0.5960245728492737, "eval_runtime": 23.5344, "eval_samples_per_second": 4.249, "eval_steps_per_second": 0.552, "epoch": 1.54, "step": 1120}
|
| 225 |
+
{"loss": 0.5413, "learning_rate": 0.00025678321678321677, "epoch": 1.56, "step": 1130}
|
| 226 |
+
{"eval_loss": 0.5944367051124573, "eval_runtime": 23.6758, "eval_samples_per_second": 4.224, "eval_steps_per_second": 0.549, "epoch": 1.56, "step": 1130}
|
| 227 |
+
{"loss": 0.6234, "learning_rate": 0.00025636363636363633, "epoch": 1.57, "step": 1140}
|
| 228 |
+
{"eval_loss": 0.5943716764450073, "eval_runtime": 23.5334, "eval_samples_per_second": 4.249, "eval_steps_per_second": 0.552, "epoch": 1.57, "step": 1140}
|
| 229 |
+
{"loss": 0.4974, "learning_rate": 0.0002559440559440559, "epoch": 1.59, "step": 1150}
|
| 230 |
+
{"eval_loss": 0.5943745970726013, "eval_runtime": 23.6205, "eval_samples_per_second": 4.234, "eval_steps_per_second": 0.55, "epoch": 1.59, "step": 1150}
|
| 231 |
+
{"loss": 0.5585, "learning_rate": 0.0002555244755244755, "epoch": 1.6, "step": 1160}
|
| 232 |
+
{"eval_loss": 0.5932533740997314, "eval_runtime": 23.5191, "eval_samples_per_second": 4.252, "eval_steps_per_second": 0.553, "epoch": 1.6, "step": 1160}
|
| 233 |
+
{"loss": 0.6533, "learning_rate": 0.0002551048951048951, "epoch": 1.61, "step": 1170}
|
| 234 |
+
{"eval_loss": 0.5927255749702454, "eval_runtime": 23.4764, "eval_samples_per_second": 4.26, "eval_steps_per_second": 0.554, "epoch": 1.61, "step": 1170}
|
| 235 |
+
{"loss": 0.5602, "learning_rate": 0.0002546853146853147, "epoch": 1.63, "step": 1180}
|
| 236 |
+
{"eval_loss": 0.5937183499336243, "eval_runtime": 23.6869, "eval_samples_per_second": 4.222, "eval_steps_per_second": 0.549, "epoch": 1.63, "step": 1180}
|
| 237 |
+
{"loss": 0.658, "learning_rate": 0.00025426573426573425, "epoch": 1.64, "step": 1190}
|
| 238 |
+
{"eval_loss": 0.5941335558891296, "eval_runtime": 23.6582, "eval_samples_per_second": 4.227, "eval_steps_per_second": 0.549, "epoch": 1.64, "step": 1190}
|
| 239 |
+
{"loss": 0.5749, "learning_rate": 0.0002538461538461538, "epoch": 1.66, "step": 1200}
|
| 240 |
+
{"eval_loss": 0.5928318500518799, "eval_runtime": 23.5647, "eval_samples_per_second": 4.244, "eval_steps_per_second": 0.552, "epoch": 1.66, "step": 1200}
|
| 241 |
+
{"loss": 0.6214, "learning_rate": 0.00025342657342657343, "epoch": 1.67, "step": 1210}
|
| 242 |
+
{"eval_loss": 0.5921968221664429, "eval_runtime": 23.5253, "eval_samples_per_second": 4.251, "eval_steps_per_second": 0.553, "epoch": 1.67, "step": 1210}
|
| 243 |
+
{"loss": 0.5356, "learning_rate": 0.000253006993006993, "epoch": 1.68, "step": 1220}
|
| 244 |
+
{"eval_loss": 0.5913729667663574, "eval_runtime": 23.5345, "eval_samples_per_second": 4.249, "eval_steps_per_second": 0.552, "epoch": 1.68, "step": 1220}
|
| 245 |
+
{"loss": 0.616, "learning_rate": 0.00025258741258741256, "epoch": 1.7, "step": 1230}
|
| 246 |
+
{"eval_loss": 0.5925624370574951, "eval_runtime": 23.56, "eval_samples_per_second": 4.244, "eval_steps_per_second": 0.552, "epoch": 1.7, "step": 1230}
|
| 247 |
+
{"loss": 0.6622, "learning_rate": 0.0002521678321678321, "epoch": 1.71, "step": 1240}
|
| 248 |
+
{"eval_loss": 0.591957688331604, "eval_runtime": 23.6371, "eval_samples_per_second": 4.231, "eval_steps_per_second": 0.55, "epoch": 1.71, "step": 1240}
|
| 249 |
+
{"loss": 0.5844, "learning_rate": 0.00025174825174825174, "epoch": 1.72, "step": 1250}
|
| 250 |
+
{"eval_loss": 0.5911493897438049, "eval_runtime": 23.6677, "eval_samples_per_second": 4.225, "eval_steps_per_second": 0.549, "epoch": 1.72, "step": 1250}
|
| 251 |
+
{"loss": 0.5539, "learning_rate": 0.0002513286713286713, "epoch": 1.74, "step": 1260}
|
| 252 |
+
{"eval_loss": 0.5910014510154724, "eval_runtime": 23.552, "eval_samples_per_second": 4.246, "eval_steps_per_second": 0.552, "epoch": 1.74, "step": 1260}
|
| 253 |
+
{"loss": 0.5968, "learning_rate": 0.00025090909090909086, "epoch": 1.75, "step": 1270}
|
| 254 |
+
{"eval_loss": 0.5909925699234009, "eval_runtime": 23.5627, "eval_samples_per_second": 4.244, "eval_steps_per_second": 0.552, "epoch": 1.75, "step": 1270}
|
| 255 |
+
{"loss": 0.4834, "learning_rate": 0.0002504895104895105, "epoch": 1.77, "step": 1280}
|
| 256 |
+
{"eval_loss": 0.5910032987594604, "eval_runtime": 23.5516, "eval_samples_per_second": 4.246, "eval_steps_per_second": 0.552, "epoch": 1.77, "step": 1280}
|
| 257 |
+
{"loss": 0.6222, "learning_rate": 0.00025006993006993004, "epoch": 1.78, "step": 1290}
|
| 258 |
+
{"eval_loss": 0.5898649096488953, "eval_runtime": 23.6624, "eval_samples_per_second": 4.226, "eval_steps_per_second": 0.549, "epoch": 1.78, "step": 1290}
|
| 259 |
+
{"loss": 0.5424, "learning_rate": 0.00024965034965034966, "epoch": 1.79, "step": 1300}
|
| 260 |
+
{"eval_loss": 0.590370774269104, "eval_runtime": 23.5622, "eval_samples_per_second": 4.244, "eval_steps_per_second": 0.552, "epoch": 1.79, "step": 1300}
|
| 261 |
+
{"loss": 0.6267, "learning_rate": 0.0002492307692307692, "epoch": 1.81, "step": 1310}
|
| 262 |
+
{"eval_loss": 0.5873252749443054, "eval_runtime": 23.5328, "eval_samples_per_second": 4.249, "eval_steps_per_second": 0.552, "epoch": 1.81, "step": 1310}
|
| 263 |
+
{"loss": 0.6605, "learning_rate": 0.0002488111888111888, "epoch": 1.82, "step": 1320}
|
| 264 |
+
{"eval_loss": 0.5867363214492798, "eval_runtime": 23.6272, "eval_samples_per_second": 4.232, "eval_steps_per_second": 0.55, "epoch": 1.82, "step": 1320}
|
| 265 |
+
{"loss": 0.5647, "learning_rate": 0.00024839160839160835, "epoch": 1.83, "step": 1330}
|
| 266 |
+
{"eval_loss": 0.5863688588142395, "eval_runtime": 23.5295, "eval_samples_per_second": 4.25, "eval_steps_per_second": 0.552, "epoch": 1.83, "step": 1330}
|
| 267 |
+
{"loss": 0.5607, "learning_rate": 0.00024797202797202796, "epoch": 1.85, "step": 1340}
|
| 268 |
+
{"eval_loss": 0.5849428176879883, "eval_runtime": 23.5386, "eval_samples_per_second": 4.248, "eval_steps_per_second": 0.552, "epoch": 1.85, "step": 1340}
|
| 269 |
+
{"loss": 0.6948, "learning_rate": 0.0002475524475524475, "epoch": 1.86, "step": 1350}
|
| 270 |
+
{"eval_loss": 0.585557222366333, "eval_runtime": 23.6955, "eval_samples_per_second": 4.22, "eval_steps_per_second": 0.549, "epoch": 1.86, "step": 1350}
|
| 271 |
+
{"loss": 0.6667, "learning_rate": 0.0002471328671328671, "epoch": 1.88, "step": 1360}
|
| 272 |
+
{"eval_loss": 0.5850853323936462, "eval_runtime": 23.5655, "eval_samples_per_second": 4.243, "eval_steps_per_second": 0.552, "epoch": 1.88, "step": 1360}
|
| 273 |
+
{"loss": 0.6335, "learning_rate": 0.0002467132867132867, "epoch": 1.89, "step": 1370}
|
| 274 |
+
{"eval_loss": 0.5850026607513428, "eval_runtime": 23.5045, "eval_samples_per_second": 4.255, "eval_steps_per_second": 0.553, "epoch": 1.89, "step": 1370}
|
| 275 |
+
{"loss": 0.601, "learning_rate": 0.00024629370629370627, "epoch": 1.9, "step": 1380}
|
| 276 |
+
{"eval_loss": 0.5849950909614563, "eval_runtime": 23.6286, "eval_samples_per_second": 4.232, "eval_steps_per_second": 0.55, "epoch": 1.9, "step": 1380}
|
| 277 |
+
{"loss": 0.4668, "learning_rate": 0.0002458741258741259, "epoch": 1.92, "step": 1390}
|
| 278 |
+
{"eval_loss": 0.5844566226005554, "eval_runtime": 23.5099, "eval_samples_per_second": 4.254, "eval_steps_per_second": 0.553, "epoch": 1.92, "step": 1390}
|
| 279 |
+
{"loss": 0.5218, "learning_rate": 0.00024545454545454545, "epoch": 1.93, "step": 1400}
|
| 280 |
+
{"eval_loss": 0.583265483379364, "eval_runtime": 23.5127, "eval_samples_per_second": 4.253, "eval_steps_per_second": 0.553, "epoch": 1.93, "step": 1400}
|
| 281 |
+
{"loss": 0.5104, "learning_rate": 0.000245034965034965, "epoch": 1.94, "step": 1410}
|
| 282 |
+
{"eval_loss": 0.5836408734321594, "eval_runtime": 23.7146, "eval_samples_per_second": 4.217, "eval_steps_per_second": 0.548, "epoch": 1.94, "step": 1410}
|
| 283 |
+
{"loss": 0.7134, "learning_rate": 0.0002446153846153846, "epoch": 1.96, "step": 1420}
|
| 284 |
+
{"eval_loss": 0.5841034650802612, "eval_runtime": 23.6571, "eval_samples_per_second": 4.227, "eval_steps_per_second": 0.55, "epoch": 1.96, "step": 1420}
|
| 285 |
+
{"loss": 0.5728, "learning_rate": 0.0002441958041958042, "epoch": 1.97, "step": 1430}
|
| 286 |
+
{"eval_loss": 0.5834821462631226, "eval_runtime": 23.4792, "eval_samples_per_second": 4.259, "eval_steps_per_second": 0.554, "epoch": 1.97, "step": 1430}
|
| 287 |
+
{"loss": 0.5703, "learning_rate": 0.00024377622377622378, "epoch": 1.99, "step": 1440}
|
| 288 |
+
{"eval_loss": 0.5817570686340332, "eval_runtime": 23.5093, "eval_samples_per_second": 4.254, "eval_steps_per_second": 0.553, "epoch": 1.99, "step": 1440}
|
| 289 |
+
{"loss": 0.5527, "learning_rate": 0.00024335664335664332, "epoch": 2.0, "step": 1450}
|
| 290 |
+
{"eval_loss": 0.5805172920227051, "eval_runtime": 23.5889, "eval_samples_per_second": 4.239, "eval_steps_per_second": 0.551, "epoch": 2.0, "step": 1450}
|
| 291 |
+
{"loss": 0.6111, "learning_rate": 0.0002429370629370629, "epoch": 2.01, "step": 1460}
|
| 292 |
+
{"eval_loss": 0.57992023229599, "eval_runtime": 23.5146, "eval_samples_per_second": 4.253, "eval_steps_per_second": 0.553, "epoch": 2.01, "step": 1460}
|
| 293 |
+
{"train_runtime": 7921.6312, "train_samples_per_second": 3.661, "train_steps_per_second": 0.915, "total_flos": 3.015155661857096e+17, "train_loss": 0.6625166618660705, "epoch": 2.01, "step": 1460}
|