| { | |
| "best_metric": 0.6739130434782609, | |
| "best_model_checkpoint": "SW2-DMAE-2\\checkpoint-168", | |
| "epoch": 68.57142857142857, | |
| "eval_steps": 500, | |
| "global_step": 240, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.86, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 1.6269055604934692, | |
| "eval_runtime": 0.2761, | |
| "eval_samples_per_second": 166.629, | |
| "eval_steps_per_second": 10.867, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 1.6078405380249023, | |
| "eval_runtime": 0.2401, | |
| "eval_samples_per_second": 191.623, | |
| "eval_steps_per_second": 12.497, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 1.4375e-05, | |
| "loss": 1.618, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 1.585246205329895, | |
| "eval_runtime": 0.3111, | |
| "eval_samples_per_second": 147.877, | |
| "eval_steps_per_second": 9.644, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 1.539771556854248, | |
| "eval_runtime": 0.2925, | |
| "eval_samples_per_second": 157.267, | |
| "eval_steps_per_second": 10.257, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 1.4947997331619263, | |
| "eval_runtime": 0.2826, | |
| "eval_samples_per_second": 162.793, | |
| "eval_steps_per_second": 10.617, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 1.375e-05, | |
| "loss": 1.5162, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 1.434383749961853, | |
| "eval_runtime": 0.2591, | |
| "eval_samples_per_second": 177.566, | |
| "eval_steps_per_second": 11.58, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "eval_accuracy": 0.10869565217391304, | |
| "eval_loss": 1.3878703117370605, | |
| "eval_runtime": 0.2371, | |
| "eval_samples_per_second": 194.049, | |
| "eval_steps_per_second": 12.655, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.17391304347826086, | |
| "eval_loss": 1.328822135925293, | |
| "eval_runtime": 0.2481, | |
| "eval_samples_per_second": 185.442, | |
| "eval_steps_per_second": 12.094, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 8.57, | |
| "learning_rate": 1.3125e-05, | |
| "loss": 1.3459, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 8.86, | |
| "eval_accuracy": 0.45652173913043476, | |
| "eval_loss": 1.2925546169281006, | |
| "eval_runtime": 0.2481, | |
| "eval_samples_per_second": 185.442, | |
| "eval_steps_per_second": 12.094, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.45652173913043476, | |
| "eval_loss": 1.2562181949615479, | |
| "eval_runtime": 0.2541, | |
| "eval_samples_per_second": 181.006, | |
| "eval_steps_per_second": 11.805, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 10.86, | |
| "eval_accuracy": 0.45652173913043476, | |
| "eval_loss": 1.238446593284607, | |
| "eval_runtime": 0.2411, | |
| "eval_samples_per_second": 190.828, | |
| "eval_steps_per_second": 12.445, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 11.43, | |
| "learning_rate": 1.25e-05, | |
| "loss": 1.2384, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.45652173913043476, | |
| "eval_loss": 1.2205413579940796, | |
| "eval_runtime": 0.2537, | |
| "eval_samples_per_second": 181.326, | |
| "eval_steps_per_second": 11.826, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 12.86, | |
| "eval_accuracy": 0.45652173913043476, | |
| "eval_loss": 1.2173599004745483, | |
| "eval_runtime": 0.2401, | |
| "eval_samples_per_second": 191.624, | |
| "eval_steps_per_second": 12.497, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.45652173913043476, | |
| "eval_loss": 1.2131370306015015, | |
| "eval_runtime": 0.2691, | |
| "eval_samples_per_second": 170.965, | |
| "eval_steps_per_second": 11.15, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 14.29, | |
| "learning_rate": 1.1874999999999999e-05, | |
| "loss": 1.2049, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 14.86, | |
| "eval_accuracy": 0.45652173913043476, | |
| "eval_loss": 1.2104469537734985, | |
| "eval_runtime": 0.2431, | |
| "eval_samples_per_second": 189.258, | |
| "eval_steps_per_second": 12.343, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.45652173913043476, | |
| "eval_loss": 1.208552598953247, | |
| "eval_runtime": 0.2451, | |
| "eval_samples_per_second": 187.713, | |
| "eval_steps_per_second": 12.242, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 16.86, | |
| "eval_accuracy": 0.45652173913043476, | |
| "eval_loss": 1.2076021432876587, | |
| "eval_runtime": 0.2556, | |
| "eval_samples_per_second": 179.993, | |
| "eval_steps_per_second": 11.739, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 17.14, | |
| "learning_rate": 1.125e-05, | |
| "loss": 1.1815, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.45652173913043476, | |
| "eval_loss": 1.2051774263381958, | |
| "eval_runtime": 0.2431, | |
| "eval_samples_per_second": 189.258, | |
| "eval_steps_per_second": 12.343, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 18.86, | |
| "eval_accuracy": 0.45652173913043476, | |
| "eval_loss": 1.204942226409912, | |
| "eval_runtime": 0.3966, | |
| "eval_samples_per_second": 115.986, | |
| "eval_steps_per_second": 7.564, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 1.0625e-05, | |
| "loss": 1.1826, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.45652173913043476, | |
| "eval_loss": 1.2018660306930542, | |
| "eval_runtime": 0.3121, | |
| "eval_samples_per_second": 147.402, | |
| "eval_steps_per_second": 9.613, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 20.86, | |
| "eval_accuracy": 0.45652173913043476, | |
| "eval_loss": 1.1960418224334717, | |
| "eval_runtime": 0.3191, | |
| "eval_samples_per_second": 144.168, | |
| "eval_steps_per_second": 9.402, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.45652173913043476, | |
| "eval_loss": 1.1926813125610352, | |
| "eval_runtime": 0.3096, | |
| "eval_samples_per_second": 148.592, | |
| "eval_steps_per_second": 9.691, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 22.86, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 1.1647, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 22.86, | |
| "eval_accuracy": 0.45652173913043476, | |
| "eval_loss": 1.1927775144577026, | |
| "eval_runtime": 0.2491, | |
| "eval_samples_per_second": 184.697, | |
| "eval_steps_per_second": 12.045, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.45652173913043476, | |
| "eval_loss": 1.1924192905426025, | |
| "eval_runtime": 0.2991, | |
| "eval_samples_per_second": 153.812, | |
| "eval_steps_per_second": 10.031, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 24.86, | |
| "eval_accuracy": 0.45652173913043476, | |
| "eval_loss": 1.1902908086776733, | |
| "eval_runtime": 0.3005, | |
| "eval_samples_per_second": 153.097, | |
| "eval_steps_per_second": 9.985, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 25.71, | |
| "learning_rate": 9.375000000000001e-06, | |
| "loss": 1.1568, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.45652173913043476, | |
| "eval_loss": 1.1878631114959717, | |
| "eval_runtime": 0.2861, | |
| "eval_samples_per_second": 160.803, | |
| "eval_steps_per_second": 10.487, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 26.86, | |
| "eval_accuracy": 0.45652173913043476, | |
| "eval_loss": 1.1913325786590576, | |
| "eval_runtime": 0.2806, | |
| "eval_samples_per_second": 163.951, | |
| "eval_steps_per_second": 10.692, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.4782608695652174, | |
| "eval_loss": 1.204640507698059, | |
| "eval_runtime": 0.2621, | |
| "eval_samples_per_second": 175.533, | |
| "eval_steps_per_second": 11.448, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 28.57, | |
| "learning_rate": 8.750000000000001e-06, | |
| "loss": 1.1432, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 28.86, | |
| "eval_accuracy": 0.4782608695652174, | |
| "eval_loss": 1.193393588066101, | |
| "eval_runtime": 0.2361, | |
| "eval_samples_per_second": 194.871, | |
| "eval_steps_per_second": 12.709, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.4782608695652174, | |
| "eval_loss": 1.166512131690979, | |
| "eval_runtime": 0.2421, | |
| "eval_samples_per_second": 190.04, | |
| "eval_steps_per_second": 12.394, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 30.86, | |
| "eval_accuracy": 0.4782608695652174, | |
| "eval_loss": 1.1600818634033203, | |
| "eval_runtime": 0.2381, | |
| "eval_samples_per_second": 193.234, | |
| "eval_steps_per_second": 12.602, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 31.43, | |
| "learning_rate": 8.125e-06, | |
| "loss": 1.1112, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.5, | |
| "eval_loss": 1.1623895168304443, | |
| "eval_runtime": 0.2801, | |
| "eval_samples_per_second": 164.249, | |
| "eval_steps_per_second": 10.712, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 32.86, | |
| "eval_accuracy": 0.5217391304347826, | |
| "eval_loss": 1.1663668155670166, | |
| "eval_runtime": 0.3126, | |
| "eval_samples_per_second": 147.162, | |
| "eval_steps_per_second": 9.597, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.5, | |
| "eval_loss": 1.1692047119140625, | |
| "eval_runtime": 0.2586, | |
| "eval_samples_per_second": 177.904, | |
| "eval_steps_per_second": 11.602, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 34.29, | |
| "learning_rate": 7.5e-06, | |
| "loss": 1.1132, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 34.86, | |
| "eval_accuracy": 0.5434782608695652, | |
| "eval_loss": 1.1513336896896362, | |
| "eval_runtime": 0.2416, | |
| "eval_samples_per_second": 190.427, | |
| "eval_steps_per_second": 12.419, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.5869565217391305, | |
| "eval_loss": 1.1384443044662476, | |
| "eval_runtime": 0.2461, | |
| "eval_samples_per_second": 186.95, | |
| "eval_steps_per_second": 12.192, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 36.86, | |
| "eval_accuracy": 0.6086956521739131, | |
| "eval_loss": 1.127366542816162, | |
| "eval_runtime": 0.2446, | |
| "eval_samples_per_second": 188.091, | |
| "eval_steps_per_second": 12.267, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 37.14, | |
| "learning_rate": 6.875e-06, | |
| "loss": 1.0642, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.5869565217391305, | |
| "eval_loss": 1.1442575454711914, | |
| "eval_runtime": 0.2586, | |
| "eval_samples_per_second": 177.903, | |
| "eval_steps_per_second": 11.602, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 38.86, | |
| "eval_accuracy": 0.5, | |
| "eval_loss": 1.1651057004928589, | |
| "eval_runtime": 0.2496, | |
| "eval_samples_per_second": 184.324, | |
| "eval_steps_per_second": 12.021, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 6.25e-06, | |
| "loss": 1.0439, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.5, | |
| "eval_loss": 1.149288296699524, | |
| "eval_runtime": 0.2441, | |
| "eval_samples_per_second": 188.478, | |
| "eval_steps_per_second": 12.292, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 40.86, | |
| "eval_accuracy": 0.5217391304347826, | |
| "eval_loss": 1.1330839395523071, | |
| "eval_runtime": 0.2811, | |
| "eval_samples_per_second": 163.664, | |
| "eval_steps_per_second": 10.674, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_accuracy": 0.5869565217391305, | |
| "eval_loss": 1.1032251119613647, | |
| "eval_runtime": 0.2521, | |
| "eval_samples_per_second": 182.499, | |
| "eval_steps_per_second": 11.902, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 42.86, | |
| "learning_rate": 5.625e-06, | |
| "loss": 1.0362, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 42.86, | |
| "eval_accuracy": 0.6304347826086957, | |
| "eval_loss": 1.0988132953643799, | |
| "eval_runtime": 0.2916, | |
| "eval_samples_per_second": 157.767, | |
| "eval_steps_per_second": 10.289, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 0.5869565217391305, | |
| "eval_loss": 1.1092532873153687, | |
| "eval_runtime": 0.2506, | |
| "eval_samples_per_second": 183.583, | |
| "eval_steps_per_second": 11.973, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 44.86, | |
| "eval_accuracy": 0.5869565217391305, | |
| "eval_loss": 1.1101136207580566, | |
| "eval_runtime": 0.22, | |
| "eval_samples_per_second": 209.044, | |
| "eval_steps_per_second": 13.633, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 45.71, | |
| "learning_rate": 4.9999999999999996e-06, | |
| "loss": 1.0177, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_accuracy": 0.6304347826086957, | |
| "eval_loss": 1.0903021097183228, | |
| "eval_runtime": 0.3931, | |
| "eval_samples_per_second": 117.022, | |
| "eval_steps_per_second": 7.632, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 46.86, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 1.069130301475525, | |
| "eval_runtime": 0.4281, | |
| "eval_samples_per_second": 107.447, | |
| "eval_steps_per_second": 7.007, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 1.0509947538375854, | |
| "eval_runtime": 0.2701, | |
| "eval_samples_per_second": 170.332, | |
| "eval_steps_per_second": 11.109, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 48.57, | |
| "learning_rate": 4.3750000000000005e-06, | |
| "loss": 1.0, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 48.86, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 1.0451492071151733, | |
| "eval_runtime": 0.2466, | |
| "eval_samples_per_second": 186.563, | |
| "eval_steps_per_second": 12.167, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 1.0425117015838623, | |
| "eval_runtime": 0.2591, | |
| "eval_samples_per_second": 177.566, | |
| "eval_steps_per_second": 11.58, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 50.86, | |
| "eval_accuracy": 0.6086956521739131, | |
| "eval_loss": 1.0512455701828003, | |
| "eval_runtime": 0.2671, | |
| "eval_samples_per_second": 172.246, | |
| "eval_steps_per_second": 11.233, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 51.43, | |
| "learning_rate": 3.75e-06, | |
| "loss": 0.9636, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_accuracy": 0.6304347826086957, | |
| "eval_loss": 1.044124722480774, | |
| "eval_runtime": 0.2541, | |
| "eval_samples_per_second": 181.062, | |
| "eval_steps_per_second": 11.808, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 52.86, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 1.0401920080184937, | |
| "eval_runtime": 0.2676, | |
| "eval_samples_per_second": 171.917, | |
| "eval_steps_per_second": 11.212, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 1.0161322355270386, | |
| "eval_runtime": 0.2421, | |
| "eval_samples_per_second": 190.039, | |
| "eval_steps_per_second": 12.394, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 54.29, | |
| "learning_rate": 3.125e-06, | |
| "loss": 0.9744, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 54.86, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 1.0072776079177856, | |
| "eval_runtime": 0.3861, | |
| "eval_samples_per_second": 119.144, | |
| "eval_steps_per_second": 7.77, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 1.0047576427459717, | |
| "eval_runtime": 0.2431, | |
| "eval_samples_per_second": 189.257, | |
| "eval_steps_per_second": 12.343, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 56.86, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 0.9993048310279846, | |
| "eval_runtime": 0.2251, | |
| "eval_samples_per_second": 204.398, | |
| "eval_steps_per_second": 13.33, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 57.14, | |
| "learning_rate": 2.4999999999999998e-06, | |
| "loss": 0.9233, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 0.9939430952072144, | |
| "eval_runtime": 0.2431, | |
| "eval_samples_per_second": 189.258, | |
| "eval_steps_per_second": 12.343, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 58.86, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 0.9938895106315613, | |
| "eval_runtime": 0.2396, | |
| "eval_samples_per_second": 192.017, | |
| "eval_steps_per_second": 12.523, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "learning_rate": 1.875e-06, | |
| "loss": 0.9452, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 0.9975183010101318, | |
| "eval_runtime": 0.2521, | |
| "eval_samples_per_second": 182.499, | |
| "eval_steps_per_second": 11.902, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 60.86, | |
| "eval_accuracy": 0.6086956521739131, | |
| "eval_loss": 0.998058557510376, | |
| "eval_runtime": 0.2471, | |
| "eval_samples_per_second": 186.193, | |
| "eval_steps_per_second": 12.143, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 62.0, | |
| "eval_accuracy": 0.6086956521739131, | |
| "eval_loss": 0.9985237121582031, | |
| "eval_runtime": 0.2421, | |
| "eval_samples_per_second": 190.04, | |
| "eval_steps_per_second": 12.394, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 62.86, | |
| "learning_rate": 1.2499999999999999e-06, | |
| "loss": 0.9183, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 62.86, | |
| "eval_accuracy": 0.6086956521739131, | |
| "eval_loss": 0.9968777298927307, | |
| "eval_runtime": 0.2391, | |
| "eval_samples_per_second": 192.425, | |
| "eval_steps_per_second": 12.549, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_accuracy": 0.6304347826086957, | |
| "eval_loss": 0.99575275182724, | |
| "eval_runtime": 0.2351, | |
| "eval_samples_per_second": 195.701, | |
| "eval_steps_per_second": 12.763, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 64.86, | |
| "eval_accuracy": 0.6086956521739131, | |
| "eval_loss": 0.9928344488143921, | |
| "eval_runtime": 0.2406, | |
| "eval_samples_per_second": 191.216, | |
| "eval_steps_per_second": 12.471, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 65.71, | |
| "learning_rate": 6.249999999999999e-07, | |
| "loss": 0.9449, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 66.0, | |
| "eval_accuracy": 0.6086956521739131, | |
| "eval_loss": 0.9906012415885925, | |
| "eval_runtime": 0.2456, | |
| "eval_samples_per_second": 187.316, | |
| "eval_steps_per_second": 12.216, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 66.86, | |
| "eval_accuracy": 0.6304347826086957, | |
| "eval_loss": 0.9892796874046326, | |
| "eval_runtime": 0.2501, | |
| "eval_samples_per_second": 183.959, | |
| "eval_steps_per_second": 11.997, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 0.9880576133728027, | |
| "eval_runtime": 0.2471, | |
| "eval_samples_per_second": 186.193, | |
| "eval_steps_per_second": 12.143, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 68.57, | |
| "learning_rate": 0.0, | |
| "loss": 0.9154, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 68.57, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 0.9880411028862, | |
| "eval_runtime": 0.2566, | |
| "eval_samples_per_second": 179.291, | |
| "eval_steps_per_second": 11.693, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 68.57, | |
| "step": 240, | |
| "total_flos": 4.754181186964685e+17, | |
| "train_loss": 1.113492695490519, | |
| "train_runtime": 374.3307, | |
| "train_samples_per_second": 45.521, | |
| "train_steps_per_second": 0.641 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 240, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 80, | |
| "save_steps": 500, | |
| "total_flos": 4.754181186964685e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |