| { |
| "best_metric": 0.6338008642196655, |
| "best_model_checkpoint": "./model_fine-tune/glot/mbert/urd-Arab/checkpoint-96000", |
| "epoch": 20.416843896214377, |
| "eval_steps": 500, |
| "global_step": 96000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.10633772862611654, |
| "grad_norm": 3.0985794067382812, |
| "learning_rate": 9.95e-05, |
| "loss": 1.6616, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.10633772862611654, |
| "eval_accuracy": 0.7045292975858304, |
| "eval_loss": 1.4788062572479248, |
| "eval_runtime": 184.8638, |
| "eval_samples_per_second": 148.39, |
| "eval_steps_per_second": 4.641, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.21267545725223308, |
| "grad_norm": 2.9091954231262207, |
| "learning_rate": 9.900000000000001e-05, |
| "loss": 1.4534, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.21267545725223308, |
| "eval_accuracy": 0.7242012199076614, |
| "eval_loss": 1.3632394075393677, |
| "eval_runtime": 186.0107, |
| "eval_samples_per_second": 147.475, |
| "eval_steps_per_second": 4.613, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.31901318587834965, |
| "grad_norm": 3.358151435852051, |
| "learning_rate": 9.850000000000001e-05, |
| "loss": 1.3783, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.31901318587834965, |
| "eval_accuracy": 0.7372946823579634, |
| "eval_loss": 1.2977707386016846, |
| "eval_runtime": 185.966, |
| "eval_samples_per_second": 147.511, |
| "eval_steps_per_second": 4.614, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.42535091450446616, |
| "grad_norm": 2.854158878326416, |
| "learning_rate": 9.8e-05, |
| "loss": 1.3043, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.42535091450446616, |
| "eval_accuracy": 0.7475462534858491, |
| "eval_loss": 1.2377636432647705, |
| "eval_runtime": 190.1291, |
| "eval_samples_per_second": 144.281, |
| "eval_steps_per_second": 4.513, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5316886431305827, |
| "grad_norm": 2.8080673217773438, |
| "learning_rate": 9.75e-05, |
| "loss": 1.266, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.5316886431305827, |
| "eval_accuracy": 0.7544512379735423, |
| "eval_loss": 1.2024192810058594, |
| "eval_runtime": 187.1935, |
| "eval_samples_per_second": 146.544, |
| "eval_steps_per_second": 4.583, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.6380263717566993, |
| "grad_norm": 3.4922664165496826, |
| "learning_rate": 9.7e-05, |
| "loss": 1.2307, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.6380263717566993, |
| "eval_accuracy": 0.7603215651478341, |
| "eval_loss": 1.1685340404510498, |
| "eval_runtime": 185.8428, |
| "eval_samples_per_second": 147.609, |
| "eval_steps_per_second": 4.617, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.7443641003828159, |
| "grad_norm": 2.4908382892608643, |
| "learning_rate": 9.65e-05, |
| "loss": 1.1959, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.7443641003828159, |
| "eval_accuracy": 0.7657556526801927, |
| "eval_loss": 1.1382145881652832, |
| "eval_runtime": 186.72, |
| "eval_samples_per_second": 146.915, |
| "eval_steps_per_second": 4.595, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.8507018290089323, |
| "grad_norm": 2.883516311645508, |
| "learning_rate": 9.6e-05, |
| "loss": 1.1782, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.8507018290089323, |
| "eval_accuracy": 0.7699587383241813, |
| "eval_loss": 1.130513310432434, |
| "eval_runtime": 186.2137, |
| "eval_samples_per_second": 147.315, |
| "eval_steps_per_second": 4.608, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.9570395576350489, |
| "grad_norm": 2.5834362506866455, |
| "learning_rate": 9.55e-05, |
| "loss": 1.1451, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.9570395576350489, |
| "eval_accuracy": 0.7739563469660221, |
| "eval_loss": 1.0999319553375244, |
| "eval_runtime": 186.1476, |
| "eval_samples_per_second": 147.367, |
| "eval_steps_per_second": 4.609, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.0633772862611655, |
| "grad_norm": 2.7231101989746094, |
| "learning_rate": 9.5e-05, |
| "loss": 1.1261, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.0633772862611655, |
| "eval_accuracy": 0.7763694500850334, |
| "eval_loss": 1.0874555110931396, |
| "eval_runtime": 186.83, |
| "eval_samples_per_second": 146.829, |
| "eval_steps_per_second": 4.592, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.169715014887282, |
| "grad_norm": 3.062772512435913, |
| "learning_rate": 9.449999999999999e-05, |
| "loss": 1.1155, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.169715014887282, |
| "eval_accuracy": 0.7799436407812638, |
| "eval_loss": 1.0624312162399292, |
| "eval_runtime": 187.3404, |
| "eval_samples_per_second": 146.429, |
| "eval_steps_per_second": 4.58, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.2760527435133986, |
| "grad_norm": 2.577993631362915, |
| "learning_rate": 9.4e-05, |
| "loss": 1.0835, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.2760527435133986, |
| "eval_accuracy": 0.7829938347419719, |
| "eval_loss": 1.0484861135482788, |
| "eval_runtime": 186.5523, |
| "eval_samples_per_second": 147.047, |
| "eval_steps_per_second": 4.599, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.382390472139515, |
| "grad_norm": 2.816096544265747, |
| "learning_rate": 9.350000000000001e-05, |
| "loss": 1.0662, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.382390472139515, |
| "eval_accuracy": 0.7847991903133893, |
| "eval_loss": 1.0540517568588257, |
| "eval_runtime": 187.2662, |
| "eval_samples_per_second": 146.487, |
| "eval_steps_per_second": 4.582, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.4887282007656317, |
| "grad_norm": 2.4449338912963867, |
| "learning_rate": 9.300000000000001e-05, |
| "loss": 1.0537, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.4887282007656317, |
| "eval_accuracy": 0.7870963170316769, |
| "eval_loss": 1.0328177213668823, |
| "eval_runtime": 187.5096, |
| "eval_samples_per_second": 146.297, |
| "eval_steps_per_second": 4.576, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.5950659293917482, |
| "grad_norm": 2.4896557331085205, |
| "learning_rate": 9.250000000000001e-05, |
| "loss": 1.0489, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.5950659293917482, |
| "eval_accuracy": 0.7898641999964792, |
| "eval_loss": 1.0257970094680786, |
| "eval_runtime": 187.4659, |
| "eval_samples_per_second": 146.331, |
| "eval_steps_per_second": 4.577, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.7014036580178646, |
| "grad_norm": 4.281688690185547, |
| "learning_rate": 9.200000000000001e-05, |
| "loss": 1.0417, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.7014036580178646, |
| "eval_accuracy": 0.7918014683810514, |
| "eval_loss": 1.0114617347717285, |
| "eval_runtime": 185.6247, |
| "eval_samples_per_second": 147.782, |
| "eval_steps_per_second": 4.622, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.8077413866439813, |
| "grad_norm": 2.6903951168060303, |
| "learning_rate": 9.15e-05, |
| "loss": 1.0238, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.8077413866439813, |
| "eval_accuracy": 0.7941965557582847, |
| "eval_loss": 0.9940029978752136, |
| "eval_runtime": 185.6726, |
| "eval_samples_per_second": 147.744, |
| "eval_steps_per_second": 4.621, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.914079115270098, |
| "grad_norm": 2.6284191608428955, |
| "learning_rate": 9.1e-05, |
| "loss": 1.0261, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.914079115270098, |
| "eval_accuracy": 0.7950889770370101, |
| "eval_loss": 0.9995012283325195, |
| "eval_runtime": 187.0194, |
| "eval_samples_per_second": 146.68, |
| "eval_steps_per_second": 4.588, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.0204168438962142, |
| "grad_norm": 2.3006789684295654, |
| "learning_rate": 9.05e-05, |
| "loss": 1.0054, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.0204168438962142, |
| "eval_accuracy": 0.7962621919626219, |
| "eval_loss": 0.989319384098053, |
| "eval_runtime": 187.091, |
| "eval_samples_per_second": 146.624, |
| "eval_steps_per_second": 4.586, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.126754572522331, |
| "grad_norm": 2.3347225189208984, |
| "learning_rate": 9e-05, |
| "loss": 1.0012, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.126754572522331, |
| "eval_accuracy": 0.7998739212317131, |
| "eval_loss": 0.9665579199790955, |
| "eval_runtime": 187.267, |
| "eval_samples_per_second": 146.486, |
| "eval_steps_per_second": 4.582, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.2330923011484476, |
| "grad_norm": 2.7090580463409424, |
| "learning_rate": 8.950000000000001e-05, |
| "loss": 0.9855, |
| "step": 10500 |
| }, |
| { |
| "epoch": 2.2330923011484476, |
| "eval_accuracy": 0.8001766586026973, |
| "eval_loss": 0.9704384207725525, |
| "eval_runtime": 186.5023, |
| "eval_samples_per_second": 147.087, |
| "eval_steps_per_second": 4.6, |
| "step": 10500 |
| }, |
| { |
| "epoch": 2.339430029774564, |
| "grad_norm": 2.664764404296875, |
| "learning_rate": 8.900000000000001e-05, |
| "loss": 0.983, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.339430029774564, |
| "eval_accuracy": 0.8007109155537777, |
| "eval_loss": 0.9576543569564819, |
| "eval_runtime": 187.0054, |
| "eval_samples_per_second": 146.691, |
| "eval_steps_per_second": 4.588, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.4457677584006805, |
| "grad_norm": 2.3209338188171387, |
| "learning_rate": 8.850000000000001e-05, |
| "loss": 0.9661, |
| "step": 11500 |
| }, |
| { |
| "epoch": 2.4457677584006805, |
| "eval_accuracy": 0.8029339980865494, |
| "eval_loss": 0.9501250386238098, |
| "eval_runtime": 186.7101, |
| "eval_samples_per_second": 146.923, |
| "eval_steps_per_second": 4.595, |
| "step": 11500 |
| }, |
| { |
| "epoch": 2.552105487026797, |
| "grad_norm": 2.667185068130493, |
| "learning_rate": 8.800000000000001e-05, |
| "loss": 0.9661, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.552105487026797, |
| "eval_accuracy": 0.803158831479515, |
| "eval_loss": 0.9485698342323303, |
| "eval_runtime": 187.0511, |
| "eval_samples_per_second": 146.655, |
| "eval_steps_per_second": 4.587, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.6584432156529134, |
| "grad_norm": 2.3294034004211426, |
| "learning_rate": 8.75e-05, |
| "loss": 0.9635, |
| "step": 12500 |
| }, |
| { |
| "epoch": 2.6584432156529134, |
| "eval_accuracy": 0.8052219742658059, |
| "eval_loss": 0.936097264289856, |
| "eval_runtime": 187.1076, |
| "eval_samples_per_second": 146.611, |
| "eval_steps_per_second": 4.586, |
| "step": 12500 |
| }, |
| { |
| "epoch": 2.76478094427903, |
| "grad_norm": 2.4288761615753174, |
| "learning_rate": 8.7e-05, |
| "loss": 0.9564, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.76478094427903, |
| "eval_accuracy": 0.8063745585697996, |
| "eval_loss": 0.9337704181671143, |
| "eval_runtime": 187.0756, |
| "eval_samples_per_second": 146.636, |
| "eval_steps_per_second": 4.586, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.8711186729051468, |
| "grad_norm": 2.7012951374053955, |
| "learning_rate": 8.65e-05, |
| "loss": 0.9461, |
| "step": 13500 |
| }, |
| { |
| "epoch": 2.8711186729051468, |
| "eval_accuracy": 0.8086281139879846, |
| "eval_loss": 0.9229084253311157, |
| "eval_runtime": 186.8255, |
| "eval_samples_per_second": 146.832, |
| "eval_steps_per_second": 4.593, |
| "step": 13500 |
| }, |
| { |
| "epoch": 2.9774564015312635, |
| "grad_norm": 2.791017532348633, |
| "learning_rate": 8.6e-05, |
| "loss": 0.9397, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.9774564015312635, |
| "eval_accuracy": 0.8081856611194737, |
| "eval_loss": 0.9245437383651733, |
| "eval_runtime": 188.1945, |
| "eval_samples_per_second": 145.764, |
| "eval_steps_per_second": 4.559, |
| "step": 14000 |
| }, |
| { |
| "epoch": 3.0837941301573797, |
| "grad_norm": 2.684272050857544, |
| "learning_rate": 8.55e-05, |
| "loss": 0.9264, |
| "step": 14500 |
| }, |
| { |
| "epoch": 3.0837941301573797, |
| "eval_accuracy": 0.8095467896972031, |
| "eval_loss": 0.9195280075073242, |
| "eval_runtime": 188.4052, |
| "eval_samples_per_second": 145.601, |
| "eval_steps_per_second": 4.554, |
| "step": 14500 |
| }, |
| { |
| "epoch": 3.1901318587834964, |
| "grad_norm": 2.159348726272583, |
| "learning_rate": 8.5e-05, |
| "loss": 0.9204, |
| "step": 15000 |
| }, |
| { |
| "epoch": 3.1901318587834964, |
| "eval_accuracy": 0.8105982920504011, |
| "eval_loss": 0.9083139300346375, |
| "eval_runtime": 186.9331, |
| "eval_samples_per_second": 146.748, |
| "eval_steps_per_second": 4.59, |
| "step": 15000 |
| }, |
| { |
| "epoch": 3.296469587409613, |
| "grad_norm": 2.2706503868103027, |
| "learning_rate": 8.450000000000001e-05, |
| "loss": 0.9213, |
| "step": 15500 |
| }, |
| { |
| "epoch": 3.296469587409613, |
| "eval_accuracy": 0.8117506926014157, |
| "eval_loss": 0.9083714485168457, |
| "eval_runtime": 186.0993, |
| "eval_samples_per_second": 147.405, |
| "eval_steps_per_second": 4.61, |
| "step": 15500 |
| }, |
| { |
| "epoch": 3.4028073160357293, |
| "grad_norm": 2.560739755630493, |
| "learning_rate": 8.4e-05, |
| "loss": 0.9193, |
| "step": 16000 |
| }, |
| { |
| "epoch": 3.4028073160357293, |
| "eval_accuracy": 0.8120885199882083, |
| "eval_loss": 0.9039110541343689, |
| "eval_runtime": 187.0648, |
| "eval_samples_per_second": 146.644, |
| "eval_steps_per_second": 4.587, |
| "step": 16000 |
| }, |
| { |
| "epoch": 3.509145044661846, |
| "grad_norm": 2.2117762565612793, |
| "learning_rate": 8.35e-05, |
| "loss": 0.9127, |
| "step": 16500 |
| }, |
| { |
| "epoch": 3.509145044661846, |
| "eval_accuracy": 0.8133210445095941, |
| "eval_loss": 0.8947284817695618, |
| "eval_runtime": 187.0387, |
| "eval_samples_per_second": 146.665, |
| "eval_steps_per_second": 4.587, |
| "step": 16500 |
| }, |
| { |
| "epoch": 3.6154827732879626, |
| "grad_norm": 7.251231670379639, |
| "learning_rate": 8.3e-05, |
| "loss": 0.9069, |
| "step": 17000 |
| }, |
| { |
| "epoch": 3.6154827732879626, |
| "eval_accuracy": 0.8143866647342304, |
| "eval_loss": 0.8961222767829895, |
| "eval_runtime": 186.0212, |
| "eval_samples_per_second": 147.467, |
| "eval_steps_per_second": 4.612, |
| "step": 17000 |
| }, |
| { |
| "epoch": 3.7218205019140793, |
| "grad_norm": 2.4916841983795166, |
| "learning_rate": 8.25e-05, |
| "loss": 0.9011, |
| "step": 17500 |
| }, |
| { |
| "epoch": 3.7218205019140793, |
| "eval_accuracy": 0.8143561971265061, |
| "eval_loss": 0.8977431058883667, |
| "eval_runtime": 186.782, |
| "eval_samples_per_second": 146.866, |
| "eval_steps_per_second": 4.594, |
| "step": 17500 |
| }, |
| { |
| "epoch": 3.8281582305401956, |
| "grad_norm": 2.5640275478363037, |
| "learning_rate": 8.2e-05, |
| "loss": 0.9044, |
| "step": 18000 |
| }, |
| { |
| "epoch": 3.8281582305401956, |
| "eval_accuracy": 0.8150782896033226, |
| "eval_loss": 0.8876448273658752, |
| "eval_runtime": 187.6582, |
| "eval_samples_per_second": 146.181, |
| "eval_steps_per_second": 4.572, |
| "step": 18000 |
| }, |
| { |
| "epoch": 3.9344959591663122, |
| "grad_norm": 2.1754488945007324, |
| "learning_rate": 8.15e-05, |
| "loss": 0.8889, |
| "step": 18500 |
| }, |
| { |
| "epoch": 3.9344959591663122, |
| "eval_accuracy": 0.8168180011691912, |
| "eval_loss": 0.8829460740089417, |
| "eval_runtime": 187.8254, |
| "eval_samples_per_second": 146.051, |
| "eval_steps_per_second": 4.568, |
| "step": 18500 |
| }, |
| { |
| "epoch": 4.0408336877924285, |
| "grad_norm": 2.2828450202941895, |
| "learning_rate": 8.1e-05, |
| "loss": 0.8781, |
| "step": 19000 |
| }, |
| { |
| "epoch": 4.0408336877924285, |
| "eval_accuracy": 0.8172543715113816, |
| "eval_loss": 0.881291925907135, |
| "eval_runtime": 186.9833, |
| "eval_samples_per_second": 146.708, |
| "eval_steps_per_second": 4.589, |
| "step": 19000 |
| }, |
| { |
| "epoch": 4.147171416418545, |
| "grad_norm": 2.2914931774139404, |
| "learning_rate": 8.05e-05, |
| "loss": 0.8776, |
| "step": 19500 |
| }, |
| { |
| "epoch": 4.147171416418545, |
| "eval_accuracy": 0.8177812507969122, |
| "eval_loss": 0.8755871057510376, |
| "eval_runtime": 187.0401, |
| "eval_samples_per_second": 146.664, |
| "eval_steps_per_second": 4.587, |
| "step": 19500 |
| }, |
| { |
| "epoch": 4.253509145044662, |
| "grad_norm": 2.8364596366882324, |
| "learning_rate": 8e-05, |
| "loss": 0.8751, |
| "step": 20000 |
| }, |
| { |
| "epoch": 4.253509145044662, |
| "eval_accuracy": 0.818812246001166, |
| "eval_loss": 0.8739203214645386, |
| "eval_runtime": 187.8224, |
| "eval_samples_per_second": 146.053, |
| "eval_steps_per_second": 4.568, |
| "step": 20000 |
| }, |
| { |
| "epoch": 4.3598468736707785, |
| "grad_norm": 2.4278838634490967, |
| "learning_rate": 7.950000000000001e-05, |
| "loss": 0.8799, |
| "step": 20500 |
| }, |
| { |
| "epoch": 4.3598468736707785, |
| "eval_accuracy": 0.8195426064288864, |
| "eval_loss": 0.87138432264328, |
| "eval_runtime": 188.1286, |
| "eval_samples_per_second": 145.815, |
| "eval_steps_per_second": 4.561, |
| "step": 20500 |
| }, |
| { |
| "epoch": 4.466184602296895, |
| "grad_norm": 2.5234789848327637, |
| "learning_rate": 7.900000000000001e-05, |
| "loss": 0.8731, |
| "step": 21000 |
| }, |
| { |
| "epoch": 4.466184602296895, |
| "eval_accuracy": 0.8210525102479217, |
| "eval_loss": 0.8586520552635193, |
| "eval_runtime": 187.1495, |
| "eval_samples_per_second": 146.578, |
| "eval_steps_per_second": 4.585, |
| "step": 21000 |
| }, |
| { |
| "epoch": 4.572522330923012, |
| "grad_norm": 2.47117018699646, |
| "learning_rate": 7.850000000000001e-05, |
| "loss": 0.8625, |
| "step": 21500 |
| }, |
| { |
| "epoch": 4.572522330923012, |
| "eval_accuracy": 0.8216630966191832, |
| "eval_loss": 0.8592851758003235, |
| "eval_runtime": 186.2152, |
| "eval_samples_per_second": 147.313, |
| "eval_steps_per_second": 4.608, |
| "step": 21500 |
| }, |
| { |
| "epoch": 4.678860059549128, |
| "grad_norm": 2.6229896545410156, |
| "learning_rate": 7.800000000000001e-05, |
| "loss": 0.8632, |
| "step": 22000 |
| }, |
| { |
| "epoch": 4.678860059549128, |
| "eval_accuracy": 0.8215383999990303, |
| "eval_loss": 0.8571019768714905, |
| "eval_runtime": 188.0905, |
| "eval_samples_per_second": 145.845, |
| "eval_steps_per_second": 4.562, |
| "step": 22000 |
| }, |
| { |
| "epoch": 4.785197788175244, |
| "grad_norm": 2.1132686138153076, |
| "learning_rate": 7.75e-05, |
| "loss": 0.8638, |
| "step": 22500 |
| }, |
| { |
| "epoch": 4.785197788175244, |
| "eval_accuracy": 0.8227068815093186, |
| "eval_loss": 0.8464910387992859, |
| "eval_runtime": 188.695, |
| "eval_samples_per_second": 145.377, |
| "eval_steps_per_second": 4.547, |
| "step": 22500 |
| }, |
| { |
| "epoch": 4.891535516801361, |
| "grad_norm": 2.4239864349365234, |
| "learning_rate": 7.7e-05, |
| "loss": 0.8538, |
| "step": 23000 |
| }, |
| { |
| "epoch": 4.891535516801361, |
| "eval_accuracy": 0.8232777279942184, |
| "eval_loss": 0.8469775915145874, |
| "eval_runtime": 186.3063, |
| "eval_samples_per_second": 147.241, |
| "eval_steps_per_second": 4.605, |
| "step": 23000 |
| }, |
| { |
| "epoch": 4.997873245427478, |
| "grad_norm": 2.2336645126342773, |
| "learning_rate": 7.65e-05, |
| "loss": 0.8541, |
| "step": 23500 |
| }, |
| { |
| "epoch": 4.997873245427478, |
| "eval_accuracy": 0.8230626489512016, |
| "eval_loss": 0.8494424819946289, |
| "eval_runtime": 187.1494, |
| "eval_samples_per_second": 146.578, |
| "eval_steps_per_second": 4.585, |
| "step": 23500 |
| }, |
| { |
| "epoch": 5.104210974053594, |
| "grad_norm": 2.3965516090393066, |
| "learning_rate": 7.6e-05, |
| "loss": 0.841, |
| "step": 24000 |
| }, |
| { |
| "epoch": 5.104210974053594, |
| "eval_accuracy": 0.8237575480873727, |
| "eval_loss": 0.8488872051239014, |
| "eval_runtime": 187.1345, |
| "eval_samples_per_second": 146.59, |
| "eval_steps_per_second": 4.585, |
| "step": 24000 |
| }, |
| { |
| "epoch": 5.210548702679711, |
| "grad_norm": 2.575192451477051, |
| "learning_rate": 7.55e-05, |
| "loss": 0.8402, |
| "step": 24500 |
| }, |
| { |
| "epoch": 5.210548702679711, |
| "eval_accuracy": 0.8252009630693954, |
| "eval_loss": 0.8397406339645386, |
| "eval_runtime": 187.5948, |
| "eval_samples_per_second": 146.23, |
| "eval_steps_per_second": 4.574, |
| "step": 24500 |
| }, |
| { |
| "epoch": 5.316886431305828, |
| "grad_norm": 2.234575033187866, |
| "learning_rate": 7.500000000000001e-05, |
| "loss": 0.8363, |
| "step": 25000 |
| }, |
| { |
| "epoch": 5.316886431305828, |
| "eval_accuracy": 0.8249671018137493, |
| "eval_loss": 0.8448637127876282, |
| "eval_runtime": 186.3423, |
| "eval_samples_per_second": 147.213, |
| "eval_steps_per_second": 4.604, |
| "step": 25000 |
| }, |
| { |
| "epoch": 5.4232241599319435, |
| "grad_norm": 2.2523510456085205, |
| "learning_rate": 7.450000000000001e-05, |
| "loss": 0.8336, |
| "step": 25500 |
| }, |
| { |
| "epoch": 5.4232241599319435, |
| "eval_accuracy": 0.8261139098550124, |
| "eval_loss": 0.8395271897315979, |
| "eval_runtime": 187.766, |
| "eval_samples_per_second": 146.097, |
| "eval_steps_per_second": 4.57, |
| "step": 25500 |
| }, |
| { |
| "epoch": 5.52956188855806, |
| "grad_norm": 2.5487735271453857, |
| "learning_rate": 7.4e-05, |
| "loss": 0.8369, |
| "step": 26000 |
| }, |
| { |
| "epoch": 5.52956188855806, |
| "eval_accuracy": 0.826535426004618, |
| "eval_loss": 0.8330492377281189, |
| "eval_runtime": 189.1247, |
| "eval_samples_per_second": 145.047, |
| "eval_steps_per_second": 4.537, |
| "step": 26000 |
| }, |
| { |
| "epoch": 5.635899617184177, |
| "grad_norm": 2.080281972885132, |
| "learning_rate": 7.35e-05, |
| "loss": 0.8365, |
| "step": 26500 |
| }, |
| { |
| "epoch": 5.635899617184177, |
| "eval_accuracy": 0.8272166865191455, |
| "eval_loss": 0.8262215852737427, |
| "eval_runtime": 187.6646, |
| "eval_samples_per_second": 146.176, |
| "eval_steps_per_second": 4.572, |
| "step": 26500 |
| }, |
| { |
| "epoch": 5.7422373458102935, |
| "grad_norm": 2.453874111175537, |
| "learning_rate": 7.3e-05, |
| "loss": 0.8221, |
| "step": 27000 |
| }, |
| { |
| "epoch": 5.7422373458102935, |
| "eval_accuracy": 0.8277079258525418, |
| "eval_loss": 0.8285869359970093, |
| "eval_runtime": 188.2189, |
| "eval_samples_per_second": 145.745, |
| "eval_steps_per_second": 4.559, |
| "step": 27000 |
| }, |
| { |
| "epoch": 5.84857507443641, |
| "grad_norm": 2.587031841278076, |
| "learning_rate": 7.25e-05, |
| "loss": 0.8235, |
| "step": 27500 |
| }, |
| { |
| "epoch": 5.84857507443641, |
| "eval_accuracy": 0.828009316952864, |
| "eval_loss": 0.8248396515846252, |
| "eval_runtime": 186.6625, |
| "eval_samples_per_second": 146.96, |
| "eval_steps_per_second": 4.597, |
| "step": 27500 |
| }, |
| { |
| "epoch": 5.954912803062527, |
| "grad_norm": 2.36771297454834, |
| "learning_rate": 7.2e-05, |
| "loss": 0.8269, |
| "step": 28000 |
| }, |
| { |
| "epoch": 5.954912803062527, |
| "eval_accuracy": 0.8290436079701959, |
| "eval_loss": 0.8144590854644775, |
| "eval_runtime": 187.3225, |
| "eval_samples_per_second": 146.443, |
| "eval_steps_per_second": 4.58, |
| "step": 28000 |
| }, |
| { |
| "epoch": 6.061250531688643, |
| "grad_norm": 2.362884044647217, |
| "learning_rate": 7.15e-05, |
| "loss": 0.8147, |
| "step": 28500 |
| }, |
| { |
| "epoch": 6.061250531688643, |
| "eval_accuracy": 0.829482515450688, |
| "eval_loss": 0.8172268271446228, |
| "eval_runtime": 185.8934, |
| "eval_samples_per_second": 147.568, |
| "eval_steps_per_second": 4.616, |
| "step": 28500 |
| }, |
| { |
| "epoch": 6.167588260314759, |
| "grad_norm": 2.7787272930145264, |
| "learning_rate": 7.1e-05, |
| "loss": 0.809, |
| "step": 29000 |
| }, |
| { |
| "epoch": 6.167588260314759, |
| "eval_accuracy": 0.8289893847861752, |
| "eval_loss": 0.8245209455490112, |
| "eval_runtime": 185.671, |
| "eval_samples_per_second": 147.745, |
| "eval_steps_per_second": 4.621, |
| "step": 29000 |
| }, |
| { |
| "epoch": 6.273925988940876, |
| "grad_norm": 2.2524912357330322, |
| "learning_rate": 7.05e-05, |
| "loss": 0.8136, |
| "step": 29500 |
| }, |
| { |
| "epoch": 6.273925988940876, |
| "eval_accuracy": 0.8299085027558315, |
| "eval_loss": 0.8161247372627258, |
| "eval_runtime": 187.178, |
| "eval_samples_per_second": 146.556, |
| "eval_steps_per_second": 4.584, |
| "step": 29500 |
| }, |
| { |
| "epoch": 6.380263717566993, |
| "grad_norm": 2.0518226623535156, |
| "learning_rate": 7e-05, |
| "loss": 0.8097, |
| "step": 30000 |
| }, |
| { |
| "epoch": 6.380263717566993, |
| "eval_accuracy": 0.8302947709868315, |
| "eval_loss": 0.8094373941421509, |
| "eval_runtime": 188.6751, |
| "eval_samples_per_second": 145.393, |
| "eval_steps_per_second": 4.548, |
| "step": 30000 |
| }, |
| { |
| "epoch": 6.486601446193109, |
| "grad_norm": 2.2898824214935303, |
| "learning_rate": 6.95e-05, |
| "loss": 0.8027, |
| "step": 30500 |
| }, |
| { |
| "epoch": 6.486601446193109, |
| "eval_accuracy": 0.831080552836758, |
| "eval_loss": 0.8069068789482117, |
| "eval_runtime": 187.7392, |
| "eval_samples_per_second": 146.118, |
| "eval_steps_per_second": 4.57, |
| "step": 30500 |
| }, |
| { |
| "epoch": 6.592939174819226, |
| "grad_norm": 2.191114902496338, |
| "learning_rate": 6.9e-05, |
| "loss": 0.8034, |
| "step": 31000 |
| }, |
| { |
| "epoch": 6.592939174819226, |
| "eval_accuracy": 0.8322840004200974, |
| "eval_loss": 0.7996180057525635, |
| "eval_runtime": 186.9585, |
| "eval_samples_per_second": 146.728, |
| "eval_steps_per_second": 4.589, |
| "step": 31000 |
| }, |
| { |
| "epoch": 6.699276903445343, |
| "grad_norm": 2.166645050048828, |
| "learning_rate": 6.850000000000001e-05, |
| "loss": 0.8014, |
| "step": 31500 |
| }, |
| { |
| "epoch": 6.699276903445343, |
| "eval_accuracy": 0.8322805736104725, |
| "eval_loss": 0.8084205389022827, |
| "eval_runtime": 187.1325, |
| "eval_samples_per_second": 146.591, |
| "eval_steps_per_second": 4.585, |
| "step": 31500 |
| }, |
| { |
| "epoch": 6.805614632071459, |
| "grad_norm": 2.5388972759246826, |
| "learning_rate": 6.800000000000001e-05, |
| "loss": 0.7925, |
| "step": 32000 |
| }, |
| { |
| "epoch": 6.805614632071459, |
| "eval_accuracy": 0.8326493216513361, |
| "eval_loss": 0.7997660040855408, |
| "eval_runtime": 186.3753, |
| "eval_samples_per_second": 147.187, |
| "eval_steps_per_second": 4.604, |
| "step": 32000 |
| }, |
| { |
| "epoch": 6.911952360697575, |
| "grad_norm": 2.3604350090026855, |
| "learning_rate": 6.750000000000001e-05, |
| "loss": 0.8069, |
| "step": 32500 |
| }, |
| { |
| "epoch": 6.911952360697575, |
| "eval_accuracy": 0.8327787324739044, |
| "eval_loss": 0.7998002767562866, |
| "eval_runtime": 186.0892, |
| "eval_samples_per_second": 147.413, |
| "eval_steps_per_second": 4.611, |
| "step": 32500 |
| }, |
| { |
| "epoch": 7.018290089323692, |
| "grad_norm": 2.3000738620758057, |
| "learning_rate": 6.7e-05, |
| "loss": 0.7916, |
| "step": 33000 |
| }, |
| { |
| "epoch": 7.018290089323692, |
| "eval_accuracy": 0.8333142335688971, |
| "eval_loss": 0.7988072037696838, |
| "eval_runtime": 187.1327, |
| "eval_samples_per_second": 146.591, |
| "eval_steps_per_second": 4.585, |
| "step": 33000 |
| }, |
| { |
| "epoch": 7.124627817949809, |
| "grad_norm": 2.031378746032715, |
| "learning_rate": 6.65e-05, |
| "loss": 0.7796, |
| "step": 33500 |
| }, |
| { |
| "epoch": 7.124627817949809, |
| "eval_accuracy": 0.834044136797238, |
| "eval_loss": 0.7932332754135132, |
| "eval_runtime": 186.7728, |
| "eval_samples_per_second": 146.874, |
| "eval_steps_per_second": 4.594, |
| "step": 33500 |
| }, |
| { |
| "epoch": 7.230965546575925, |
| "grad_norm": 2.151765823364258, |
| "learning_rate": 6.6e-05, |
| "loss": 0.783, |
| "step": 34000 |
| }, |
| { |
| "epoch": 7.230965546575925, |
| "eval_accuracy": 0.833573114410175, |
| "eval_loss": 0.7935149073600769, |
| "eval_runtime": 188.5605, |
| "eval_samples_per_second": 145.481, |
| "eval_steps_per_second": 4.55, |
| "step": 34000 |
| }, |
| { |
| "epoch": 7.337303275202042, |
| "grad_norm": 2.1417930126190186, |
| "learning_rate": 6.55e-05, |
| "loss": 0.785, |
| "step": 34500 |
| }, |
| { |
| "epoch": 7.337303275202042, |
| "eval_accuracy": 0.8342695371268045, |
| "eval_loss": 0.7929127216339111, |
| "eval_runtime": 188.5148, |
| "eval_samples_per_second": 145.516, |
| "eval_steps_per_second": 4.551, |
| "step": 34500 |
| }, |
| { |
| "epoch": 7.443641003828159, |
| "grad_norm": 1.9593024253845215, |
| "learning_rate": 6.500000000000001e-05, |
| "loss": 0.7849, |
| "step": 35000 |
| }, |
| { |
| "epoch": 7.443641003828159, |
| "eval_accuracy": 0.8352518766512985, |
| "eval_loss": 0.7853254675865173, |
| "eval_runtime": 187.2799, |
| "eval_samples_per_second": 146.476, |
| "eval_steps_per_second": 4.581, |
| "step": 35000 |
| }, |
| { |
| "epoch": 7.549978732454274, |
| "grad_norm": 2.447838544845581, |
| "learning_rate": 6.450000000000001e-05, |
| "loss": 0.7791, |
| "step": 35500 |
| }, |
| { |
| "epoch": 7.549978732454274, |
| "eval_accuracy": 0.8354121212121212, |
| "eval_loss": 0.7829086184501648, |
| "eval_runtime": 187.3697, |
| "eval_samples_per_second": 146.406, |
| "eval_steps_per_second": 4.579, |
| "step": 35500 |
| }, |
| { |
| "epoch": 7.656316461080391, |
| "grad_norm": 2.3611719608306885, |
| "learning_rate": 6.400000000000001e-05, |
| "loss": 0.772, |
| "step": 36000 |
| }, |
| { |
| "epoch": 7.656316461080391, |
| "eval_accuracy": 0.8359586422824412, |
| "eval_loss": 0.7856774926185608, |
| "eval_runtime": 187.3584, |
| "eval_samples_per_second": 146.415, |
| "eval_steps_per_second": 4.579, |
| "step": 36000 |
| }, |
| { |
| "epoch": 7.762654189706508, |
| "grad_norm": 2.6035830974578857, |
| "learning_rate": 6.35e-05, |
| "loss": 0.7725, |
| "step": 36500 |
| }, |
| { |
| "epoch": 7.762654189706508, |
| "eval_accuracy": 0.8363098633948102, |
| "eval_loss": 0.7833205461502075, |
| "eval_runtime": 187.0804, |
| "eval_samples_per_second": 146.632, |
| "eval_steps_per_second": 4.586, |
| "step": 36500 |
| }, |
| { |
| "epoch": 7.8689919183326245, |
| "grad_norm": 2.1747405529022217, |
| "learning_rate": 6.3e-05, |
| "loss": 0.7726, |
| "step": 37000 |
| }, |
| { |
| "epoch": 7.8689919183326245, |
| "eval_accuracy": 0.8363886951914692, |
| "eval_loss": 0.7823408842086792, |
| "eval_runtime": 187.2496, |
| "eval_samples_per_second": 146.5, |
| "eval_steps_per_second": 4.582, |
| "step": 37000 |
| }, |
| { |
| "epoch": 7.975329646958741, |
| "grad_norm": 2.4442336559295654, |
| "learning_rate": 6.25e-05, |
| "loss": 0.7706, |
| "step": 37500 |
| }, |
| { |
| "epoch": 7.975329646958741, |
| "eval_accuracy": 0.8373462373755904, |
| "eval_loss": 0.7759196758270264, |
| "eval_runtime": 187.0689, |
| "eval_samples_per_second": 146.641, |
| "eval_steps_per_second": 4.587, |
| "step": 37500 |
| }, |
| { |
| "epoch": 8.081667375584857, |
| "grad_norm": 2.2232158184051514, |
| "learning_rate": 6.2e-05, |
| "loss": 0.7681, |
| "step": 38000 |
| }, |
| { |
| "epoch": 8.081667375584857, |
| "eval_accuracy": 0.8376378883351716, |
| "eval_loss": 0.773087203502655, |
| "eval_runtime": 185.8545, |
| "eval_samples_per_second": 147.599, |
| "eval_steps_per_second": 4.617, |
| "step": 38000 |
| }, |
| { |
| "epoch": 8.188005104210974, |
| "grad_norm": 2.019041061401367, |
| "learning_rate": 6.15e-05, |
| "loss": 0.7598, |
| "step": 38500 |
| }, |
| { |
| "epoch": 8.188005104210974, |
| "eval_accuracy": 0.8383952690813153, |
| "eval_loss": 0.7797868847846985, |
| "eval_runtime": 187.2699, |
| "eval_samples_per_second": 146.484, |
| "eval_steps_per_second": 4.582, |
| "step": 38500 |
| }, |
| { |
| "epoch": 8.29434283283709, |
| "grad_norm": 2.744246482849121, |
| "learning_rate": 6.1e-05, |
| "loss": 0.7585, |
| "step": 39000 |
| }, |
| { |
| "epoch": 8.29434283283709, |
| "eval_accuracy": 0.8387710202737253, |
| "eval_loss": 0.77022385597229, |
| "eval_runtime": 185.74, |
| "eval_samples_per_second": 147.69, |
| "eval_steps_per_second": 4.619, |
| "step": 39000 |
| }, |
| { |
| "epoch": 8.400680561463208, |
| "grad_norm": 2.3689775466918945, |
| "learning_rate": 6.05e-05, |
| "loss": 0.7567, |
| "step": 39500 |
| }, |
| { |
| "epoch": 8.400680561463208, |
| "eval_accuracy": 0.8392269122978646, |
| "eval_loss": 0.7701305747032166, |
| "eval_runtime": 187.3057, |
| "eval_samples_per_second": 146.456, |
| "eval_steps_per_second": 4.581, |
| "step": 39500 |
| }, |
| { |
| "epoch": 8.507018290089324, |
| "grad_norm": 2.43866229057312, |
| "learning_rate": 6e-05, |
| "loss": 0.7549, |
| "step": 40000 |
| }, |
| { |
| "epoch": 8.507018290089324, |
| "eval_accuracy": 0.8386237512526904, |
| "eval_loss": 0.7731093168258667, |
| "eval_runtime": 186.3871, |
| "eval_samples_per_second": 147.178, |
| "eval_steps_per_second": 4.603, |
| "step": 40000 |
| }, |
| { |
| "epoch": 8.61335601871544, |
| "grad_norm": 2.1197094917297363, |
| "learning_rate": 5.95e-05, |
| "loss": 0.7566, |
| "step": 40500 |
| }, |
| { |
| "epoch": 8.61335601871544, |
| "eval_accuracy": 0.8395869011774794, |
| "eval_loss": 0.7649192214012146, |
| "eval_runtime": 187.8414, |
| "eval_samples_per_second": 146.038, |
| "eval_steps_per_second": 4.568, |
| "step": 40500 |
| }, |
| { |
| "epoch": 8.719693747341557, |
| "grad_norm": 2.322613000869751, |
| "learning_rate": 5.9e-05, |
| "loss": 0.7556, |
| "step": 41000 |
| }, |
| { |
| "epoch": 8.719693747341557, |
| "eval_accuracy": 0.8396322982752535, |
| "eval_loss": 0.7655606865882874, |
| "eval_runtime": 187.4692, |
| "eval_samples_per_second": 146.328, |
| "eval_steps_per_second": 4.577, |
| "step": 41000 |
| }, |
| { |
| "epoch": 8.826031475967673, |
| "grad_norm": 2.060520887374878, |
| "learning_rate": 5.85e-05, |
| "loss": 0.7477, |
| "step": 41500 |
| }, |
| { |
| "epoch": 8.826031475967673, |
| "eval_accuracy": 0.8404079064518989, |
| "eval_loss": 0.7558547258377075, |
| "eval_runtime": 187.201, |
| "eval_samples_per_second": 146.538, |
| "eval_steps_per_second": 4.583, |
| "step": 41500 |
| }, |
| { |
| "epoch": 8.93236920459379, |
| "grad_norm": 2.261857509613037, |
| "learning_rate": 5.8e-05, |
| "loss": 0.7438, |
| "step": 42000 |
| }, |
| { |
| "epoch": 8.93236920459379, |
| "eval_accuracy": 0.84021015485733, |
| "eval_loss": 0.7601897120475769, |
| "eval_runtime": 187.2973, |
| "eval_samples_per_second": 146.462, |
| "eval_steps_per_second": 4.581, |
| "step": 42000 |
| }, |
| { |
| "epoch": 9.038706933219906, |
| "grad_norm": 2.352423906326294, |
| "learning_rate": 5.7499999999999995e-05, |
| "loss": 0.7454, |
| "step": 42500 |
| }, |
| { |
| "epoch": 9.038706933219906, |
| "eval_accuracy": 0.8411417281077362, |
| "eval_loss": 0.7569445371627808, |
| "eval_runtime": 188.8431, |
| "eval_samples_per_second": 145.263, |
| "eval_steps_per_second": 4.543, |
| "step": 42500 |
| }, |
| { |
| "epoch": 9.145044661846024, |
| "grad_norm": 2.5105371475219727, |
| "learning_rate": 5.6999999999999996e-05, |
| "loss": 0.7441, |
| "step": 43000 |
| }, |
| { |
| "epoch": 9.145044661846024, |
| "eval_accuracy": 0.8420837788872061, |
| "eval_loss": 0.751946210861206, |
| "eval_runtime": 187.964, |
| "eval_samples_per_second": 145.943, |
| "eval_steps_per_second": 4.565, |
| "step": 43000 |
| }, |
| { |
| "epoch": 9.25138239047214, |
| "grad_norm": 2.747666835784912, |
| "learning_rate": 5.65e-05, |
| "loss": 0.7391, |
| "step": 43500 |
| }, |
| { |
| "epoch": 9.25138239047214, |
| "eval_accuracy": 0.8416609664144786, |
| "eval_loss": 0.7541568279266357, |
| "eval_runtime": 187.0727, |
| "eval_samples_per_second": 146.638, |
| "eval_steps_per_second": 4.586, |
| "step": 43500 |
| }, |
| { |
| "epoch": 9.357720119098255, |
| "grad_norm": 2.229844093322754, |
| "learning_rate": 5.6000000000000006e-05, |
| "loss": 0.7345, |
| "step": 44000 |
| }, |
| { |
| "epoch": 9.357720119098255, |
| "eval_accuracy": 0.8417848056902848, |
| "eval_loss": 0.758493185043335, |
| "eval_runtime": 187.0332, |
| "eval_samples_per_second": 146.669, |
| "eval_steps_per_second": 4.587, |
| "step": 44000 |
| }, |
| { |
| "epoch": 9.464057847724373, |
| "grad_norm": 2.2251367568969727, |
| "learning_rate": 5.550000000000001e-05, |
| "loss": 0.7381, |
| "step": 44500 |
| }, |
| { |
| "epoch": 9.464057847724373, |
| "eval_accuracy": 0.8423865712157858, |
| "eval_loss": 0.7545250654220581, |
| "eval_runtime": 185.7507, |
| "eval_samples_per_second": 147.682, |
| "eval_steps_per_second": 4.619, |
| "step": 44500 |
| }, |
| { |
| "epoch": 9.570395576350489, |
| "grad_norm": 2.3922371864318848, |
| "learning_rate": 5.500000000000001e-05, |
| "loss": 0.7385, |
| "step": 45000 |
| }, |
| { |
| "epoch": 9.570395576350489, |
| "eval_accuracy": 0.8431280219033379, |
| "eval_loss": 0.7468847632408142, |
| "eval_runtime": 187.1862, |
| "eval_samples_per_second": 146.549, |
| "eval_steps_per_second": 4.584, |
| "step": 45000 |
| }, |
| { |
| "epoch": 9.676733304976606, |
| "grad_norm": 2.024217367172241, |
| "learning_rate": 5.45e-05, |
| "loss": 0.7319, |
| "step": 45500 |
| }, |
| { |
| "epoch": 9.676733304976606, |
| "eval_accuracy": 0.8432375296998541, |
| "eval_loss": 0.7428926229476929, |
| "eval_runtime": 185.6653, |
| "eval_samples_per_second": 147.75, |
| "eval_steps_per_second": 4.621, |
| "step": 45500 |
| }, |
| { |
| "epoch": 9.783071033602722, |
| "grad_norm": 2.946664571762085, |
| "learning_rate": 5.4000000000000005e-05, |
| "loss": 0.7312, |
| "step": 46000 |
| }, |
| { |
| "epoch": 9.783071033602722, |
| "eval_accuracy": 0.8432175410960274, |
| "eval_loss": 0.7447838187217712, |
| "eval_runtime": 187.318, |
| "eval_samples_per_second": 146.446, |
| "eval_steps_per_second": 4.58, |
| "step": 46000 |
| }, |
| { |
| "epoch": 9.88940876222884, |
| "grad_norm": 2.362518787384033, |
| "learning_rate": 5.3500000000000006e-05, |
| "loss": 0.7264, |
| "step": 46500 |
| }, |
| { |
| "epoch": 9.88940876222884, |
| "eval_accuracy": 0.8436135597948867, |
| "eval_loss": 0.7432146668434143, |
| "eval_runtime": 189.1721, |
| "eval_samples_per_second": 145.011, |
| "eval_steps_per_second": 4.536, |
| "step": 46500 |
| }, |
| { |
| "epoch": 9.995746490854955, |
| "grad_norm": 2.3549137115478516, |
| "learning_rate": 5.300000000000001e-05, |
| "loss": 0.73, |
| "step": 47000 |
| }, |
| { |
| "epoch": 9.995746490854955, |
| "eval_accuracy": 0.8444893924139688, |
| "eval_loss": 0.7410290837287903, |
| "eval_runtime": 189.0361, |
| "eval_samples_per_second": 145.115, |
| "eval_steps_per_second": 4.539, |
| "step": 47000 |
| }, |
| { |
| "epoch": 10.102084219481071, |
| "grad_norm": 2.3257205486297607, |
| "learning_rate": 5.25e-05, |
| "loss": 0.7208, |
| "step": 47500 |
| }, |
| { |
| "epoch": 10.102084219481071, |
| "eval_accuracy": 0.8442721638196152, |
| "eval_loss": 0.7432807087898254, |
| "eval_runtime": 187.0905, |
| "eval_samples_per_second": 146.624, |
| "eval_steps_per_second": 4.586, |
| "step": 47500 |
| }, |
| { |
| "epoch": 10.208421948107189, |
| "grad_norm": 2.081179618835449, |
| "learning_rate": 5.2000000000000004e-05, |
| "loss": 0.7184, |
| "step": 48000 |
| }, |
| { |
| "epoch": 10.208421948107189, |
| "eval_accuracy": 0.8441397724142315, |
| "eval_loss": 0.7414455413818359, |
| "eval_runtime": 187.245, |
| "eval_samples_per_second": 146.503, |
| "eval_steps_per_second": 4.582, |
| "step": 48000 |
| }, |
| { |
| "epoch": 10.314759676733305, |
| "grad_norm": 2.1536877155303955, |
| "learning_rate": 5.1500000000000005e-05, |
| "loss": 0.7197, |
| "step": 48500 |
| }, |
| { |
| "epoch": 10.314759676733305, |
| "eval_accuracy": 0.8453421523524357, |
| "eval_loss": 0.7430285215377808, |
| "eval_runtime": 186.8496, |
| "eval_samples_per_second": 146.813, |
| "eval_steps_per_second": 4.592, |
| "step": 48500 |
| }, |
| { |
| "epoch": 10.421097405359422, |
| "grad_norm": 2.0332043170928955, |
| "learning_rate": 5.1000000000000006e-05, |
| "loss": 0.7216, |
| "step": 49000 |
| }, |
| { |
| "epoch": 10.421097405359422, |
| "eval_accuracy": 0.8457369996973606, |
| "eval_loss": 0.735456645488739, |
| "eval_runtime": 186.981, |
| "eval_samples_per_second": 146.71, |
| "eval_steps_per_second": 4.589, |
| "step": 49000 |
| }, |
| { |
| "epoch": 10.527435133985538, |
| "grad_norm": 2.408484935760498, |
| "learning_rate": 5.05e-05, |
| "loss": 0.7136, |
| "step": 49500 |
| }, |
| { |
| "epoch": 10.527435133985538, |
| "eval_accuracy": 0.844874850108529, |
| "eval_loss": 0.7428346872329712, |
| "eval_runtime": 187.2438, |
| "eval_samples_per_second": 146.504, |
| "eval_steps_per_second": 4.582, |
| "step": 49500 |
| }, |
| { |
| "epoch": 10.633772862611655, |
| "grad_norm": 2.188323497772217, |
| "learning_rate": 5e-05, |
| "loss": 0.7181, |
| "step": 50000 |
| }, |
| { |
| "epoch": 10.633772862611655, |
| "eval_accuracy": 0.8461618686867156, |
| "eval_loss": 0.7299309372901917, |
| "eval_runtime": 186.3046, |
| "eval_samples_per_second": 147.243, |
| "eval_steps_per_second": 4.605, |
| "step": 50000 |
| }, |
| { |
| "epoch": 10.740110591237771, |
| "grad_norm": 2.0275685787200928, |
| "learning_rate": 4.9500000000000004e-05, |
| "loss": 0.7146, |
| "step": 50500 |
| }, |
| { |
| "epoch": 10.740110591237771, |
| "eval_accuracy": 0.8468640226233667, |
| "eval_loss": 0.7294827103614807, |
| "eval_runtime": 188.3539, |
| "eval_samples_per_second": 145.641, |
| "eval_steps_per_second": 4.555, |
| "step": 50500 |
| }, |
| { |
| "epoch": 10.846448319863887, |
| "grad_norm": 2.075991630554199, |
| "learning_rate": 4.9e-05, |
| "loss": 0.7155, |
| "step": 51000 |
| }, |
| { |
| "epoch": 10.846448319863887, |
| "eval_accuracy": 0.8462509637157972, |
| "eval_loss": 0.7348110675811768, |
| "eval_runtime": 187.3432, |
| "eval_samples_per_second": 146.426, |
| "eval_steps_per_second": 4.58, |
| "step": 51000 |
| }, |
| { |
| "epoch": 10.952786048490005, |
| "grad_norm": 2.083899736404419, |
| "learning_rate": 4.85e-05, |
| "loss": 0.709, |
| "step": 51500 |
| }, |
| { |
| "epoch": 10.952786048490005, |
| "eval_accuracy": 0.8467475927498435, |
| "eval_loss": 0.7246316075325012, |
| "eval_runtime": 186.2473, |
| "eval_samples_per_second": 147.288, |
| "eval_steps_per_second": 4.607, |
| "step": 51500 |
| }, |
| { |
| "epoch": 11.05912377711612, |
| "grad_norm": 2.051508903503418, |
| "learning_rate": 4.8e-05, |
| "loss": 0.709, |
| "step": 52000 |
| }, |
| { |
| "epoch": 11.05912377711612, |
| "eval_accuracy": 0.8473245959585851, |
| "eval_loss": 0.727922797203064, |
| "eval_runtime": 186.0664, |
| "eval_samples_per_second": 147.431, |
| "eval_steps_per_second": 4.611, |
| "step": 52000 |
| }, |
| { |
| "epoch": 11.165461505742238, |
| "grad_norm": 1.8798682689666748, |
| "learning_rate": 4.75e-05, |
| "loss": 0.7038, |
| "step": 52500 |
| }, |
| { |
| "epoch": 11.165461505742238, |
| "eval_accuracy": 0.8470500338964946, |
| "eval_loss": 0.7311117649078369, |
| "eval_runtime": 185.7398, |
| "eval_samples_per_second": 147.69, |
| "eval_steps_per_second": 4.619, |
| "step": 52500 |
| }, |
| { |
| "epoch": 11.271799234368354, |
| "grad_norm": 2.2613861560821533, |
| "learning_rate": 4.7e-05, |
| "loss": 0.7072, |
| "step": 53000 |
| }, |
| { |
| "epoch": 11.271799234368354, |
| "eval_accuracy": 0.8469547536409501, |
| "eval_loss": 0.7293407917022705, |
| "eval_runtime": 186.6537, |
| "eval_samples_per_second": 146.967, |
| "eval_steps_per_second": 4.597, |
| "step": 53000 |
| }, |
| { |
| "epoch": 11.378136962994471, |
| "grad_norm": 2.284278631210327, |
| "learning_rate": 4.6500000000000005e-05, |
| "loss": 0.6982, |
| "step": 53500 |
| }, |
| { |
| "epoch": 11.378136962994471, |
| "eval_accuracy": 0.8488894363564872, |
| "eval_loss": 0.7241687774658203, |
| "eval_runtime": 187.2053, |
| "eval_samples_per_second": 146.534, |
| "eval_steps_per_second": 4.583, |
| "step": 53500 |
| }, |
| { |
| "epoch": 11.484474691620587, |
| "grad_norm": 2.0332393646240234, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 0.6979, |
| "step": 54000 |
| }, |
| { |
| "epoch": 11.484474691620587, |
| "eval_accuracy": 0.8488223472254323, |
| "eval_loss": 0.7191620469093323, |
| "eval_runtime": 186.9506, |
| "eval_samples_per_second": 146.734, |
| "eval_steps_per_second": 4.589, |
| "step": 54000 |
| }, |
| { |
| "epoch": 11.590812420246703, |
| "grad_norm": 2.3902156352996826, |
| "learning_rate": 4.55e-05, |
| "loss": 0.7061, |
| "step": 54500 |
| }, |
| { |
| "epoch": 11.590812420246703, |
| "eval_accuracy": 0.8484331299628768, |
| "eval_loss": 0.7225244641304016, |
| "eval_runtime": 188.9334, |
| "eval_samples_per_second": 145.194, |
| "eval_steps_per_second": 4.541, |
| "step": 54500 |
| }, |
| { |
| "epoch": 11.69715014887282, |
| "grad_norm": 2.1105005741119385, |
| "learning_rate": 4.5e-05, |
| "loss": 0.6908, |
| "step": 55000 |
| }, |
| { |
| "epoch": 11.69715014887282, |
| "eval_accuracy": 0.8491300232964473, |
| "eval_loss": 0.7186225652694702, |
| "eval_runtime": 187.6216, |
| "eval_samples_per_second": 146.209, |
| "eval_steps_per_second": 4.573, |
| "step": 55000 |
| }, |
| { |
| "epoch": 11.803487877498936, |
| "grad_norm": 2.3430845737457275, |
| "learning_rate": 4.4500000000000004e-05, |
| "loss": 0.6982, |
| "step": 55500 |
| }, |
| { |
| "epoch": 11.803487877498936, |
| "eval_accuracy": 0.848985207075478, |
| "eval_loss": 0.7230582237243652, |
| "eval_runtime": 187.7234, |
| "eval_samples_per_second": 146.13, |
| "eval_steps_per_second": 4.571, |
| "step": 55500 |
| }, |
| { |
| "epoch": 11.909825606125054, |
| "grad_norm": 2.3165669441223145, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.6968, |
| "step": 56000 |
| }, |
| { |
| "epoch": 11.909825606125054, |
| "eval_accuracy": 0.8496422899025508, |
| "eval_loss": 0.7111759185791016, |
| "eval_runtime": 187.4207, |
| "eval_samples_per_second": 146.366, |
| "eval_steps_per_second": 4.578, |
| "step": 56000 |
| }, |
| { |
| "epoch": 12.01616333475117, |
| "grad_norm": 2.1263792514801025, |
| "learning_rate": 4.35e-05, |
| "loss": 0.6899, |
| "step": 56500 |
| }, |
| { |
| "epoch": 12.01616333475117, |
| "eval_accuracy": 0.849960832248035, |
| "eval_loss": 0.7166005373001099, |
| "eval_runtime": 187.5327, |
| "eval_samples_per_second": 146.278, |
| "eval_steps_per_second": 4.575, |
| "step": 56500 |
| }, |
| { |
| "epoch": 12.122501063377285, |
| "grad_norm": 1.994381070137024, |
| "learning_rate": 4.3e-05, |
| "loss": 0.6861, |
| "step": 57000 |
| }, |
| { |
| "epoch": 12.122501063377285, |
| "eval_accuracy": 0.8500555930888853, |
| "eval_loss": 0.7198529839515686, |
| "eval_runtime": 186.319, |
| "eval_samples_per_second": 147.231, |
| "eval_steps_per_second": 4.605, |
| "step": 57000 |
| }, |
| { |
| "epoch": 12.228838792003403, |
| "grad_norm": 2.1838319301605225, |
| "learning_rate": 4.25e-05, |
| "loss": 0.6904, |
| "step": 57500 |
| }, |
| { |
| "epoch": 12.228838792003403, |
| "eval_accuracy": 0.850506019072164, |
| "eval_loss": 0.7138729095458984, |
| "eval_runtime": 186.3075, |
| "eval_samples_per_second": 147.24, |
| "eval_steps_per_second": 4.605, |
| "step": 57500 |
| }, |
| { |
| "epoch": 12.335176520629519, |
| "grad_norm": 2.116994857788086, |
| "learning_rate": 4.2e-05, |
| "loss": 0.6865, |
| "step": 58000 |
| }, |
| { |
| "epoch": 12.335176520629519, |
| "eval_accuracy": 0.8506285997887376, |
| "eval_loss": 0.7130934596061707, |
| "eval_runtime": 187.4216, |
| "eval_samples_per_second": 146.365, |
| "eval_steps_per_second": 4.578, |
| "step": 58000 |
| }, |
| { |
| "epoch": 12.441514249255636, |
| "grad_norm": 2.377434015274048, |
| "learning_rate": 4.15e-05, |
| "loss": 0.6812, |
| "step": 58500 |
| }, |
| { |
| "epoch": 12.441514249255636, |
| "eval_accuracy": 0.8510783043990745, |
| "eval_loss": 0.7050937414169312, |
| "eval_runtime": 189.3457, |
| "eval_samples_per_second": 144.878, |
| "eval_steps_per_second": 4.531, |
| "step": 58500 |
| }, |
| { |
| "epoch": 12.547851977881752, |
| "grad_norm": 2.320155143737793, |
| "learning_rate": 4.1e-05, |
| "loss": 0.6907, |
| "step": 59000 |
| }, |
| { |
| "epoch": 12.547851977881752, |
| "eval_accuracy": 0.8511123081850741, |
| "eval_loss": 0.7095110416412354, |
| "eval_runtime": 187.9801, |
| "eval_samples_per_second": 145.93, |
| "eval_steps_per_second": 4.564, |
| "step": 59000 |
| }, |
| { |
| "epoch": 12.65418970650787, |
| "grad_norm": 2.1185412406921387, |
| "learning_rate": 4.05e-05, |
| "loss": 0.6849, |
| "step": 59500 |
| }, |
| { |
| "epoch": 12.65418970650787, |
| "eval_accuracy": 0.8515246474410098, |
| "eval_loss": 0.7070408463478088, |
| "eval_runtime": 187.5124, |
| "eval_samples_per_second": 146.294, |
| "eval_steps_per_second": 4.576, |
| "step": 59500 |
| }, |
| { |
| "epoch": 12.760527435133985, |
| "grad_norm": 2.21907639503479, |
| "learning_rate": 4e-05, |
| "loss": 0.6789, |
| "step": 60000 |
| }, |
| { |
| "epoch": 12.760527435133985, |
| "eval_accuracy": 0.8517749754931363, |
| "eval_loss": 0.7055286765098572, |
| "eval_runtime": 188.9775, |
| "eval_samples_per_second": 145.16, |
| "eval_steps_per_second": 4.54, |
| "step": 60000 |
| }, |
| { |
| "epoch": 12.866865163760101, |
| "grad_norm": 2.1278369426727295, |
| "learning_rate": 3.9500000000000005e-05, |
| "loss": 0.6767, |
| "step": 60500 |
| }, |
| { |
| "epoch": 12.866865163760101, |
| "eval_accuracy": 0.8519451108850803, |
| "eval_loss": 0.706390917301178, |
| "eval_runtime": 188.7478, |
| "eval_samples_per_second": 145.337, |
| "eval_steps_per_second": 4.546, |
| "step": 60500 |
| }, |
| { |
| "epoch": 12.973202892386219, |
| "grad_norm": 2.2294068336486816, |
| "learning_rate": 3.9000000000000006e-05, |
| "loss": 0.6828, |
| "step": 61000 |
| }, |
| { |
| "epoch": 12.973202892386219, |
| "eval_accuracy": 0.8526284277549754, |
| "eval_loss": 0.7011674046516418, |
| "eval_runtime": 188.2328, |
| "eval_samples_per_second": 145.734, |
| "eval_steps_per_second": 4.558, |
| "step": 61000 |
| }, |
| { |
| "epoch": 13.079540621012335, |
| "grad_norm": 2.4725828170776367, |
| "learning_rate": 3.85e-05, |
| "loss": 0.6706, |
| "step": 61500 |
| }, |
| { |
| "epoch": 13.079540621012335, |
| "eval_accuracy": 0.8527708579766078, |
| "eval_loss": 0.7007238268852234, |
| "eval_runtime": 188.5594, |
| "eval_samples_per_second": 145.482, |
| "eval_steps_per_second": 4.55, |
| "step": 61500 |
| }, |
| { |
| "epoch": 13.185878349638452, |
| "grad_norm": 2.164578676223755, |
| "learning_rate": 3.8e-05, |
| "loss": 0.6713, |
| "step": 62000 |
| }, |
| { |
| "epoch": 13.185878349638452, |
| "eval_accuracy": 0.8523022405661091, |
| "eval_loss": 0.7039057612419128, |
| "eval_runtime": 189.6905, |
| "eval_samples_per_second": 144.615, |
| "eval_steps_per_second": 4.523, |
| "step": 62000 |
| }, |
| { |
| "epoch": 13.292216078264568, |
| "grad_norm": 2.413243532180786, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 0.6688, |
| "step": 62500 |
| }, |
| { |
| "epoch": 13.292216078264568, |
| "eval_accuracy": 0.8524316826829147, |
| "eval_loss": 0.7022396922111511, |
| "eval_runtime": 187.5826, |
| "eval_samples_per_second": 146.24, |
| "eval_steps_per_second": 4.574, |
| "step": 62500 |
| }, |
| { |
| "epoch": 13.398553806890686, |
| "grad_norm": 2.042698383331299, |
| "learning_rate": 3.7e-05, |
| "loss": 0.6768, |
| "step": 63000 |
| }, |
| { |
| "epoch": 13.398553806890686, |
| "eval_accuracy": 0.8535561550741191, |
| "eval_loss": 0.6964840888977051, |
| "eval_runtime": 187.9201, |
| "eval_samples_per_second": 145.977, |
| "eval_steps_per_second": 4.566, |
| "step": 63000 |
| }, |
| { |
| "epoch": 13.504891535516801, |
| "grad_norm": 2.041522741317749, |
| "learning_rate": 3.65e-05, |
| "loss": 0.6645, |
| "step": 63500 |
| }, |
| { |
| "epoch": 13.504891535516801, |
| "eval_accuracy": 0.853386487539177, |
| "eval_loss": 0.7000829577445984, |
| "eval_runtime": 187.7215, |
| "eval_samples_per_second": 146.131, |
| "eval_steps_per_second": 4.571, |
| "step": 63500 |
| }, |
| { |
| "epoch": 13.611229264142917, |
| "grad_norm": 1.9478411674499512, |
| "learning_rate": 3.6e-05, |
| "loss": 0.6652, |
| "step": 64000 |
| }, |
| { |
| "epoch": 13.611229264142917, |
| "eval_accuracy": 0.8545154193036631, |
| "eval_loss": 0.6918612718582153, |
| "eval_runtime": 186.6017, |
| "eval_samples_per_second": 147.008, |
| "eval_steps_per_second": 4.598, |
| "step": 64000 |
| }, |
| { |
| "epoch": 13.717566992769035, |
| "grad_norm": 2.164696216583252, |
| "learning_rate": 3.55e-05, |
| "loss": 0.669, |
| "step": 64500 |
| }, |
| { |
| "epoch": 13.717566992769035, |
| "eval_accuracy": 0.8544395509174251, |
| "eval_loss": 0.6917126774787903, |
| "eval_runtime": 187.4287, |
| "eval_samples_per_second": 146.36, |
| "eval_steps_per_second": 4.578, |
| "step": 64500 |
| }, |
| { |
| "epoch": 13.82390472139515, |
| "grad_norm": 2.247715711593628, |
| "learning_rate": 3.5e-05, |
| "loss": 0.6683, |
| "step": 65000 |
| }, |
| { |
| "epoch": 13.82390472139515, |
| "eval_accuracy": 0.8542704238784817, |
| "eval_loss": 0.6937560439109802, |
| "eval_runtime": 188.8636, |
| "eval_samples_per_second": 145.248, |
| "eval_steps_per_second": 4.543, |
| "step": 65000 |
| }, |
| { |
| "epoch": 13.930242450021268, |
| "grad_norm": 2.264913558959961, |
| "learning_rate": 3.45e-05, |
| "loss": 0.6655, |
| "step": 65500 |
| }, |
| { |
| "epoch": 13.930242450021268, |
| "eval_accuracy": 0.8545225605898715, |
| "eval_loss": 0.6955862045288086, |
| "eval_runtime": 187.5492, |
| "eval_samples_per_second": 146.266, |
| "eval_steps_per_second": 4.575, |
| "step": 65500 |
| }, |
| { |
| "epoch": 14.036580178647384, |
| "grad_norm": 2.3346023559570312, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 0.6587, |
| "step": 66000 |
| }, |
| { |
| "epoch": 14.036580178647384, |
| "eval_accuracy": 0.8548906265850504, |
| "eval_loss": 0.6948685646057129, |
| "eval_runtime": 187.7667, |
| "eval_samples_per_second": 146.096, |
| "eval_steps_per_second": 4.57, |
| "step": 66000 |
| }, |
| { |
| "epoch": 14.142917907273501, |
| "grad_norm": 2.054490089416504, |
| "learning_rate": 3.35e-05, |
| "loss": 0.6562, |
| "step": 66500 |
| }, |
| { |
| "epoch": 14.142917907273501, |
| "eval_accuracy": 0.8547657424014736, |
| "eval_loss": 0.6918261647224426, |
| "eval_runtime": 188.2884, |
| "eval_samples_per_second": 145.691, |
| "eval_steps_per_second": 4.557, |
| "step": 66500 |
| }, |
| { |
| "epoch": 14.249255635899617, |
| "grad_norm": 2.101318359375, |
| "learning_rate": 3.3e-05, |
| "loss": 0.6524, |
| "step": 67000 |
| }, |
| { |
| "epoch": 14.249255635899617, |
| "eval_accuracy": 0.8545756667919997, |
| "eval_loss": 0.6881661415100098, |
| "eval_runtime": 188.3077, |
| "eval_samples_per_second": 145.676, |
| "eval_steps_per_second": 4.556, |
| "step": 67000 |
| }, |
| { |
| "epoch": 14.355593364525733, |
| "grad_norm": 2.3317761421203613, |
| "learning_rate": 3.2500000000000004e-05, |
| "loss": 0.653, |
| "step": 67500 |
| }, |
| { |
| "epoch": 14.355593364525733, |
| "eval_accuracy": 0.8556463396940239, |
| "eval_loss": 0.6863633394241333, |
| "eval_runtime": 187.6402, |
| "eval_samples_per_second": 146.195, |
| "eval_steps_per_second": 4.573, |
| "step": 67500 |
| }, |
| { |
| "epoch": 14.46193109315185, |
| "grad_norm": 2.2883026599884033, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 0.6592, |
| "step": 68000 |
| }, |
| { |
| "epoch": 14.46193109315185, |
| "eval_accuracy": 0.8556758477466689, |
| "eval_loss": 0.6878767609596252, |
| "eval_runtime": 187.2987, |
| "eval_samples_per_second": 146.461, |
| "eval_steps_per_second": 4.581, |
| "step": 68000 |
| }, |
| { |
| "epoch": 14.568268821777966, |
| "grad_norm": 2.058140993118286, |
| "learning_rate": 3.15e-05, |
| "loss": 0.654, |
| "step": 68500 |
| }, |
| { |
| "epoch": 14.568268821777966, |
| "eval_accuracy": 0.8560685191587963, |
| "eval_loss": 0.6820354461669922, |
| "eval_runtime": 187.489, |
| "eval_samples_per_second": 146.313, |
| "eval_steps_per_second": 4.576, |
| "step": 68500 |
| }, |
| { |
| "epoch": 14.674606550404084, |
| "grad_norm": 2.3248109817504883, |
| "learning_rate": 3.1e-05, |
| "loss": 0.6535, |
| "step": 69000 |
| }, |
| { |
| "epoch": 14.674606550404084, |
| "eval_accuracy": 0.8565050750014839, |
| "eval_loss": 0.6821831464767456, |
| "eval_runtime": 187.5661, |
| "eval_samples_per_second": 146.252, |
| "eval_steps_per_second": 4.574, |
| "step": 69000 |
| }, |
| { |
| "epoch": 14.7809442790302, |
| "grad_norm": 2.507769823074341, |
| "learning_rate": 3.05e-05, |
| "loss": 0.6572, |
| "step": 69500 |
| }, |
| { |
| "epoch": 14.7809442790302, |
| "eval_accuracy": 0.8568994154448439, |
| "eval_loss": 0.6748529672622681, |
| "eval_runtime": 187.4579, |
| "eval_samples_per_second": 146.337, |
| "eval_steps_per_second": 4.577, |
| "step": 69500 |
| }, |
| { |
| "epoch": 14.887282007656317, |
| "grad_norm": 1.7414127588272095, |
| "learning_rate": 3e-05, |
| "loss": 0.653, |
| "step": 70000 |
| }, |
| { |
| "epoch": 14.887282007656317, |
| "eval_accuracy": 0.8568193158641805, |
| "eval_loss": 0.6834676861763, |
| "eval_runtime": 187.2966, |
| "eval_samples_per_second": 146.463, |
| "eval_steps_per_second": 4.581, |
| "step": 70000 |
| }, |
| { |
| "epoch": 14.993619736282433, |
| "grad_norm": 2.1479337215423584, |
| "learning_rate": 2.95e-05, |
| "loss": 0.6476, |
| "step": 70500 |
| }, |
| { |
| "epoch": 14.993619736282433, |
| "eval_accuracy": 0.8570982400326026, |
| "eval_loss": 0.6762750148773193, |
| "eval_runtime": 186.0961, |
| "eval_samples_per_second": 147.408, |
| "eval_steps_per_second": 4.611, |
| "step": 70500 |
| }, |
| { |
| "epoch": 15.099957464908549, |
| "grad_norm": 2.4283432960510254, |
| "learning_rate": 2.9e-05, |
| "loss": 0.6454, |
| "step": 71000 |
| }, |
| { |
| "epoch": 15.099957464908549, |
| "eval_accuracy": 0.8570932978007869, |
| "eval_loss": 0.6805168986320496, |
| "eval_runtime": 187.4834, |
| "eval_samples_per_second": 146.317, |
| "eval_steps_per_second": 4.576, |
| "step": 71000 |
| }, |
| { |
| "epoch": 15.206295193534666, |
| "grad_norm": 2.3493471145629883, |
| "learning_rate": 2.8499999999999998e-05, |
| "loss": 0.6492, |
| "step": 71500 |
| }, |
| { |
| "epoch": 15.206295193534666, |
| "eval_accuracy": 0.857002769504863, |
| "eval_loss": 0.6780962944030762, |
| "eval_runtime": 187.5133, |
| "eval_samples_per_second": 146.294, |
| "eval_steps_per_second": 4.576, |
| "step": 71500 |
| }, |
| { |
| "epoch": 15.312632922160782, |
| "grad_norm": 2.315495729446411, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 0.6417, |
| "step": 72000 |
| }, |
| { |
| "epoch": 15.312632922160782, |
| "eval_accuracy": 0.8574317167307508, |
| "eval_loss": 0.67529296875, |
| "eval_runtime": 187.9011, |
| "eval_samples_per_second": 145.992, |
| "eval_steps_per_second": 4.566, |
| "step": 72000 |
| }, |
| { |
| "epoch": 15.4189706507869, |
| "grad_norm": 2.3548035621643066, |
| "learning_rate": 2.7500000000000004e-05, |
| "loss": 0.6411, |
| "step": 72500 |
| }, |
| { |
| "epoch": 15.4189706507869, |
| "eval_accuracy": 0.8574519944946007, |
| "eval_loss": 0.6762493252754211, |
| "eval_runtime": 187.6451, |
| "eval_samples_per_second": 146.191, |
| "eval_steps_per_second": 4.572, |
| "step": 72500 |
| }, |
| { |
| "epoch": 15.525308379413016, |
| "grad_norm": 2.073268175125122, |
| "learning_rate": 2.7000000000000002e-05, |
| "loss": 0.6427, |
| "step": 73000 |
| }, |
| { |
| "epoch": 15.525308379413016, |
| "eval_accuracy": 0.8576201590411181, |
| "eval_loss": 0.6761339902877808, |
| "eval_runtime": 189.4074, |
| "eval_samples_per_second": 144.831, |
| "eval_steps_per_second": 4.53, |
| "step": 73000 |
| }, |
| { |
| "epoch": 15.631646108039131, |
| "grad_norm": 1.8768184185028076, |
| "learning_rate": 2.6500000000000004e-05, |
| "loss": 0.6427, |
| "step": 73500 |
| }, |
| { |
| "epoch": 15.631646108039131, |
| "eval_accuracy": 0.8576710592022652, |
| "eval_loss": 0.6767549514770508, |
| "eval_runtime": 189.8952, |
| "eval_samples_per_second": 144.459, |
| "eval_steps_per_second": 4.518, |
| "step": 73500 |
| }, |
| { |
| "epoch": 15.737983836665249, |
| "grad_norm": 2.476339817047119, |
| "learning_rate": 2.6000000000000002e-05, |
| "loss": 0.6418, |
| "step": 74000 |
| }, |
| { |
| "epoch": 15.737983836665249, |
| "eval_accuracy": 0.8583988090867217, |
| "eval_loss": 0.6754936575889587, |
| "eval_runtime": 187.0962, |
| "eval_samples_per_second": 146.62, |
| "eval_steps_per_second": 4.586, |
| "step": 74000 |
| }, |
| { |
| "epoch": 15.844321565291365, |
| "grad_norm": 2.2399730682373047, |
| "learning_rate": 2.5500000000000003e-05, |
| "loss": 0.6391, |
| "step": 74500 |
| }, |
| { |
| "epoch": 15.844321565291365, |
| "eval_accuracy": 0.8587047439506456, |
| "eval_loss": 0.6762666702270508, |
| "eval_runtime": 187.0028, |
| "eval_samples_per_second": 146.693, |
| "eval_steps_per_second": 4.588, |
| "step": 74500 |
| }, |
| { |
| "epoch": 15.950659293917482, |
| "grad_norm": 2.1147332191467285, |
| "learning_rate": 2.5e-05, |
| "loss": 0.6324, |
| "step": 75000 |
| }, |
| { |
| "epoch": 15.950659293917482, |
| "eval_accuracy": 0.8583270548777524, |
| "eval_loss": 0.6746546626091003, |
| "eval_runtime": 186.0971, |
| "eval_samples_per_second": 147.407, |
| "eval_steps_per_second": 4.61, |
| "step": 75000 |
| }, |
| { |
| "epoch": 16.056997022543598, |
| "grad_norm": 1.9580026865005493, |
| "learning_rate": 2.45e-05, |
| "loss": 0.635, |
| "step": 75500 |
| }, |
| { |
| "epoch": 16.056997022543598, |
| "eval_accuracy": 0.8590129341873303, |
| "eval_loss": 0.665653645992279, |
| "eval_runtime": 186.3662, |
| "eval_samples_per_second": 147.194, |
| "eval_steps_per_second": 4.604, |
| "step": 75500 |
| }, |
| { |
| "epoch": 16.163334751169714, |
| "grad_norm": 1.9090304374694824, |
| "learning_rate": 2.4e-05, |
| "loss": 0.6301, |
| "step": 76000 |
| }, |
| { |
| "epoch": 16.163334751169714, |
| "eval_accuracy": 0.8595268739500587, |
| "eval_loss": 0.6707313060760498, |
| "eval_runtime": 186.5814, |
| "eval_samples_per_second": 147.024, |
| "eval_steps_per_second": 4.599, |
| "step": 76000 |
| }, |
| { |
| "epoch": 16.269672479795833, |
| "grad_norm": 2.125847101211548, |
| "learning_rate": 2.35e-05, |
| "loss": 0.6325, |
| "step": 76500 |
| }, |
| { |
| "epoch": 16.269672479795833, |
| "eval_accuracy": 0.8596205331865414, |
| "eval_loss": 0.6737939715385437, |
| "eval_runtime": 186.3349, |
| "eval_samples_per_second": 147.219, |
| "eval_steps_per_second": 4.605, |
| "step": 76500 |
| }, |
| { |
| "epoch": 16.37601020842195, |
| "grad_norm": 2.348851203918457, |
| "learning_rate": 2.3000000000000003e-05, |
| "loss": 0.6278, |
| "step": 77000 |
| }, |
| { |
| "epoch": 16.37601020842195, |
| "eval_accuracy": 0.8598069827368299, |
| "eval_loss": 0.6713533401489258, |
| "eval_runtime": 187.2971, |
| "eval_samples_per_second": 146.462, |
| "eval_steps_per_second": 4.581, |
| "step": 77000 |
| }, |
| { |
| "epoch": 16.482347937048065, |
| "grad_norm": 1.935435175895691, |
| "learning_rate": 2.25e-05, |
| "loss": 0.6333, |
| "step": 77500 |
| }, |
| { |
| "epoch": 16.482347937048065, |
| "eval_accuracy": 0.8600093652609475, |
| "eval_loss": 0.6650431752204895, |
| "eval_runtime": 187.5594, |
| "eval_samples_per_second": 146.258, |
| "eval_steps_per_second": 4.575, |
| "step": 77500 |
| }, |
| { |
| "epoch": 16.58868566567418, |
| "grad_norm": 2.2629425525665283, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 0.6296, |
| "step": 78000 |
| }, |
| { |
| "epoch": 16.58868566567418, |
| "eval_accuracy": 0.8603086354661712, |
| "eval_loss": 0.6635463237762451, |
| "eval_runtime": 186.2259, |
| "eval_samples_per_second": 147.305, |
| "eval_steps_per_second": 4.607, |
| "step": 78000 |
| }, |
| { |
| "epoch": 16.695023394300296, |
| "grad_norm": 2.1555726528167725, |
| "learning_rate": 2.15e-05, |
| "loss": 0.6278, |
| "step": 78500 |
| }, |
| { |
| "epoch": 16.695023394300296, |
| "eval_accuracy": 0.8604913225558962, |
| "eval_loss": 0.6623604893684387, |
| "eval_runtime": 187.7075, |
| "eval_samples_per_second": 146.142, |
| "eval_steps_per_second": 4.571, |
| "step": 78500 |
| }, |
| { |
| "epoch": 16.801361122926416, |
| "grad_norm": 2.1904544830322266, |
| "learning_rate": 2.1e-05, |
| "loss": 0.6264, |
| "step": 79000 |
| }, |
| { |
| "epoch": 16.801361122926416, |
| "eval_accuracy": 0.8604346510079303, |
| "eval_loss": 0.6690334677696228, |
| "eval_runtime": 187.4243, |
| "eval_samples_per_second": 146.363, |
| "eval_steps_per_second": 4.578, |
| "step": 79000 |
| }, |
| { |
| "epoch": 16.90769885155253, |
| "grad_norm": 2.208678722381592, |
| "learning_rate": 2.05e-05, |
| "loss": 0.6303, |
| "step": 79500 |
| }, |
| { |
| "epoch": 16.90769885155253, |
| "eval_accuracy": 0.8602657766644671, |
| "eval_loss": 0.663872480392456, |
| "eval_runtime": 187.6784, |
| "eval_samples_per_second": 146.165, |
| "eval_steps_per_second": 4.572, |
| "step": 79500 |
| }, |
| { |
| "epoch": 17.014036580178647, |
| "grad_norm": 2.0841898918151855, |
| "learning_rate": 2e-05, |
| "loss": 0.6261, |
| "step": 80000 |
| }, |
| { |
| "epoch": 17.014036580178647, |
| "eval_accuracy": 0.860494846911361, |
| "eval_loss": 0.6641469597816467, |
| "eval_runtime": 186.8732, |
| "eval_samples_per_second": 146.795, |
| "eval_steps_per_second": 4.591, |
| "step": 80000 |
| }, |
| { |
| "epoch": 17.120374308804763, |
| "grad_norm": 1.9734201431274414, |
| "learning_rate": 1.9500000000000003e-05, |
| "loss": 0.6188, |
| "step": 80500 |
| }, |
| { |
| "epoch": 17.120374308804763, |
| "eval_accuracy": 0.8611235949597102, |
| "eval_loss": 0.6597367525100708, |
| "eval_runtime": 187.1259, |
| "eval_samples_per_second": 146.596, |
| "eval_steps_per_second": 4.585, |
| "step": 80500 |
| }, |
| { |
| "epoch": 17.22671203743088, |
| "grad_norm": 2.002861976623535, |
| "learning_rate": 1.9e-05, |
| "loss": 0.6169, |
| "step": 81000 |
| }, |
| { |
| "epoch": 17.22671203743088, |
| "eval_accuracy": 0.8612186249545289, |
| "eval_loss": 0.656814694404602, |
| "eval_runtime": 186.6713, |
| "eval_samples_per_second": 146.953, |
| "eval_steps_per_second": 4.596, |
| "step": 81000 |
| }, |
| { |
| "epoch": 17.333049766056998, |
| "grad_norm": 2.489762783050537, |
| "learning_rate": 1.85e-05, |
| "loss": 0.6224, |
| "step": 81500 |
| }, |
| { |
| "epoch": 17.333049766056998, |
| "eval_accuracy": 0.8613080153687351, |
| "eval_loss": 0.6605936884880066, |
| "eval_runtime": 188.8518, |
| "eval_samples_per_second": 145.257, |
| "eval_steps_per_second": 4.543, |
| "step": 81500 |
| }, |
| { |
| "epoch": 17.439387494683114, |
| "grad_norm": 2.0980629920959473, |
| "learning_rate": 1.8e-05, |
| "loss": 0.6199, |
| "step": 82000 |
| }, |
| { |
| "epoch": 17.439387494683114, |
| "eval_accuracy": 0.861816008556555, |
| "eval_loss": 0.6584839820861816, |
| "eval_runtime": 186.4347, |
| "eval_samples_per_second": 147.14, |
| "eval_steps_per_second": 4.602, |
| "step": 82000 |
| }, |
| { |
| "epoch": 17.54572522330923, |
| "grad_norm": 1.9317203760147095, |
| "learning_rate": 1.75e-05, |
| "loss": 0.6241, |
| "step": 82500 |
| }, |
| { |
| "epoch": 17.54572522330923, |
| "eval_accuracy": 0.8614589561697034, |
| "eval_loss": 0.661562442779541, |
| "eval_runtime": 187.7317, |
| "eval_samples_per_second": 146.123, |
| "eval_steps_per_second": 4.57, |
| "step": 82500 |
| }, |
| { |
| "epoch": 17.652062951935346, |
| "grad_norm": 2.1747238636016846, |
| "learning_rate": 1.7000000000000003e-05, |
| "loss": 0.6154, |
| "step": 83000 |
| }, |
| { |
| "epoch": 17.652062951935346, |
| "eval_accuracy": 0.8619722616948544, |
| "eval_loss": 0.6551214456558228, |
| "eval_runtime": 187.7435, |
| "eval_samples_per_second": 146.114, |
| "eval_steps_per_second": 4.57, |
| "step": 83000 |
| }, |
| { |
| "epoch": 17.758400680561465, |
| "grad_norm": 2.34098482131958, |
| "learning_rate": 1.65e-05, |
| "loss": 0.6167, |
| "step": 83500 |
| }, |
| { |
| "epoch": 17.758400680561465, |
| "eval_accuracy": 0.8625058751823201, |
| "eval_loss": 0.6509849429130554, |
| "eval_runtime": 186.5789, |
| "eval_samples_per_second": 147.026, |
| "eval_steps_per_second": 4.599, |
| "step": 83500 |
| }, |
| { |
| "epoch": 17.86473840918758, |
| "grad_norm": 2.2351200580596924, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.6162, |
| "step": 84000 |
| }, |
| { |
| "epoch": 17.86473840918758, |
| "eval_accuracy": 0.8624151082072831, |
| "eval_loss": 0.6552938222885132, |
| "eval_runtime": 186.7383, |
| "eval_samples_per_second": 146.901, |
| "eval_steps_per_second": 4.595, |
| "step": 84000 |
| }, |
| { |
| "epoch": 17.971076137813697, |
| "grad_norm": 2.1403043270111084, |
| "learning_rate": 1.55e-05, |
| "loss": 0.6127, |
| "step": 84500 |
| }, |
| { |
| "epoch": 17.971076137813697, |
| "eval_accuracy": 0.8623448436360815, |
| "eval_loss": 0.6527832746505737, |
| "eval_runtime": 186.8175, |
| "eval_samples_per_second": 146.838, |
| "eval_steps_per_second": 4.593, |
| "step": 84500 |
| }, |
| { |
| "epoch": 18.077413866439812, |
| "grad_norm": 1.9795105457305908, |
| "learning_rate": 1.5e-05, |
| "loss": 0.61, |
| "step": 85000 |
| }, |
| { |
| "epoch": 18.077413866439812, |
| "eval_accuracy": 0.8622515357619651, |
| "eval_loss": 0.6563568115234375, |
| "eval_runtime": 188.8808, |
| "eval_samples_per_second": 145.234, |
| "eval_steps_per_second": 4.543, |
| "step": 85000 |
| }, |
| { |
| "epoch": 18.183751595065928, |
| "grad_norm": 2.548271656036377, |
| "learning_rate": 1.45e-05, |
| "loss": 0.6115, |
| "step": 85500 |
| }, |
| { |
| "epoch": 18.183751595065928, |
| "eval_accuracy": 0.862900921721882, |
| "eval_loss": 0.6507585644721985, |
| "eval_runtime": 187.5686, |
| "eval_samples_per_second": 146.251, |
| "eval_steps_per_second": 4.574, |
| "step": 85500 |
| }, |
| { |
| "epoch": 18.290089323692047, |
| "grad_norm": 2.2638394832611084, |
| "learning_rate": 1.4000000000000001e-05, |
| "loss": 0.602, |
| "step": 86000 |
| }, |
| { |
| "epoch": 18.290089323692047, |
| "eval_accuracy": 0.8630698221693879, |
| "eval_loss": 0.6584257483482361, |
| "eval_runtime": 186.8872, |
| "eval_samples_per_second": 146.784, |
| "eval_steps_per_second": 4.591, |
| "step": 86000 |
| }, |
| { |
| "epoch": 18.396427052318163, |
| "grad_norm": 1.9320064783096313, |
| "learning_rate": 1.3500000000000001e-05, |
| "loss": 0.6134, |
| "step": 86500 |
| }, |
| { |
| "epoch": 18.396427052318163, |
| "eval_accuracy": 0.8632312148222042, |
| "eval_loss": 0.6499433517456055, |
| "eval_runtime": 187.7637, |
| "eval_samples_per_second": 146.098, |
| "eval_steps_per_second": 4.57, |
| "step": 86500 |
| }, |
| { |
| "epoch": 18.50276478094428, |
| "grad_norm": 1.891508936882019, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 0.6112, |
| "step": 87000 |
| }, |
| { |
| "epoch": 18.50276478094428, |
| "eval_accuracy": 0.8630286860815074, |
| "eval_loss": 0.6519419550895691, |
| "eval_runtime": 187.4682, |
| "eval_samples_per_second": 146.329, |
| "eval_steps_per_second": 4.577, |
| "step": 87000 |
| }, |
| { |
| "epoch": 18.609102509570395, |
| "grad_norm": 2.208890438079834, |
| "learning_rate": 1.25e-05, |
| "loss": 0.6109, |
| "step": 87500 |
| }, |
| { |
| "epoch": 18.609102509570395, |
| "eval_accuracy": 0.8636812324617463, |
| "eval_loss": 0.6446605324745178, |
| "eval_runtime": 188.845, |
| "eval_samples_per_second": 145.262, |
| "eval_steps_per_second": 4.543, |
| "step": 87500 |
| }, |
| { |
| "epoch": 18.71544023819651, |
| "grad_norm": 2.313291549682617, |
| "learning_rate": 1.2e-05, |
| "loss": 0.6018, |
| "step": 88000 |
| }, |
| { |
| "epoch": 18.71544023819651, |
| "eval_accuracy": 0.86378315200226, |
| "eval_loss": 0.6436861753463745, |
| "eval_runtime": 186.7793, |
| "eval_samples_per_second": 146.869, |
| "eval_steps_per_second": 4.594, |
| "step": 88000 |
| }, |
| { |
| "epoch": 18.82177796682263, |
| "grad_norm": 1.9980500936508179, |
| "learning_rate": 1.1500000000000002e-05, |
| "loss": 0.6081, |
| "step": 88500 |
| }, |
| { |
| "epoch": 18.82177796682263, |
| "eval_accuracy": 0.8636463710418437, |
| "eval_loss": 0.6478908061981201, |
| "eval_runtime": 189.7409, |
| "eval_samples_per_second": 144.576, |
| "eval_steps_per_second": 4.522, |
| "step": 88500 |
| }, |
| { |
| "epoch": 18.928115695448746, |
| "grad_norm": 2.0310556888580322, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 0.6033, |
| "step": 89000 |
| }, |
| { |
| "epoch": 18.928115695448746, |
| "eval_accuracy": 0.8635644696497025, |
| "eval_loss": 0.6475953459739685, |
| "eval_runtime": 187.8946, |
| "eval_samples_per_second": 145.997, |
| "eval_steps_per_second": 4.566, |
| "step": 89000 |
| }, |
| { |
| "epoch": 19.03445342407486, |
| "grad_norm": 2.399073839187622, |
| "learning_rate": 1.05e-05, |
| "loss": 0.6035, |
| "step": 89500 |
| }, |
| { |
| "epoch": 19.03445342407486, |
| "eval_accuracy": 0.8637125413143368, |
| "eval_loss": 0.6477306485176086, |
| "eval_runtime": 188.0038, |
| "eval_samples_per_second": 145.912, |
| "eval_steps_per_second": 4.564, |
| "step": 89500 |
| }, |
| { |
| "epoch": 19.140791152700977, |
| "grad_norm": 2.218600034713745, |
| "learning_rate": 1e-05, |
| "loss": 0.5977, |
| "step": 90000 |
| }, |
| { |
| "epoch": 19.140791152700977, |
| "eval_accuracy": 0.8640764513496372, |
| "eval_loss": 0.6459035873413086, |
| "eval_runtime": 186.7125, |
| "eval_samples_per_second": 146.921, |
| "eval_steps_per_second": 4.595, |
| "step": 90000 |
| }, |
| { |
| "epoch": 19.247128881327093, |
| "grad_norm": 2.1308956146240234, |
| "learning_rate": 9.5e-06, |
| "loss": 0.6092, |
| "step": 90500 |
| }, |
| { |
| "epoch": 19.247128881327093, |
| "eval_accuracy": 0.8635675775343433, |
| "eval_loss": 0.6506515145301819, |
| "eval_runtime": 186.9565, |
| "eval_samples_per_second": 146.729, |
| "eval_steps_per_second": 4.589, |
| "step": 90500 |
| }, |
| { |
| "epoch": 19.353466609953212, |
| "grad_norm": 2.1165056228637695, |
| "learning_rate": 9e-06, |
| "loss": 0.594, |
| "step": 91000 |
| }, |
| { |
| "epoch": 19.353466609953212, |
| "eval_accuracy": 0.8645061649822221, |
| "eval_loss": 0.6416978240013123, |
| "eval_runtime": 187.0244, |
| "eval_samples_per_second": 146.676, |
| "eval_steps_per_second": 4.588, |
| "step": 91000 |
| }, |
| { |
| "epoch": 19.45980433857933, |
| "grad_norm": 2.277674436569214, |
| "learning_rate": 8.500000000000002e-06, |
| "loss": 0.601, |
| "step": 91500 |
| }, |
| { |
| "epoch": 19.45980433857933, |
| "eval_accuracy": 0.8641424971302022, |
| "eval_loss": 0.6443900465965271, |
| "eval_runtime": 186.9341, |
| "eval_samples_per_second": 146.747, |
| "eval_steps_per_second": 4.59, |
| "step": 91500 |
| }, |
| { |
| "epoch": 19.566142067205444, |
| "grad_norm": 2.283182144165039, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.6008, |
| "step": 92000 |
| }, |
| { |
| "epoch": 19.566142067205444, |
| "eval_accuracy": 0.8650054540372305, |
| "eval_loss": 0.6387457847595215, |
| "eval_runtime": 188.6398, |
| "eval_samples_per_second": 145.42, |
| "eval_steps_per_second": 4.548, |
| "step": 92000 |
| }, |
| { |
| "epoch": 19.67247979583156, |
| "grad_norm": 1.9207966327667236, |
| "learning_rate": 7.5e-06, |
| "loss": 0.5995, |
| "step": 92500 |
| }, |
| { |
| "epoch": 19.67247979583156, |
| "eval_accuracy": 0.8643799885041582, |
| "eval_loss": 0.6439911127090454, |
| "eval_runtime": 186.8593, |
| "eval_samples_per_second": 146.806, |
| "eval_steps_per_second": 4.592, |
| "step": 92500 |
| }, |
| { |
| "epoch": 19.77881752445768, |
| "grad_norm": 2.332043409347534, |
| "learning_rate": 7.000000000000001e-06, |
| "loss": 0.5945, |
| "step": 93000 |
| }, |
| { |
| "epoch": 19.77881752445768, |
| "eval_accuracy": 0.8648918324061133, |
| "eval_loss": 0.6363367438316345, |
| "eval_runtime": 187.6222, |
| "eval_samples_per_second": 146.209, |
| "eval_steps_per_second": 4.573, |
| "step": 93000 |
| }, |
| { |
| "epoch": 19.885155253083795, |
| "grad_norm": 2.0394229888916016, |
| "learning_rate": 6.5000000000000004e-06, |
| "loss": 0.5985, |
| "step": 93500 |
| }, |
| { |
| "epoch": 19.885155253083795, |
| "eval_accuracy": 0.8647885102924586, |
| "eval_loss": 0.6401004791259766, |
| "eval_runtime": 187.4927, |
| "eval_samples_per_second": 146.31, |
| "eval_steps_per_second": 4.576, |
| "step": 93500 |
| }, |
| { |
| "epoch": 19.99149298170991, |
| "grad_norm": 2.3234775066375732, |
| "learning_rate": 6e-06, |
| "loss": 0.5945, |
| "step": 94000 |
| }, |
| { |
| "epoch": 19.99149298170991, |
| "eval_accuracy": 0.8650117508813161, |
| "eval_loss": 0.6405218839645386, |
| "eval_runtime": 188.3987, |
| "eval_samples_per_second": 145.606, |
| "eval_steps_per_second": 4.554, |
| "step": 94000 |
| }, |
| { |
| "epoch": 20.097830710336027, |
| "grad_norm": 1.8002312183380127, |
| "learning_rate": 5.500000000000001e-06, |
| "loss": 0.5943, |
| "step": 94500 |
| }, |
| { |
| "epoch": 20.097830710336027, |
| "eval_accuracy": 0.8649030683652459, |
| "eval_loss": 0.6379217505455017, |
| "eval_runtime": 186.858, |
| "eval_samples_per_second": 146.807, |
| "eval_steps_per_second": 4.592, |
| "step": 94500 |
| }, |
| { |
| "epoch": 20.204168438962142, |
| "grad_norm": 2.004221200942993, |
| "learning_rate": 5e-06, |
| "loss": 0.5874, |
| "step": 95000 |
| }, |
| { |
| "epoch": 20.204168438962142, |
| "eval_accuracy": 0.8656674134373837, |
| "eval_loss": 0.6364595293998718, |
| "eval_runtime": 186.2416, |
| "eval_samples_per_second": 147.293, |
| "eval_steps_per_second": 4.607, |
| "step": 95000 |
| }, |
| { |
| "epoch": 20.31050616758826, |
| "grad_norm": 2.2905805110931396, |
| "learning_rate": 4.5e-06, |
| "loss": 0.5919, |
| "step": 95500 |
| }, |
| { |
| "epoch": 20.31050616758826, |
| "eval_accuracy": 0.8654995569474285, |
| "eval_loss": 0.6359232664108276, |
| "eval_runtime": 188.7313, |
| "eval_samples_per_second": 145.349, |
| "eval_steps_per_second": 4.546, |
| "step": 95500 |
| }, |
| { |
| "epoch": 20.416843896214377, |
| "grad_norm": 2.1926088333129883, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.5959, |
| "step": 96000 |
| }, |
| { |
| "epoch": 20.416843896214377, |
| "eval_accuracy": 0.8659267527708855, |
| "eval_loss": 0.6338008642196655, |
| "eval_runtime": 187.0107, |
| "eval_samples_per_second": 146.687, |
| "eval_steps_per_second": 4.588, |
| "step": 96000 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 100000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 22, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.094053129886106e+17, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|