| { | |
| "best_metric": 0.5836298932384342, | |
| "best_model_checkpoint": "wav2vec2-5Class-Validation-Mobil/checkpoint-773", | |
| "epoch": 276.9230769230769, | |
| "eval_steps": 500, | |
| "global_step": 900, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.92, | |
| "eval_accuracy": 0.3202846975088968, | |
| "eval_loss": 1.602386713027954, | |
| "eval_runtime": 4.3468, | |
| "eval_samples_per_second": 64.645, | |
| "eval_steps_per_second": 0.69, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "eval_accuracy": 0.3167259786476868, | |
| "eval_loss": 1.6022303104400635, | |
| "eval_runtime": 3.573, | |
| "eval_samples_per_second": 78.645, | |
| "eval_steps_per_second": 0.84, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "eval_accuracy": 0.3167259786476868, | |
| "eval_loss": 1.601974368095398, | |
| "eval_runtime": 4.6151, | |
| "eval_samples_per_second": 60.887, | |
| "eval_steps_per_second": 0.65, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.3167259786476868, | |
| "eval_loss": 1.6014597415924072, | |
| "eval_runtime": 5.3659, | |
| "eval_samples_per_second": 52.368, | |
| "eval_steps_per_second": 0.559, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "eval_accuracy": 0.3167259786476868, | |
| "eval_loss": 1.6009386777877808, | |
| "eval_runtime": 3.4504, | |
| "eval_samples_per_second": 81.439, | |
| "eval_steps_per_second": 0.869, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "eval_accuracy": 0.31316725978647686, | |
| "eval_loss": 1.6003268957138062, | |
| "eval_runtime": 4.2937, | |
| "eval_samples_per_second": 65.445, | |
| "eval_steps_per_second": 0.699, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "eval_accuracy": 0.30604982206405695, | |
| "eval_loss": 1.5995941162109375, | |
| "eval_runtime": 3.7057, | |
| "eval_samples_per_second": 75.828, | |
| "eval_steps_per_second": 0.81, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.298932384341637, | |
| "eval_loss": 1.5984183549880981, | |
| "eval_runtime": 4.6458, | |
| "eval_samples_per_second": 60.484, | |
| "eval_steps_per_second": 0.646, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 8.92, | |
| "eval_accuracy": 0.2918149466192171, | |
| "eval_loss": 1.5974235534667969, | |
| "eval_runtime": 5.0303, | |
| "eval_samples_per_second": 55.861, | |
| "eval_steps_per_second": 0.596, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 9.85, | |
| "eval_accuracy": 0.27402135231316727, | |
| "eval_loss": 1.596360445022583, | |
| "eval_runtime": 3.3268, | |
| "eval_samples_per_second": 84.465, | |
| "eval_steps_per_second": 0.902, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 10.77, | |
| "eval_accuracy": 0.2597864768683274, | |
| "eval_loss": 1.5951836109161377, | |
| "eval_runtime": 3.1882, | |
| "eval_samples_per_second": 88.138, | |
| "eval_steps_per_second": 0.941, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.26334519572953735, | |
| "eval_loss": 1.593432903289795, | |
| "eval_runtime": 4.2078, | |
| "eval_samples_per_second": 66.78, | |
| "eval_steps_per_second": 0.713, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 12.92, | |
| "eval_accuracy": 0.27402135231316727, | |
| "eval_loss": 1.5920255184173584, | |
| "eval_runtime": 4.9074, | |
| "eval_samples_per_second": 57.261, | |
| "eval_steps_per_second": 0.611, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 13.85, | |
| "eval_accuracy": 0.298932384341637, | |
| "eval_loss": 1.5904992818832397, | |
| "eval_runtime": 5.4737, | |
| "eval_samples_per_second": 51.336, | |
| "eval_steps_per_second": 0.548, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 14.77, | |
| "eval_accuracy": 0.298932384341637, | |
| "eval_loss": 1.5889027118682861, | |
| "eval_runtime": 5.4844, | |
| "eval_samples_per_second": 51.236, | |
| "eval_steps_per_second": 0.547, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.2846975088967972, | |
| "eval_loss": 1.5867795944213867, | |
| "eval_runtime": 4.8027, | |
| "eval_samples_per_second": 58.508, | |
| "eval_steps_per_second": 0.625, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 16.92, | |
| "eval_accuracy": 0.2846975088967972, | |
| "eval_loss": 1.5850844383239746, | |
| "eval_runtime": 4.5938, | |
| "eval_samples_per_second": 61.169, | |
| "eval_steps_per_second": 0.653, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 17.85, | |
| "eval_accuracy": 0.2846975088967972, | |
| "eval_loss": 1.5833449363708496, | |
| "eval_runtime": 3.4722, | |
| "eval_samples_per_second": 80.929, | |
| "eval_steps_per_second": 0.864, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 18.77, | |
| "eval_accuracy": 0.26334519572953735, | |
| "eval_loss": 1.58156418800354, | |
| "eval_runtime": 3.9515, | |
| "eval_samples_per_second": 71.112, | |
| "eval_steps_per_second": 0.759, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.24555160142348753, | |
| "eval_loss": 1.579047441482544, | |
| "eval_runtime": 4.2125, | |
| "eval_samples_per_second": 66.707, | |
| "eval_steps_per_second": 0.712, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 20.92, | |
| "eval_accuracy": 0.24199288256227758, | |
| "eval_loss": 1.576985478401184, | |
| "eval_runtime": 4.6275, | |
| "eval_samples_per_second": 60.724, | |
| "eval_steps_per_second": 0.648, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 21.85, | |
| "eval_accuracy": 0.23487544483985764, | |
| "eval_loss": 1.574812650680542, | |
| "eval_runtime": 4.9061, | |
| "eval_samples_per_second": 57.275, | |
| "eval_steps_per_second": 0.611, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 22.77, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.5727591514587402, | |
| "eval_runtime": 5.6003, | |
| "eval_samples_per_second": 50.176, | |
| "eval_steps_per_second": 0.536, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.2277580071174377, | |
| "eval_loss": 1.5699430704116821, | |
| "eval_runtime": 4.5057, | |
| "eval_samples_per_second": 62.365, | |
| "eval_steps_per_second": 0.666, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 24.92, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.567823052406311, | |
| "eval_runtime": 4.5731, | |
| "eval_samples_per_second": 61.446, | |
| "eval_steps_per_second": 0.656, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 25.85, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.5657496452331543, | |
| "eval_runtime": 4.3556, | |
| "eval_samples_per_second": 64.515, | |
| "eval_steps_per_second": 0.689, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 26.77, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.5637929439544678, | |
| "eval_runtime": 5.9441, | |
| "eval_samples_per_second": 47.274, | |
| "eval_steps_per_second": 0.505, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.5613017082214355, | |
| "eval_runtime": 4.5762, | |
| "eval_samples_per_second": 61.404, | |
| "eval_steps_per_second": 0.656, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 28.92, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.5597190856933594, | |
| "eval_runtime": 4.1813, | |
| "eval_samples_per_second": 67.204, | |
| "eval_steps_per_second": 0.717, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 29.85, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.5587605237960815, | |
| "eval_runtime": 4.6749, | |
| "eval_samples_per_second": 60.108, | |
| "eval_steps_per_second": 0.642, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 30.77, | |
| "grad_norm": 66708.1953125, | |
| "learning_rate": 2.962962962962963e-05, | |
| "loss": 1.561, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 30.77, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.5586402416229248, | |
| "eval_runtime": 5.2059, | |
| "eval_samples_per_second": 53.977, | |
| "eval_steps_per_second": 0.576, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.5596789121627808, | |
| "eval_runtime": 4.428, | |
| "eval_samples_per_second": 63.46, | |
| "eval_steps_per_second": 0.678, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 32.92, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.5619100332260132, | |
| "eval_runtime": 3.3009, | |
| "eval_samples_per_second": 85.128, | |
| "eval_steps_per_second": 0.909, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 33.85, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.5660569667816162, | |
| "eval_runtime": 3.371, | |
| "eval_samples_per_second": 83.357, | |
| "eval_steps_per_second": 0.89, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 34.77, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.5720349550247192, | |
| "eval_runtime": 3.9013, | |
| "eval_samples_per_second": 72.028, | |
| "eval_steps_per_second": 0.769, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.5833308696746826, | |
| "eval_runtime": 4.7161, | |
| "eval_samples_per_second": 59.583, | |
| "eval_steps_per_second": 0.636, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 36.92, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.5957212448120117, | |
| "eval_runtime": 4.1977, | |
| "eval_samples_per_second": 66.942, | |
| "eval_steps_per_second": 0.715, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 37.85, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.6119521856307983, | |
| "eval_runtime": 3.034, | |
| "eval_samples_per_second": 92.618, | |
| "eval_steps_per_second": 0.989, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 38.77, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.631814956665039, | |
| "eval_runtime": 3.0252, | |
| "eval_samples_per_second": 92.887, | |
| "eval_steps_per_second": 0.992, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.663757085800171, | |
| "eval_runtime": 3.243, | |
| "eval_samples_per_second": 86.648, | |
| "eval_steps_per_second": 0.925, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 40.92, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.6904593706130981, | |
| "eval_runtime": 3.1943, | |
| "eval_samples_per_second": 87.97, | |
| "eval_steps_per_second": 0.939, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 41.85, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.7196571826934814, | |
| "eval_runtime": 3.4764, | |
| "eval_samples_per_second": 80.832, | |
| "eval_steps_per_second": 0.863, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 42.77, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.750288724899292, | |
| "eval_runtime": 3.415, | |
| "eval_samples_per_second": 82.283, | |
| "eval_steps_per_second": 0.878, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.7802847623825073, | |
| "eval_runtime": 3.0779, | |
| "eval_samples_per_second": 91.295, | |
| "eval_steps_per_second": 0.975, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 44.92, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.7917312383651733, | |
| "eval_runtime": 3.6229, | |
| "eval_samples_per_second": 77.562, | |
| "eval_steps_per_second": 0.828, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 45.85, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.7919948101043701, | |
| "eval_runtime": 3.2733, | |
| "eval_samples_per_second": 85.845, | |
| "eval_steps_per_second": 0.916, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 46.77, | |
| "eval_accuracy": 0.2313167259786477, | |
| "eval_loss": 1.7869282960891724, | |
| "eval_runtime": 3.1081, | |
| "eval_samples_per_second": 90.408, | |
| "eval_steps_per_second": 0.965, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_accuracy": 0.2597864768683274, | |
| "eval_loss": 1.7699986696243286, | |
| "eval_runtime": 3.2526, | |
| "eval_samples_per_second": 86.392, | |
| "eval_steps_per_second": 0.922, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 48.92, | |
| "eval_accuracy": 0.27402135231316727, | |
| "eval_loss": 1.7525370121002197, | |
| "eval_runtime": 2.789, | |
| "eval_samples_per_second": 100.754, | |
| "eval_steps_per_second": 1.076, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 49.85, | |
| "eval_accuracy": 0.2775800711743772, | |
| "eval_loss": 1.7406829595565796, | |
| "eval_runtime": 3.5203, | |
| "eval_samples_per_second": 79.822, | |
| "eval_steps_per_second": 0.852, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 50.77, | |
| "eval_accuracy": 0.2918149466192171, | |
| "eval_loss": 1.7306878566741943, | |
| "eval_runtime": 3.4092, | |
| "eval_samples_per_second": 82.424, | |
| "eval_steps_per_second": 0.88, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_accuracy": 0.3096085409252669, | |
| "eval_loss": 1.7241473197937012, | |
| "eval_runtime": 3.4771, | |
| "eval_samples_per_second": 80.815, | |
| "eval_steps_per_second": 0.863, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 52.92, | |
| "eval_accuracy": 0.3167259786476868, | |
| "eval_loss": 1.7242671251296997, | |
| "eval_runtime": 3.338, | |
| "eval_samples_per_second": 84.182, | |
| "eval_steps_per_second": 0.899, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 53.85, | |
| "eval_accuracy": 0.3167259786476868, | |
| "eval_loss": 1.7253814935684204, | |
| "eval_runtime": 3.037, | |
| "eval_samples_per_second": 92.524, | |
| "eval_steps_per_second": 0.988, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 54.77, | |
| "eval_accuracy": 0.3238434163701068, | |
| "eval_loss": 1.7232733964920044, | |
| "eval_runtime": 3.3453, | |
| "eval_samples_per_second": 84.0, | |
| "eval_steps_per_second": 0.897, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_accuracy": 0.3238434163701068, | |
| "eval_loss": 1.7224737405776978, | |
| "eval_runtime": 4.1856, | |
| "eval_samples_per_second": 67.135, | |
| "eval_steps_per_second": 0.717, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 56.92, | |
| "eval_accuracy": 0.3274021352313167, | |
| "eval_loss": 1.7187089920043945, | |
| "eval_runtime": 4.0825, | |
| "eval_samples_per_second": 68.831, | |
| "eval_steps_per_second": 0.735, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 57.85, | |
| "eval_accuracy": 0.3274021352313167, | |
| "eval_loss": 1.7172435522079468, | |
| "eval_runtime": 4.3988, | |
| "eval_samples_per_second": 63.881, | |
| "eval_steps_per_second": 0.682, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 58.77, | |
| "eval_accuracy": 0.33451957295373663, | |
| "eval_loss": 1.7145518064498901, | |
| "eval_runtime": 3.5886, | |
| "eval_samples_per_second": 78.303, | |
| "eval_steps_per_second": 0.836, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_accuracy": 0.3487544483985765, | |
| "eval_loss": 1.711957573890686, | |
| "eval_runtime": 3.0988, | |
| "eval_samples_per_second": 90.681, | |
| "eval_steps_per_second": 0.968, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 60.92, | |
| "eval_accuracy": 0.35587188612099646, | |
| "eval_loss": 1.7048858404159546, | |
| "eval_runtime": 3.3244, | |
| "eval_samples_per_second": 84.526, | |
| "eval_steps_per_second": 0.902, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 61.54, | |
| "grad_norm": 26972.24609375, | |
| "learning_rate": 2.5925925925925925e-05, | |
| "loss": 1.3094, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 61.85, | |
| "eval_accuracy": 0.3594306049822064, | |
| "eval_loss": 1.702221155166626, | |
| "eval_runtime": 2.9103, | |
| "eval_samples_per_second": 96.553, | |
| "eval_steps_per_second": 1.031, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 62.77, | |
| "eval_accuracy": 0.3736654804270463, | |
| "eval_loss": 1.6912201642990112, | |
| "eval_runtime": 3.4935, | |
| "eval_samples_per_second": 80.435, | |
| "eval_steps_per_second": 0.859, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_accuracy": 0.37722419928825623, | |
| "eval_loss": 1.6797984838485718, | |
| "eval_runtime": 3.0757, | |
| "eval_samples_per_second": 91.361, | |
| "eval_steps_per_second": 0.975, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 64.92, | |
| "eval_accuracy": 0.3807829181494662, | |
| "eval_loss": 1.6687328815460205, | |
| "eval_runtime": 3.281, | |
| "eval_samples_per_second": 85.645, | |
| "eval_steps_per_second": 0.914, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 65.85, | |
| "eval_accuracy": 0.38434163701067614, | |
| "eval_loss": 1.6568727493286133, | |
| "eval_runtime": 3.0158, | |
| "eval_samples_per_second": 93.174, | |
| "eval_steps_per_second": 0.995, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 66.77, | |
| "eval_accuracy": 0.3914590747330961, | |
| "eval_loss": 1.642698049545288, | |
| "eval_runtime": 2.9377, | |
| "eval_samples_per_second": 95.654, | |
| "eval_steps_per_second": 1.021, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_accuracy": 0.3914590747330961, | |
| "eval_loss": 1.6301021575927734, | |
| "eval_runtime": 2.9188, | |
| "eval_samples_per_second": 96.272, | |
| "eval_steps_per_second": 1.028, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 68.92, | |
| "eval_accuracy": 0.39501779359430605, | |
| "eval_loss": 1.6217372417449951, | |
| "eval_runtime": 3.1297, | |
| "eval_samples_per_second": 89.784, | |
| "eval_steps_per_second": 0.959, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 69.85, | |
| "eval_accuracy": 0.39501779359430605, | |
| "eval_loss": 1.6203086376190186, | |
| "eval_runtime": 3.3261, | |
| "eval_samples_per_second": 84.482, | |
| "eval_steps_per_second": 0.902, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 70.77, | |
| "eval_accuracy": 0.39501779359430605, | |
| "eval_loss": 1.6257439851760864, | |
| "eval_runtime": 3.1941, | |
| "eval_samples_per_second": 87.974, | |
| "eval_steps_per_second": 0.939, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_accuracy": 0.40213523131672596, | |
| "eval_loss": 1.6192444562911987, | |
| "eval_runtime": 2.8716, | |
| "eval_samples_per_second": 97.855, | |
| "eval_steps_per_second": 1.045, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 72.92, | |
| "eval_accuracy": 0.4092526690391459, | |
| "eval_loss": 1.6044347286224365, | |
| "eval_runtime": 3.3231, | |
| "eval_samples_per_second": 84.559, | |
| "eval_steps_per_second": 0.903, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 73.85, | |
| "eval_accuracy": 0.4306049822064057, | |
| "eval_loss": 1.5868154764175415, | |
| "eval_runtime": 3.0078, | |
| "eval_samples_per_second": 93.422, | |
| "eval_steps_per_second": 0.997, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 74.77, | |
| "eval_accuracy": 0.4377224199288256, | |
| "eval_loss": 1.5786783695220947, | |
| "eval_runtime": 3.1108, | |
| "eval_samples_per_second": 90.332, | |
| "eval_steps_per_second": 0.964, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_accuracy": 0.43416370106761565, | |
| "eval_loss": 1.5762073993682861, | |
| "eval_runtime": 4.8033, | |
| "eval_samples_per_second": 58.501, | |
| "eval_steps_per_second": 0.625, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 76.92, | |
| "eval_accuracy": 0.4377224199288256, | |
| "eval_loss": 1.5717052221298218, | |
| "eval_runtime": 4.9388, | |
| "eval_samples_per_second": 56.896, | |
| "eval_steps_per_second": 0.607, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 77.85, | |
| "eval_accuracy": 0.43416370106761565, | |
| "eval_loss": 1.5673516988754272, | |
| "eval_runtime": 3.5439, | |
| "eval_samples_per_second": 79.29, | |
| "eval_steps_per_second": 0.847, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 78.77, | |
| "eval_accuracy": 0.42704626334519574, | |
| "eval_loss": 1.5683715343475342, | |
| "eval_runtime": 2.9479, | |
| "eval_samples_per_second": 95.323, | |
| "eval_steps_per_second": 1.018, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_accuracy": 0.42704626334519574, | |
| "eval_loss": 1.5619009733200073, | |
| "eval_runtime": 3.2494, | |
| "eval_samples_per_second": 86.478, | |
| "eval_steps_per_second": 0.923, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 80.92, | |
| "eval_accuracy": 0.4306049822064057, | |
| "eval_loss": 1.5554527044296265, | |
| "eval_runtime": 3.0649, | |
| "eval_samples_per_second": 91.683, | |
| "eval_steps_per_second": 0.979, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 81.85, | |
| "eval_accuracy": 0.43416370106761565, | |
| "eval_loss": 1.550489068031311, | |
| "eval_runtime": 3.1587, | |
| "eval_samples_per_second": 88.96, | |
| "eval_steps_per_second": 0.95, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 82.77, | |
| "eval_accuracy": 0.4412811387900356, | |
| "eval_loss": 1.5385645627975464, | |
| "eval_runtime": 3.1715, | |
| "eval_samples_per_second": 88.601, | |
| "eval_steps_per_second": 0.946, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_accuracy": 0.4377224199288256, | |
| "eval_loss": 1.536201000213623, | |
| "eval_runtime": 3.2602, | |
| "eval_samples_per_second": 86.191, | |
| "eval_steps_per_second": 0.92, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 84.92, | |
| "eval_accuracy": 0.43416370106761565, | |
| "eval_loss": 1.5410619974136353, | |
| "eval_runtime": 2.9845, | |
| "eval_samples_per_second": 94.153, | |
| "eval_steps_per_second": 1.005, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 85.85, | |
| "eval_accuracy": 0.43416370106761565, | |
| "eval_loss": 1.5452691316604614, | |
| "eval_runtime": 3.4013, | |
| "eval_samples_per_second": 82.616, | |
| "eval_steps_per_second": 0.882, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 86.77, | |
| "eval_accuracy": 0.42704626334519574, | |
| "eval_loss": 1.5611252784729004, | |
| "eval_runtime": 2.9135, | |
| "eval_samples_per_second": 96.447, | |
| "eval_steps_per_second": 1.03, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "eval_accuracy": 0.4199288256227758, | |
| "eval_loss": 1.5766078233718872, | |
| "eval_runtime": 2.8634, | |
| "eval_samples_per_second": 98.135, | |
| "eval_steps_per_second": 1.048, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 88.92, | |
| "eval_accuracy": 0.4199288256227758, | |
| "eval_loss": 1.5781065225601196, | |
| "eval_runtime": 3.1014, | |
| "eval_samples_per_second": 90.606, | |
| "eval_steps_per_second": 0.967, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 89.85, | |
| "eval_accuracy": 0.4234875444839858, | |
| "eval_loss": 1.5674538612365723, | |
| "eval_runtime": 3.5418, | |
| "eval_samples_per_second": 79.339, | |
| "eval_steps_per_second": 0.847, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 90.77, | |
| "eval_accuracy": 0.42704626334519574, | |
| "eval_loss": 1.558840036392212, | |
| "eval_runtime": 4.5717, | |
| "eval_samples_per_second": 61.464, | |
| "eval_steps_per_second": 0.656, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "eval_accuracy": 0.42704626334519574, | |
| "eval_loss": 1.5495978593826294, | |
| "eval_runtime": 2.971, | |
| "eval_samples_per_second": 94.581, | |
| "eval_steps_per_second": 1.01, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 92.31, | |
| "grad_norm": 27984.919921875, | |
| "learning_rate": 2.222222222222222e-05, | |
| "loss": 1.0538, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 92.92, | |
| "eval_accuracy": 0.42704626334519574, | |
| "eval_loss": 1.5492929220199585, | |
| "eval_runtime": 3.229, | |
| "eval_samples_per_second": 87.023, | |
| "eval_steps_per_second": 0.929, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 93.85, | |
| "eval_accuracy": 0.4234875444839858, | |
| "eval_loss": 1.5539740324020386, | |
| "eval_runtime": 2.993, | |
| "eval_samples_per_second": 93.886, | |
| "eval_steps_per_second": 1.002, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 94.77, | |
| "eval_accuracy": 0.41637010676156583, | |
| "eval_loss": 1.5620365142822266, | |
| "eval_runtime": 3.5102, | |
| "eval_samples_per_second": 80.052, | |
| "eval_steps_per_second": 0.855, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "eval_accuracy": 0.41637010676156583, | |
| "eval_loss": 1.564751148223877, | |
| "eval_runtime": 3.7132, | |
| "eval_samples_per_second": 75.677, | |
| "eval_steps_per_second": 0.808, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 96.92, | |
| "eval_accuracy": 0.41637010676156583, | |
| "eval_loss": 1.561686396598816, | |
| "eval_runtime": 4.9316, | |
| "eval_samples_per_second": 56.98, | |
| "eval_steps_per_second": 0.608, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 97.85, | |
| "eval_accuracy": 0.4234875444839858, | |
| "eval_loss": 1.5461145639419556, | |
| "eval_runtime": 3.1512, | |
| "eval_samples_per_second": 89.173, | |
| "eval_steps_per_second": 0.952, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 98.77, | |
| "eval_accuracy": 0.4306049822064057, | |
| "eval_loss": 1.5348182916641235, | |
| "eval_runtime": 4.3294, | |
| "eval_samples_per_second": 64.906, | |
| "eval_steps_per_second": 0.693, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_accuracy": 0.4306049822064057, | |
| "eval_loss": 1.5345805883407593, | |
| "eval_runtime": 3.3762, | |
| "eval_samples_per_second": 83.23, | |
| "eval_steps_per_second": 0.889, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 100.92, | |
| "eval_accuracy": 0.41637010676156583, | |
| "eval_loss": 1.5465843677520752, | |
| "eval_runtime": 3.8288, | |
| "eval_samples_per_second": 73.391, | |
| "eval_steps_per_second": 0.784, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 101.85, | |
| "eval_accuracy": 0.4128113879003559, | |
| "eval_loss": 1.5547189712524414, | |
| "eval_runtime": 4.3332, | |
| "eval_samples_per_second": 64.848, | |
| "eval_steps_per_second": 0.692, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 102.77, | |
| "eval_accuracy": 0.4128113879003559, | |
| "eval_loss": 1.5559605360031128, | |
| "eval_runtime": 3.2588, | |
| "eval_samples_per_second": 86.229, | |
| "eval_steps_per_second": 0.921, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 104.0, | |
| "eval_accuracy": 0.4306049822064057, | |
| "eval_loss": 1.5315039157867432, | |
| "eval_runtime": 4.5744, | |
| "eval_samples_per_second": 61.429, | |
| "eval_steps_per_second": 0.656, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 104.92, | |
| "eval_accuracy": 0.44483985765124556, | |
| "eval_loss": 1.5124022960662842, | |
| "eval_runtime": 3.3067, | |
| "eval_samples_per_second": 84.979, | |
| "eval_steps_per_second": 0.907, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 105.85, | |
| "eval_accuracy": 0.44483985765124556, | |
| "eval_loss": 1.5044087171554565, | |
| "eval_runtime": 3.9949, | |
| "eval_samples_per_second": 70.341, | |
| "eval_steps_per_second": 0.751, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 106.77, | |
| "eval_accuracy": 0.4483985765124555, | |
| "eval_loss": 1.5010027885437012, | |
| "eval_runtime": 3.5698, | |
| "eval_samples_per_second": 78.716, | |
| "eval_steps_per_second": 0.84, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 108.0, | |
| "eval_accuracy": 0.44483985765124556, | |
| "eval_loss": 1.5004721879959106, | |
| "eval_runtime": 2.9807, | |
| "eval_samples_per_second": 94.273, | |
| "eval_steps_per_second": 1.006, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 108.92, | |
| "eval_accuracy": 0.44483985765124556, | |
| "eval_loss": 1.499153971672058, | |
| "eval_runtime": 2.8868, | |
| "eval_samples_per_second": 97.339, | |
| "eval_steps_per_second": 1.039, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 109.85, | |
| "eval_accuracy": 0.4483985765124555, | |
| "eval_loss": 1.4993938207626343, | |
| "eval_runtime": 3.2052, | |
| "eval_samples_per_second": 87.67, | |
| "eval_steps_per_second": 0.936, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 110.77, | |
| "eval_accuracy": 0.45195729537366547, | |
| "eval_loss": 1.4987653493881226, | |
| "eval_runtime": 3.3473, | |
| "eval_samples_per_second": 83.949, | |
| "eval_steps_per_second": 0.896, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 112.0, | |
| "eval_accuracy": 0.46619217081850534, | |
| "eval_loss": 1.5004514455795288, | |
| "eval_runtime": 2.8714, | |
| "eval_samples_per_second": 97.862, | |
| "eval_steps_per_second": 1.045, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 112.92, | |
| "eval_accuracy": 0.47330960854092524, | |
| "eval_loss": 1.5010361671447754, | |
| "eval_runtime": 3.6886, | |
| "eval_samples_per_second": 76.182, | |
| "eval_steps_per_second": 0.813, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 113.85, | |
| "eval_accuracy": 0.4697508896797153, | |
| "eval_loss": 1.4968541860580444, | |
| "eval_runtime": 3.5621, | |
| "eval_samples_per_second": 78.886, | |
| "eval_steps_per_second": 0.842, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 114.77, | |
| "eval_accuracy": 0.47330960854092524, | |
| "eval_loss": 1.4775702953338623, | |
| "eval_runtime": 4.3842, | |
| "eval_samples_per_second": 64.093, | |
| "eval_steps_per_second": 0.684, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 116.0, | |
| "eval_accuracy": 0.47686832740213525, | |
| "eval_loss": 1.4527899026870728, | |
| "eval_runtime": 4.7808, | |
| "eval_samples_per_second": 58.777, | |
| "eval_steps_per_second": 0.628, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 116.92, | |
| "eval_accuracy": 0.49466192170818507, | |
| "eval_loss": 1.4394866228103638, | |
| "eval_runtime": 5.0753, | |
| "eval_samples_per_second": 55.366, | |
| "eval_steps_per_second": 0.591, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 117.85, | |
| "eval_accuracy": 0.498220640569395, | |
| "eval_loss": 1.4310173988342285, | |
| "eval_runtime": 4.758, | |
| "eval_samples_per_second": 59.058, | |
| "eval_steps_per_second": 0.631, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 118.77, | |
| "eval_accuracy": 0.49466192170818507, | |
| "eval_loss": 1.4314603805541992, | |
| "eval_runtime": 3.9673, | |
| "eval_samples_per_second": 70.829, | |
| "eval_steps_per_second": 0.756, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 120.0, | |
| "eval_accuracy": 0.49466192170818507, | |
| "eval_loss": 1.4388599395751953, | |
| "eval_runtime": 4.1069, | |
| "eval_samples_per_second": 68.422, | |
| "eval_steps_per_second": 0.73, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 120.92, | |
| "eval_accuracy": 0.498220640569395, | |
| "eval_loss": 1.4374699592590332, | |
| "eval_runtime": 5.1154, | |
| "eval_samples_per_second": 54.933, | |
| "eval_steps_per_second": 0.586, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 121.85, | |
| "eval_accuracy": 0.498220640569395, | |
| "eval_loss": 1.4381343126296997, | |
| "eval_runtime": 4.1133, | |
| "eval_samples_per_second": 68.315, | |
| "eval_steps_per_second": 0.729, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 122.77, | |
| "eval_accuracy": 0.498220640569395, | |
| "eval_loss": 1.4246776103973389, | |
| "eval_runtime": 3.9833, | |
| "eval_samples_per_second": 70.544, | |
| "eval_steps_per_second": 0.753, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 123.08, | |
| "grad_norm": 31388.482421875, | |
| "learning_rate": 1.8518518518518518e-05, | |
| "loss": 0.8509, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 124.0, | |
| "eval_accuracy": 0.498220640569395, | |
| "eval_loss": 1.4195659160614014, | |
| "eval_runtime": 4.1654, | |
| "eval_samples_per_second": 67.461, | |
| "eval_steps_per_second": 0.72, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 124.92, | |
| "eval_accuracy": 0.505338078291815, | |
| "eval_loss": 1.4178649187088013, | |
| "eval_runtime": 5.0869, | |
| "eval_samples_per_second": 55.239, | |
| "eval_steps_per_second": 0.59, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 125.85, | |
| "eval_accuracy": 0.505338078291815, | |
| "eval_loss": 1.40910804271698, | |
| "eval_runtime": 4.5242, | |
| "eval_samples_per_second": 62.11, | |
| "eval_steps_per_second": 0.663, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 126.77, | |
| "eval_accuracy": 0.505338078291815, | |
| "eval_loss": 1.3957635164260864, | |
| "eval_runtime": 4.5377, | |
| "eval_samples_per_second": 61.926, | |
| "eval_steps_per_second": 0.661, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 128.0, | |
| "eval_accuracy": 0.5088967971530249, | |
| "eval_loss": 1.3736003637313843, | |
| "eval_runtime": 3.6994, | |
| "eval_samples_per_second": 75.958, | |
| "eval_steps_per_second": 0.811, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 128.92, | |
| "eval_accuracy": 0.5088967971530249, | |
| "eval_loss": 1.3661431074142456, | |
| "eval_runtime": 4.0248, | |
| "eval_samples_per_second": 69.817, | |
| "eval_steps_per_second": 0.745, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 129.85, | |
| "eval_accuracy": 0.5124555160142349, | |
| "eval_loss": 1.369443416595459, | |
| "eval_runtime": 4.9876, | |
| "eval_samples_per_second": 56.34, | |
| "eval_steps_per_second": 0.601, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 130.77, | |
| "eval_accuracy": 0.5124555160142349, | |
| "eval_loss": 1.3807623386383057, | |
| "eval_runtime": 3.5494, | |
| "eval_samples_per_second": 79.169, | |
| "eval_steps_per_second": 0.845, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 132.0, | |
| "eval_accuracy": 0.5124555160142349, | |
| "eval_loss": 1.3818711042404175, | |
| "eval_runtime": 3.9503, | |
| "eval_samples_per_second": 71.134, | |
| "eval_steps_per_second": 0.759, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 132.92, | |
| "eval_accuracy": 0.5124555160142349, | |
| "eval_loss": 1.3859163522720337, | |
| "eval_runtime": 4.2041, | |
| "eval_samples_per_second": 66.84, | |
| "eval_steps_per_second": 0.714, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 133.85, | |
| "eval_accuracy": 0.5231316725978647, | |
| "eval_loss": 1.378004789352417, | |
| "eval_runtime": 3.8384, | |
| "eval_samples_per_second": 73.208, | |
| "eval_steps_per_second": 0.782, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 134.77, | |
| "eval_accuracy": 0.5231316725978647, | |
| "eval_loss": 1.3696413040161133, | |
| "eval_runtime": 4.6334, | |
| "eval_samples_per_second": 60.646, | |
| "eval_steps_per_second": 0.647, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 136.0, | |
| "eval_accuracy": 0.5302491103202847, | |
| "eval_loss": 1.3564013242721558, | |
| "eval_runtime": 4.002, | |
| "eval_samples_per_second": 70.215, | |
| "eval_steps_per_second": 0.75, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 136.92, | |
| "eval_accuracy": 0.5338078291814946, | |
| "eval_loss": 1.3421210050582886, | |
| "eval_runtime": 4.0161, | |
| "eval_samples_per_second": 69.968, | |
| "eval_steps_per_second": 0.747, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 137.85, | |
| "eval_accuracy": 0.5373665480427047, | |
| "eval_loss": 1.325627326965332, | |
| "eval_runtime": 4.156, | |
| "eval_samples_per_second": 67.613, | |
| "eval_steps_per_second": 0.722, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 138.77, | |
| "eval_accuracy": 0.5373665480427047, | |
| "eval_loss": 1.3274290561676025, | |
| "eval_runtime": 3.9911, | |
| "eval_samples_per_second": 70.407, | |
| "eval_steps_per_second": 0.752, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 140.0, | |
| "eval_accuracy": 0.5409252669039146, | |
| "eval_loss": 1.3401566743850708, | |
| "eval_runtime": 4.4088, | |
| "eval_samples_per_second": 63.736, | |
| "eval_steps_per_second": 0.68, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 140.92, | |
| "eval_accuracy": 0.5409252669039146, | |
| "eval_loss": 1.351689338684082, | |
| "eval_runtime": 4.4409, | |
| "eval_samples_per_second": 63.276, | |
| "eval_steps_per_second": 0.676, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 141.85, | |
| "eval_accuracy": 0.5409252669039146, | |
| "eval_loss": 1.3585495948791504, | |
| "eval_runtime": 3.7955, | |
| "eval_samples_per_second": 74.035, | |
| "eval_steps_per_second": 0.79, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 142.77, | |
| "eval_accuracy": 0.5373665480427047, | |
| "eval_loss": 1.3592112064361572, | |
| "eval_runtime": 3.3552, | |
| "eval_samples_per_second": 83.75, | |
| "eval_steps_per_second": 0.894, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 144.0, | |
| "eval_accuracy": 0.5480427046263345, | |
| "eval_loss": 1.3329293727874756, | |
| "eval_runtime": 5.3044, | |
| "eval_samples_per_second": 52.975, | |
| "eval_steps_per_second": 0.566, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 144.92, | |
| "eval_accuracy": 0.5480427046263345, | |
| "eval_loss": 1.312560796737671, | |
| "eval_runtime": 4.319, | |
| "eval_samples_per_second": 65.061, | |
| "eval_steps_per_second": 0.695, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 145.85, | |
| "eval_accuracy": 0.5444839857651246, | |
| "eval_loss": 1.3075566291809082, | |
| "eval_runtime": 3.9528, | |
| "eval_samples_per_second": 71.09, | |
| "eval_steps_per_second": 0.759, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 146.77, | |
| "eval_accuracy": 0.5480427046263345, | |
| "eval_loss": 1.3146412372589111, | |
| "eval_runtime": 4.3249, | |
| "eval_samples_per_second": 64.973, | |
| "eval_steps_per_second": 0.694, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 148.0, | |
| "eval_accuracy": 0.5444839857651246, | |
| "eval_loss": 1.3345069885253906, | |
| "eval_runtime": 3.9127, | |
| "eval_samples_per_second": 71.817, | |
| "eval_steps_per_second": 0.767, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 148.92, | |
| "eval_accuracy": 0.5444839857651246, | |
| "eval_loss": 1.3408929109573364, | |
| "eval_runtime": 4.1463, | |
| "eval_samples_per_second": 67.771, | |
| "eval_steps_per_second": 0.724, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 149.85, | |
| "eval_accuracy": 0.5444839857651246, | |
| "eval_loss": 1.3374032974243164, | |
| "eval_runtime": 4.2345, | |
| "eval_samples_per_second": 66.359, | |
| "eval_steps_per_second": 0.708, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 150.77, | |
| "eval_accuracy": 0.5480427046263345, | |
| "eval_loss": 1.3227189779281616, | |
| "eval_runtime": 4.3006, | |
| "eval_samples_per_second": 65.339, | |
| "eval_steps_per_second": 0.698, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 152.0, | |
| "eval_accuracy": 0.5444839857651246, | |
| "eval_loss": 1.3200651407241821, | |
| "eval_runtime": 4.4216, | |
| "eval_samples_per_second": 63.551, | |
| "eval_steps_per_second": 0.678, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 152.92, | |
| "eval_accuracy": 0.5444839857651246, | |
| "eval_loss": 1.3174102306365967, | |
| "eval_runtime": 4.4898, | |
| "eval_samples_per_second": 62.586, | |
| "eval_steps_per_second": 0.668, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 153.85, | |
| "grad_norm": 24984.7734375, | |
| "learning_rate": 1.4814814814814815e-05, | |
| "loss": 0.7118, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 153.85, | |
| "eval_accuracy": 0.5444839857651246, | |
| "eval_loss": 1.3073471784591675, | |
| "eval_runtime": 4.2385, | |
| "eval_samples_per_second": 66.297, | |
| "eval_steps_per_second": 0.708, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 154.77, | |
| "eval_accuracy": 0.5551601423487544, | |
| "eval_loss": 1.2983657121658325, | |
| "eval_runtime": 3.4569, | |
| "eval_samples_per_second": 81.286, | |
| "eval_steps_per_second": 0.868, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 156.0, | |
| "eval_accuracy": 0.5516014234875445, | |
| "eval_loss": 1.2974605560302734, | |
| "eval_runtime": 4.3467, | |
| "eval_samples_per_second": 64.647, | |
| "eval_steps_per_second": 0.69, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 156.92, | |
| "eval_accuracy": 0.5516014234875445, | |
| "eval_loss": 1.3027478456497192, | |
| "eval_runtime": 4.5106, | |
| "eval_samples_per_second": 62.297, | |
| "eval_steps_per_second": 0.665, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 157.85, | |
| "eval_accuracy": 0.5480427046263345, | |
| "eval_loss": 1.3088507652282715, | |
| "eval_runtime": 4.2508, | |
| "eval_samples_per_second": 66.105, | |
| "eval_steps_per_second": 0.706, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 158.77, | |
| "eval_accuracy": 0.5480427046263345, | |
| "eval_loss": 1.3138750791549683, | |
| "eval_runtime": 4.4205, | |
| "eval_samples_per_second": 63.567, | |
| "eval_steps_per_second": 0.679, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 160.0, | |
| "eval_accuracy": 0.5551601423487544, | |
| "eval_loss": 1.3067928552627563, | |
| "eval_runtime": 4.2488, | |
| "eval_samples_per_second": 66.136, | |
| "eval_steps_per_second": 0.706, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 160.92, | |
| "eval_accuracy": 0.5551601423487544, | |
| "eval_loss": 1.3011025190353394, | |
| "eval_runtime": 4.3025, | |
| "eval_samples_per_second": 65.31, | |
| "eval_steps_per_second": 0.697, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 161.85, | |
| "eval_accuracy": 0.5551601423487544, | |
| "eval_loss": 1.2957364320755005, | |
| "eval_runtime": 4.3812, | |
| "eval_samples_per_second": 64.137, | |
| "eval_steps_per_second": 0.685, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 162.77, | |
| "eval_accuracy": 0.5551601423487544, | |
| "eval_loss": 1.296021819114685, | |
| "eval_runtime": 4.6921, | |
| "eval_samples_per_second": 59.887, | |
| "eval_steps_per_second": 0.639, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 164.0, | |
| "eval_accuracy": 0.5516014234875445, | |
| "eval_loss": 1.3158953189849854, | |
| "eval_runtime": 4.7452, | |
| "eval_samples_per_second": 59.218, | |
| "eval_steps_per_second": 0.632, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 164.92, | |
| "eval_accuracy": 0.5516014234875445, | |
| "eval_loss": 1.3257168531417847, | |
| "eval_runtime": 4.4128, | |
| "eval_samples_per_second": 63.678, | |
| "eval_steps_per_second": 0.68, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 165.85, | |
| "eval_accuracy": 0.5516014234875445, | |
| "eval_loss": 1.3312301635742188, | |
| "eval_runtime": 3.6447, | |
| "eval_samples_per_second": 77.099, | |
| "eval_steps_per_second": 0.823, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 166.77, | |
| "eval_accuracy": 0.5516014234875445, | |
| "eval_loss": 1.322218418121338, | |
| "eval_runtime": 4.2773, | |
| "eval_samples_per_second": 65.695, | |
| "eval_steps_per_second": 0.701, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 168.0, | |
| "eval_accuracy": 0.5551601423487544, | |
| "eval_loss": 1.298622488975525, | |
| "eval_runtime": 4.5789, | |
| "eval_samples_per_second": 61.369, | |
| "eval_steps_per_second": 0.655, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 168.92, | |
| "eval_accuracy": 0.5587188612099644, | |
| "eval_loss": 1.289797306060791, | |
| "eval_runtime": 4.5328, | |
| "eval_samples_per_second": 61.993, | |
| "eval_steps_per_second": 0.662, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 169.85, | |
| "eval_accuracy": 0.5551601423487544, | |
| "eval_loss": 1.2937852144241333, | |
| "eval_runtime": 3.7509, | |
| "eval_samples_per_second": 74.915, | |
| "eval_steps_per_second": 0.8, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 170.77, | |
| "eval_accuracy": 0.5551601423487544, | |
| "eval_loss": 1.290231704711914, | |
| "eval_runtime": 4.1153, | |
| "eval_samples_per_second": 68.282, | |
| "eval_steps_per_second": 0.729, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 172.0, | |
| "eval_accuracy": 0.5658362989323843, | |
| "eval_loss": 1.287913203239441, | |
| "eval_runtime": 4.7912, | |
| "eval_samples_per_second": 58.649, | |
| "eval_steps_per_second": 0.626, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 172.92, | |
| "eval_accuracy": 0.5658362989323843, | |
| "eval_loss": 1.283803939819336, | |
| "eval_runtime": 4.5456, | |
| "eval_samples_per_second": 61.818, | |
| "eval_steps_per_second": 0.66, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 173.85, | |
| "eval_accuracy": 0.5658362989323843, | |
| "eval_loss": 1.2811965942382812, | |
| "eval_runtime": 4.4869, | |
| "eval_samples_per_second": 62.627, | |
| "eval_steps_per_second": 0.669, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 174.77, | |
| "eval_accuracy": 0.5658362989323843, | |
| "eval_loss": 1.2863661050796509, | |
| "eval_runtime": 4.2715, | |
| "eval_samples_per_second": 65.785, | |
| "eval_steps_per_second": 0.702, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 176.0, | |
| "eval_accuracy": 0.5551601423487544, | |
| "eval_loss": 1.2934131622314453, | |
| "eval_runtime": 4.643, | |
| "eval_samples_per_second": 60.522, | |
| "eval_steps_per_second": 0.646, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 176.92, | |
| "eval_accuracy": 0.5587188612099644, | |
| "eval_loss": 1.2940202951431274, | |
| "eval_runtime": 4.2681, | |
| "eval_samples_per_second": 65.837, | |
| "eval_steps_per_second": 0.703, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 177.85, | |
| "eval_accuracy": 0.5587188612099644, | |
| "eval_loss": 1.298832654953003, | |
| "eval_runtime": 4.2991, | |
| "eval_samples_per_second": 65.363, | |
| "eval_steps_per_second": 0.698, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 178.77, | |
| "eval_accuracy": 0.5622775800711743, | |
| "eval_loss": 1.295286774635315, | |
| "eval_runtime": 4.1989, | |
| "eval_samples_per_second": 66.922, | |
| "eval_steps_per_second": 0.714, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 180.0, | |
| "eval_accuracy": 0.5587188612099644, | |
| "eval_loss": 1.2971975803375244, | |
| "eval_runtime": 4.7188, | |
| "eval_samples_per_second": 59.549, | |
| "eval_steps_per_second": 0.636, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 180.92, | |
| "eval_accuracy": 0.5658362989323843, | |
| "eval_loss": 1.2936004400253296, | |
| "eval_runtime": 4.813, | |
| "eval_samples_per_second": 58.383, | |
| "eval_steps_per_second": 0.623, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 181.85, | |
| "eval_accuracy": 0.5658362989323843, | |
| "eval_loss": 1.2928047180175781, | |
| "eval_runtime": 4.1735, | |
| "eval_samples_per_second": 67.33, | |
| "eval_steps_per_second": 0.719, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 182.77, | |
| "eval_accuracy": 0.5658362989323843, | |
| "eval_loss": 1.291295051574707, | |
| "eval_runtime": 4.4694, | |
| "eval_samples_per_second": 62.872, | |
| "eval_steps_per_second": 0.671, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 184.0, | |
| "eval_accuracy": 0.5658362989323843, | |
| "eval_loss": 1.2824889421463013, | |
| "eval_runtime": 4.0765, | |
| "eval_samples_per_second": 68.932, | |
| "eval_steps_per_second": 0.736, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 184.62, | |
| "grad_norm": 29892.71484375, | |
| "learning_rate": 1.111111111111111e-05, | |
| "loss": 0.6473, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 184.92, | |
| "eval_accuracy": 0.5693950177935944, | |
| "eval_loss": 1.2735832929611206, | |
| "eval_runtime": 4.6704, | |
| "eval_samples_per_second": 60.166, | |
| "eval_steps_per_second": 0.642, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 185.85, | |
| "eval_accuracy": 0.5693950177935944, | |
| "eval_loss": 1.2714898586273193, | |
| "eval_runtime": 4.6432, | |
| "eval_samples_per_second": 60.519, | |
| "eval_steps_per_second": 0.646, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 186.77, | |
| "eval_accuracy": 0.5693950177935944, | |
| "eval_loss": 1.2703534364700317, | |
| "eval_runtime": 4.1853, | |
| "eval_samples_per_second": 67.139, | |
| "eval_steps_per_second": 0.717, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 188.0, | |
| "eval_accuracy": 0.5693950177935944, | |
| "eval_loss": 1.2716755867004395, | |
| "eval_runtime": 4.1775, | |
| "eval_samples_per_second": 67.265, | |
| "eval_steps_per_second": 0.718, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 188.92, | |
| "eval_accuracy": 0.5658362989323843, | |
| "eval_loss": 1.2724348306655884, | |
| "eval_runtime": 4.9312, | |
| "eval_samples_per_second": 56.984, | |
| "eval_steps_per_second": 0.608, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 189.85, | |
| "eval_accuracy": 0.5658362989323843, | |
| "eval_loss": 1.2763242721557617, | |
| "eval_runtime": 3.5712, | |
| "eval_samples_per_second": 78.685, | |
| "eval_steps_per_second": 0.84, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 190.77, | |
| "eval_accuracy": 0.5658362989323843, | |
| "eval_loss": 1.2811599969863892, | |
| "eval_runtime": 4.2324, | |
| "eval_samples_per_second": 66.393, | |
| "eval_steps_per_second": 0.709, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 192.0, | |
| "eval_accuracy": 0.5658362989323843, | |
| "eval_loss": 1.2791301012039185, | |
| "eval_runtime": 4.4625, | |
| "eval_samples_per_second": 62.97, | |
| "eval_steps_per_second": 0.672, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 192.92, | |
| "eval_accuracy": 0.5693950177935944, | |
| "eval_loss": 1.2697654962539673, | |
| "eval_runtime": 4.2766, | |
| "eval_samples_per_second": 65.707, | |
| "eval_steps_per_second": 0.701, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 193.85, | |
| "eval_accuracy": 0.5693950177935944, | |
| "eval_loss": 1.269476294517517, | |
| "eval_runtime": 4.2862, | |
| "eval_samples_per_second": 65.56, | |
| "eval_steps_per_second": 0.7, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 194.77, | |
| "eval_accuracy": 0.5693950177935944, | |
| "eval_loss": 1.2703962326049805, | |
| "eval_runtime": 4.2135, | |
| "eval_samples_per_second": 66.69, | |
| "eval_steps_per_second": 0.712, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 196.0, | |
| "eval_accuracy": 0.5658362989323843, | |
| "eval_loss": 1.2736749649047852, | |
| "eval_runtime": 4.1666, | |
| "eval_samples_per_second": 67.441, | |
| "eval_steps_per_second": 0.72, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 196.92, | |
| "eval_accuracy": 0.5658362989323843, | |
| "eval_loss": 1.2782082557678223, | |
| "eval_runtime": 4.3682, | |
| "eval_samples_per_second": 64.329, | |
| "eval_steps_per_second": 0.687, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 197.85, | |
| "eval_accuracy": 0.5622775800711743, | |
| "eval_loss": 1.2813825607299805, | |
| "eval_runtime": 5.6488, | |
| "eval_samples_per_second": 49.745, | |
| "eval_steps_per_second": 0.531, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 198.77, | |
| "eval_accuracy": 0.5622775800711743, | |
| "eval_loss": 1.2819089889526367, | |
| "eval_runtime": 5.1916, | |
| "eval_samples_per_second": 54.126, | |
| "eval_steps_per_second": 0.578, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 200.0, | |
| "eval_accuracy": 0.5658362989323843, | |
| "eval_loss": 1.274595022201538, | |
| "eval_runtime": 4.4378, | |
| "eval_samples_per_second": 63.32, | |
| "eval_steps_per_second": 0.676, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 200.92, | |
| "eval_accuracy": 0.5658362989323843, | |
| "eval_loss": 1.2694467306137085, | |
| "eval_runtime": 4.797, | |
| "eval_samples_per_second": 58.579, | |
| "eval_steps_per_second": 0.625, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 201.85, | |
| "eval_accuracy": 0.5765124555160143, | |
| "eval_loss": 1.262547254562378, | |
| "eval_runtime": 4.6991, | |
| "eval_samples_per_second": 59.798, | |
| "eval_steps_per_second": 0.638, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 202.77, | |
| "eval_accuracy": 0.5800711743772242, | |
| "eval_loss": 1.2575123310089111, | |
| "eval_runtime": 4.9663, | |
| "eval_samples_per_second": 56.582, | |
| "eval_steps_per_second": 0.604, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 204.0, | |
| "eval_accuracy": 0.5800711743772242, | |
| "eval_loss": 1.2548755407333374, | |
| "eval_runtime": 5.2012, | |
| "eval_samples_per_second": 54.026, | |
| "eval_steps_per_second": 0.577, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 204.92, | |
| "eval_accuracy": 0.5729537366548043, | |
| "eval_loss": 1.2623133659362793, | |
| "eval_runtime": 4.5347, | |
| "eval_samples_per_second": 61.967, | |
| "eval_steps_per_second": 0.662, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 205.85, | |
| "eval_accuracy": 0.5658362989323843, | |
| "eval_loss": 1.2665455341339111, | |
| "eval_runtime": 3.1603, | |
| "eval_samples_per_second": 88.917, | |
| "eval_steps_per_second": 0.949, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 206.77, | |
| "eval_accuracy": 0.5658362989323843, | |
| "eval_loss": 1.2684026956558228, | |
| "eval_runtime": 4.2009, | |
| "eval_samples_per_second": 66.89, | |
| "eval_steps_per_second": 0.714, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 208.0, | |
| "eval_accuracy": 0.5622775800711743, | |
| "eval_loss": 1.277047038078308, | |
| "eval_runtime": 4.3489, | |
| "eval_samples_per_second": 64.613, | |
| "eval_steps_per_second": 0.69, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 208.92, | |
| "eval_accuracy": 0.5622775800711743, | |
| "eval_loss": 1.2807551622390747, | |
| "eval_runtime": 3.8563, | |
| "eval_samples_per_second": 72.867, | |
| "eval_steps_per_second": 0.778, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 209.85, | |
| "eval_accuracy": 0.5729537366548043, | |
| "eval_loss": 1.2761532068252563, | |
| "eval_runtime": 4.7161, | |
| "eval_samples_per_second": 59.583, | |
| "eval_steps_per_second": 0.636, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 210.77, | |
| "eval_accuracy": 0.5729537366548043, | |
| "eval_loss": 1.2759194374084473, | |
| "eval_runtime": 5.0376, | |
| "eval_samples_per_second": 55.781, | |
| "eval_steps_per_second": 0.596, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 212.0, | |
| "eval_accuracy": 0.5729537366548043, | |
| "eval_loss": 1.2752187252044678, | |
| "eval_runtime": 4.5842, | |
| "eval_samples_per_second": 61.297, | |
| "eval_steps_per_second": 0.654, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 212.92, | |
| "eval_accuracy": 0.5729537366548043, | |
| "eval_loss": 1.275394082069397, | |
| "eval_runtime": 4.2209, | |
| "eval_samples_per_second": 66.573, | |
| "eval_steps_per_second": 0.711, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 213.85, | |
| "eval_accuracy": 0.5765124555160143, | |
| "eval_loss": 1.272161602973938, | |
| "eval_runtime": 4.7348, | |
| "eval_samples_per_second": 59.347, | |
| "eval_steps_per_second": 0.634, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 214.77, | |
| "eval_accuracy": 0.5765124555160143, | |
| "eval_loss": 1.273858904838562, | |
| "eval_runtime": 4.0254, | |
| "eval_samples_per_second": 69.808, | |
| "eval_steps_per_second": 0.745, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 215.38, | |
| "grad_norm": 28098.056640625, | |
| "learning_rate": 7.4074074074074075e-06, | |
| "loss": 0.613, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 216.0, | |
| "eval_accuracy": 0.5765124555160143, | |
| "eval_loss": 1.2782981395721436, | |
| "eval_runtime": 4.6723, | |
| "eval_samples_per_second": 60.142, | |
| "eval_steps_per_second": 0.642, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 216.92, | |
| "eval_accuracy": 0.5765124555160143, | |
| "eval_loss": 1.2774933576583862, | |
| "eval_runtime": 4.576, | |
| "eval_samples_per_second": 61.407, | |
| "eval_steps_per_second": 0.656, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 217.85, | |
| "eval_accuracy": 0.5765124555160143, | |
| "eval_loss": 1.2740654945373535, | |
| "eval_runtime": 4.8253, | |
| "eval_samples_per_second": 58.234, | |
| "eval_steps_per_second": 0.622, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 218.77, | |
| "eval_accuracy": 0.5765124555160143, | |
| "eval_loss": 1.2705509662628174, | |
| "eval_runtime": 4.386, | |
| "eval_samples_per_second": 64.067, | |
| "eval_steps_per_second": 0.684, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 220.0, | |
| "eval_accuracy": 0.5765124555160143, | |
| "eval_loss": 1.2627956867218018, | |
| "eval_runtime": 4.2817, | |
| "eval_samples_per_second": 65.628, | |
| "eval_steps_per_second": 0.701, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 220.92, | |
| "eval_accuracy": 0.5800711743772242, | |
| "eval_loss": 1.2580970525741577, | |
| "eval_runtime": 3.9386, | |
| "eval_samples_per_second": 71.344, | |
| "eval_steps_per_second": 0.762, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 221.85, | |
| "eval_accuracy": 0.5765124555160143, | |
| "eval_loss": 1.2567566633224487, | |
| "eval_runtime": 4.3353, | |
| "eval_samples_per_second": 64.817, | |
| "eval_steps_per_second": 0.692, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 222.77, | |
| "eval_accuracy": 0.5729537366548043, | |
| "eval_loss": 1.2558982372283936, | |
| "eval_runtime": 3.7135, | |
| "eval_samples_per_second": 75.67, | |
| "eval_steps_per_second": 0.808, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 224.0, | |
| "eval_accuracy": 0.5765124555160143, | |
| "eval_loss": 1.2502700090408325, | |
| "eval_runtime": 5.0636, | |
| "eval_samples_per_second": 55.494, | |
| "eval_steps_per_second": 0.592, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 224.92, | |
| "eval_accuracy": 0.5765124555160143, | |
| "eval_loss": 1.2497973442077637, | |
| "eval_runtime": 4.7669, | |
| "eval_samples_per_second": 58.948, | |
| "eval_steps_per_second": 0.629, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 225.85, | |
| "eval_accuracy": 0.5765124555160143, | |
| "eval_loss": 1.2500195503234863, | |
| "eval_runtime": 3.9522, | |
| "eval_samples_per_second": 71.099, | |
| "eval_steps_per_second": 0.759, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 226.77, | |
| "eval_accuracy": 0.5765124555160143, | |
| "eval_loss": 1.2490234375, | |
| "eval_runtime": 4.1869, | |
| "eval_samples_per_second": 67.114, | |
| "eval_steps_per_second": 0.717, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 228.0, | |
| "eval_accuracy": 0.5765124555160143, | |
| "eval_loss": 1.2531741857528687, | |
| "eval_runtime": 3.9865, | |
| "eval_samples_per_second": 70.489, | |
| "eval_steps_per_second": 0.753, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 228.92, | |
| "eval_accuracy": 0.5765124555160143, | |
| "eval_loss": 1.2572293281555176, | |
| "eval_runtime": 5.2241, | |
| "eval_samples_per_second": 53.789, | |
| "eval_steps_per_second": 0.574, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 229.85, | |
| "eval_accuracy": 0.5765124555160143, | |
| "eval_loss": 1.2598803043365479, | |
| "eval_runtime": 4.1402, | |
| "eval_samples_per_second": 67.87, | |
| "eval_steps_per_second": 0.725, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 230.77, | |
| "eval_accuracy": 0.5729537366548043, | |
| "eval_loss": 1.2600898742675781, | |
| "eval_runtime": 3.9785, | |
| "eval_samples_per_second": 70.63, | |
| "eval_steps_per_second": 0.754, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 232.0, | |
| "eval_accuracy": 0.5729537366548043, | |
| "eval_loss": 1.2625129222869873, | |
| "eval_runtime": 4.1458, | |
| "eval_samples_per_second": 67.779, | |
| "eval_steps_per_second": 0.724, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 232.92, | |
| "eval_accuracy": 0.5765124555160143, | |
| "eval_loss": 1.2635974884033203, | |
| "eval_runtime": 4.5032, | |
| "eval_samples_per_second": 62.401, | |
| "eval_steps_per_second": 0.666, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 233.85, | |
| "eval_accuracy": 0.5765124555160143, | |
| "eval_loss": 1.2629433870315552, | |
| "eval_runtime": 4.1399, | |
| "eval_samples_per_second": 67.876, | |
| "eval_steps_per_second": 0.725, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 234.77, | |
| "eval_accuracy": 0.5765124555160143, | |
| "eval_loss": 1.2600425481796265, | |
| "eval_runtime": 4.2407, | |
| "eval_samples_per_second": 66.263, | |
| "eval_steps_per_second": 0.707, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 236.0, | |
| "eval_accuracy": 0.5800711743772242, | |
| "eval_loss": 1.2558783292770386, | |
| "eval_runtime": 4.1208, | |
| "eval_samples_per_second": 68.19, | |
| "eval_steps_per_second": 0.728, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 236.92, | |
| "eval_accuracy": 0.5800711743772242, | |
| "eval_loss": 1.2534478902816772, | |
| "eval_runtime": 3.8139, | |
| "eval_samples_per_second": 73.678, | |
| "eval_steps_per_second": 0.787, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 237.85, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.2513927221298218, | |
| "eval_runtime": 4.6813, | |
| "eval_samples_per_second": 60.026, | |
| "eval_steps_per_second": 0.641, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 238.77, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.2508091926574707, | |
| "eval_runtime": 4.2671, | |
| "eval_samples_per_second": 65.852, | |
| "eval_steps_per_second": 0.703, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 240.0, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.2487518787384033, | |
| "eval_runtime": 3.7642, | |
| "eval_samples_per_second": 74.651, | |
| "eval_steps_per_second": 0.797, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 240.92, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.2483351230621338, | |
| "eval_runtime": 4.8941, | |
| "eval_samples_per_second": 57.416, | |
| "eval_steps_per_second": 0.613, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 241.85, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.2500139474868774, | |
| "eval_runtime": 4.274, | |
| "eval_samples_per_second": 65.746, | |
| "eval_steps_per_second": 0.702, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 242.77, | |
| "eval_accuracy": 0.5800711743772242, | |
| "eval_loss": 1.2503968477249146, | |
| "eval_runtime": 4.4982, | |
| "eval_samples_per_second": 62.469, | |
| "eval_steps_per_second": 0.667, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 244.0, | |
| "eval_accuracy": 0.5800711743772242, | |
| "eval_loss": 1.2521419525146484, | |
| "eval_runtime": 4.0413, | |
| "eval_samples_per_second": 69.532, | |
| "eval_steps_per_second": 0.742, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 244.92, | |
| "eval_accuracy": 0.5800711743772242, | |
| "eval_loss": 1.2532862424850464, | |
| "eval_runtime": 4.1262, | |
| "eval_samples_per_second": 68.101, | |
| "eval_steps_per_second": 0.727, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 245.85, | |
| "eval_accuracy": 0.5800711743772242, | |
| "eval_loss": 1.251287817955017, | |
| "eval_runtime": 3.9321, | |
| "eval_samples_per_second": 71.463, | |
| "eval_steps_per_second": 0.763, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 246.15, | |
| "grad_norm": 63046.29296875, | |
| "learning_rate": 3.7037037037037037e-06, | |
| "loss": 0.5946, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 246.77, | |
| "eval_accuracy": 0.5800711743772242, | |
| "eval_loss": 1.2513457536697388, | |
| "eval_runtime": 4.1155, | |
| "eval_samples_per_second": 68.279, | |
| "eval_steps_per_second": 0.729, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 248.0, | |
| "eval_accuracy": 0.5800711743772242, | |
| "eval_loss": 1.2507133483886719, | |
| "eval_runtime": 4.3807, | |
| "eval_samples_per_second": 64.145, | |
| "eval_steps_per_second": 0.685, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 248.92, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.2491704225540161, | |
| "eval_runtime": 4.0611, | |
| "eval_samples_per_second": 69.193, | |
| "eval_steps_per_second": 0.739, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 249.85, | |
| "eval_accuracy": 0.5800711743772242, | |
| "eval_loss": 1.2499818801879883, | |
| "eval_runtime": 3.9673, | |
| "eval_samples_per_second": 70.828, | |
| "eval_steps_per_second": 0.756, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 250.77, | |
| "eval_accuracy": 0.5800711743772242, | |
| "eval_loss": 1.2505466938018799, | |
| "eval_runtime": 4.5211, | |
| "eval_samples_per_second": 62.153, | |
| "eval_steps_per_second": 0.664, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 252.0, | |
| "eval_accuracy": 0.5800711743772242, | |
| "eval_loss": 1.2519145011901855, | |
| "eval_runtime": 5.2859, | |
| "eval_samples_per_second": 53.16, | |
| "eval_steps_per_second": 0.568, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 252.92, | |
| "eval_accuracy": 0.5800711743772242, | |
| "eval_loss": 1.253113865852356, | |
| "eval_runtime": 4.0658, | |
| "eval_samples_per_second": 69.113, | |
| "eval_steps_per_second": 0.738, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 253.85, | |
| "eval_accuracy": 0.5800711743772242, | |
| "eval_loss": 1.2538248300552368, | |
| "eval_runtime": 4.1084, | |
| "eval_samples_per_second": 68.396, | |
| "eval_steps_per_second": 0.73, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 254.77, | |
| "eval_accuracy": 0.5800711743772242, | |
| "eval_loss": 1.2532281875610352, | |
| "eval_runtime": 4.0615, | |
| "eval_samples_per_second": 69.186, | |
| "eval_steps_per_second": 0.739, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 256.0, | |
| "eval_accuracy": 0.5800711743772242, | |
| "eval_loss": 1.2527676820755005, | |
| "eval_runtime": 4.6892, | |
| "eval_samples_per_second": 59.925, | |
| "eval_steps_per_second": 0.64, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 256.92, | |
| "eval_accuracy": 0.5800711743772242, | |
| "eval_loss": 1.252835988998413, | |
| "eval_runtime": 3.7759, | |
| "eval_samples_per_second": 74.42, | |
| "eval_steps_per_second": 0.795, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 257.85, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.2521347999572754, | |
| "eval_runtime": 3.5788, | |
| "eval_samples_per_second": 78.519, | |
| "eval_steps_per_second": 0.838, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 258.77, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.252551555633545, | |
| "eval_runtime": 3.9885, | |
| "eval_samples_per_second": 70.452, | |
| "eval_steps_per_second": 0.752, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 260.0, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.2527978420257568, | |
| "eval_runtime": 3.7855, | |
| "eval_samples_per_second": 74.231, | |
| "eval_steps_per_second": 0.792, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 260.92, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.2529038190841675, | |
| "eval_runtime": 3.8445, | |
| "eval_samples_per_second": 73.091, | |
| "eval_steps_per_second": 0.78, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 261.85, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.2528387308120728, | |
| "eval_runtime": 3.7891, | |
| "eval_samples_per_second": 74.16, | |
| "eval_steps_per_second": 0.792, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 262.77, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.2516640424728394, | |
| "eval_runtime": 4.2868, | |
| "eval_samples_per_second": 65.55, | |
| "eval_steps_per_second": 0.7, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 264.0, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.251232385635376, | |
| "eval_runtime": 3.7886, | |
| "eval_samples_per_second": 74.169, | |
| "eval_steps_per_second": 0.792, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 264.92, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.251160979270935, | |
| "eval_runtime": 4.4131, | |
| "eval_samples_per_second": 63.674, | |
| "eval_steps_per_second": 0.68, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 265.85, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.2503511905670166, | |
| "eval_runtime": 3.8431, | |
| "eval_samples_per_second": 73.118, | |
| "eval_steps_per_second": 0.781, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 266.77, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.2499034404754639, | |
| "eval_runtime": 3.7109, | |
| "eval_samples_per_second": 75.723, | |
| "eval_steps_per_second": 0.808, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 268.0, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.2496285438537598, | |
| "eval_runtime": 4.0777, | |
| "eval_samples_per_second": 68.912, | |
| "eval_steps_per_second": 0.736, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 268.92, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.2497419118881226, | |
| "eval_runtime": 4.4854, | |
| "eval_samples_per_second": 62.648, | |
| "eval_steps_per_second": 0.669, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 269.85, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.2500321865081787, | |
| "eval_runtime": 5.2487, | |
| "eval_samples_per_second": 53.537, | |
| "eval_steps_per_second": 0.572, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 270.77, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.250011682510376, | |
| "eval_runtime": 4.3951, | |
| "eval_samples_per_second": 63.935, | |
| "eval_steps_per_second": 0.683, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 272.0, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.2498865127563477, | |
| "eval_runtime": 4.1755, | |
| "eval_samples_per_second": 67.297, | |
| "eval_steps_per_second": 0.718, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 272.92, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.2500803470611572, | |
| "eval_runtime": 4.6562, | |
| "eval_samples_per_second": 60.349, | |
| "eval_steps_per_second": 0.644, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 273.85, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.2503583431243896, | |
| "eval_runtime": 4.5464, | |
| "eval_samples_per_second": 61.807, | |
| "eval_steps_per_second": 0.66, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 274.77, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.2506159543991089, | |
| "eval_runtime": 3.9624, | |
| "eval_samples_per_second": 70.917, | |
| "eval_steps_per_second": 0.757, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 276.0, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.2505924701690674, | |
| "eval_runtime": 4.0033, | |
| "eval_samples_per_second": 70.192, | |
| "eval_steps_per_second": 0.749, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 276.92, | |
| "grad_norm": 30700.47265625, | |
| "learning_rate": 0.0, | |
| "loss": 0.588, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 276.92, | |
| "eval_accuracy": 0.5836298932384342, | |
| "eval_loss": 1.2505559921264648, | |
| "eval_runtime": 4.7936, | |
| "eval_samples_per_second": 58.62, | |
| "eval_steps_per_second": 0.626, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 276.92, | |
| "step": 900, | |
| "total_flos": 3.755576946691584e+18, | |
| "train_loss": 0.8810926691691081, | |
| "train_runtime": 3759.0782, | |
| "train_samples_per_second": 123.541, | |
| "train_steps_per_second": 0.239 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 900, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 300, | |
| "save_steps": 500, | |
| "total_flos": 3.755576946691584e+18, | |
| "train_batch_size": 128, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |