{ "best_metric": 0.8978955572876072, "best_model_checkpoint": "./results/finetunes/20250205-121158__microsoft_Phi-3.5-mini-instruct__ft/checkpoint-1792", "epoch": 0.13208520675167687, "eval_steps": 16, "global_step": 1792, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001474165253924965, "grad_norm": 112.13977813720703, "learning_rate": 0.00012128399488167067, "loss": 2.0334, "step": 2 }, { "epoch": 0.000294833050784993, "grad_norm": 47.18525695800781, "learning_rate": 0.00012128399457682722, "loss": 0.4295, "step": 4 }, { "epoch": 0.0004422495761774895, "grad_norm": 46.89369583129883, "learning_rate": 0.0001212839940687548, "loss": 1.793, "step": 6 }, { "epoch": 0.000589666101569986, "grad_norm": 90.68251037597656, "learning_rate": 0.00012128399335745342, "loss": 1.582, "step": 8 }, { "epoch": 0.0007370826269624825, "grad_norm": 10.48133373260498, "learning_rate": 0.00012128399244292309, "loss": 1.152, "step": 10 }, { "epoch": 0.000884499152354979, "grad_norm": 57.58028030395508, "learning_rate": 0.00012128399132516379, "loss": 0.8417, "step": 12 }, { "epoch": 0.0010319156777474755, "grad_norm": 24.7613468170166, "learning_rate": 0.00012128399000417552, "loss": 0.6337, "step": 14 }, { "epoch": 0.001179332203139972, "grad_norm": 5.995689868927002, "learning_rate": 0.00012128398847995831, "loss": 0.29, "step": 16 }, { "epoch": 0.001179332203139972, "eval_1_ratio_diff": -0.12081060015588468, "eval_accuracy": 0.6360093530787218, "eval_f1": 0.5856255545696539, "eval_loss": 0.7121835350990295, "eval_precision": 0.6790123456790124, "eval_recall": 0.514820592823713, "eval_runtime": 1440.0319, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 16 }, { "epoch": 0.0013267487285324685, "grad_norm": 20.756057739257812, "learning_rate": 0.00012128398675251216, "loss": 0.4541, "step": 18 }, { "epoch": 0.001474165253924965, "grad_norm": 49.25767135620117, "learning_rate": 0.00012128398482183706, "loss": 1.1751, "step": 20 }, { "epoch": 0.0016215817793174615, "grad_norm": 10.73904037475586, "learning_rate": 0.00012128398268793303, "loss": 0.2334, "step": 22 }, { "epoch": 0.001768998304709958, "grad_norm": 3.5153348445892334, "learning_rate": 0.00012128398035080009, "loss": 0.8965, "step": 24 }, { "epoch": 0.0019164148301024544, "grad_norm": 117.84137725830078, "learning_rate": 0.0001212839778104382, "loss": 2.9108, "step": 26 }, { "epoch": 0.002063831355494951, "grad_norm": 108.86376190185547, "learning_rate": 0.00012128397506684742, "loss": 2.1317, "step": 28 }, { "epoch": 0.0022112478808874476, "grad_norm": 19.305322647094727, "learning_rate": 0.00012128397212002774, "loss": 0.2653, "step": 30 }, { "epoch": 0.002358664406279944, "grad_norm": 46.865966796875, "learning_rate": 0.00012128396896997918, "loss": 2.2461, "step": 32 }, { "epoch": 0.002358664406279944, "eval_1_ratio_diff": -0.49961028838659394, "eval_accuracy": 0.5003897116134061, "eval_f1": 0.0, "eval_loss": 1.7971160411834717, "eval_precision": 0.0, "eval_recall": 0.0, "eval_runtime": 1438.1269, "eval_samples_per_second": 0.892, "eval_steps_per_second": 0.446, "step": 32 }, { "epoch": 0.0025060809316724405, "grad_norm": 55.90218734741211, "learning_rate": 0.00012128396561670172, "loss": 1.0773, "step": 34 }, { "epoch": 0.002653497457064937, "grad_norm": 8.257821083068848, "learning_rate": 0.0001212839620601954, "loss": 0.7481, "step": 36 }, { "epoch": 0.0028009139824574335, "grad_norm": 11.776910781860352, "learning_rate": 0.00012128395830046022, "loss": 0.0906, "step": 38 }, { "epoch": 0.00294833050784993, "grad_norm": 115.57841491699219, "learning_rate": 0.00012128395433749618, "loss": 3.0851, "step": 40 }, { "epoch": 0.0030957470332424264, "grad_norm": 5.130585193634033, "learning_rate": 0.00012128395017130333, "loss": 0.9399, "step": 42 }, { "epoch": 0.003243163558634923, "grad_norm": 43.877689361572266, "learning_rate": 0.00012128394580188166, "loss": 0.9284, "step": 44 }, { "epoch": 0.0033905800840274194, "grad_norm": 48.76664733886719, "learning_rate": 0.00012128394122923118, "loss": 0.5431, "step": 46 }, { "epoch": 0.003537996609419916, "grad_norm": 33.9229736328125, "learning_rate": 0.00012128393645335193, "loss": 0.6688, "step": 48 }, { "epoch": 0.003537996609419916, "eval_1_ratio_diff": -0.09353078721745911, "eval_accuracy": 0.764614185502728, "eval_f1": 0.7401032702237521, "eval_loss": 0.49912577867507935, "eval_precision": 0.8253358925143954, "eval_recall": 0.6708268330733229, "eval_runtime": 1439.1521, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 48 }, { "epoch": 0.0036854131348124123, "grad_norm": 20.783430099487305, "learning_rate": 0.00012128393147424389, "loss": 0.7502, "step": 50 }, { "epoch": 0.003832829660204909, "grad_norm": 28.81708335876465, "learning_rate": 0.0001212839262919071, "loss": 0.8271, "step": 52 }, { "epoch": 0.003980246185597405, "grad_norm": 58.47079086303711, "learning_rate": 0.00012128392090634156, "loss": 1.0213, "step": 54 }, { "epoch": 0.004127662710989902, "grad_norm": 107.4663314819336, "learning_rate": 0.00012128391531754733, "loss": 1.6449, "step": 56 }, { "epoch": 0.004275079236382398, "grad_norm": 21.926761627197266, "learning_rate": 0.00012128390952552436, "loss": 1.5282, "step": 58 }, { "epoch": 0.004422495761774895, "grad_norm": 108.13206481933594, "learning_rate": 0.00012128390353027275, "loss": 1.2688, "step": 60 }, { "epoch": 0.004569912287167391, "grad_norm": 85.27387237548828, "learning_rate": 0.00012128389733179246, "loss": 1.4725, "step": 62 }, { "epoch": 0.004717328812559888, "grad_norm": 3.8993313312530518, "learning_rate": 0.00012128389093008353, "loss": 0.1737, "step": 64 }, { "epoch": 0.004717328812559888, "eval_1_ratio_diff": 0.05378020265003891, "eval_accuracy": 0.7809820732657833, "eval_f1": 0.7920059215396003, "eval_loss": 0.4972352981567383, "eval_precision": 0.7535211267605634, "eval_recall": 0.8346333853354134, "eval_runtime": 1439.2432, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 64 }, { "epoch": 0.004864745337952384, "grad_norm": 1.8932548761367798, "learning_rate": 0.00012128388432514599, "loss": 0.1574, "step": 66 }, { "epoch": 0.005012161863344881, "grad_norm": 31.92827606201172, "learning_rate": 0.00012128387751697984, "loss": 0.2239, "step": 68 }, { "epoch": 0.005159578388737377, "grad_norm": 57.11052703857422, "learning_rate": 0.00012128387050558512, "loss": 1.4278, "step": 70 }, { "epoch": 0.005306994914129874, "grad_norm": 0.45575767755508423, "learning_rate": 0.00012128386329096184, "loss": 2.7855, "step": 72 }, { "epoch": 0.00545441143952237, "grad_norm": 71.60086059570312, "learning_rate": 0.00012128385587311005, "loss": 1.4066, "step": 74 }, { "epoch": 0.005601827964914867, "grad_norm": 0.1034606546163559, "learning_rate": 0.00012128384825202977, "loss": 2.1198, "step": 76 }, { "epoch": 0.005749244490307364, "grad_norm": 0.3067642152309418, "learning_rate": 0.00012128384042772098, "loss": 0.0126, "step": 78 }, { "epoch": 0.00589666101569986, "grad_norm": 63.32870101928711, "learning_rate": 0.00012128383240018376, "loss": 1.4007, "step": 80 }, { "epoch": 0.00589666101569986, "eval_1_ratio_diff": 0.04130943102104445, "eval_accuracy": 0.7653936087295401, "eval_f1": 0.7745318352059926, "eval_loss": 1.208424687385559, "eval_precision": 0.7449567723342939, "eval_recall": 0.8065522620904836, "eval_runtime": 1438.9869, "eval_samples_per_second": 0.892, "eval_steps_per_second": 0.446, "step": 80 }, { "epoch": 0.006044077541092357, "grad_norm": 0.002626498695462942, "learning_rate": 0.00012128382416941812, "loss": 0.003, "step": 82 }, { "epoch": 0.006191494066484853, "grad_norm": 78.83605194091797, "learning_rate": 0.00012128381573542408, "loss": 1.7103, "step": 84 }, { "epoch": 0.00633891059187735, "grad_norm": 0.04237201437354088, "learning_rate": 0.00012128380709820168, "loss": 0.0184, "step": 86 }, { "epoch": 0.006486327117269846, "grad_norm": 57.11608123779297, "learning_rate": 0.00012128379825775094, "loss": 0.3886, "step": 88 }, { "epoch": 0.006633743642662343, "grad_norm": 71.66314697265625, "learning_rate": 0.00012128378921407189, "loss": 1.0122, "step": 90 }, { "epoch": 0.006781160168054839, "grad_norm": 60.63711166381836, "learning_rate": 0.00012128377996716456, "loss": 2.2072, "step": 92 }, { "epoch": 0.006928576693447336, "grad_norm": 64.88410186767578, "learning_rate": 0.00012128377051702896, "loss": 1.7641, "step": 94 }, { "epoch": 0.007075993218839832, "grad_norm": 15.290694236755371, "learning_rate": 0.00012128376086366519, "loss": 0.2084, "step": 96 }, { "epoch": 0.007075993218839832, "eval_1_ratio_diff": -0.07794232268121593, "eval_accuracy": 0.764614185502728, "eval_f1": 0.7445008460236887, "eval_loss": 0.6278901100158691, "eval_precision": 0.8133086876155268, "eval_recall": 0.6864274570982839, "eval_runtime": 1439.7986, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 96 }, { "epoch": 0.007223409744232329, "grad_norm": 4.278674125671387, "learning_rate": 0.00012128375100707322, "loss": 0.0205, "step": 98 }, { "epoch": 0.007370826269624825, "grad_norm": 12.730552673339844, "learning_rate": 0.00012128374094725308, "loss": 0.0596, "step": 100 }, { "epoch": 0.007518242795017322, "grad_norm": 0.03387758880853653, "learning_rate": 0.00012128373068420486, "loss": 1.1734, "step": 102 }, { "epoch": 0.007665659320409818, "grad_norm": 0.002689527813345194, "learning_rate": 0.00012128372021792852, "loss": 0.016, "step": 104 }, { "epoch": 0.007813075845802315, "grad_norm": 46.29806900024414, "learning_rate": 0.00012128370954842415, "loss": 3.8453, "step": 106 }, { "epoch": 0.00796049237119481, "grad_norm": 65.56766510009766, "learning_rate": 0.00012128369867569178, "loss": 3.0592, "step": 108 }, { "epoch": 0.008107908896587307, "grad_norm": 67.830322265625, "learning_rate": 0.00012128368759973141, "loss": 1.5232, "step": 110 }, { "epoch": 0.008255325421979804, "grad_norm": 1.828292965888977, "learning_rate": 0.00012128367632054312, "loss": 0.899, "step": 112 }, { "epoch": 0.008255325421979804, "eval_1_ratio_diff": -0.24707716289945442, "eval_accuracy": 0.6952455183164459, "eval_f1": 0.5948186528497409, "eval_loss": 1.2687604427337646, "eval_precision": 0.8858024691358025, "eval_recall": 0.44773790951638065, "eval_runtime": 1440.6646, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 112 }, { "epoch": 0.0084027419473723, "grad_norm": 2.445478916168213, "learning_rate": 0.00012128366483812693, "loss": 1.3983, "step": 114 }, { "epoch": 0.008550158472764796, "grad_norm": 0.8839952349662781, "learning_rate": 0.00012128365315248287, "loss": 2.515, "step": 116 }, { "epoch": 0.008697574998157294, "grad_norm": 20.67784881591797, "learning_rate": 0.000121283641263611, "loss": 1.5722, "step": 118 }, { "epoch": 0.00884499152354979, "grad_norm": 1.1078622341156006, "learning_rate": 0.00012128362917151136, "loss": 0.0058, "step": 120 }, { "epoch": 0.008992408048942286, "grad_norm": 52.540367126464844, "learning_rate": 0.00012128361687618396, "loss": 2.8601, "step": 122 }, { "epoch": 0.009139824574334782, "grad_norm": 40.01364517211914, "learning_rate": 0.00012128360437762885, "loss": 0.6845, "step": 124 }, { "epoch": 0.00928724109972728, "grad_norm": 4.011626243591309, "learning_rate": 0.00012128359167584609, "loss": 0.6806, "step": 126 }, { "epoch": 0.009434657625119776, "grad_norm": 12.99624252319336, "learning_rate": 0.00012128357877083573, "loss": 0.8965, "step": 128 }, { "epoch": 0.009434657625119776, "eval_1_ratio_diff": 0.33982852689010135, "eval_accuracy": 0.6492595479345284, "eval_f1": 0.7380675203725262, "eval_loss": 0.9785400629043579, "eval_precision": 0.5886722376973074, "eval_recall": 0.9890795631825273, "eval_runtime": 1440.0679, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 128 }, { "epoch": 0.009582074150512272, "grad_norm": 34.23851013183594, "learning_rate": 0.00012128356566259777, "loss": 0.3434, "step": 130 }, { "epoch": 0.009729490675904768, "grad_norm": 66.7353286743164, "learning_rate": 0.0001212835523511323, "loss": 0.475, "step": 132 }, { "epoch": 0.009876907201297266, "grad_norm": 56.82964324951172, "learning_rate": 0.00012128353883643935, "loss": 0.7709, "step": 134 }, { "epoch": 0.010024323726689762, "grad_norm": 34.38500213623047, "learning_rate": 0.00012128352511851894, "loss": 0.7302, "step": 136 }, { "epoch": 0.010171740252082258, "grad_norm": 106.88589477539062, "learning_rate": 0.00012128351119737116, "loss": 1.332, "step": 138 }, { "epoch": 0.010319156777474754, "grad_norm": 85.7337875366211, "learning_rate": 0.00012128349707299602, "loss": 1.6342, "step": 140 }, { "epoch": 0.010466573302867252, "grad_norm": 4.05411958694458, "learning_rate": 0.00012128348274539358, "loss": 0.0673, "step": 142 }, { "epoch": 0.010613989828259748, "grad_norm": 2.334378719329834, "learning_rate": 0.0001212834682145639, "loss": 0.0332, "step": 144 }, { "epoch": 0.010613989828259748, "eval_1_ratio_diff": -0.2346063912704599, "eval_accuracy": 0.7014809041309431, "eval_f1": 0.6095820591233435, "eval_loss": 1.218570351600647, "eval_precision": 0.8794117647058823, "eval_recall": 0.4664586583463339, "eval_runtime": 1440.6194, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 144 }, { "epoch": 0.010761406353652244, "grad_norm": 1.3649911880493164, "learning_rate": 0.00012128345348050701, "loss": 0.985, "step": 146 }, { "epoch": 0.01090882287904474, "grad_norm": 6.569690227508545, "learning_rate": 0.00012128343854322297, "loss": 0.0316, "step": 148 }, { "epoch": 0.011056239404437238, "grad_norm": 50.96843719482422, "learning_rate": 0.00012128342340271183, "loss": 2.5112, "step": 150 }, { "epoch": 0.011203655929829734, "grad_norm": 46.42570877075195, "learning_rate": 0.00012128340805897364, "loss": 2.5907, "step": 152 }, { "epoch": 0.01135107245522223, "grad_norm": 35.919315338134766, "learning_rate": 0.00012128339251200845, "loss": 0.5731, "step": 154 }, { "epoch": 0.011498488980614728, "grad_norm": 0.33857831358909607, "learning_rate": 0.0001212833767618163, "loss": 0.0029, "step": 156 }, { "epoch": 0.011645905506007224, "grad_norm": 0.6119909286499023, "learning_rate": 0.00012128336080839724, "loss": 0.0036, "step": 158 }, { "epoch": 0.01179332203139972, "grad_norm": 34.078514099121094, "learning_rate": 0.00012128334465175136, "loss": 3.0454, "step": 160 }, { "epoch": 0.01179332203139972, "eval_1_ratio_diff": -0.05222135619641466, "eval_accuracy": 0.8106001558846454, "eval_f1": 0.8, "eval_loss": 0.9759823083877563, "eval_precision": 0.8466898954703833, "eval_recall": 0.7581903276131046, "eval_runtime": 1440.5068, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 160 }, { "epoch": 0.011940738556792216, "grad_norm": 0.10960781574249268, "learning_rate": 0.0001212833282918787, "loss": 0.0036, "step": 162 }, { "epoch": 0.012088155082184714, "grad_norm": 0.12220565974712372, "learning_rate": 0.0001212833117287793, "loss": 0.0025, "step": 164 }, { "epoch": 0.01223557160757721, "grad_norm": 127.77825164794922, "learning_rate": 0.00012128329496245321, "loss": 2.7251, "step": 166 }, { "epoch": 0.012382988132969706, "grad_norm": 65.698486328125, "learning_rate": 0.0001212832779929005, "loss": 0.4867, "step": 168 }, { "epoch": 0.012530404658362202, "grad_norm": 37.85614013671875, "learning_rate": 0.00012128326082012124, "loss": 0.2097, "step": 170 }, { "epoch": 0.0126778211837547, "grad_norm": 12.939319610595703, "learning_rate": 0.00012128324344411546, "loss": 1.4561, "step": 172 }, { "epoch": 0.012825237709147196, "grad_norm": 81.24678039550781, "learning_rate": 0.00012128322586488326, "loss": 1.1304, "step": 174 }, { "epoch": 0.012972654234539692, "grad_norm": 58.61750030517578, "learning_rate": 0.00012128320808242463, "loss": 0.9005, "step": 176 }, { "epoch": 0.012972654234539692, "eval_1_ratio_diff": 0.2533125487139517, "eval_accuracy": 0.7186282151208107, "eval_f1": 0.7753578095830741, "eval_loss": 0.8996144533157349, "eval_precision": 0.644927536231884, "eval_recall": 0.9719188767550702, "eval_runtime": 1439.76, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 176 }, { "epoch": 0.013120070759932188, "grad_norm": 60.69062805175781, "learning_rate": 0.00012128319009673968, "loss": 1.4957, "step": 178 }, { "epoch": 0.013267487285324685, "grad_norm": 6.7324652671813965, "learning_rate": 0.00012128317190782848, "loss": 0.2882, "step": 180 }, { "epoch": 0.013414903810717181, "grad_norm": 0.18422821164131165, "learning_rate": 0.00012128315351569106, "loss": 0.5841, "step": 182 }, { "epoch": 0.013562320336109678, "grad_norm": 106.35135650634766, "learning_rate": 0.00012128313492032748, "loss": 1.3522, "step": 184 }, { "epoch": 0.013709736861502174, "grad_norm": 35.63379669189453, "learning_rate": 0.00012128311612173782, "loss": 1.237, "step": 186 }, { "epoch": 0.013857153386894671, "grad_norm": 83.5736312866211, "learning_rate": 0.00012128309711992214, "loss": 1.3351, "step": 188 }, { "epoch": 0.014004569912287167, "grad_norm": 97.8160400390625, "learning_rate": 0.0001212830779148805, "loss": 1.6019, "step": 190 }, { "epoch": 0.014151986437679663, "grad_norm": 2.5867555141448975, "learning_rate": 0.00012128305850661298, "loss": 0.0897, "step": 192 }, { "epoch": 0.014151986437679663, "eval_1_ratio_diff": 0.24863600935307872, "eval_accuracy": 0.7295401402961809, "eval_f1": 0.783260462211118, "eval_loss": 1.138918161392212, "eval_precision": 0.653125, "eval_recall": 0.9781591263650546, "eval_runtime": 1440.7407, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 192 }, { "epoch": 0.014299402963072161, "grad_norm": 71.02184295654297, "learning_rate": 0.00012128303889511963, "loss": 1.3708, "step": 194 }, { "epoch": 0.014446819488464657, "grad_norm": 0.5830493569374084, "learning_rate": 0.0001212830190804005, "loss": 3.0855, "step": 196 }, { "epoch": 0.014594236013857153, "grad_norm": 63.9030876159668, "learning_rate": 0.00012128299906245568, "loss": 1.6675, "step": 198 }, { "epoch": 0.01474165253924965, "grad_norm": 0.18025726079940796, "learning_rate": 0.00012128297884128523, "loss": 0.1379, "step": 200 }, { "epoch": 0.014889069064642147, "grad_norm": 0.8397954702377319, "learning_rate": 0.00012128295841688921, "loss": 1.528, "step": 202 }, { "epoch": 0.015036485590034643, "grad_norm": 78.28919219970703, "learning_rate": 0.0001212829377892677, "loss": 1.2677, "step": 204 }, { "epoch": 0.01518390211542714, "grad_norm": 5.996486186981201, "learning_rate": 0.00012128291695842078, "loss": 1.205, "step": 206 }, { "epoch": 0.015331318640819635, "grad_norm": 1.2115447521209717, "learning_rate": 0.0001212828959243485, "loss": 0.0076, "step": 208 }, { "epoch": 0.015331318640819635, "eval_1_ratio_diff": 0.03351519875292286, "eval_accuracy": 0.8402182385035074, "eval_f1": 0.8452830188679246, "eval_loss": 0.5696436166763306, "eval_precision": 0.8187134502923976, "eval_recall": 0.8736349453978159, "eval_runtime": 1440.7431, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 208 }, { "epoch": 0.015478735166212133, "grad_norm": 41.47733688354492, "learning_rate": 0.00012128287468705092, "loss": 1.0424, "step": 210 }, { "epoch": 0.01562615169160463, "grad_norm": 13.133481979370117, "learning_rate": 0.00012128285324652816, "loss": 0.0602, "step": 212 }, { "epoch": 0.015773568216997127, "grad_norm": 14.336326599121094, "learning_rate": 0.00012128283160278022, "loss": 0.0887, "step": 214 }, { "epoch": 0.01592098474238962, "grad_norm": 2.6840479373931885, "learning_rate": 0.00012128280975580723, "loss": 0.0105, "step": 216 }, { "epoch": 0.01606840126778212, "grad_norm": 0.026224393397569656, "learning_rate": 0.00012128278770560924, "loss": 0.0006, "step": 218 }, { "epoch": 0.016215817793174613, "grad_norm": 0.0356808602809906, "learning_rate": 0.00012128276545218633, "loss": 1.6274, "step": 220 }, { "epoch": 0.01636323431856711, "grad_norm": 0.03703249245882034, "learning_rate": 0.00012128274299553858, "loss": 1.6564, "step": 222 }, { "epoch": 0.01651065084395961, "grad_norm": 0.23091621696949005, "learning_rate": 0.00012128272033566606, "loss": 0.0017, "step": 224 }, { "epoch": 0.01651065084395961, "eval_1_ratio_diff": 0.10210444271239283, "eval_accuracy": 0.8106001558846454, "eval_f1": 0.8280254777070064, "eval_loss": 1.4256943464279175, "eval_precision": 0.7577720207253886, "eval_recall": 0.9126365054602185, "eval_runtime": 1440.6468, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 224 }, { "epoch": 0.016658067369352103, "grad_norm": 0.2899627983570099, "learning_rate": 0.00012128269747256883, "loss": 0.0048, "step": 226 }, { "epoch": 0.0168054838947446, "grad_norm": 138.98680114746094, "learning_rate": 0.00012128267440624699, "loss": 7.0607, "step": 228 }, { "epoch": 0.0169529004201371, "grad_norm": 64.21833801269531, "learning_rate": 0.0001212826511367006, "loss": 2.4323, "step": 230 }, { "epoch": 0.017100316945529593, "grad_norm": 69.21852111816406, "learning_rate": 0.00012128262766392974, "loss": 3.8941, "step": 232 }, { "epoch": 0.01724773347092209, "grad_norm": 0.6788825988769531, "learning_rate": 0.00012128260398793452, "loss": 0.0033, "step": 234 }, { "epoch": 0.01739514999631459, "grad_norm": 0.5503783822059631, "learning_rate": 0.000121282580108715, "loss": 0.0089, "step": 236 }, { "epoch": 0.017542566521707083, "grad_norm": 1.4736528396606445, "learning_rate": 0.00012128255602627122, "loss": 0.6923, "step": 238 }, { "epoch": 0.01768998304709958, "grad_norm": 0.052145253866910934, "learning_rate": 0.0001212825317406033, "loss": 0.003, "step": 240 }, { "epoch": 0.01768998304709958, "eval_1_ratio_diff": 0.05689789555728764, "eval_accuracy": 0.8667186282151208, "eval_f1": 0.8738007380073801, "eval_loss": 0.5649486184120178, "eval_precision": 0.8291316526610645, "eval_recall": 0.9235569422776911, "eval_runtime": 1440.858, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.446, "step": 240 }, { "epoch": 0.017837399572492075, "grad_norm": 33.907466888427734, "learning_rate": 0.00012128250725171133, "loss": 1.0754, "step": 242 }, { "epoch": 0.017984816097884573, "grad_norm": 1.5523881912231445, "learning_rate": 0.00012128248255959539, "loss": 2.2872, "step": 244 }, { "epoch": 0.01813223262327707, "grad_norm": 0.45814594626426697, "learning_rate": 0.00012128245766425553, "loss": 0.0082, "step": 246 }, { "epoch": 0.018279649148669565, "grad_norm": 63.94032669067383, "learning_rate": 0.00012128243256569185, "loss": 1.7641, "step": 248 }, { "epoch": 0.018427065674062063, "grad_norm": 0.17571286857128143, "learning_rate": 0.00012128240726390445, "loss": 0.0017, "step": 250 }, { "epoch": 0.01857448219945456, "grad_norm": 0.08677598834037781, "learning_rate": 0.0001212823817588934, "loss": 2.0446, "step": 252 }, { "epoch": 0.018721898724847055, "grad_norm": 0.06298824399709702, "learning_rate": 0.00012128235605065879, "loss": 0.0031, "step": 254 }, { "epoch": 0.018869315250239552, "grad_norm": 0.04490824043750763, "learning_rate": 0.00012128233013920071, "loss": 0.0016, "step": 256 }, { "epoch": 0.018869315250239552, "eval_1_ratio_diff": 0.26032735775526106, "eval_accuracy": 0.7272018706157444, "eval_f1": 0.7834158415841584, "eval_loss": 1.7306467294692993, "eval_precision": 0.6492307692307693, "eval_recall": 0.9875195007800313, "eval_runtime": 1441.1243, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 256 }, { "epoch": 0.019016731775632047, "grad_norm": 64.88382720947266, "learning_rate": 0.00012128230402451925, "loss": 1.4818, "step": 258 }, { "epoch": 0.019164148301024544, "grad_norm": 0.04304850101470947, "learning_rate": 0.00012128227770661447, "loss": 0.0006, "step": 260 }, { "epoch": 0.019311564826417042, "grad_norm": 99.55477142333984, "learning_rate": 0.00012128225118548648, "loss": 1.3041, "step": 262 }, { "epoch": 0.019458981351809537, "grad_norm": 64.24674987792969, "learning_rate": 0.00012128222446113537, "loss": 3.4221, "step": 264 }, { "epoch": 0.019606397877202034, "grad_norm": 1.130561351776123, "learning_rate": 0.00012128219753356123, "loss": 0.0047, "step": 266 }, { "epoch": 0.019753814402594532, "grad_norm": 60.320674896240234, "learning_rate": 0.00012128217040276413, "loss": 0.7215, "step": 268 }, { "epoch": 0.019901230927987026, "grad_norm": 56.348636627197266, "learning_rate": 0.0001212821430687442, "loss": 3.0486, "step": 270 }, { "epoch": 0.020048647453379524, "grad_norm": 4.682687759399414, "learning_rate": 0.0001212821155315015, "loss": 0.0195, "step": 272 }, { "epoch": 0.020048647453379524, "eval_1_ratio_diff": -0.07170693686671864, "eval_accuracy": 0.8035853468433359, "eval_f1": 0.788235294117647, "eval_loss": 0.7957486510276794, "eval_precision": 0.8542805100182149, "eval_recall": 0.7316692667706708, "eval_runtime": 1438.4097, "eval_samples_per_second": 0.892, "eval_steps_per_second": 0.446, "step": 272 }, { "epoch": 0.020196063978772022, "grad_norm": 0.11813419312238693, "learning_rate": 0.00012128208779103613, "loss": 0.1104, "step": 274 }, { "epoch": 0.020343480504164516, "grad_norm": 61.332427978515625, "learning_rate": 0.0001212820598473482, "loss": 0.8622, "step": 276 }, { "epoch": 0.020490897029557014, "grad_norm": 9.628612518310547, "learning_rate": 0.00012128203170043776, "loss": 0.0682, "step": 278 }, { "epoch": 0.02063831355494951, "grad_norm": 59.6220703125, "learning_rate": 0.00012128200335030495, "loss": 0.7833, "step": 280 }, { "epoch": 0.020785730080342006, "grad_norm": 1.084692358970642, "learning_rate": 0.00012128197479694983, "loss": 1.5881, "step": 282 }, { "epoch": 0.020933146605734504, "grad_norm": 0.44916099309921265, "learning_rate": 0.00012128194604037253, "loss": 0.0187, "step": 284 }, { "epoch": 0.021080563131126998, "grad_norm": 0.11146622151136398, "learning_rate": 0.00012128191708057311, "loss": 0.0025, "step": 286 }, { "epoch": 0.021227979656519496, "grad_norm": 0.05726571008563042, "learning_rate": 0.00012128188791755172, "loss": 0.0004, "step": 288 }, { "epoch": 0.021227979656519496, "eval_1_ratio_diff": 0.09119251753702262, "eval_accuracy": 0.8277474668745128, "eval_f1": 0.8420300214438885, "eval_loss": 1.1355745792388916, "eval_precision": 0.7770448548812665, "eval_recall": 0.9188767550702028, "eval_runtime": 1440.4727, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 288 }, { "epoch": 0.021375396181911994, "grad_norm": 63.95652770996094, "learning_rate": 0.0001212818585513084, "loss": 2.2186, "step": 290 }, { "epoch": 0.021522812707304488, "grad_norm": 0.041420936584472656, "learning_rate": 0.00012128182898184326, "loss": 2.2755, "step": 292 }, { "epoch": 0.021670229232696986, "grad_norm": 0.19315005838871002, "learning_rate": 0.00012128179920915643, "loss": 1.7156, "step": 294 }, { "epoch": 0.02181764575808948, "grad_norm": 0.06642986834049225, "learning_rate": 0.00012128176923324799, "loss": 0.0021, "step": 296 }, { "epoch": 0.021965062283481978, "grad_norm": 0.22619064152240753, "learning_rate": 0.00012128173905411805, "loss": 1.2636, "step": 298 }, { "epoch": 0.022112478808874476, "grad_norm": 0.30320611596107483, "learning_rate": 0.00012128170867176669, "loss": 0.0031, "step": 300 }, { "epoch": 0.02225989533426697, "grad_norm": 62.3597412109375, "learning_rate": 0.00012128167808619403, "loss": 1.3432, "step": 302 }, { "epoch": 0.022407311859659468, "grad_norm": 63.980323791503906, "learning_rate": 0.00012128164729740015, "loss": 0.8526, "step": 304 }, { "epoch": 0.022407311859659468, "eval_1_ratio_diff": 0.15354637568199536, "eval_accuracy": 0.8028059236165238, "eval_f1": 0.8289384719405003, "eval_loss": 0.781088650226593, "eval_precision": 0.7315035799522673, "eval_recall": 0.9563182527301092, "eval_runtime": 1439.8087, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 304 }, { "epoch": 0.022554728385051966, "grad_norm": 0.5441477298736572, "learning_rate": 0.0001212816163053852, "loss": 0.023, "step": 306 }, { "epoch": 0.02270214491044446, "grad_norm": 60.2026252746582, "learning_rate": 0.00012128158511014924, "loss": 0.4811, "step": 308 }, { "epoch": 0.022849561435836958, "grad_norm": 3.5183231830596924, "learning_rate": 0.00012128155371169238, "loss": 0.0164, "step": 310 }, { "epoch": 0.022996977961229455, "grad_norm": 49.883365631103516, "learning_rate": 0.00012128152211001475, "loss": 2.6559, "step": 312 }, { "epoch": 0.02314439448662195, "grad_norm": 0.21442897617816925, "learning_rate": 0.00012128149030511643, "loss": 1.0737, "step": 314 }, { "epoch": 0.023291811012014448, "grad_norm": 66.95639038085938, "learning_rate": 0.00012128145829699753, "loss": 2.2649, "step": 316 }, { "epoch": 0.023439227537406942, "grad_norm": 41.275150299072266, "learning_rate": 0.00012128142608565818, "loss": 1.4307, "step": 318 }, { "epoch": 0.02358664406279944, "grad_norm": 60.39665603637695, "learning_rate": 0.00012128139367109845, "loss": 0.8912, "step": 320 }, { "epoch": 0.02358664406279944, "eval_1_ratio_diff": 0.15666406858924398, "eval_accuracy": 0.7903351519875292, "eval_f1": 0.8186109238031019, "eval_loss": 0.6988638043403625, "eval_precision": 0.7209026128266033, "eval_recall": 0.9469578783151326, "eval_runtime": 1440.1147, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 320 }, { "epoch": 0.023734060588191937, "grad_norm": 0.26957735419273376, "learning_rate": 0.0001212813610533185, "loss": 0.0109, "step": 322 }, { "epoch": 0.02388147711358443, "grad_norm": 1.1442532539367676, "learning_rate": 0.00012128132823231837, "loss": 0.8164, "step": 324 }, { "epoch": 0.02402889363897693, "grad_norm": 2.7633121013641357, "learning_rate": 0.00012128129520809825, "loss": 0.0146, "step": 326 }, { "epoch": 0.024176310164369427, "grad_norm": 103.85281372070312, "learning_rate": 0.00012128126198065819, "loss": 2.8926, "step": 328 }, { "epoch": 0.02432372668976192, "grad_norm": 4.870635032653809, "learning_rate": 0.00012128122854999832, "loss": 0.0289, "step": 330 }, { "epoch": 0.02447114321515442, "grad_norm": 0.17178401350975037, "learning_rate": 0.00012128119491611876, "loss": 0.7425, "step": 332 }, { "epoch": 0.024618559740546914, "grad_norm": 37.24171447753906, "learning_rate": 0.00012128116107901961, "loss": 3.577, "step": 334 }, { "epoch": 0.02476597626593941, "grad_norm": 12.520587921142578, "learning_rate": 0.00012128112703870099, "loss": 0.0673, "step": 336 }, { "epoch": 0.02476597626593941, "eval_1_ratio_diff": -0.025720966484801266, "eval_accuracy": 0.857365549493375, "eval_f1": 0.8534827862289832, "eval_loss": 0.4316674470901489, "eval_precision": 0.8766447368421053, "eval_recall": 0.8315132605304212, "eval_runtime": 1440.3285, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 336 }, { "epoch": 0.02491339279133191, "grad_norm": 41.28479766845703, "learning_rate": 0.00012128109279516303, "loss": 0.2896, "step": 338 }, { "epoch": 0.025060809316724404, "grad_norm": 6.806232452392578, "learning_rate": 0.00012128105834840581, "loss": 0.0378, "step": 340 }, { "epoch": 0.0252082258421169, "grad_norm": 2.091874361038208, "learning_rate": 0.00012128102369842947, "loss": 0.0118, "step": 342 }, { "epoch": 0.0253556423675094, "grad_norm": 57.055580139160156, "learning_rate": 0.00012128098884523412, "loss": 0.6633, "step": 344 }, { "epoch": 0.025503058892901893, "grad_norm": 59.19140625, "learning_rate": 0.00012128095378881987, "loss": 0.4166, "step": 346 }, { "epoch": 0.02565047541829439, "grad_norm": 0.08690566569566727, "learning_rate": 0.00012128091852918686, "loss": 0.0041, "step": 348 }, { "epoch": 0.02579789194368689, "grad_norm": 0.4953851103782654, "learning_rate": 0.00012128088306633519, "loss": 0.0058, "step": 350 }, { "epoch": 0.025945308469079383, "grad_norm": 0.8310350179672241, "learning_rate": 0.00012128084740026497, "loss": 0.0115, "step": 352 }, { "epoch": 0.025945308469079383, "eval_1_ratio_diff": -0.05455962587685115, "eval_accuracy": 0.8659392049883087, "eval_f1": 0.858085808580858, "eval_loss": 0.6554389595985413, "eval_precision": 0.9106830122591943, "eval_recall": 0.8112324492979719, "eval_runtime": 1441.1917, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 352 }, { "epoch": 0.02609272499447188, "grad_norm": 75.97391510009766, "learning_rate": 0.00012128081153097633, "loss": 1.0946, "step": 354 }, { "epoch": 0.026240141519864375, "grad_norm": 0.1318621188402176, "learning_rate": 0.0001212807754584694, "loss": 0.0013, "step": 356 }, { "epoch": 0.026387558045256873, "grad_norm": 0.07249584794044495, "learning_rate": 0.0001212807391827443, "loss": 0.2854, "step": 358 }, { "epoch": 0.02653497457064937, "grad_norm": 23.931421279907227, "learning_rate": 0.00012128070270380113, "loss": 0.0587, "step": 360 }, { "epoch": 0.026682391096041865, "grad_norm": 228.77931213378906, "learning_rate": 0.00012128066602164004, "loss": 0.6358, "step": 362 }, { "epoch": 0.026829807621434363, "grad_norm": 0.020578529685735703, "learning_rate": 0.00012128062913626113, "loss": 0.0003, "step": 364 }, { "epoch": 0.02697722414682686, "grad_norm": 0.044141389429569244, "learning_rate": 0.00012128059204766453, "loss": 0.0003, "step": 366 }, { "epoch": 0.027124640672219355, "grad_norm": 35.83491516113281, "learning_rate": 0.00012128055475585035, "loss": 2.1523, "step": 368 }, { "epoch": 0.027124640672219355, "eval_1_ratio_diff": -0.044427123928293066, "eval_accuracy": 0.8604832424006236, "eval_f1": 0.8538775510204082, "eval_loss": 1.1068644523620605, "eval_precision": 0.8955479452054794, "eval_recall": 0.8159126365054602, "eval_runtime": 1440.348, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 368 }, { "epoch": 0.027272057197611853, "grad_norm": 0.004144140053540468, "learning_rate": 0.00012128051726081876, "loss": 0.0, "step": 370 }, { "epoch": 0.027419473723004347, "grad_norm": 0.0015425934689119458, "learning_rate": 0.00012128047956256984, "loss": 0.0002, "step": 372 }, { "epoch": 0.027566890248396845, "grad_norm": 36.42764663696289, "learning_rate": 0.00012128044166110374, "loss": 2.8486, "step": 374 }, { "epoch": 0.027714306773789343, "grad_norm": 0.6206398010253906, "learning_rate": 0.00012128040355642058, "loss": 2.924, "step": 376 }, { "epoch": 0.027861723299181837, "grad_norm": 97.60330963134766, "learning_rate": 0.00012128036524852049, "loss": 1.9209, "step": 378 }, { "epoch": 0.028009139824574335, "grad_norm": 2.1615848541259766, "learning_rate": 0.0001212803267374036, "loss": 0.0215, "step": 380 }, { "epoch": 0.028156556349966833, "grad_norm": 41.35491180419922, "learning_rate": 0.00012128028802307003, "loss": 0.8105, "step": 382 }, { "epoch": 0.028303972875359327, "grad_norm": 39.422916412353516, "learning_rate": 0.00012128024910551992, "loss": 1.131, "step": 384 }, { "epoch": 0.028303972875359327, "eval_1_ratio_diff": -0.3904910366328917, "eval_accuracy": 0.5876851130163678, "eval_f1": 0.322663252240717, "eval_loss": 1.1657379865646362, "eval_precision": 0.9, "eval_recall": 0.19656786271450857, "eval_runtime": 1441.4939, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 384 }, { "epoch": 0.028451389400751825, "grad_norm": 1.7290548086166382, "learning_rate": 0.0001212802099847534, "loss": 0.0986, "step": 386 }, { "epoch": 0.028598805926144322, "grad_norm": 40.167484283447266, "learning_rate": 0.00012128017066077058, "loss": 1.1352, "step": 388 }, { "epoch": 0.028746222451536817, "grad_norm": 36.6862678527832, "learning_rate": 0.00012128013113357162, "loss": 2.6405, "step": 390 }, { "epoch": 0.028893638976929314, "grad_norm": 1.1684958934783936, "learning_rate": 0.00012128009140315665, "loss": 1.1565, "step": 392 }, { "epoch": 0.02904105550232181, "grad_norm": 28.306957244873047, "learning_rate": 0.00012128005146952578, "loss": 1.6548, "step": 394 }, { "epoch": 0.029188472027714307, "grad_norm": 18.64267349243164, "learning_rate": 0.00012128001133267917, "loss": 1.1205, "step": 396 }, { "epoch": 0.029335888553106804, "grad_norm": 7.279528617858887, "learning_rate": 0.00012127997099261693, "loss": 0.6742, "step": 398 }, { "epoch": 0.0294833050784993, "grad_norm": 41.569854736328125, "learning_rate": 0.00012127993044933921, "loss": 0.6977, "step": 400 }, { "epoch": 0.0294833050784993, "eval_1_ratio_diff": -0.2704598597038192, "eval_accuracy": 0.6344505066250974, "eval_f1": 0.4983957219251337, "eval_loss": 0.6263108849525452, "eval_precision": 0.7925170068027211, "eval_recall": 0.36349453978159124, "eval_runtime": 1441.3891, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 400 }, { "epoch": 0.029630721603891796, "grad_norm": 14.647398948669434, "learning_rate": 0.00012127988970284616, "loss": 0.4508, "step": 402 }, { "epoch": 0.029778138129284294, "grad_norm": 21.75971221923828, "learning_rate": 0.00012127984875313788, "loss": 0.6282, "step": 404 }, { "epoch": 0.02992555465467679, "grad_norm": 32.292236328125, "learning_rate": 0.00012127980760021456, "loss": 1.0279, "step": 406 }, { "epoch": 0.030072971180069286, "grad_norm": 59.10111999511719, "learning_rate": 0.00012127976624407626, "loss": 1.7322, "step": 408 }, { "epoch": 0.03022038770546178, "grad_norm": 56.45620346069336, "learning_rate": 0.00012127972468472319, "loss": 2.3399, "step": 410 }, { "epoch": 0.03036780423085428, "grad_norm": 33.3152961730957, "learning_rate": 0.00012127968292215546, "loss": 1.1374, "step": 412 }, { "epoch": 0.030515220756246776, "grad_norm": 9.003528594970703, "learning_rate": 0.00012127964095637322, "loss": 0.531, "step": 414 }, { "epoch": 0.03066263728163927, "grad_norm": 11.181624412536621, "learning_rate": 0.00012127959878737659, "loss": 0.167, "step": 416 }, { "epoch": 0.03066263728163927, "eval_1_ratio_diff": -0.49961028838659394, "eval_accuracy": 0.5003897116134061, "eval_f1": 0.0, "eval_loss": 0.9164891839027405, "eval_precision": 0.0, "eval_recall": 0.0, "eval_runtime": 1441.9045, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 416 }, { "epoch": 0.030810053807031768, "grad_norm": 81.1378173828125, "learning_rate": 0.00012127955641516573, "loss": 1.5427, "step": 418 }, { "epoch": 0.030957470332424266, "grad_norm": 40.89067840576172, "learning_rate": 0.00012127951383974079, "loss": 0.8105, "step": 420 }, { "epoch": 0.03110488685781676, "grad_norm": 0.7650836706161499, "learning_rate": 0.00012127947106110188, "loss": 0.8716, "step": 422 }, { "epoch": 0.03125230338320926, "grad_norm": 41.49223709106445, "learning_rate": 0.00012127942807924917, "loss": 3.4998, "step": 424 }, { "epoch": 0.031399719908601756, "grad_norm": 0.12294773012399673, "learning_rate": 0.00012127938489418281, "loss": 1.8698, "step": 426 }, { "epoch": 0.031547136433994254, "grad_norm": 35.12305450439453, "learning_rate": 0.00012127934150590295, "loss": 1.6532, "step": 428 }, { "epoch": 0.031694552959386744, "grad_norm": 27.799177169799805, "learning_rate": 0.00012127929791440968, "loss": 0.5514, "step": 430 }, { "epoch": 0.03184196948477924, "grad_norm": 24.18194580078125, "learning_rate": 0.00012127925411970319, "loss": 0.6588, "step": 432 }, { "epoch": 0.03184196948477924, "eval_1_ratio_diff": 0.0615744349181605, "eval_accuracy": 0.8074824629773967, "eval_f1": 0.8185157972079353, "eval_loss": 0.46238815784454346, "eval_precision": 0.7736111111111111, "eval_recall": 0.8689547581903276, "eval_runtime": 1441.3065, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 432 }, { "epoch": 0.03198938601017174, "grad_norm": 39.476436614990234, "learning_rate": 0.00012127921012178362, "loss": 0.5056, "step": 434 }, { "epoch": 0.03213680253556424, "grad_norm": 17.45188331604004, "learning_rate": 0.00012127916592065112, "loss": 1.9197, "step": 436 }, { "epoch": 0.032284219060956736, "grad_norm": 37.614906311035156, "learning_rate": 0.00012127912151630586, "loss": 1.4371, "step": 438 }, { "epoch": 0.032431635586349226, "grad_norm": 6.937824726104736, "learning_rate": 0.00012127907690874794, "loss": 0.1527, "step": 440 }, { "epoch": 0.032579052111741724, "grad_norm": 1.9573392868041992, "learning_rate": 0.00012127903209797754, "loss": 0.0619, "step": 442 }, { "epoch": 0.03272646863713422, "grad_norm": 5.234042167663574, "learning_rate": 0.00012127898708399481, "loss": 0.0308, "step": 444 }, { "epoch": 0.03287388516252672, "grad_norm": 19.76664161682129, "learning_rate": 0.00012127894186679988, "loss": 2.5914, "step": 446 }, { "epoch": 0.03302130168791922, "grad_norm": 48.643428802490234, "learning_rate": 0.00012127889644639293, "loss": 3.5738, "step": 448 }, { "epoch": 0.03302130168791922, "eval_1_ratio_diff": -0.4964925954793453, "eval_accuracy": 0.5035074045206547, "eval_f1": 0.012403100775193798, "eval_loss": 2.0848419666290283, "eval_precision": 1.0, "eval_recall": 0.0062402496099844, "eval_runtime": 1441.7896, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 448 }, { "epoch": 0.033168718213311715, "grad_norm": 41.91992950439453, "learning_rate": 0.0001212788508227741, "loss": 3.656, "step": 450 }, { "epoch": 0.033316134738704206, "grad_norm": 58.21712112426758, "learning_rate": 0.00012127880499594355, "loss": 2.5973, "step": 452 }, { "epoch": 0.033463551264096704, "grad_norm": 14.196877479553223, "learning_rate": 0.00012127875896590141, "loss": 0.9817, "step": 454 }, { "epoch": 0.0336109677894892, "grad_norm": 21.982349395751953, "learning_rate": 0.00012127871273264783, "loss": 0.6516, "step": 456 }, { "epoch": 0.0337583843148817, "grad_norm": 26.360563278198242, "learning_rate": 0.00012127866629618302, "loss": 0.5606, "step": 458 }, { "epoch": 0.0339058008402742, "grad_norm": 15.224770545959473, "learning_rate": 0.00012127861965650708, "loss": 0.4791, "step": 460 }, { "epoch": 0.03405321736566669, "grad_norm": 40.95515441894531, "learning_rate": 0.0001212785728136202, "loss": 0.8481, "step": 462 }, { "epoch": 0.034200633891059186, "grad_norm": 0.4365566670894623, "learning_rate": 0.00012127852576752252, "loss": 0.2475, "step": 464 }, { "epoch": 0.034200633891059186, "eval_1_ratio_diff": 0.2938425565081839, "eval_accuracy": 0.6890101325019485, "eval_f1": 0.759493670886076, "eval_loss": 0.8622868061065674, "eval_precision": 0.618860510805501, "eval_recall": 0.982839313572543, "eval_runtime": 1441.2401, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 464 }, { "epoch": 0.034348050416451684, "grad_norm": 15.40101432800293, "learning_rate": 0.0001212784785182142, "loss": 0.6156, "step": 466 }, { "epoch": 0.03449546694184418, "grad_norm": 5.0568013191223145, "learning_rate": 0.00012127843106569541, "loss": 0.4877, "step": 468 }, { "epoch": 0.03464288346723668, "grad_norm": 0.277358740568161, "learning_rate": 0.00012127838340996629, "loss": 0.6857, "step": 470 }, { "epoch": 0.03479029999262918, "grad_norm": 0.04443424195051193, "learning_rate": 0.00012127833555102701, "loss": 0.0286, "step": 472 }, { "epoch": 0.03493771651802167, "grad_norm": 35.34669876098633, "learning_rate": 0.00012127828748887773, "loss": 1.7842, "step": 474 }, { "epoch": 0.035085133043414166, "grad_norm": 0.08662135899066925, "learning_rate": 0.00012127823922351861, "loss": 0.0011, "step": 476 }, { "epoch": 0.03523254956880666, "grad_norm": 0.021065138280391693, "learning_rate": 0.00012127819075494979, "loss": 0.0013, "step": 478 }, { "epoch": 0.03537996609419916, "grad_norm": 39.31500244140625, "learning_rate": 0.00012127814208317148, "loss": 1.3799, "step": 480 }, { "epoch": 0.03537996609419916, "eval_1_ratio_diff": 0.03975058456742009, "eval_accuracy": 0.838659392049883, "eval_f1": 0.8447111777944486, "eval_loss": 0.967132568359375, "eval_precision": 0.8135838150289018, "eval_recall": 0.8783151326053042, "eval_runtime": 1441.5685, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 480 }, { "epoch": 0.03552738261959166, "grad_norm": 0.021114541217684746, "learning_rate": 0.0001212780932081838, "loss": 0.0022, "step": 482 }, { "epoch": 0.03567479914498415, "grad_norm": 0.15021076798439026, "learning_rate": 0.00012127804412998695, "loss": 0.0023, "step": 484 }, { "epoch": 0.03582221567037665, "grad_norm": 0.017235957086086273, "learning_rate": 0.00012127799484858106, "loss": 0.0157, "step": 486 }, { "epoch": 0.035969632195769145, "grad_norm": 0.07619292289018631, "learning_rate": 0.00012127794536396632, "loss": 0.0006, "step": 488 }, { "epoch": 0.03611704872116164, "grad_norm": 0.35548681020736694, "learning_rate": 0.0001212778956761429, "loss": 0.0025, "step": 490 }, { "epoch": 0.03626446524655414, "grad_norm": 0.019310960546135902, "learning_rate": 0.00012127784578511092, "loss": 0.0006, "step": 492 }, { "epoch": 0.03641188177194664, "grad_norm": 0.0059149437583982944, "learning_rate": 0.00012127779569087061, "loss": 0.0222, "step": 494 }, { "epoch": 0.03655929829733913, "grad_norm": 0.0023440527729690075, "learning_rate": 0.00012127774539342209, "loss": 2.0713, "step": 496 }, { "epoch": 0.03655929829733913, "eval_1_ratio_diff": 0.0615744349181605, "eval_accuracy": 0.848012470771629, "eval_f1": 0.8567229977957385, "eval_loss": 1.1258606910705566, "eval_precision": 0.8097222222222222, "eval_recall": 0.9095163806552262, "eval_runtime": 1442.1776, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 496 }, { "epoch": 0.03670671482273163, "grad_norm": 0.4357898235321045, "learning_rate": 0.00012127769489276555, "loss": 0.0017, "step": 498 }, { "epoch": 0.036854131348124125, "grad_norm": 0.0051942430436611176, "learning_rate": 0.00012127764418890117, "loss": 0.0001, "step": 500 }, { "epoch": 0.03700154787351662, "grad_norm": 0.048877667635679245, "learning_rate": 0.0001212775932818291, "loss": 1.0276, "step": 502 }, { "epoch": 0.03714896439890912, "grad_norm": 0.030356034636497498, "learning_rate": 0.00012127754217154949, "loss": 2.3301, "step": 504 }, { "epoch": 0.03729638092430161, "grad_norm": 0.06719710677862167, "learning_rate": 0.00012127749085806257, "loss": 0.0008, "step": 506 }, { "epoch": 0.03744379744969411, "grad_norm": 0.8071137070655823, "learning_rate": 0.00012127743934136846, "loss": 0.0034, "step": 508 }, { "epoch": 0.03759121397508661, "grad_norm": 66.58085632324219, "learning_rate": 0.00012127738762146735, "loss": 2.0918, "step": 510 }, { "epoch": 0.037738630500479105, "grad_norm": 0.5617576241493225, "learning_rate": 0.00012127733569835943, "loss": 0.004, "step": 512 }, { "epoch": 0.037738630500479105, "eval_1_ratio_diff": 0.13795791114575218, "eval_accuracy": 0.8152766952455183, "eval_f1": 0.8375599725839616, "eval_loss": 1.003125548362732, "eval_precision": 0.7469437652811736, "eval_recall": 0.953198127925117, "eval_runtime": 1441.8288, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 512 }, { "epoch": 0.0378860470258716, "grad_norm": 0.019583938643336296, "learning_rate": 0.00012127728357204487, "loss": 0.0029, "step": 514 }, { "epoch": 0.03803346355126409, "grad_norm": 66.44640350341797, "learning_rate": 0.00012127723124252383, "loss": 1.346, "step": 516 }, { "epoch": 0.03818088007665659, "grad_norm": 0.05073532462120056, "learning_rate": 0.00012127717870979647, "loss": 1.726, "step": 518 }, { "epoch": 0.03832829660204909, "grad_norm": 0.008476372808218002, "learning_rate": 0.000121277125973863, "loss": 0.0002, "step": 520 }, { "epoch": 0.03847571312744159, "grad_norm": 78.07063293457031, "learning_rate": 0.00012127707303472356, "loss": 3.8118, "step": 522 }, { "epoch": 0.038623129652834085, "grad_norm": 37.921451568603516, "learning_rate": 0.00012127701989237836, "loss": 3.374, "step": 524 }, { "epoch": 0.03877054617822658, "grad_norm": 38.97615432739258, "learning_rate": 0.0001212769665468276, "loss": 1.849, "step": 526 }, { "epoch": 0.03891796270361907, "grad_norm": 1.3990278244018555, "learning_rate": 0.0001212769129980714, "loss": 0.2307, "step": 528 }, { "epoch": 0.03891796270361907, "eval_1_ratio_diff": -0.002338269680436489, "eval_accuracy": 0.8511301636788776, "eval_f1": 0.8506645817044566, "eval_loss": 0.5836467742919922, "eval_precision": 0.8526645768025078, "eval_recall": 0.8486739469578783, "eval_runtime": 1442.6344, "eval_samples_per_second": 0.889, "eval_steps_per_second": 0.445, "step": 528 }, { "epoch": 0.03906537922901157, "grad_norm": 0.5216283798217773, "learning_rate": 0.00012127685924610997, "loss": 0.0092, "step": 530 }, { "epoch": 0.03921279575440407, "grad_norm": 0.716465950012207, "learning_rate": 0.00012127680529094349, "loss": 0.0057, "step": 532 }, { "epoch": 0.039360212279796566, "grad_norm": 0.17090915143489838, "learning_rate": 0.00012127675113257214, "loss": 0.0031, "step": 534 }, { "epoch": 0.039507628805189064, "grad_norm": 62.14753723144531, "learning_rate": 0.00012127669677099608, "loss": 1.6501, "step": 536 }, { "epoch": 0.039655045330581555, "grad_norm": 35.18620681762695, "learning_rate": 0.00012127664220621553, "loss": 0.8287, "step": 538 }, { "epoch": 0.03980246185597405, "grad_norm": 34.50994873046875, "learning_rate": 0.00012127658743823064, "loss": 2.5161, "step": 540 }, { "epoch": 0.03994987838136655, "grad_norm": 0.9479020237922668, "learning_rate": 0.00012127653246704162, "loss": 0.0155, "step": 542 }, { "epoch": 0.04009729490675905, "grad_norm": 0.040624819695949554, "learning_rate": 0.00012127647729264862, "loss": 1.536, "step": 544 }, { "epoch": 0.04009729490675905, "eval_1_ratio_diff": -0.008573655494933774, "eval_accuracy": 0.8791893998441154, "eval_f1": 0.8780487804878049, "eval_loss": 0.49514248967170715, "eval_precision": 0.8857142857142857, "eval_recall": 0.8705148205928237, "eval_runtime": 1441.6693, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 544 }, { "epoch": 0.040244711432151546, "grad_norm": 0.08040345460176468, "learning_rate": 0.00012127642191505187, "loss": 0.0205, "step": 546 }, { "epoch": 0.040392127957544044, "grad_norm": 58.783809661865234, "learning_rate": 0.00012127636633425152, "loss": 1.1192, "step": 548 }, { "epoch": 0.040539544482936535, "grad_norm": 0.25617870688438416, "learning_rate": 0.00012127631055024779, "loss": 1.0263, "step": 550 }, { "epoch": 0.04068696100832903, "grad_norm": 46.056339263916016, "learning_rate": 0.00012127625456304081, "loss": 1.1183, "step": 552 }, { "epoch": 0.04083437753372153, "grad_norm": 0.17480018734931946, "learning_rate": 0.00012127619837263082, "loss": 0.0055, "step": 554 }, { "epoch": 0.04098179405911403, "grad_norm": 0.37528491020202637, "learning_rate": 0.000121276141979018, "loss": 0.0032, "step": 556 }, { "epoch": 0.041129210584506526, "grad_norm": 0.35542991757392883, "learning_rate": 0.00012127608538220252, "loss": 0.9512, "step": 558 }, { "epoch": 0.04127662710989902, "grad_norm": 0.08831676840782166, "learning_rate": 0.00012127602858218457, "loss": 0.0184, "step": 560 }, { "epoch": 0.04127662710989902, "eval_1_ratio_diff": 0.12860483242400622, "eval_accuracy": 0.8402182385035074, "eval_f1": 0.8583275742916379, "eval_loss": 1.0018821954727173, "eval_precision": 0.7704714640198511, "eval_recall": 0.968798751950078, "eval_runtime": 1442.4789, "eval_samples_per_second": 0.889, "eval_steps_per_second": 0.445, "step": 560 }, { "epoch": 0.041424043635291515, "grad_norm": 46.25735092163086, "learning_rate": 0.00012127597157896437, "loss": 0.6495, "step": 562 }, { "epoch": 0.04157146016068401, "grad_norm": 58.521575927734375, "learning_rate": 0.00012127591437254209, "loss": 1.4757, "step": 564 }, { "epoch": 0.04171887668607651, "grad_norm": 0.3296540379524231, "learning_rate": 0.0001212758569629179, "loss": 2.2725, "step": 566 }, { "epoch": 0.04186629321146901, "grad_norm": 0.03395453095436096, "learning_rate": 0.00012127579935009204, "loss": 0.0006, "step": 568 }, { "epoch": 0.042013709736861506, "grad_norm": 0.02328958362340927, "learning_rate": 0.00012127574153406467, "loss": 0.0004, "step": 570 }, { "epoch": 0.042161126262253996, "grad_norm": 58.99131774902344, "learning_rate": 0.000121275683514836, "loss": 2.0081, "step": 572 }, { "epoch": 0.042308542787646494, "grad_norm": 0.9085908532142639, "learning_rate": 0.0001212756252924062, "loss": 0.006, "step": 574 }, { "epoch": 0.04245595931303899, "grad_norm": 0.5718927383422852, "learning_rate": 0.00012127556686677549, "loss": 2.0144, "step": 576 }, { "epoch": 0.04245595931303899, "eval_1_ratio_diff": 0.22291504286827746, "eval_accuracy": 0.7443491816056118, "eval_f1": 0.7908163265306123, "eval_loss": 0.9025093913078308, "eval_precision": 0.668824163969795, "eval_recall": 0.9672386895475819, "eval_runtime": 1442.0314, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 576 }, { "epoch": 0.04260337583843149, "grad_norm": 0.4269089698791504, "learning_rate": 0.00012127550823794406, "loss": 1.8595, "step": 578 }, { "epoch": 0.04275079236382399, "grad_norm": 1.5817714929580688, "learning_rate": 0.00012127544940591211, "loss": 0.4153, "step": 580 }, { "epoch": 0.04289820888921648, "grad_norm": 56.673728942871094, "learning_rate": 0.00012127539037067981, "loss": 1.8132, "step": 582 }, { "epoch": 0.043045625414608976, "grad_norm": 4.291464805603027, "learning_rate": 0.0001212753311322474, "loss": 0.3818, "step": 584 }, { "epoch": 0.043193041940001474, "grad_norm": 18.92963981628418, "learning_rate": 0.00012127527169061505, "loss": 0.0941, "step": 586 }, { "epoch": 0.04334045846539397, "grad_norm": 27.108686447143555, "learning_rate": 0.00012127521204578297, "loss": 0.1314, "step": 588 }, { "epoch": 0.04348787499078647, "grad_norm": 33.73942184448242, "learning_rate": 0.00012127515219775134, "loss": 0.1772, "step": 590 }, { "epoch": 0.04363529151617896, "grad_norm": 52.08650588989258, "learning_rate": 0.00012127509214652041, "loss": 0.4505, "step": 592 }, { "epoch": 0.04363529151617896, "eval_1_ratio_diff": -0.1184723304754482, "eval_accuracy": 0.798908807482463, "eval_f1": 0.7716814159292036, "eval_loss": 0.7536761164665222, "eval_precision": 0.8916155419222904, "eval_recall": 0.6801872074882995, "eval_runtime": 1442.1268, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 592 }, { "epoch": 0.04378270804157146, "grad_norm": 0.05625031143426895, "learning_rate": 0.00012127503189209032, "loss": 0.3175, "step": 594 }, { "epoch": 0.043930124566963956, "grad_norm": 0.10953383892774582, "learning_rate": 0.0001212749714344613, "loss": 0.0059, "step": 596 }, { "epoch": 0.044077541092356454, "grad_norm": 71.34505462646484, "learning_rate": 0.00012127491077363357, "loss": 0.5113, "step": 598 }, { "epoch": 0.04422495761774895, "grad_norm": 0.012292311526834965, "learning_rate": 0.00012127484990960732, "loss": 0.0008, "step": 600 }, { "epoch": 0.04437237414314145, "grad_norm": 0.010139914229512215, "learning_rate": 0.00012127478884238274, "loss": 0.0002, "step": 602 }, { "epoch": 0.04451979066853394, "grad_norm": 58.99741744995117, "learning_rate": 0.00012127472757196004, "loss": 3.6273, "step": 604 }, { "epoch": 0.04466720719392644, "grad_norm": 56.25634765625, "learning_rate": 0.00012127466609833943, "loss": 3.663, "step": 606 }, { "epoch": 0.044814623719318936, "grad_norm": 56.98939895629883, "learning_rate": 0.00012127460442152114, "loss": 1.6247, "step": 608 }, { "epoch": 0.044814623719318936, "eval_1_ratio_diff": -0.03273577552611068, "eval_accuracy": 0.8456742010911925, "eval_f1": 0.8403225806451613, "eval_loss": 0.6838305592536926, "eval_precision": 0.8697829716193656, "eval_recall": 0.8127925117004681, "eval_runtime": 1441.7869, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 608 }, { "epoch": 0.04496204024471143, "grad_norm": 41.00777053833008, "learning_rate": 0.00012127454254150532, "loss": 3.2637, "step": 610 }, { "epoch": 0.04510945677010393, "grad_norm": 15.958291053771973, "learning_rate": 0.00012127448045829223, "loss": 0.0749, "step": 612 }, { "epoch": 0.04525687329549642, "grad_norm": 52.62068176269531, "learning_rate": 0.00012127441817188204, "loss": 1.1452, "step": 614 }, { "epoch": 0.04540428982088892, "grad_norm": 0.8104878067970276, "learning_rate": 0.00012127435568227499, "loss": 0.0086, "step": 616 }, { "epoch": 0.04555170634628142, "grad_norm": 6.7712883949279785, "learning_rate": 0.00012127429298947129, "loss": 0.035, "step": 618 }, { "epoch": 0.045699122871673915, "grad_norm": 1.2900152206420898, "learning_rate": 0.00012127423009347112, "loss": 0.0133, "step": 620 }, { "epoch": 0.04584653939706641, "grad_norm": 0.5468306541442871, "learning_rate": 0.00012127416699427471, "loss": 0.0066, "step": 622 }, { "epoch": 0.04599395592245891, "grad_norm": 0.16869762539863586, "learning_rate": 0.00012127410369188226, "loss": 0.0026, "step": 624 }, { "epoch": 0.04599395592245891, "eval_1_ratio_diff": 0.015588464536243185, "eval_accuracy": 0.8862042088854248, "eval_f1": 0.8878648233486943, "eval_loss": 0.570717990398407, "eval_precision": 0.8744326777609682, "eval_recall": 0.9017160686427457, "eval_runtime": 1441.498, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 624 }, { "epoch": 0.0461413724478514, "grad_norm": 0.20163878798484802, "learning_rate": 0.00012127404018629401, "loss": 0.0013, "step": 626 }, { "epoch": 0.0462887889732439, "grad_norm": 0.1430014669895172, "learning_rate": 0.00012127397647751014, "loss": 0.0016, "step": 628 }, { "epoch": 0.0464362054986364, "grad_norm": 60.50364303588867, "learning_rate": 0.00012127391256553088, "loss": 1.6526, "step": 630 }, { "epoch": 0.046583622024028895, "grad_norm": 0.009336289949715137, "learning_rate": 0.00012127384845035646, "loss": 0.0005, "step": 632 }, { "epoch": 0.04673103854942139, "grad_norm": 0.02924017794430256, "learning_rate": 0.00012127378413198706, "loss": 2.0099, "step": 634 }, { "epoch": 0.046878455074813884, "grad_norm": 0.1369701325893402, "learning_rate": 0.00012127371961042292, "loss": 1.9002, "step": 636 }, { "epoch": 0.04702587160020638, "grad_norm": 77.09698486328125, "learning_rate": 0.00012127365488566423, "loss": 1.0021, "step": 638 }, { "epoch": 0.04717328812559888, "grad_norm": 4.486428260803223, "learning_rate": 0.00012127358995771124, "loss": 1.8971, "step": 640 }, { "epoch": 0.04717328812559888, "eval_1_ratio_diff": -0.17225253312548716, "eval_accuracy": 0.7669524551831645, "eval_f1": 0.7181903864278982, "eval_loss": 1.282883644104004, "eval_precision": 0.9071428571428571, "eval_recall": 0.594383775351014, "eval_runtime": 1441.6631, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 640 }, { "epoch": 0.04732070465099138, "grad_norm": 0.3835877478122711, "learning_rate": 0.00012127352482656414, "loss": 1.5125, "step": 642 }, { "epoch": 0.047468121176383875, "grad_norm": 0.3453172445297241, "learning_rate": 0.00012127345949222316, "loss": 1.4256, "step": 644 }, { "epoch": 0.04761553770177637, "grad_norm": 56.087467193603516, "learning_rate": 0.00012127339395468855, "loss": 1.389, "step": 646 }, { "epoch": 0.04776295422716886, "grad_norm": 39.20930099487305, "learning_rate": 0.00012127332821396047, "loss": 2.2849, "step": 648 }, { "epoch": 0.04791037075256136, "grad_norm": 2.4249165058135986, "learning_rate": 0.00012127326227003918, "loss": 0.0286, "step": 650 }, { "epoch": 0.04805778727795386, "grad_norm": 1.4587557315826416, "learning_rate": 0.0001212731961229249, "loss": 0.775, "step": 652 }, { "epoch": 0.04820520380334636, "grad_norm": 45.33637237548828, "learning_rate": 0.00012127312977261783, "loss": 0.2852, "step": 654 }, { "epoch": 0.048352620328738855, "grad_norm": 0.07065322250127792, "learning_rate": 0.0001212730632191182, "loss": 0.7127, "step": 656 }, { "epoch": 0.048352620328738855, "eval_1_ratio_diff": -0.05455962587685115, "eval_accuracy": 0.8487918939984411, "eval_f1": 0.8399339933993399, "eval_loss": 0.5940015316009521, "eval_precision": 0.8914185639229422, "eval_recall": 0.7940717628705148, "eval_runtime": 1441.517, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 656 }, { "epoch": 0.048500036854131345, "grad_norm": 35.70323181152344, "learning_rate": 0.00012127299646242624, "loss": 0.6816, "step": 658 }, { "epoch": 0.04864745337952384, "grad_norm": 1.4870625734329224, "learning_rate": 0.00012127292950254218, "loss": 0.1488, "step": 660 }, { "epoch": 0.04879486990491634, "grad_norm": 0.6423426866531372, "learning_rate": 0.00012127286233946625, "loss": 0.0136, "step": 662 }, { "epoch": 0.04894228643030884, "grad_norm": 0.3320056200027466, "learning_rate": 0.00012127279497319864, "loss": 0.0058, "step": 664 }, { "epoch": 0.049089702955701336, "grad_norm": 4.33368444442749, "learning_rate": 0.00012127272740373959, "loss": 0.5196, "step": 666 }, { "epoch": 0.04923711948109383, "grad_norm": 71.66387939453125, "learning_rate": 0.00012127265963108935, "loss": 2.7961, "step": 668 }, { "epoch": 0.049384536006486325, "grad_norm": 94.96151733398438, "learning_rate": 0.00012127259165524814, "loss": 3.8152, "step": 670 }, { "epoch": 0.04953195253187882, "grad_norm": 39.40300369262695, "learning_rate": 0.00012127252347621616, "loss": 1.1659, "step": 672 }, { "epoch": 0.04953195253187882, "eval_1_ratio_diff": -0.11301636788776309, "eval_accuracy": 0.779423226812159, "eval_f1": 0.751099384344767, "eval_loss": 1.093988060951233, "eval_precision": 0.8608870967741935, "eval_recall": 0.6661466458658346, "eval_runtime": 1441.6444, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 672 }, { "epoch": 0.04967936905727132, "grad_norm": 142.39564514160156, "learning_rate": 0.00012127245509399365, "loss": 1.9772, "step": 674 }, { "epoch": 0.04982678558266382, "grad_norm": 0.43099793791770935, "learning_rate": 0.00012127238650858088, "loss": 0.0056, "step": 676 }, { "epoch": 0.049974202108056316, "grad_norm": 0.22017613053321838, "learning_rate": 0.00012127231771997801, "loss": 0.0026, "step": 678 }, { "epoch": 0.05012161863344881, "grad_norm": 0.06024312227964401, "learning_rate": 0.00012127224872818532, "loss": 1.4556, "step": 680 }, { "epoch": 0.050269035158841305, "grad_norm": 30.382848739624023, "learning_rate": 0.00012127217953320302, "loss": 0.959, "step": 682 }, { "epoch": 0.0504164516842338, "grad_norm": 0.12178266048431396, "learning_rate": 0.00012127211013503136, "loss": 0.0025, "step": 684 }, { "epoch": 0.0505638682096263, "grad_norm": 0.2670276165008545, "learning_rate": 0.00012127204053367056, "loss": 0.0059, "step": 686 }, { "epoch": 0.0507112847350188, "grad_norm": 0.7420686483383179, "learning_rate": 0.00012127197072912085, "loss": 0.0205, "step": 688 }, { "epoch": 0.0507112847350188, "eval_1_ratio_diff": -0.05300077942322684, "eval_accuracy": 0.828526890101325, "eval_f1": 0.8187808896210873, "eval_loss": 0.5867729783058167, "eval_precision": 0.8673647469458988, "eval_recall": 0.7753510140405616, "eval_runtime": 1441.426, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 688 }, { "epoch": 0.05085870126041129, "grad_norm": 69.81874084472656, "learning_rate": 0.00012127190072138247, "loss": 0.853, "step": 690 }, { "epoch": 0.05100611778580379, "grad_norm": 0.51251220703125, "learning_rate": 0.00012127183051045567, "loss": 0.037, "step": 692 }, { "epoch": 0.051153534311196285, "grad_norm": 32.83553695678711, "learning_rate": 0.00012127176009634066, "loss": 1.7711, "step": 694 }, { "epoch": 0.05130095083658878, "grad_norm": 0.029091738164424896, "learning_rate": 0.00012127168947903768, "loss": 0.0006, "step": 696 }, { "epoch": 0.05144836736198128, "grad_norm": 59.94422912597656, "learning_rate": 0.00012127161865854698, "loss": 1.6607, "step": 698 }, { "epoch": 0.05159578388737378, "grad_norm": 60.350067138671875, "learning_rate": 0.00012127154763486877, "loss": 2.055, "step": 700 }, { "epoch": 0.05174320041276627, "grad_norm": 0.08221148699522018, "learning_rate": 0.00012127147640800332, "loss": 1.6475, "step": 702 }, { "epoch": 0.051890616938158766, "grad_norm": 39.905357360839844, "learning_rate": 0.00012127140497795086, "loss": 1.2104, "step": 704 }, { "epoch": 0.051890616938158766, "eval_1_ratio_diff": -0.0919719407638348, "eval_accuracy": 0.8316445830085737, "eval_f1": 0.8144329896907216, "eval_loss": 0.7349568605422974, "eval_precision": 0.9063097514340345, "eval_recall": 0.7394695787831513, "eval_runtime": 1441.7333, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 704 }, { "epoch": 0.052038033463551264, "grad_norm": 4.226317882537842, "learning_rate": 0.00012127133334471161, "loss": 0.4275, "step": 706 }, { "epoch": 0.05218544998894376, "grad_norm": 0.05035168305039406, "learning_rate": 0.00012127126150828585, "loss": 1.3166, "step": 708 }, { "epoch": 0.05233286651433626, "grad_norm": 0.25760674476623535, "learning_rate": 0.00012127118946867378, "loss": 0.0081, "step": 710 }, { "epoch": 0.05248028303972875, "grad_norm": 36.74332809448242, "learning_rate": 0.00012127111722587565, "loss": 1.1506, "step": 712 }, { "epoch": 0.05262769956512125, "grad_norm": 36.16116714477539, "learning_rate": 0.00012127104477989172, "loss": 1.2632, "step": 714 }, { "epoch": 0.052775116090513746, "grad_norm": 37.083343505859375, "learning_rate": 0.00012127097213072223, "loss": 1.8408, "step": 716 }, { "epoch": 0.052922532615906244, "grad_norm": 3.0497827529907227, "learning_rate": 0.0001212708992783674, "loss": 0.0247, "step": 718 }, { "epoch": 0.05306994914129874, "grad_norm": 4.117802619934082, "learning_rate": 0.00012127082622282751, "loss": 0.0342, "step": 720 }, { "epoch": 0.05306994914129874, "eval_1_ratio_diff": -0.08183943881527672, "eval_accuracy": 0.8121590023382697, "eval_f1": 0.7952421410365336, "eval_loss": 0.5786097645759583, "eval_precision": 0.8731343283582089, "eval_recall": 0.7301092043681747, "eval_runtime": 1442.0276, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 720 }, { "epoch": 0.05321736566669124, "grad_norm": 0.633588969707489, "learning_rate": 0.00012127075296410277, "loss": 0.0056, "step": 722 }, { "epoch": 0.05336478219208373, "grad_norm": 36.505218505859375, "learning_rate": 0.00012127067950219344, "loss": 0.7263, "step": 724 }, { "epoch": 0.05351219871747623, "grad_norm": 0.6716632843017578, "learning_rate": 0.00012127060583709976, "loss": 0.0045, "step": 726 }, { "epoch": 0.053659615242868726, "grad_norm": 36.19940948486328, "learning_rate": 0.000121270531968822, "loss": 0.377, "step": 728 }, { "epoch": 0.053807031768261224, "grad_norm": 81.25736236572266, "learning_rate": 0.00012127045789736038, "loss": 0.6006, "step": 730 }, { "epoch": 0.05395444829365372, "grad_norm": 29.044986724853516, "learning_rate": 0.00012127038362271517, "loss": 1.1609, "step": 732 }, { "epoch": 0.05410186481904621, "grad_norm": 0.1593562811613083, "learning_rate": 0.0001212703091448866, "loss": 0.0055, "step": 734 }, { "epoch": 0.05424928134443871, "grad_norm": 7.988092422485352, "learning_rate": 0.00012127023446387492, "loss": 0.0238, "step": 736 }, { "epoch": 0.05424928134443871, "eval_1_ratio_diff": -0.07092751363990646, "eval_accuracy": 0.838659392049883, "eval_f1": 0.8261964735516373, "eval_loss": 0.6887457370758057, "eval_precision": 0.8945454545454545, "eval_recall": 0.7675507020280812, "eval_runtime": 1441.3319, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 736 }, { "epoch": 0.05439669786983121, "grad_norm": 58.56552505493164, "learning_rate": 0.00012127015957968041, "loss": 2.3194, "step": 738 }, { "epoch": 0.054544114395223706, "grad_norm": 0.37152421474456787, "learning_rate": 0.00012127008449230329, "loss": 0.0029, "step": 740 }, { "epoch": 0.0546915309206162, "grad_norm": 33.52932357788086, "learning_rate": 0.00012127000920174381, "loss": 1.1549, "step": 742 }, { "epoch": 0.054838947446008694, "grad_norm": 0.02616913430392742, "learning_rate": 0.00012126993370800224, "loss": 0.0021, "step": 744 }, { "epoch": 0.05498636397140119, "grad_norm": 36.83317565917969, "learning_rate": 0.00012126985801107882, "loss": 1.2016, "step": 746 }, { "epoch": 0.05513378049679369, "grad_norm": 0.006011671852320433, "learning_rate": 0.00012126978211097381, "loss": 2.834, "step": 748 }, { "epoch": 0.05528119702218619, "grad_norm": 58.966102600097656, "learning_rate": 0.00012126970600768747, "loss": 2.0661, "step": 750 }, { "epoch": 0.055428613547578685, "grad_norm": 57.80133819580078, "learning_rate": 0.00012126962970122005, "loss": 1.2417, "step": 752 }, { "epoch": 0.055428613547578685, "eval_1_ratio_diff": 0.09508963367108336, "eval_accuracy": 0.8503507404520655, "eval_f1": 0.8632478632478633, "eval_loss": 0.7459388375282288, "eval_precision": 0.7942332896461337, "eval_recall": 0.9453978159126365, "eval_runtime": 1440.976, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.446, "step": 752 }, { "epoch": 0.05557603007297118, "grad_norm": 0.10538947582244873, "learning_rate": 0.00012126955319157181, "loss": 1.5568, "step": 754 }, { "epoch": 0.055723446598363674, "grad_norm": 0.3577294647693634, "learning_rate": 0.000121269476478743, "loss": 1.3633, "step": 756 }, { "epoch": 0.05587086312375617, "grad_norm": 111.04033660888672, "learning_rate": 0.00012126939956273387, "loss": 1.5691, "step": 758 }, { "epoch": 0.05601827964914867, "grad_norm": 8.450987815856934, "learning_rate": 0.00012126932244354469, "loss": 0.6036, "step": 760 }, { "epoch": 0.05616569617454117, "grad_norm": 6.646569728851318, "learning_rate": 0.00012126924512117572, "loss": 0.0554, "step": 762 }, { "epoch": 0.056313112699933665, "grad_norm": 10.05777359008789, "learning_rate": 0.00012126916759562719, "loss": 0.0507, "step": 764 }, { "epoch": 0.056460529225326156, "grad_norm": 1.5429670810699463, "learning_rate": 0.00012126908986689941, "loss": 0.5476, "step": 766 }, { "epoch": 0.056607945750718654, "grad_norm": 0.7471988201141357, "learning_rate": 0.0001212690119349926, "loss": 0.0357, "step": 768 }, { "epoch": 0.056607945750718654, "eval_1_ratio_diff": -0.03273577552611068, "eval_accuracy": 0.8534684333593141, "eval_f1": 0.8483870967741935, "eval_loss": 0.5041674971580505, "eval_precision": 0.8781302170283807, "eval_recall": 0.8205928237129485, "eval_runtime": 1441.5634, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 768 }, { "epoch": 0.05675536227611115, "grad_norm": 25.423622131347656, "learning_rate": 0.00012126893379990705, "loss": 0.0991, "step": 770 }, { "epoch": 0.05690277880150365, "grad_norm": 8.131854057312012, "learning_rate": 0.00012126885546164299, "loss": 0.0467, "step": 772 }, { "epoch": 0.05705019532689615, "grad_norm": 0.7007619738578796, "learning_rate": 0.00012126877692020069, "loss": 0.0319, "step": 774 }, { "epoch": 0.057197611852288645, "grad_norm": 0.0242279302328825, "learning_rate": 0.00012126869817558045, "loss": 0.5106, "step": 776 }, { "epoch": 0.057345028377681136, "grad_norm": 1.126301646232605, "learning_rate": 0.00012126861922778249, "loss": 0.0068, "step": 778 }, { "epoch": 0.05749244490307363, "grad_norm": 2.2255496978759766, "learning_rate": 0.0001212685400768071, "loss": 0.0125, "step": 780 }, { "epoch": 0.05763986142846613, "grad_norm": 53.08203125, "learning_rate": 0.00012126846072265453, "loss": 3.4784, "step": 782 }, { "epoch": 0.05778727795385863, "grad_norm": 53.75185012817383, "learning_rate": 0.00012126838116532506, "loss": 5.3382, "step": 784 }, { "epoch": 0.05778727795385863, "eval_1_ratio_diff": 0.024162120031176904, "eval_accuracy": 0.8074824629773967, "eval_f1": 0.8118811881188119, "eval_loss": 1.1864495277404785, "eval_precision": 0.7931547619047619, "eval_recall": 0.8315132605304212, "eval_runtime": 1441.6478, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 784 }, { "epoch": 0.05793469447925113, "grad_norm": 20.95121955871582, "learning_rate": 0.00012126830140481893, "loss": 3.3432, "step": 786 }, { "epoch": 0.05808211100464362, "grad_norm": 49.42118453979492, "learning_rate": 0.00012126822144113646, "loss": 0.762, "step": 788 }, { "epoch": 0.058229527530036115, "grad_norm": 16.03618812561035, "learning_rate": 0.00012126814127427784, "loss": 0.1045, "step": 790 }, { "epoch": 0.05837694405542861, "grad_norm": 34.1168212890625, "learning_rate": 0.00012126806090424342, "loss": 3.1091, "step": 792 }, { "epoch": 0.05852436058082111, "grad_norm": 34.757083892822266, "learning_rate": 0.00012126798033103342, "loss": 2.0632, "step": 794 }, { "epoch": 0.05867177710621361, "grad_norm": 1.412405014038086, "learning_rate": 0.00012126789955464813, "loss": 0.2568, "step": 796 }, { "epoch": 0.058819193631606106, "grad_norm": 55.76416015625, "learning_rate": 0.00012126781857508779, "loss": 0.497, "step": 798 }, { "epoch": 0.0589666101569986, "grad_norm": 0.3345389664173126, "learning_rate": 0.00012126773739235272, "loss": 0.0121, "step": 800 }, { "epoch": 0.0589666101569986, "eval_1_ratio_diff": 0.06703039750584561, "eval_accuracy": 0.8363211223694466, "eval_f1": 0.8464912280701754, "eval_loss": 0.7451047897338867, "eval_precision": 0.796423658872077, "eval_recall": 0.9032761310452418, "eval_runtime": 1440.5179, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 800 }, { "epoch": 0.059114026682391095, "grad_norm": 0.15297777950763702, "learning_rate": 0.00012126765600644314, "loss": 0.0082, "step": 802 }, { "epoch": 0.05926144320778359, "grad_norm": 0.19080302119255066, "learning_rate": 0.00012126757441735937, "loss": 0.0026, "step": 804 }, { "epoch": 0.05940885973317609, "grad_norm": 0.43317776918411255, "learning_rate": 0.00012126749262510164, "loss": 0.0055, "step": 806 }, { "epoch": 0.05955627625856859, "grad_norm": 6.4003984334704e-06, "learning_rate": 0.00012126741062967027, "loss": 0.0031, "step": 808 }, { "epoch": 0.05970369278396108, "grad_norm": 0.03450751677155495, "learning_rate": 0.00012126732843106551, "loss": 0.0052, "step": 810 }, { "epoch": 0.05985110930935358, "grad_norm": 52.02117156982422, "learning_rate": 0.00012126724602928764, "loss": 5.0919, "step": 812 }, { "epoch": 0.059998525834746075, "grad_norm": 50.249900817871094, "learning_rate": 0.00012126716342433692, "loss": 4.0749, "step": 814 }, { "epoch": 0.06014594236013857, "grad_norm": 0.0067368014715611935, "learning_rate": 0.00012126708061621366, "loss": 0.0001, "step": 816 }, { "epoch": 0.06014594236013857, "eval_1_ratio_diff": 0.06469212782540923, "eval_accuracy": 0.8651597817614964, "eval_f1": 0.8732600732600733, "eval_loss": 0.9449532628059387, "eval_precision": 0.8232044198895028, "eval_recall": 0.9297971918876755, "eval_runtime": 1440.6727, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 816 }, { "epoch": 0.06029335888553107, "grad_norm": 44.037471771240234, "learning_rate": 0.00012126699760491808, "loss": 2.1184, "step": 818 }, { "epoch": 0.06044077541092356, "grad_norm": 31.20966148376465, "learning_rate": 0.00012126691439045052, "loss": 2.3532, "step": 820 }, { "epoch": 0.06058819193631606, "grad_norm": 1.108382225036621, "learning_rate": 0.00012126683097281125, "loss": 0.0093, "step": 822 }, { "epoch": 0.06073560846170856, "grad_norm": 1.2753050327301025, "learning_rate": 0.0001212667473520005, "loss": 0.011, "step": 824 }, { "epoch": 0.060883024987101055, "grad_norm": 4.512105941772461, "learning_rate": 0.00012126666352801861, "loss": 0.0212, "step": 826 }, { "epoch": 0.06103044151249355, "grad_norm": 0.3488874137401581, "learning_rate": 0.00012126657950086582, "loss": 1.2435, "step": 828 }, { "epoch": 0.06117785803788605, "grad_norm": 0.11297665536403656, "learning_rate": 0.00012126649527054243, "loss": 0.0027, "step": 830 }, { "epoch": 0.06132527456327854, "grad_norm": 0.07631942629814148, "learning_rate": 0.00012126641083704874, "loss": 0.0032, "step": 832 }, { "epoch": 0.06132527456327854, "eval_1_ratio_diff": 0.08885424785658613, "eval_accuracy": 0.8799688230709275, "eval_f1": 0.8896848137535817, "eval_loss": 0.7254036068916321, "eval_precision": 0.8225165562913908, "eval_recall": 0.968798751950078, "eval_runtime": 1440.5593, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 832 }, { "epoch": 0.06147269108867104, "grad_norm": 41.37874984741211, "learning_rate": 0.00012126632620038498, "loss": 1.3108, "step": 834 }, { "epoch": 0.061620107614063536, "grad_norm": 55.71513366699219, "learning_rate": 0.00012126624136055149, "loss": 1.7068, "step": 836 }, { "epoch": 0.061767524139456034, "grad_norm": 1.5174663066864014, "learning_rate": 0.0001212661563175485, "loss": 0.0173, "step": 838 }, { "epoch": 0.06191494066484853, "grad_norm": 0.10353035479784012, "learning_rate": 0.00012126607107137636, "loss": 1.2081, "step": 840 }, { "epoch": 0.06206235719024102, "grad_norm": 0.05997217819094658, "learning_rate": 0.00012126598562203531, "loss": 1.8296, "step": 842 }, { "epoch": 0.06220977371563352, "grad_norm": 0.17887941002845764, "learning_rate": 0.00012126589996952563, "loss": 0.0016, "step": 844 }, { "epoch": 0.06235719024102602, "grad_norm": 0.08932141214609146, "learning_rate": 0.00012126581411384764, "loss": 1.5849, "step": 846 }, { "epoch": 0.06250460676641852, "grad_norm": 41.82356643676758, "learning_rate": 0.0001212657280550016, "loss": 1.2425, "step": 848 }, { "epoch": 0.06250460676641852, "eval_1_ratio_diff": 0.09586905689789549, "eval_accuracy": 0.8542478565861262, "eval_f1": 0.8669039145907473, "eval_loss": 0.6706948280334473, "eval_precision": 0.7971204188481675, "eval_recall": 0.9500780031201248, "eval_runtime": 1441.8264, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 848 }, { "epoch": 0.06265202329181101, "grad_norm": 45.45724105834961, "learning_rate": 0.00012126564179298783, "loss": 0.7189, "step": 850 }, { "epoch": 0.06279943981720351, "grad_norm": 1.7170765399932861, "learning_rate": 0.00012126555532780658, "loss": 0.014, "step": 852 }, { "epoch": 0.062946856342596, "grad_norm": 1.8296376466751099, "learning_rate": 0.00012126546865945818, "loss": 0.0249, "step": 854 }, { "epoch": 0.06309427286798851, "grad_norm": 28.392093658447266, "learning_rate": 0.00012126538178794288, "loss": 2.1082, "step": 856 }, { "epoch": 0.063241689393381, "grad_norm": 0.024955546483397484, "learning_rate": 0.00012126529471326101, "loss": 0.0026, "step": 858 }, { "epoch": 0.06338910591877349, "grad_norm": 29.141136169433594, "learning_rate": 0.00012126520743541283, "loss": 1.5827, "step": 860 }, { "epoch": 0.063536522444166, "grad_norm": 0.3031620383262634, "learning_rate": 0.00012126511995439865, "loss": 1.4029, "step": 862 }, { "epoch": 0.06368393896955848, "grad_norm": 0.2821040451526642, "learning_rate": 0.00012126503227021874, "loss": 1.3931, "step": 864 }, { "epoch": 0.06368393896955848, "eval_1_ratio_diff": -0.04053000779423227, "eval_accuracy": 0.8581449727201871, "eval_f1": 0.8520325203252033, "eval_loss": 0.5900216102600098, "eval_precision": 0.8896434634974533, "eval_recall": 0.8174726989079563, "eval_runtime": 1439.3591, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 864 }, { "epoch": 0.06383135549495099, "grad_norm": 4.8987860679626465, "learning_rate": 0.00012126494438287343, "loss": 0.0265, "step": 866 }, { "epoch": 0.06397877202034348, "grad_norm": 0.27837908267974854, "learning_rate": 0.000121264856292363, "loss": 0.0093, "step": 868 }, { "epoch": 0.06412618854573597, "grad_norm": 0.5379538536071777, "learning_rate": 0.00012126476799868773, "loss": 0.0095, "step": 870 }, { "epoch": 0.06427360507112848, "grad_norm": 23.87804412841797, "learning_rate": 0.00012126467950184793, "loss": 0.8342, "step": 872 }, { "epoch": 0.06442102159652097, "grad_norm": 1.2284973859786987, "learning_rate": 0.0001212645908018439, "loss": 0.0162, "step": 874 }, { "epoch": 0.06456843812191347, "grad_norm": 36.555442810058594, "learning_rate": 0.00012126450189867592, "loss": 2.2561, "step": 876 }, { "epoch": 0.06471585464730596, "grad_norm": 24.54311180114746, "learning_rate": 0.00012126441279234432, "loss": 3.1743, "step": 878 }, { "epoch": 0.06486327117269845, "grad_norm": 0.13615825772285461, "learning_rate": 0.00012126432348284936, "loss": 0.0021, "step": 880 }, { "epoch": 0.06486327117269845, "eval_1_ratio_diff": -0.2899454403741232, "eval_accuracy": 0.6975837879968823, "eval_f1": 0.5736263736263736, "eval_loss": 1.3224732875823975, "eval_precision": 0.9702602230483272, "eval_recall": 0.40717628705148207, "eval_runtime": 1440.3311, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 880 }, { "epoch": 0.06501068769809096, "grad_norm": 3.8478543758392334, "learning_rate": 0.00012126423397019136, "loss": 0.021, "step": 882 }, { "epoch": 0.06515810422348345, "grad_norm": 0.08823257684707642, "learning_rate": 0.00012126414425437062, "loss": 0.0016, "step": 884 }, { "epoch": 0.06530552074887595, "grad_norm": 27.02589988708496, "learning_rate": 0.00012126405433538744, "loss": 2.9462, "step": 886 }, { "epoch": 0.06545293727426844, "grad_norm": 24.244503021240234, "learning_rate": 0.00012126396421324212, "loss": 0.8423, "step": 888 }, { "epoch": 0.06560035379966095, "grad_norm": 0.3652421236038208, "learning_rate": 0.00012126387388793495, "loss": 0.0081, "step": 890 }, { "epoch": 0.06574777032505344, "grad_norm": 22.919225692749023, "learning_rate": 0.00012126378335946625, "loss": 1.1268, "step": 892 }, { "epoch": 0.06589518685044593, "grad_norm": 0.18866397440433502, "learning_rate": 0.00012126369262783633, "loss": 1.8645, "step": 894 }, { "epoch": 0.06604260337583844, "grad_norm": 2.1540791988372803, "learning_rate": 0.00012126360169304547, "loss": 0.036, "step": 896 }, { "epoch": 0.06604260337583844, "eval_1_ratio_diff": 0.017147310989867437, "eval_accuracy": 0.8862042088854248, "eval_f1": 0.8880368098159509, "eval_loss": 0.49060943722724915, "eval_precision": 0.8733031674208145, "eval_recall": 0.9032761310452418, "eval_runtime": 1439.5517, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 896 }, { "epoch": 0.06619001990123093, "grad_norm": 0.7136353850364685, "learning_rate": 0.00012126351055509399, "loss": 1.4136, "step": 898 }, { "epoch": 0.06633743642662343, "grad_norm": 1.6063231229782104, "learning_rate": 0.00012126341921398221, "loss": 0.0358, "step": 900 }, { "epoch": 0.06648485295201592, "grad_norm": 4.673253536224365, "learning_rate": 0.00012126332766971038, "loss": 0.0494, "step": 902 }, { "epoch": 0.06663226947740841, "grad_norm": 0.21607956290245056, "learning_rate": 0.00012126323592227886, "loss": 0.0053, "step": 904 }, { "epoch": 0.06677968600280092, "grad_norm": 32.70335006713867, "learning_rate": 0.00012126314397168796, "loss": 1.5106, "step": 906 }, { "epoch": 0.06692710252819341, "grad_norm": 38.56415557861328, "learning_rate": 0.00012126305181793794, "loss": 0.8798, "step": 908 }, { "epoch": 0.06707451905358591, "grad_norm": 0.018692007288336754, "learning_rate": 0.00012126295946102917, "loss": 0.0004, "step": 910 }, { "epoch": 0.0672219355789784, "grad_norm": 22.49344825744629, "learning_rate": 0.00012126286690096191, "loss": 0.9364, "step": 912 }, { "epoch": 0.0672219355789784, "eval_1_ratio_diff": -0.18706157443491817, "eval_accuracy": 0.7802026500389712, "eval_f1": 0.7293666026871402, "eval_loss": 1.4581658840179443, "eval_precision": 0.9476309226932669, "eval_recall": 0.592823712948518, "eval_runtime": 1440.0947, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 912 }, { "epoch": 0.0673693521043709, "grad_norm": 182.47938537597656, "learning_rate": 0.00012126277413773649, "loss": 1.0293, "step": 914 }, { "epoch": 0.0675167686297634, "grad_norm": 4.0591816902160645, "learning_rate": 0.00012126268117135323, "loss": 0.0308, "step": 916 }, { "epoch": 0.06766418515515589, "grad_norm": 6.380730628967285, "learning_rate": 0.00012126258800181242, "loss": 1.2327, "step": 918 }, { "epoch": 0.0678116016805484, "grad_norm": 31.462488174438477, "learning_rate": 0.00012126249462911438, "loss": 0.8761, "step": 920 }, { "epoch": 0.06795901820594089, "grad_norm": 0.0057801539078354836, "learning_rate": 0.00012126240105325944, "loss": 0.0077, "step": 922 }, { "epoch": 0.06810643473133338, "grad_norm": 1.9855010509490967, "learning_rate": 0.0001212623072742479, "loss": 0.0239, "step": 924 }, { "epoch": 0.06825385125672588, "grad_norm": 0.4254453480243683, "learning_rate": 0.00012126221329208006, "loss": 0.0032, "step": 926 }, { "epoch": 0.06840126778211837, "grad_norm": 0.502257227897644, "learning_rate": 0.00012126211910675626, "loss": 0.0103, "step": 928 }, { "epoch": 0.06840126778211837, "eval_1_ratio_diff": 0.026500389711613392, "eval_accuracy": 0.8768511301636789, "eval_f1": 0.8799392097264438, "eval_loss": 0.7347307205200195, "eval_precision": 0.8577777777777778, "eval_recall": 0.9032761310452418, "eval_runtime": 1440.0126, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 928 }, { "epoch": 0.06854868430751088, "grad_norm": 0.6557896733283997, "learning_rate": 0.00012126202471827679, "loss": 0.003, "step": 930 }, { "epoch": 0.06869610083290337, "grad_norm": 0.020085789263248444, "learning_rate": 0.00012126193012664201, "loss": 0.0004, "step": 932 }, { "epoch": 0.06884351735829586, "grad_norm": 0.006013574078679085, "learning_rate": 0.00012126183533185218, "loss": 0.0001, "step": 934 }, { "epoch": 0.06899093388368836, "grad_norm": 252.07472229003906, "learning_rate": 0.00012126174033390767, "loss": 0.8075, "step": 936 }, { "epoch": 0.06913835040908085, "grad_norm": 0.002460025018081069, "learning_rate": 0.00012126164513280875, "loss": 0.0011, "step": 938 }, { "epoch": 0.06928576693447336, "grad_norm": 0.01136123575270176, "learning_rate": 0.00012126154972855578, "loss": 0.0003, "step": 940 }, { "epoch": 0.06943318345986585, "grad_norm": 37.112640380859375, "learning_rate": 0.00012126145412114907, "loss": 2.9468, "step": 942 }, { "epoch": 0.06958059998525835, "grad_norm": 0.006933971308171749, "learning_rate": 0.00012126135831058891, "loss": 0.0001, "step": 944 }, { "epoch": 0.06958059998525835, "eval_1_ratio_diff": 0.2478565861262666, "eval_accuracy": 0.7443491816056118, "eval_f1": 0.795, "eval_loss": 2.4577670097351074, "eval_precision": 0.6631908237747653, "eval_recall": 0.9921996879875195, "eval_runtime": 1439.5508, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 944 }, { "epoch": 0.06972801651065084, "grad_norm": 36.87862777709961, "learning_rate": 0.00012126126229687566, "loss": 5.0295, "step": 946 }, { "epoch": 0.06987543303604334, "grad_norm": 70.17023468017578, "learning_rate": 0.00012126116608000961, "loss": 4.0308, "step": 948 }, { "epoch": 0.07002284956143584, "grad_norm": 37.03538513183594, "learning_rate": 0.00012126106965999112, "loss": 1.8733, "step": 950 }, { "epoch": 0.07017026608682833, "grad_norm": 66.47712707519531, "learning_rate": 0.00012126097303682048, "loss": 4.2016, "step": 952 }, { "epoch": 0.07031768261222084, "grad_norm": 29.390884399414062, "learning_rate": 0.00012126087621049803, "loss": 1.9788, "step": 954 }, { "epoch": 0.07046509913761333, "grad_norm": 10.997523307800293, "learning_rate": 0.00012126077918102409, "loss": 0.1381, "step": 956 }, { "epoch": 0.07061251566300582, "grad_norm": 38.46750259399414, "learning_rate": 0.00012126068194839898, "loss": 0.8822, "step": 958 }, { "epoch": 0.07075993218839832, "grad_norm": 18.62594985961914, "learning_rate": 0.00012126058451262304, "loss": 0.3758, "step": 960 }, { "epoch": 0.07075993218839832, "eval_1_ratio_diff": -0.4505066250974279, "eval_accuracy": 0.5494933749025721, "eval_f1": 0.17897727272727273, "eval_loss": 0.5460181832313538, "eval_precision": 1.0, "eval_recall": 0.09828393135725429, "eval_runtime": 1440.4539, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 960 }, { "epoch": 0.07090734871379081, "grad_norm": 21.68712615966797, "learning_rate": 0.00012126048687369658, "loss": 0.3891, "step": 962 }, { "epoch": 0.07105476523918332, "grad_norm": 7.1598124504089355, "learning_rate": 0.00012126038903161995, "loss": 0.3555, "step": 964 }, { "epoch": 0.07120218176457581, "grad_norm": 28.80471420288086, "learning_rate": 0.00012126029098639344, "loss": 0.9078, "step": 966 }, { "epoch": 0.0713495982899683, "grad_norm": 18.606401443481445, "learning_rate": 0.00012126019273801743, "loss": 0.2927, "step": 968 }, { "epoch": 0.0714970148153608, "grad_norm": 21.51089859008789, "learning_rate": 0.0001212600942864922, "loss": 0.6348, "step": 970 }, { "epoch": 0.0716444313407533, "grad_norm": 4.713807582855225, "learning_rate": 0.00012125999563181809, "loss": 0.5351, "step": 972 }, { "epoch": 0.0717918478661458, "grad_norm": 11.428181648254395, "learning_rate": 0.00012125989677399546, "loss": 0.2465, "step": 974 }, { "epoch": 0.07193926439153829, "grad_norm": 13.697668075561523, "learning_rate": 0.00012125979771302464, "loss": 0.1411, "step": 976 }, { "epoch": 0.07193926439153829, "eval_1_ratio_diff": 0.03897116134060796, "eval_accuracy": 0.8846453624318005, "eval_f1": 0.8888888888888888, "eval_loss": 0.38700371980667114, "eval_precision": 0.8567293777134588, "eval_recall": 0.9235569422776911, "eval_runtime": 1440.5564, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 976 }, { "epoch": 0.07208668091693078, "grad_norm": 3.2123868465423584, "learning_rate": 0.0001212596984489059, "loss": 0.0295, "step": 978 }, { "epoch": 0.07223409744232329, "grad_norm": 0.026355383917689323, "learning_rate": 0.00012125959898163965, "loss": 0.0005, "step": 980 }, { "epoch": 0.07238151396771578, "grad_norm": 0.10228274017572403, "learning_rate": 0.00012125949931122618, "loss": 0.002, "step": 982 }, { "epoch": 0.07252893049310828, "grad_norm": 0.031520161777734756, "learning_rate": 0.00012125939943766583, "loss": 0.0008, "step": 984 }, { "epoch": 0.07267634701850077, "grad_norm": 0.1047026515007019, "learning_rate": 0.00012125929936095894, "loss": 0.0009, "step": 986 }, { "epoch": 0.07282376354389328, "grad_norm": 30.88459587097168, "learning_rate": 0.00012125919908110585, "loss": 2.267, "step": 988 }, { "epoch": 0.07297118006928577, "grad_norm": 0.029362376779317856, "learning_rate": 0.0001212590985981069, "loss": 0.0003, "step": 990 }, { "epoch": 0.07311859659467826, "grad_norm": 0.2791018784046173, "learning_rate": 0.0001212589979119624, "loss": 0.0017, "step": 992 }, { "epoch": 0.07311859659467826, "eval_1_ratio_diff": 0.05378020265003891, "eval_accuracy": 0.877630553390491, "eval_f1": 0.8837897853441895, "eval_loss": 0.7231972813606262, "eval_precision": 0.8408450704225352, "eval_recall": 0.9313572542901716, "eval_runtime": 1440.0578, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 992 }, { "epoch": 0.07326601312007076, "grad_norm": 0.056903205811977386, "learning_rate": 0.00012125889702267272, "loss": 0.0007, "step": 994 }, { "epoch": 0.07341342964546325, "grad_norm": 0.015094200149178505, "learning_rate": 0.00012125879593023818, "loss": 0.0002, "step": 996 }, { "epoch": 0.07356084617085576, "grad_norm": 0.6008047461509705, "learning_rate": 0.00012125869463465912, "loss": 0.0045, "step": 998 }, { "epoch": 0.07370826269624825, "grad_norm": 0.6626961827278137, "learning_rate": 0.00012125859313593587, "loss": 0.004, "step": 1000 }, { "epoch": 0.07385567922164074, "grad_norm": 0.009313930757343769, "learning_rate": 0.0001212584914340688, "loss": 0.0002, "step": 1002 }, { "epoch": 0.07400309574703325, "grad_norm": 0.01076335646212101, "learning_rate": 0.00012125838952905822, "loss": 0.0004, "step": 1004 }, { "epoch": 0.07415051227242574, "grad_norm": 0.008014670573174953, "learning_rate": 0.00012125828742090447, "loss": 0.0001, "step": 1006 }, { "epoch": 0.07429792879781824, "grad_norm": 33.344932556152344, "learning_rate": 0.00012125818510960795, "loss": 2.0841, "step": 1008 }, { "epoch": 0.07429792879781824, "eval_1_ratio_diff": 0.08261886204208879, "eval_accuracy": 0.8643803585346843, "eval_f1": 0.8746397694524496, "eval_loss": 0.954525887966156, "eval_precision": 0.8125836680053548, "eval_recall": 0.9469578783151326, "eval_runtime": 1439.8312, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1008 }, { "epoch": 0.07444534532321073, "grad_norm": 0.009119726717472076, "learning_rate": 0.00012125808259516893, "loss": 2.2253, "step": 1010 }, { "epoch": 0.07459276184860322, "grad_norm": 0.08696369081735611, "learning_rate": 0.00012125797987758778, "loss": 0.0009, "step": 1012 }, { "epoch": 0.07474017837399573, "grad_norm": 26.136661529541016, "learning_rate": 0.00012125787695686484, "loss": 1.5774, "step": 1014 }, { "epoch": 0.07488759489938822, "grad_norm": 32.24976348876953, "learning_rate": 0.00012125777383300048, "loss": 1.1735, "step": 1016 }, { "epoch": 0.07503501142478072, "grad_norm": 0.5457736253738403, "learning_rate": 0.00012125767050599501, "loss": 0.0112, "step": 1018 }, { "epoch": 0.07518242795017321, "grad_norm": 0.7166759967803955, "learning_rate": 0.0001212575669758488, "loss": 0.2859, "step": 1020 }, { "epoch": 0.0753298444755657, "grad_norm": 0.32718005776405334, "learning_rate": 0.00012125746324256221, "loss": 1.5148, "step": 1022 }, { "epoch": 0.07547726100095821, "grad_norm": 88.85284423828125, "learning_rate": 0.00012125735930613554, "loss": 2.6444, "step": 1024 }, { "epoch": 0.07547726100095821, "eval_1_ratio_diff": 0.024162120031176904, "eval_accuracy": 0.8838659392049883, "eval_f1": 0.8865194211728865, "eval_loss": 0.48203912377357483, "eval_precision": 0.8660714285714286, "eval_recall": 0.9079563182527302, "eval_runtime": 1439.7938, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1024 }, { "epoch": 0.0756246775263507, "grad_norm": 0.2224024385213852, "learning_rate": 0.00012125725516656918, "loss": 0.0068, "step": 1026 }, { "epoch": 0.0757720940517432, "grad_norm": 0.2110309898853302, "learning_rate": 0.00012125715082386346, "loss": 0.0036, "step": 1028 }, { "epoch": 0.0759195105771357, "grad_norm": 0.20480689406394958, "learning_rate": 0.00012125704627801874, "loss": 0.0101, "step": 1030 }, { "epoch": 0.07606692710252819, "grad_norm": 24.321718215942383, "learning_rate": 0.00012125694152903538, "loss": 2.3569, "step": 1032 }, { "epoch": 0.07621434362792069, "grad_norm": 0.3324243426322937, "learning_rate": 0.00012125683657691368, "loss": 0.0101, "step": 1034 }, { "epoch": 0.07636176015331318, "grad_norm": 1.0518757104873657, "learning_rate": 0.00012125673142165406, "loss": 0.013, "step": 1036 }, { "epoch": 0.07650917667870569, "grad_norm": 25.96786880493164, "learning_rate": 0.00012125662606325683, "loss": 1.3031, "step": 1038 }, { "epoch": 0.07665659320409818, "grad_norm": 0.12808893620967865, "learning_rate": 0.00012125652050172236, "loss": 0.0051, "step": 1040 }, { "epoch": 0.07665659320409818, "eval_1_ratio_diff": -0.05689789555728764, "eval_accuracy": 0.8698363211223694, "eval_f1": 0.8618693134822167, "eval_loss": 0.5904788970947266, "eval_precision": 0.9172535211267606, "eval_recall": 0.8127925117004681, "eval_runtime": 1440.2519, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1040 }, { "epoch": 0.07680400972949068, "grad_norm": 30.74445343017578, "learning_rate": 0.00012125641473705098, "loss": 1.479, "step": 1042 }, { "epoch": 0.07695142625488317, "grad_norm": 0.33330148458480835, "learning_rate": 0.00012125630876924309, "loss": 2.7544, "step": 1044 }, { "epoch": 0.07709884278027566, "grad_norm": 0.4779714047908783, "learning_rate": 0.00012125620259829898, "loss": 0.0059, "step": 1046 }, { "epoch": 0.07724625930566817, "grad_norm": 0.4376041889190674, "learning_rate": 0.00012125609622421907, "loss": 0.0273, "step": 1048 }, { "epoch": 0.07739367583106066, "grad_norm": 0.14147210121154785, "learning_rate": 0.00012125598964700367, "loss": 1.3617, "step": 1050 }, { "epoch": 0.07754109235645316, "grad_norm": 20.751298904418945, "learning_rate": 0.00012125588286665319, "loss": 2.4864, "step": 1052 }, { "epoch": 0.07768850888184566, "grad_norm": 0.3589191138744354, "learning_rate": 0.00012125577588316793, "loss": 0.0102, "step": 1054 }, { "epoch": 0.07783592540723815, "grad_norm": 31.519622802734375, "learning_rate": 0.00012125566869654828, "loss": 1.8161, "step": 1056 }, { "epoch": 0.07783592540723815, "eval_1_ratio_diff": -0.05455962587685115, "eval_accuracy": 0.8472330475448169, "eval_f1": 0.8382838283828383, "eval_loss": 0.5423593521118164, "eval_precision": 0.8896672504378283, "eval_recall": 0.7925117004680188, "eval_runtime": 1440.6162, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1056 }, { "epoch": 0.07798334193263065, "grad_norm": 29.618946075439453, "learning_rate": 0.00012125556130679457, "loss": 0.9249, "step": 1058 }, { "epoch": 0.07813075845802314, "grad_norm": 24.92931365966797, "learning_rate": 0.0001212554537139072, "loss": 1.3237, "step": 1060 }, { "epoch": 0.07827817498341565, "grad_norm": 6.922366142272949, "learning_rate": 0.00012125534591788653, "loss": 0.0954, "step": 1062 }, { "epoch": 0.07842559150880814, "grad_norm": 1.4033849239349365, "learning_rate": 0.00012125523791873287, "loss": 0.1059, "step": 1064 }, { "epoch": 0.07857300803420063, "grad_norm": 0.5430750846862793, "learning_rate": 0.00012125512971644664, "loss": 0.0167, "step": 1066 }, { "epoch": 0.07872042455959313, "grad_norm": 25.31169319152832, "learning_rate": 0.00012125502131102817, "loss": 1.4498, "step": 1068 }, { "epoch": 0.07886784108498562, "grad_norm": 24.96006965637207, "learning_rate": 0.00012125491270247783, "loss": 1.3258, "step": 1070 }, { "epoch": 0.07901525761037813, "grad_norm": 1.3635300397872925, "learning_rate": 0.000121254803890796, "loss": 0.0339, "step": 1072 }, { "epoch": 0.07901525761037813, "eval_1_ratio_diff": -0.029618082618862063, "eval_accuracy": 0.8581449727201871, "eval_f1": 0.8536977491961415, "eval_loss": 0.6108663082122803, "eval_precision": 0.8805970149253731, "eval_recall": 0.828393135725429, "eval_runtime": 1440.3267, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1072 }, { "epoch": 0.07916267413577062, "grad_norm": 41.34056091308594, "learning_rate": 0.00012125469487598301, "loss": 1.665, "step": 1074 }, { "epoch": 0.07931009066116311, "grad_norm": 0.8467972278594971, "learning_rate": 0.00012125458565803925, "loss": 0.0102, "step": 1076 }, { "epoch": 0.07945750718655561, "grad_norm": 0.09642868489027023, "learning_rate": 0.00012125447623696508, "loss": 0.0053, "step": 1078 }, { "epoch": 0.0796049237119481, "grad_norm": 0.06861916184425354, "learning_rate": 0.00012125436661276089, "loss": 0.5701, "step": 1080 }, { "epoch": 0.07975234023734061, "grad_norm": 22.139467239379883, "learning_rate": 0.000121254256785427, "loss": 1.088, "step": 1082 }, { "epoch": 0.0798997567627331, "grad_norm": 0.037754353135824203, "learning_rate": 0.00012125414675496381, "loss": 0.0046, "step": 1084 }, { "epoch": 0.08004717328812559, "grad_norm": 32.785037994384766, "learning_rate": 0.00012125403652137169, "loss": 0.956, "step": 1086 }, { "epoch": 0.0801945898135181, "grad_norm": 21.96536636352539, "learning_rate": 0.000121253926084651, "loss": 3.1959, "step": 1088 }, { "epoch": 0.0801945898135181, "eval_1_ratio_diff": -0.04364770070148094, "eval_accuracy": 0.8659392049883087, "eval_f1": 0.8597063621533442, "eval_loss": 0.6232408285140991, "eval_precision": 0.9008547008547009, "eval_recall": 0.8221528861154446, "eval_runtime": 1440.7525, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1088 }, { "epoch": 0.08034200633891059, "grad_norm": 0.09767896682024002, "learning_rate": 0.00012125381544480211, "loss": 1.9769, "step": 1090 }, { "epoch": 0.08048942286430309, "grad_norm": 4.43467378616333, "learning_rate": 0.0001212537046018254, "loss": 1.8357, "step": 1092 }, { "epoch": 0.08063683938969558, "grad_norm": 22.267379760742188, "learning_rate": 0.00012125359355572121, "loss": 0.9555, "step": 1094 }, { "epoch": 0.08078425591508809, "grad_norm": 0.7281066179275513, "learning_rate": 0.00012125348230648997, "loss": 0.0391, "step": 1096 }, { "epoch": 0.08093167244048058, "grad_norm": 0.4542294442653656, "learning_rate": 0.000121253370854132, "loss": 0.9409, "step": 1098 }, { "epoch": 0.08107908896587307, "grad_norm": 22.38312339782715, "learning_rate": 0.0001212532591986477, "loss": 1.0271, "step": 1100 }, { "epoch": 0.08122650549126557, "grad_norm": 26.339080810546875, "learning_rate": 0.00012125314734003743, "loss": 1.4919, "step": 1102 }, { "epoch": 0.08137392201665807, "grad_norm": 3.4797956943511963, "learning_rate": 0.00012125303527830157, "loss": 0.0342, "step": 1104 }, { "epoch": 0.08137392201665807, "eval_1_ratio_diff": -0.15354637568199536, "eval_accuracy": 0.7887763055339049, "eval_f1": 0.7502304147465437, "eval_loss": 0.8048840761184692, "eval_precision": 0.9166666666666666, "eval_recall": 0.6349453978159126, "eval_runtime": 1440.2595, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1104 }, { "epoch": 0.08152133854205057, "grad_norm": 27.387937545776367, "learning_rate": 0.0001212529230134405, "loss": 2.2878, "step": 1106 }, { "epoch": 0.08166875506744306, "grad_norm": 0.27098074555397034, "learning_rate": 0.00012125281054545459, "loss": 0.0115, "step": 1108 }, { "epoch": 0.08181617159283555, "grad_norm": 0.17622074484825134, "learning_rate": 0.00012125269787434425, "loss": 0.9066, "step": 1110 }, { "epoch": 0.08196358811822806, "grad_norm": 13.168516159057617, "learning_rate": 0.00012125258500010979, "loss": 1.0468, "step": 1112 }, { "epoch": 0.08211100464362055, "grad_norm": 15.512298583984375, "learning_rate": 0.00012125247192275165, "loss": 0.1845, "step": 1114 }, { "epoch": 0.08225842116901305, "grad_norm": 31.888328552246094, "learning_rate": 0.00012125235864227018, "loss": 0.3136, "step": 1116 }, { "epoch": 0.08240583769440554, "grad_norm": 9.891843795776367, "learning_rate": 0.00012125224515866574, "loss": 0.8436, "step": 1118 }, { "epoch": 0.08255325421979803, "grad_norm": 46.16787338256836, "learning_rate": 0.00012125213147193877, "loss": 2.5811, "step": 1120 }, { "epoch": 0.08255325421979803, "eval_1_ratio_diff": 0.07560405300077944, "eval_accuracy": 0.848012470771629, "eval_f1": 0.8585931834662799, "eval_loss": 0.6410078406333923, "eval_precision": 0.8021680216802168, "eval_recall": 0.9235569422776911, "eval_runtime": 1440.0548, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1120 }, { "epoch": 0.08270067074519054, "grad_norm": 7.604285717010498, "learning_rate": 0.00012125201758208962, "loss": 1.1177, "step": 1122 }, { "epoch": 0.08284808727058303, "grad_norm": 0.39338427782058716, "learning_rate": 0.00012125190348911864, "loss": 1.5911, "step": 1124 }, { "epoch": 0.08299550379597553, "grad_norm": 36.94788360595703, "learning_rate": 0.00012125178919302626, "loss": 1.3629, "step": 1126 }, { "epoch": 0.08314292032136802, "grad_norm": 0.8372169137001038, "learning_rate": 0.00012125167469381283, "loss": 0.0102, "step": 1128 }, { "epoch": 0.08329033684676052, "grad_norm": 0.12225531786680222, "learning_rate": 0.00012125155999147876, "loss": 0.0043, "step": 1130 }, { "epoch": 0.08343775337215302, "grad_norm": 62.011695861816406, "learning_rate": 0.0001212514450860244, "loss": 1.6697, "step": 1132 }, { "epoch": 0.08358516989754551, "grad_norm": 0.02834857441484928, "learning_rate": 0.00012125132997745018, "loss": 0.0044, "step": 1134 }, { "epoch": 0.08373258642293802, "grad_norm": 0.007508635055273771, "learning_rate": 0.00012125121466575647, "loss": 0.0067, "step": 1136 }, { "epoch": 0.08373258642293802, "eval_1_ratio_diff": -0.059236165237724125, "eval_accuracy": 0.8534684333593141, "eval_f1": 0.8441127694859039, "eval_loss": 0.8524520993232727, "eval_precision": 0.9008849557522124, "eval_recall": 0.7940717628705148, "eval_runtime": 1440.1118, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1136 }, { "epoch": 0.0838800029483305, "grad_norm": 70.38623046875, "learning_rate": 0.00012125109915094362, "loss": 2.694, "step": 1138 }, { "epoch": 0.08402741947372301, "grad_norm": 1.548732042312622, "learning_rate": 0.00012125098343301206, "loss": 0.0213, "step": 1140 }, { "epoch": 0.0841748359991155, "grad_norm": 1.2770323753356934, "learning_rate": 0.00012125086751196217, "loss": 0.0099, "step": 1142 }, { "epoch": 0.08432225252450799, "grad_norm": 30.610591888427734, "learning_rate": 0.00012125075138779432, "loss": 2.0352, "step": 1144 }, { "epoch": 0.0844696690499005, "grad_norm": 0.8128361701965332, "learning_rate": 0.0001212506350605089, "loss": 1.0719, "step": 1146 }, { "epoch": 0.08461708557529299, "grad_norm": 1.6853057146072388, "learning_rate": 0.00012125051853010634, "loss": 0.0092, "step": 1148 }, { "epoch": 0.0847645021006855, "grad_norm": 39.670047760009766, "learning_rate": 0.000121250401796587, "loss": 1.7653, "step": 1150 }, { "epoch": 0.08491191862607798, "grad_norm": 36.04311752319336, "learning_rate": 0.00012125028485995127, "loss": 1.3473, "step": 1152 }, { "epoch": 0.08491191862607798, "eval_1_ratio_diff": 0.03897116134060796, "eval_accuracy": 0.8737334372564303, "eval_f1": 0.8783783783783784, "eval_loss": 0.6749188899993896, "eval_precision": 0.8465991316931982, "eval_recall": 0.9126365054602185, "eval_runtime": 1440.4215, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1152 }, { "epoch": 0.08505933515147047, "grad_norm": 0.17764577269554138, "learning_rate": 0.00012125016772019952, "loss": 0.0023, "step": 1154 }, { "epoch": 0.08520675167686298, "grad_norm": 0.3527587652206421, "learning_rate": 0.0001212500503773322, "loss": 0.0055, "step": 1156 }, { "epoch": 0.08535416820225547, "grad_norm": 0.1379138082265854, "learning_rate": 0.00012124993283134963, "loss": 1.6429, "step": 1158 }, { "epoch": 0.08550158472764798, "grad_norm": 0.14264832437038422, "learning_rate": 0.0001212498150822523, "loss": 0.0089, "step": 1160 }, { "epoch": 0.08564900125304047, "grad_norm": 30.086095809936523, "learning_rate": 0.00012124969713004051, "loss": 2.4261, "step": 1162 }, { "epoch": 0.08579641777843296, "grad_norm": 0.26527953147888184, "learning_rate": 0.00012124957897471469, "loss": 0.6917, "step": 1164 }, { "epoch": 0.08594383430382546, "grad_norm": 8.70952320098877, "learning_rate": 0.00012124946061627526, "loss": 0.0826, "step": 1166 }, { "epoch": 0.08609125082921795, "grad_norm": 0.031940966844558716, "learning_rate": 0.0001212493420547226, "loss": 0.0008, "step": 1168 }, { "epoch": 0.08609125082921795, "eval_1_ratio_diff": -0.010132501948558081, "eval_accuracy": 0.8636009353078722, "eval_f1": 0.8620961386918834, "eval_loss": 0.5565428137779236, "eval_precision": 0.8710191082802548, "eval_recall": 0.8533541341653667, "eval_runtime": 1440.6772, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1168 }, { "epoch": 0.08623866735461046, "grad_norm": 1.1032943725585938, "learning_rate": 0.0001212492232900571, "loss": 0.0135, "step": 1170 }, { "epoch": 0.08638608388000295, "grad_norm": 0.6731190085411072, "learning_rate": 0.00012124910432227916, "loss": 0.0145, "step": 1172 }, { "epoch": 0.08653350040539544, "grad_norm": 0.2941815257072449, "learning_rate": 0.00012124898515138918, "loss": 0.005, "step": 1174 }, { "epoch": 0.08668091693078794, "grad_norm": 0.060058582574129105, "learning_rate": 0.00012124886577738757, "loss": 0.0024, "step": 1176 }, { "epoch": 0.08682833345618043, "grad_norm": 0.029819436371326447, "learning_rate": 0.0001212487462002747, "loss": 0.0015, "step": 1178 }, { "epoch": 0.08697574998157294, "grad_norm": 0.1549704670906067, "learning_rate": 0.000121248626420051, "loss": 0.0023, "step": 1180 }, { "epoch": 0.08712316650696543, "grad_norm": 1.1005401611328125, "learning_rate": 0.00012124850643671686, "loss": 0.0065, "step": 1182 }, { "epoch": 0.08727058303235792, "grad_norm": 200.2630157470703, "learning_rate": 0.00012124838625027271, "loss": 0.7416, "step": 1184 }, { "epoch": 0.08727058303235792, "eval_1_ratio_diff": 0.04520654715510519, "eval_accuracy": 0.8752922837100545, "eval_f1": 0.8805970149253731, "eval_loss": 0.8647755980491638, "eval_precision": 0.844062947067239, "eval_recall": 0.9204368174726989, "eval_runtime": 1441.0897, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 1184 }, { "epoch": 0.08741799955775043, "grad_norm": 0.012469271197915077, "learning_rate": 0.0001212482658607189, "loss": 0.0003, "step": 1186 }, { "epoch": 0.08756541608314292, "grad_norm": 0.017095841467380524, "learning_rate": 0.00012124814526805586, "loss": 0.0003, "step": 1188 }, { "epoch": 0.08771283260853542, "grad_norm": 23.186222076416016, "learning_rate": 0.00012124802447228401, "loss": 2.0149, "step": 1190 }, { "epoch": 0.08786024913392791, "grad_norm": 0.010486994870007038, "learning_rate": 0.00012124790347340374, "loss": 0.0006, "step": 1192 }, { "epoch": 0.08800766565932042, "grad_norm": 40.754051208496094, "learning_rate": 0.00012124778227141545, "loss": 2.8077, "step": 1194 }, { "epoch": 0.08815508218471291, "grad_norm": 0.08611409366130829, "learning_rate": 0.00012124766086631955, "loss": 0.0013, "step": 1196 }, { "epoch": 0.0883024987101054, "grad_norm": 0.28396108746528625, "learning_rate": 0.00012124753925811646, "loss": 2.2785, "step": 1198 }, { "epoch": 0.0884499152354979, "grad_norm": 0.03215723857283592, "learning_rate": 0.00012124741744680656, "loss": 0.0026, "step": 1200 }, { "epoch": 0.0884499152354979, "eval_1_ratio_diff": 0.04598597038191732, "eval_accuracy": 0.877630553390491, "eval_f1": 0.8829231916480239, "eval_loss": 0.7880816459655762, "eval_precision": 0.8457142857142858, "eval_recall": 0.9235569422776911, "eval_runtime": 1441.3958, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 1200 }, { "epoch": 0.0885973317608904, "grad_norm": 0.03621472418308258, "learning_rate": 0.00012124729543239029, "loss": 0.7748, "step": 1202 }, { "epoch": 0.0887447482862829, "grad_norm": 0.09097783267498016, "learning_rate": 0.00012124717321486803, "loss": 1.8821, "step": 1204 }, { "epoch": 0.08889216481167539, "grad_norm": 0.3395259976387024, "learning_rate": 0.00012124705079424022, "loss": 0.0073, "step": 1206 }, { "epoch": 0.08903958133706788, "grad_norm": 0.04736631363630295, "learning_rate": 0.00012124692817050723, "loss": 1.567, "step": 1208 }, { "epoch": 0.08918699786246038, "grad_norm": 0.08807298541069031, "learning_rate": 0.00012124680534366952, "loss": 0.0014, "step": 1210 }, { "epoch": 0.08933441438785288, "grad_norm": 0.05549991875886917, "learning_rate": 0.00012124668231372745, "loss": 0.0021, "step": 1212 }, { "epoch": 0.08948183091324538, "grad_norm": 0.06815358251333237, "learning_rate": 0.00012124655908068146, "loss": 0.9174, "step": 1214 }, { "epoch": 0.08962924743863787, "grad_norm": 0.03639994189143181, "learning_rate": 0.00012124643564453199, "loss": 0.0199, "step": 1216 }, { "epoch": 0.08962924743863787, "eval_1_ratio_diff": 0.020265003897116163, "eval_accuracy": 0.8628215120810601, "eval_f1": 0.8654434250764526, "eval_loss": 0.648876965045929, "eval_precision": 0.848575712143928, "eval_recall": 0.8829953198127926, "eval_runtime": 1441.2405, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 1216 }, { "epoch": 0.08977666396403036, "grad_norm": 23.13437271118164, "learning_rate": 0.00012124631200527941, "loss": 1.6889, "step": 1218 }, { "epoch": 0.08992408048942287, "grad_norm": 0.2734740674495697, "learning_rate": 0.00012124618816292414, "loss": 0.0059, "step": 1220 }, { "epoch": 0.09007149701481536, "grad_norm": 12.39369010925293, "learning_rate": 0.00012124606411746661, "loss": 0.0533, "step": 1222 }, { "epoch": 0.09021891354020786, "grad_norm": 0.036048658192157745, "learning_rate": 0.00012124593986890722, "loss": 0.0011, "step": 1224 }, { "epoch": 0.09036633006560035, "grad_norm": 0.3171124756336212, "learning_rate": 0.00012124581541724642, "loss": 1.5207, "step": 1226 }, { "epoch": 0.09051374659099284, "grad_norm": 4.317696571350098, "learning_rate": 0.00012124569076248459, "loss": 1.5358, "step": 1228 }, { "epoch": 0.09066116311638535, "grad_norm": 0.12044669687747955, "learning_rate": 0.00012124556590462215, "loss": 0.0053, "step": 1230 }, { "epoch": 0.09080857964177784, "grad_norm": 0.21298988163471222, "learning_rate": 0.00012124544084365953, "loss": 0.0081, "step": 1232 }, { "epoch": 0.09080857964177784, "eval_1_ratio_diff": 0.009353078721745844, "eval_accuracy": 0.8222915042868277, "eval_f1": 0.8238021638330757, "eval_loss": 0.7862046360969543, "eval_precision": 0.8162327718223583, "eval_recall": 0.8315132605304212, "eval_runtime": 1439.2896, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1232 }, { "epoch": 0.09095599616717034, "grad_norm": 20.541194915771484, "learning_rate": 0.00012124531557959717, "loss": 1.194, "step": 1234 }, { "epoch": 0.09110341269256284, "grad_norm": 0.2897285223007202, "learning_rate": 0.00012124519011243545, "loss": 0.8952, "step": 1236 }, { "epoch": 0.09125082921795533, "grad_norm": 0.08111666887998581, "learning_rate": 0.0001212450644421748, "loss": 0.006, "step": 1238 }, { "epoch": 0.09139824574334783, "grad_norm": 0.4867294430732727, "learning_rate": 0.00012124493856881568, "loss": 1.7795, "step": 1240 }, { "epoch": 0.09154566226874032, "grad_norm": 0.9198406934738159, "learning_rate": 0.00012124481249235846, "loss": 0.0259, "step": 1242 }, { "epoch": 0.09169307879413283, "grad_norm": 0.08149991929531097, "learning_rate": 0.0001212446862128036, "loss": 1.2016, "step": 1244 }, { "epoch": 0.09184049531952532, "grad_norm": 0.1457146853208542, "learning_rate": 0.0001212445597301515, "loss": 0.9302, "step": 1246 }, { "epoch": 0.09198791184491782, "grad_norm": 0.24497820436954498, "learning_rate": 0.00012124443304440259, "loss": 0.0051, "step": 1248 }, { "epoch": 0.09198791184491782, "eval_1_ratio_diff": 0.02260327357755254, "eval_accuracy": 0.8339828526890102, "eval_f1": 0.8375286041189931, "eval_loss": 0.747604489326477, "eval_precision": 0.8194029850746268, "eval_recall": 0.8564742589703588, "eval_runtime": 1440.6099, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1248 }, { "epoch": 0.09213532837031031, "grad_norm": 0.10772482305765152, "learning_rate": 0.0001212443061555573, "loss": 0.0032, "step": 1250 }, { "epoch": 0.0922827448957028, "grad_norm": 3.8056480884552, "learning_rate": 0.00012124417906361605, "loss": 0.838, "step": 1252 }, { "epoch": 0.09243016142109531, "grad_norm": 21.590364456176758, "learning_rate": 0.00012124405176857927, "loss": 2.5474, "step": 1254 }, { "epoch": 0.0925775779464878, "grad_norm": 21.33682632446289, "learning_rate": 0.00012124392427044737, "loss": 2.7454, "step": 1256 }, { "epoch": 0.0927249944718803, "grad_norm": 0.21534398198127747, "learning_rate": 0.00012124379656922081, "loss": 0.0068, "step": 1258 }, { "epoch": 0.0928724109972728, "grad_norm": 20.76007843017578, "learning_rate": 0.0001212436686649, "loss": 1.2547, "step": 1260 }, { "epoch": 0.09301982752266529, "grad_norm": 20.636024475097656, "learning_rate": 0.00012124354055748535, "loss": 1.5976, "step": 1262 }, { "epoch": 0.09316724404805779, "grad_norm": 2.3518083095550537, "learning_rate": 0.00012124341224697731, "loss": 0.0369, "step": 1264 }, { "epoch": 0.09316724404805779, "eval_1_ratio_diff": -0.03117692907248637, "eval_accuracy": 0.8565861262665627, "eval_f1": 0.8518518518518519, "eval_loss": 0.43984636664390564, "eval_precision": 0.8801996672212978, "eval_recall": 0.8252730109204368, "eval_runtime": 1440.9991, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.446, "step": 1264 }, { "epoch": 0.09331466057345028, "grad_norm": 1.4304808378219604, "learning_rate": 0.0001212432837333763, "loss": 0.03, "step": 1266 }, { "epoch": 0.09346207709884279, "grad_norm": 0.6885532736778259, "learning_rate": 0.00012124315501668278, "loss": 0.7603, "step": 1268 }, { "epoch": 0.09360949362423528, "grad_norm": 0.5777420997619629, "learning_rate": 0.00012124302609689715, "loss": 1.1026, "step": 1270 }, { "epoch": 0.09375691014962777, "grad_norm": 1.5885238647460938, "learning_rate": 0.00012124289697401986, "loss": 0.041, "step": 1272 }, { "epoch": 0.09390432667502027, "grad_norm": 0.37640276551246643, "learning_rate": 0.00012124276764805132, "loss": 0.0182, "step": 1274 }, { "epoch": 0.09405174320041276, "grad_norm": 25.54754066467285, "learning_rate": 0.00012124263811899196, "loss": 1.2952, "step": 1276 }, { "epoch": 0.09419915972580527, "grad_norm": 41.04960632324219, "learning_rate": 0.00012124250838684226, "loss": 2.126, "step": 1278 }, { "epoch": 0.09434657625119776, "grad_norm": 0.16556452214717865, "learning_rate": 0.00012124237845160263, "loss": 0.0078, "step": 1280 }, { "epoch": 0.09434657625119776, "eval_1_ratio_diff": -0.08573655494933752, "eval_accuracy": 0.8487918939984411, "eval_f1": 0.8344709897610921, "eval_loss": 0.6657168865203857, "eval_precision": 0.9209039548022598, "eval_recall": 0.7628705148205929, "eval_runtime": 1440.6129, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1280 }, { "epoch": 0.09449399277659025, "grad_norm": 17.6622314453125, "learning_rate": 0.00012124224831327347, "loss": 0.1561, "step": 1282 }, { "epoch": 0.09464140930198275, "grad_norm": 0.15980716049671173, "learning_rate": 0.00012124211797185528, "loss": 0.0042, "step": 1284 }, { "epoch": 0.09478882582737524, "grad_norm": 0.04221845418214798, "learning_rate": 0.00012124198742734845, "loss": 1.4535, "step": 1286 }, { "epoch": 0.09493624235276775, "grad_norm": 0.056126296520233154, "learning_rate": 0.00012124185667975342, "loss": 0.0031, "step": 1288 }, { "epoch": 0.09508365887816024, "grad_norm": 0.08041621744632721, "learning_rate": 0.00012124172572907067, "loss": 0.0018, "step": 1290 }, { "epoch": 0.09523107540355275, "grad_norm": 28.64826011657715, "learning_rate": 0.00012124159457530059, "loss": 1.6516, "step": 1292 }, { "epoch": 0.09537849192894524, "grad_norm": 0.31489408016204834, "learning_rate": 0.00012124146321844365, "loss": 0.0038, "step": 1294 }, { "epoch": 0.09552590845433773, "grad_norm": 1.7656670808792114, "learning_rate": 0.00012124133165850026, "loss": 0.0131, "step": 1296 }, { "epoch": 0.09552590845433773, "eval_1_ratio_diff": 0.04832424006235381, "eval_accuracy": 0.8176149649259548, "eval_f1": 0.8258928571428571, "eval_loss": 0.8926898241043091, "eval_precision": 0.7894736842105263, "eval_recall": 0.8658346333853354, "eval_runtime": 1440.8824, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.446, "step": 1296 }, { "epoch": 0.09567332497973023, "grad_norm": 0.3228819668292999, "learning_rate": 0.00012124119989547089, "loss": 0.8356, "step": 1298 }, { "epoch": 0.09582074150512272, "grad_norm": 58.03204345703125, "learning_rate": 0.00012124106792935597, "loss": 0.5161, "step": 1300 }, { "epoch": 0.09596815803051523, "grad_norm": 0.094666488468647, "learning_rate": 0.00012124093576015595, "loss": 0.0014, "step": 1302 }, { "epoch": 0.09611557455590772, "grad_norm": 0.054852358996868134, "learning_rate": 0.00012124080338787127, "loss": 0.0025, "step": 1304 }, { "epoch": 0.09626299108130021, "grad_norm": 2.4614083766937256, "learning_rate": 0.00012124067081250235, "loss": 0.0231, "step": 1306 }, { "epoch": 0.09641040760669271, "grad_norm": 0.13067440688610077, "learning_rate": 0.00012124053803404966, "loss": 0.0019, "step": 1308 }, { "epoch": 0.0965578241320852, "grad_norm": 0.05831296741962433, "learning_rate": 0.00012124040505251365, "loss": 1.1599, "step": 1310 }, { "epoch": 0.09670524065747771, "grad_norm": 22.675302505493164, "learning_rate": 0.00012124027186789477, "loss": 1.7971, "step": 1312 }, { "epoch": 0.09670524065747771, "eval_1_ratio_diff": -0.07638347622759162, "eval_accuracy": 0.8207326578332035, "eval_f1": 0.8057432432432432, "eval_loss": 0.9711058735847473, "eval_precision": 0.8784530386740331, "eval_recall": 0.7441497659906396, "eval_runtime": 1440.5355, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1312 }, { "epoch": 0.0968526571828702, "grad_norm": 245.76840209960938, "learning_rate": 0.00012124013848019342, "loss": 2.4617, "step": 1314 }, { "epoch": 0.09700007370826269, "grad_norm": 21.968021392822266, "learning_rate": 0.00012124000488941008, "loss": 1.4503, "step": 1316 }, { "epoch": 0.0971474902336552, "grad_norm": 0.03653848171234131, "learning_rate": 0.00012123987109554522, "loss": 0.0015, "step": 1318 }, { "epoch": 0.09729490675904769, "grad_norm": 0.16115568578243256, "learning_rate": 0.00012123973709859925, "loss": 0.0201, "step": 1320 }, { "epoch": 0.09744232328444019, "grad_norm": 34.74784851074219, "learning_rate": 0.00012123960289857264, "loss": 1.092, "step": 1322 }, { "epoch": 0.09758973980983268, "grad_norm": 17.326068878173828, "learning_rate": 0.00012123946849546582, "loss": 0.0826, "step": 1324 }, { "epoch": 0.09773715633522517, "grad_norm": 22.532522201538086, "learning_rate": 0.00012123933388927926, "loss": 2.0905, "step": 1326 }, { "epoch": 0.09788457286061768, "grad_norm": 0.09820098429918289, "learning_rate": 0.0001212391990800134, "loss": 0.002, "step": 1328 }, { "epoch": 0.09788457286061768, "eval_1_ratio_diff": 0.05689789555728764, "eval_accuracy": 0.8106001558846454, "eval_f1": 0.8206642066420664, "eval_loss": 0.7345473170280457, "eval_precision": 0.7787114845938375, "eval_recall": 0.8673946957878315, "eval_runtime": 1439.7279, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1328 }, { "epoch": 0.09803198938601017, "grad_norm": 0.11757276207208633, "learning_rate": 0.00012123906406766871, "loss": 0.0079, "step": 1330 }, { "epoch": 0.09817940591140267, "grad_norm": 24.76763153076172, "learning_rate": 0.00012123892885224563, "loss": 1.3389, "step": 1332 }, { "epoch": 0.09832682243679516, "grad_norm": 0.0959400087594986, "learning_rate": 0.0001212387934337446, "loss": 0.9421, "step": 1334 }, { "epoch": 0.09847423896218765, "grad_norm": 0.3935282826423645, "learning_rate": 0.00012123865781216609, "loss": 0.0104, "step": 1336 }, { "epoch": 0.09862165548758016, "grad_norm": 22.505558013916016, "learning_rate": 0.00012123852198751054, "loss": 0.7555, "step": 1338 }, { "epoch": 0.09876907201297265, "grad_norm": 1.3673774003982544, "learning_rate": 0.00012123838595977844, "loss": 0.0409, "step": 1340 }, { "epoch": 0.09891648853836515, "grad_norm": 0.6889051198959351, "learning_rate": 0.0001212382497289702, "loss": 0.0269, "step": 1342 }, { "epoch": 0.09906390506375765, "grad_norm": 0.2218835949897766, "learning_rate": 0.0001212381132950863, "loss": 0.9572, "step": 1344 }, { "epoch": 0.09906390506375765, "eval_1_ratio_diff": 0.07560405300077944, "eval_accuracy": 0.8277474668745128, "eval_f1": 0.8397389412617839, "eval_loss": 0.7541435360908508, "eval_precision": 0.7845528455284553, "eval_recall": 0.9032761310452418, "eval_runtime": 1440.149, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1344 }, { "epoch": 0.09921132158915015, "grad_norm": 0.08860000967979431, "learning_rate": 0.0001212379766581272, "loss": 0.0038, "step": 1346 }, { "epoch": 0.09935873811454264, "grad_norm": 0.1549777388572693, "learning_rate": 0.00012123783981809338, "loss": 0.6904, "step": 1348 }, { "epoch": 0.09950615463993513, "grad_norm": 0.4857753813266754, "learning_rate": 0.00012123770277498524, "loss": 0.005, "step": 1350 }, { "epoch": 0.09965357116532764, "grad_norm": 0.5475670099258423, "learning_rate": 0.00012123756552880328, "loss": 0.0057, "step": 1352 }, { "epoch": 0.09980098769072013, "grad_norm": 0.8644952178001404, "learning_rate": 0.00012123742807954794, "loss": 2.7045, "step": 1354 }, { "epoch": 0.09994840421611263, "grad_norm": 0.15051943063735962, "learning_rate": 0.0001212372904272197, "loss": 0.7707, "step": 1356 }, { "epoch": 0.10009582074150512, "grad_norm": 0.04434569925069809, "learning_rate": 0.00012123715257181902, "loss": 0.0007, "step": 1358 }, { "epoch": 0.10024323726689761, "grad_norm": 0.03767779842019081, "learning_rate": 0.00012123701451334634, "loss": 1.7987, "step": 1360 }, { "epoch": 0.10024323726689761, "eval_1_ratio_diff": 0.10054559625876847, "eval_accuracy": 0.8589243959469992, "eval_f1": 0.8717221828490432, "eval_loss": 0.7392542958259583, "eval_precision": 0.7987012987012987, "eval_recall": 0.9594383775351014, "eval_runtime": 1439.9484, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1360 }, { "epoch": 0.10039065379229012, "grad_norm": 0.032404810190200806, "learning_rate": 0.00012123687625180216, "loss": 1.3724, "step": 1362 }, { "epoch": 0.10053807031768261, "grad_norm": 0.02649116888642311, "learning_rate": 0.00012123673778718691, "loss": 1.3162, "step": 1364 }, { "epoch": 0.10068548684307511, "grad_norm": 0.120023712515831, "learning_rate": 0.00012123659911950106, "loss": 0.0026, "step": 1366 }, { "epoch": 0.1008329033684676, "grad_norm": 0.28818804025650024, "learning_rate": 0.00012123646024874507, "loss": 0.0048, "step": 1368 }, { "epoch": 0.1009803198938601, "grad_norm": 0.5911560654640198, "learning_rate": 0.00012123632117491944, "loss": 0.0142, "step": 1370 }, { "epoch": 0.1011277364192526, "grad_norm": 22.85379409790039, "learning_rate": 0.00012123618189802459, "loss": 1.8439, "step": 1372 }, { "epoch": 0.10127515294464509, "grad_norm": 0.37168049812316895, "learning_rate": 0.00012123604241806102, "loss": 0.0065, "step": 1374 }, { "epoch": 0.1014225694700376, "grad_norm": 0.10927151888608932, "learning_rate": 0.00012123590273502919, "loss": 1.1801, "step": 1376 }, { "epoch": 0.1014225694700376, "eval_1_ratio_diff": 0.021823850350740415, "eval_accuracy": 0.8862042088854248, "eval_f1": 0.8885496183206106, "eval_loss": 0.5426926612854004, "eval_precision": 0.8699551569506726, "eval_recall": 0.9079563182527302, "eval_runtime": 1440.2334, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1376 }, { "epoch": 0.10156998599543009, "grad_norm": 0.038460321724414825, "learning_rate": 0.00012123576284892955, "loss": 0.0101, "step": 1378 }, { "epoch": 0.10171740252082258, "grad_norm": 25.498838424682617, "learning_rate": 0.00012123562275976258, "loss": 1.3981, "step": 1380 }, { "epoch": 0.10186481904621508, "grad_norm": 159.9862060546875, "learning_rate": 0.00012123548246752878, "loss": 1.2495, "step": 1382 }, { "epoch": 0.10201223557160757, "grad_norm": 0.06094611436128616, "learning_rate": 0.00012123534197222857, "loss": 0.0046, "step": 1384 }, { "epoch": 0.10215965209700008, "grad_norm": 26.12101173400879, "learning_rate": 0.00012123520127386245, "loss": 1.3714, "step": 1386 }, { "epoch": 0.10230706862239257, "grad_norm": 48.13339614868164, "learning_rate": 0.00012123506037243086, "loss": 0.0869, "step": 1388 }, { "epoch": 0.10245448514778506, "grad_norm": 0.5880022644996643, "learning_rate": 0.00012123491926793433, "loss": 0.6204, "step": 1390 }, { "epoch": 0.10260190167317756, "grad_norm": 24.889034271240234, "learning_rate": 0.00012123477796037328, "loss": 0.9381, "step": 1392 }, { "epoch": 0.10260190167317756, "eval_1_ratio_diff": 0.09664848012470773, "eval_accuracy": 0.8487918939984411, "eval_f1": 0.8620199146514936, "eval_loss": 0.5980536937713623, "eval_precision": 0.792156862745098, "eval_recall": 0.9453978159126365, "eval_runtime": 1440.6605, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1392 }, { "epoch": 0.10274931819857006, "grad_norm": 23.767898559570312, "learning_rate": 0.00012123463644974822, "loss": 1.3434, "step": 1394 }, { "epoch": 0.10289673472396256, "grad_norm": 0.05240378528833389, "learning_rate": 0.0001212344947360596, "loss": 0.006, "step": 1396 }, { "epoch": 0.10304415124935505, "grad_norm": 0.05574984475970268, "learning_rate": 0.00012123435281930789, "loss": 0.0062, "step": 1398 }, { "epoch": 0.10319156777474756, "grad_norm": 25.049999237060547, "learning_rate": 0.00012123421069949359, "loss": 0.7515, "step": 1400 }, { "epoch": 0.10333898430014005, "grad_norm": 0.6514810919761658, "learning_rate": 0.00012123406837661717, "loss": 0.0286, "step": 1402 }, { "epoch": 0.10348640082553254, "grad_norm": 25.315319061279297, "learning_rate": 0.00012123392585067908, "loss": 0.6189, "step": 1404 }, { "epoch": 0.10363381735092504, "grad_norm": 24.714847564697266, "learning_rate": 0.00012123378312167983, "loss": 0.7992, "step": 1406 }, { "epoch": 0.10378123387631753, "grad_norm": 21.79236602783203, "learning_rate": 0.00012123364018961989, "loss": 1.8653, "step": 1408 }, { "epoch": 0.10378123387631753, "eval_1_ratio_diff": -0.018706157443491855, "eval_accuracy": 0.8752922837100545, "eval_f1": 0.8728139904610492, "eval_loss": 0.573785662651062, "eval_precision": 0.8897893030794165, "eval_recall": 0.8564742589703588, "eval_runtime": 1440.8628, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.446, "step": 1408 }, { "epoch": 0.10392865040171004, "grad_norm": 21.346384048461914, "learning_rate": 0.00012123349705449974, "loss": 1.923, "step": 1410 }, { "epoch": 0.10407606692710253, "grad_norm": 10.506868362426758, "learning_rate": 0.00012123335371631985, "loss": 0.5301, "step": 1412 }, { "epoch": 0.10422348345249502, "grad_norm": 1.1288862228393555, "learning_rate": 0.00012123321017508069, "loss": 0.0411, "step": 1414 }, { "epoch": 0.10437089997788752, "grad_norm": 0.11825437843799591, "learning_rate": 0.00012123306643078279, "loss": 0.0026, "step": 1416 }, { "epoch": 0.10451831650328001, "grad_norm": 0.14662548899650574, "learning_rate": 0.00012123292248342657, "loss": 1.3863, "step": 1418 }, { "epoch": 0.10466573302867252, "grad_norm": 1.1349258422851562, "learning_rate": 0.00012123277833301255, "loss": 0.0148, "step": 1420 }, { "epoch": 0.10481314955406501, "grad_norm": 20.21559715270996, "learning_rate": 0.00012123263397954121, "loss": 2.3576, "step": 1422 }, { "epoch": 0.1049605660794575, "grad_norm": 27.789064407348633, "learning_rate": 0.00012123248942301302, "loss": 1.3553, "step": 1424 }, { "epoch": 0.1049605660794575, "eval_1_ratio_diff": -0.18082618862042088, "eval_accuracy": 0.7833203429462198, "eval_f1": 0.7352380952380952, "eval_loss": 0.8213497400283813, "eval_precision": 0.9437652811735942, "eval_recall": 0.6021840873634945, "eval_runtime": 1440.567, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1424 }, { "epoch": 0.10510798260485, "grad_norm": 0.472825288772583, "learning_rate": 0.00012123234466342849, "loss": 0.0161, "step": 1426 }, { "epoch": 0.1052553991302425, "grad_norm": 44.982635498046875, "learning_rate": 0.00012123219970078806, "loss": 0.216, "step": 1428 }, { "epoch": 0.105402815655635, "grad_norm": 20.85587501525879, "learning_rate": 0.00012123205453509228, "loss": 1.7555, "step": 1430 }, { "epoch": 0.10555023218102749, "grad_norm": 19.432729721069336, "learning_rate": 0.00012123190916634158, "loss": 0.9614, "step": 1432 }, { "epoch": 0.10569764870641998, "grad_norm": 1.2885982990264893, "learning_rate": 0.00012123176359453646, "loss": 0.7221, "step": 1434 }, { "epoch": 0.10584506523181249, "grad_norm": 39.255924224853516, "learning_rate": 0.00012123161781967742, "loss": 0.7135, "step": 1436 }, { "epoch": 0.10599248175720498, "grad_norm": 1.8398678302764893, "learning_rate": 0.00012123147184176495, "loss": 1.7681, "step": 1438 }, { "epoch": 0.10613989828259748, "grad_norm": 0.04480309039354324, "learning_rate": 0.00012123132566079952, "loss": 0.0198, "step": 1440 }, { "epoch": 0.10613989828259748, "eval_1_ratio_diff": 0.013250194855806696, "eval_accuracy": 0.8495713172252534, "eval_f1": 0.8514241724403387, "eval_loss": 0.5520654916763306, "eval_precision": 0.8404255319148937, "eval_recall": 0.8627145085803433, "eval_runtime": 1441.2669, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 1440 }, { "epoch": 0.10628731480798997, "grad_norm": 0.10228992253541946, "learning_rate": 0.00012123117927678164, "loss": 0.0767, "step": 1442 }, { "epoch": 0.10643473133338248, "grad_norm": 0.14043979346752167, "learning_rate": 0.0001212310326897118, "loss": 0.062, "step": 1444 }, { "epoch": 0.10658214785877497, "grad_norm": 18.650835037231445, "learning_rate": 0.00012123088589959048, "loss": 0.5735, "step": 1446 }, { "epoch": 0.10672956438416746, "grad_norm": 18.65635871887207, "learning_rate": 0.00012123073890641816, "loss": 0.697, "step": 1448 }, { "epoch": 0.10687698090955997, "grad_norm": 24.889253616333008, "learning_rate": 0.00012123059171019538, "loss": 1.1449, "step": 1450 }, { "epoch": 0.10702439743495246, "grad_norm": 0.32461315393447876, "learning_rate": 0.00012123044431092258, "loss": 0.0108, "step": 1452 }, { "epoch": 0.10717181396034496, "grad_norm": 0.195255309343338, "learning_rate": 0.00012123029670860029, "loss": 0.0082, "step": 1454 }, { "epoch": 0.10731923048573745, "grad_norm": 0.3942672312259674, "learning_rate": 0.00012123014890322897, "loss": 0.0278, "step": 1456 }, { "epoch": 0.10731923048573745, "eval_1_ratio_diff": -0.05455962587685115, "eval_accuracy": 0.8487918939984411, "eval_f1": 0.8399339933993399, "eval_loss": 0.6235100626945496, "eval_precision": 0.8914185639229422, "eval_recall": 0.7940717628705148, "eval_runtime": 1441.051, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.446, "step": 1456 }, { "epoch": 0.10746664701112994, "grad_norm": 20.8675537109375, "learning_rate": 0.00012123000089480917, "loss": 2.0488, "step": 1458 }, { "epoch": 0.10761406353652245, "grad_norm": 19.674894332885742, "learning_rate": 0.00012122985268334132, "loss": 0.9135, "step": 1460 }, { "epoch": 0.10776148006191494, "grad_norm": 0.16670210659503937, "learning_rate": 0.00012122970426882597, "loss": 0.0074, "step": 1462 }, { "epoch": 0.10790889658730744, "grad_norm": 20.293106079101562, "learning_rate": 0.00012122955565126358, "loss": 1.0217, "step": 1464 }, { "epoch": 0.10805631311269993, "grad_norm": 0.6973972916603088, "learning_rate": 0.00012122940683065467, "loss": 0.9069, "step": 1466 }, { "epoch": 0.10820372963809242, "grad_norm": 25.440162658691406, "learning_rate": 0.00012122925780699975, "loss": 1.5865, "step": 1468 }, { "epoch": 0.10835114616348493, "grad_norm": 4.310685157775879, "learning_rate": 0.00012122910858029928, "loss": 0.4176, "step": 1470 }, { "epoch": 0.10849856268887742, "grad_norm": 0.3989110291004181, "learning_rate": 0.00012122895915055379, "loss": 1.2954, "step": 1472 }, { "epoch": 0.10849856268887742, "eval_1_ratio_diff": 0.021823850350740415, "eval_accuracy": 0.8581449727201871, "eval_f1": 0.8610687022900764, "eval_loss": 0.4833138585090637, "eval_precision": 0.8430493273542601, "eval_recall": 0.8798751950078003, "eval_runtime": 1440.7462, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1472 }, { "epoch": 0.10864597921426992, "grad_norm": 0.3485046923160553, "learning_rate": 0.00012122880951776379, "loss": 0.0092, "step": 1474 }, { "epoch": 0.10879339573966242, "grad_norm": 31.38138198852539, "learning_rate": 0.00012122865968192974, "loss": 2.2038, "step": 1476 }, { "epoch": 0.1089408122650549, "grad_norm": 0.1756962537765503, "learning_rate": 0.00012122850964305218, "loss": 0.0039, "step": 1478 }, { "epoch": 0.10908822879044741, "grad_norm": 0.4892203211784363, "learning_rate": 0.0001212283594011316, "loss": 1.2883, "step": 1480 }, { "epoch": 0.1092356453158399, "grad_norm": 0.38502997159957886, "learning_rate": 0.00012122820895616849, "loss": 0.015, "step": 1482 }, { "epoch": 0.1093830618412324, "grad_norm": 0.3273461163043976, "learning_rate": 0.00012122805830816339, "loss": 0.0328, "step": 1484 }, { "epoch": 0.1095304783666249, "grad_norm": 53.52883529663086, "learning_rate": 0.00012122790745711678, "loss": 1.4843, "step": 1486 }, { "epoch": 0.10967789489201739, "grad_norm": 0.2854032814502716, "learning_rate": 0.00012122775640302914, "loss": 0.0227, "step": 1488 }, { "epoch": 0.10967789489201739, "eval_1_ratio_diff": 0.014809041309431059, "eval_accuracy": 0.8448947778643804, "eval_f1": 0.8470407378939278, "eval_loss": 0.6297035217285156, "eval_precision": 0.8348484848484848, "eval_recall": 0.859594383775351, "eval_runtime": 1441.3865, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 1488 }, { "epoch": 0.1098253114174099, "grad_norm": 0.2311754673719406, "learning_rate": 0.00012122760514590104, "loss": 0.0063, "step": 1490 }, { "epoch": 0.10997272794280238, "grad_norm": 21.77858543395996, "learning_rate": 0.00012122745368573293, "loss": 1.6042, "step": 1492 }, { "epoch": 0.11012014446819489, "grad_norm": 0.12185559421777725, "learning_rate": 0.00012122730202252534, "loss": 0.0054, "step": 1494 }, { "epoch": 0.11026756099358738, "grad_norm": 0.07674361765384674, "learning_rate": 0.00012122715015627879, "loss": 1.2277, "step": 1496 }, { "epoch": 0.11041497751897988, "grad_norm": 1.0588175058364868, "learning_rate": 0.00012122699808699376, "loss": 0.0121, "step": 1498 }, { "epoch": 0.11056239404437238, "grad_norm": 148.854248046875, "learning_rate": 0.00012122684581467078, "loss": 1.6651, "step": 1500 }, { "epoch": 0.11070981056976487, "grad_norm": 0.07673851400613785, "learning_rate": 0.00012122669333931036, "loss": 0.0037, "step": 1502 }, { "epoch": 0.11085722709515737, "grad_norm": 0.14825621247291565, "learning_rate": 0.00012122654066091301, "loss": 0.0033, "step": 1504 }, { "epoch": 0.11085722709515737, "eval_1_ratio_diff": 0.0, "eval_accuracy": 0.8394388152766953, "eval_f1": 0.8393135725429017, "eval_loss": 0.7106738686561584, "eval_precision": 0.8393135725429017, "eval_recall": 0.8393135725429017, "eval_runtime": 1440.7668, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.446, "step": 1504 }, { "epoch": 0.11100464362054986, "grad_norm": 0.3325727880001068, "learning_rate": 0.00012122638777947923, "loss": 0.0043, "step": 1506 }, { "epoch": 0.11115206014594237, "grad_norm": 0.16898727416992188, "learning_rate": 0.00012122623469500956, "loss": 1.3778, "step": 1508 }, { "epoch": 0.11129947667133486, "grad_norm": 24.855741500854492, "learning_rate": 0.00012122608140750447, "loss": 1.1577, "step": 1510 }, { "epoch": 0.11144689319672735, "grad_norm": 0.15268811583518982, "learning_rate": 0.0001212259279169645, "loss": 0.0057, "step": 1512 }, { "epoch": 0.11159430972211985, "grad_norm": 37.5292854309082, "learning_rate": 0.00012122577422339017, "loss": 2.6301, "step": 1514 }, { "epoch": 0.11174172624751234, "grad_norm": 0.23876796662807465, "learning_rate": 0.000121225620326782, "loss": 0.0067, "step": 1516 }, { "epoch": 0.11188914277290485, "grad_norm": 0.14355158805847168, "learning_rate": 0.00012122546622714046, "loss": 0.0082, "step": 1518 }, { "epoch": 0.11203655929829734, "grad_norm": 0.14837191998958588, "learning_rate": 0.00012122531192446613, "loss": 1.1954, "step": 1520 }, { "epoch": 0.11203655929829734, "eval_1_ratio_diff": -0.006235385814497285, "eval_accuracy": 0.8456742010911925, "eval_f1": 0.8445839874411303, "eval_loss": 0.5836101174354553, "eval_precision": 0.8499210110584519, "eval_recall": 0.8393135725429017, "eval_runtime": 1440.5652, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1520 }, { "epoch": 0.11218397582368983, "grad_norm": 1.0671629905700684, "learning_rate": 0.0001212251574187595, "loss": 0.0128, "step": 1522 }, { "epoch": 0.11233139234908233, "grad_norm": 22.311914443969727, "learning_rate": 0.00012122500271002106, "loss": 1.1378, "step": 1524 }, { "epoch": 0.11247880887447483, "grad_norm": 24.98206329345703, "learning_rate": 0.00012122484779825135, "loss": 1.4429, "step": 1526 }, { "epoch": 0.11262622539986733, "grad_norm": 0.10400061309337616, "learning_rate": 0.00012122469268345093, "loss": 0.8205, "step": 1528 }, { "epoch": 0.11277364192525982, "grad_norm": 0.1311234086751938, "learning_rate": 0.00012122453736562024, "loss": 0.0052, "step": 1530 }, { "epoch": 0.11292105845065231, "grad_norm": 24.459693908691406, "learning_rate": 0.00012122438184475986, "loss": 0.8169, "step": 1532 }, { "epoch": 0.11306847497604482, "grad_norm": 0.6599878072738647, "learning_rate": 0.0001212242261208703, "loss": 0.0172, "step": 1534 }, { "epoch": 0.11321589150143731, "grad_norm": 0.7011798024177551, "learning_rate": 0.00012122407019395205, "loss": 0.0101, "step": 1536 }, { "epoch": 0.11321589150143731, "eval_1_ratio_diff": -0.011691348402182389, "eval_accuracy": 0.852689010132502, "eval_f1": 0.850828729281768, "eval_loss": 0.616263747215271, "eval_precision": 0.8610223642172524, "eval_recall": 0.8408736349453978, "eval_runtime": 1440.4808, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1536 }, { "epoch": 0.11336330802682981, "grad_norm": 0.11136188358068466, "learning_rate": 0.00012122391406400568, "loss": 0.0043, "step": 1538 }, { "epoch": 0.1135107245522223, "grad_norm": 0.09410673379898071, "learning_rate": 0.00012122375773103169, "loss": 0.0029, "step": 1540 }, { "epoch": 0.1136581410776148, "grad_norm": 0.0886264443397522, "learning_rate": 0.00012122360119503061, "loss": 0.0027, "step": 1542 }, { "epoch": 0.1138055576030073, "grad_norm": 0.06019139662384987, "learning_rate": 0.00012122344445600295, "loss": 0.0012, "step": 1544 }, { "epoch": 0.11395297412839979, "grad_norm": 24.27945327758789, "learning_rate": 0.00012122328751394924, "loss": 1.2476, "step": 1546 }, { "epoch": 0.1141003906537923, "grad_norm": 0.07040827721357346, "learning_rate": 0.00012122313036887001, "loss": 1.05, "step": 1548 }, { "epoch": 0.11424780717918478, "grad_norm": 21.743165969848633, "learning_rate": 0.00012122297302076579, "loss": 3.2561, "step": 1550 }, { "epoch": 0.11439522370457729, "grad_norm": 0.21815018355846405, "learning_rate": 0.00012122281546963711, "loss": 0.0085, "step": 1552 }, { "epoch": 0.11439522370457729, "eval_1_ratio_diff": 0.05845674201091189, "eval_accuracy": 0.8620420888542478, "eval_f1": 0.8695652173913043, "eval_loss": 0.5588727593421936, "eval_precision": 0.8240223463687151, "eval_recall": 0.9204368174726989, "eval_runtime": 1440.4266, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1552 }, { "epoch": 0.11454264022996978, "grad_norm": 0.3978158235549927, "learning_rate": 0.0001212226577154845, "loss": 0.0087, "step": 1554 }, { "epoch": 0.11469005675536227, "grad_norm": 0.07042258977890015, "learning_rate": 0.00012122249975830848, "loss": 0.0021, "step": 1556 }, { "epoch": 0.11483747328075478, "grad_norm": 0.16607695817947388, "learning_rate": 0.00012122234159810957, "loss": 0.0024, "step": 1558 }, { "epoch": 0.11498488980614727, "grad_norm": 0.11605281382799149, "learning_rate": 0.00012122218323488832, "loss": 0.0026, "step": 1560 }, { "epoch": 0.11513230633153977, "grad_norm": 24.77876091003418, "learning_rate": 0.00012122202466864525, "loss": 1.4127, "step": 1562 }, { "epoch": 0.11527972285693226, "grad_norm": 0.17567309737205505, "learning_rate": 0.00012122186589938088, "loss": 0.0037, "step": 1564 }, { "epoch": 0.11542713938232475, "grad_norm": 0.19481156766414642, "learning_rate": 0.00012122170692709576, "loss": 0.6267, "step": 1566 }, { "epoch": 0.11557455590771726, "grad_norm": 24.115211486816406, "learning_rate": 0.00012122154775179043, "loss": 0.8964, "step": 1568 }, { "epoch": 0.11557455590771726, "eval_1_ratio_diff": 0.03975058456742009, "eval_accuracy": 0.8713951675759938, "eval_f1": 0.8762190547636909, "eval_loss": 0.5382638573646545, "eval_precision": 0.8439306358381503, "eval_recall": 0.9110764430577223, "eval_runtime": 1441.1253, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.445, "step": 1568 }, { "epoch": 0.11572197243310975, "grad_norm": 0.140619158744812, "learning_rate": 0.0001212213883734654, "loss": 0.0054, "step": 1570 }, { "epoch": 0.11586938895850225, "grad_norm": 0.12547695636749268, "learning_rate": 0.00012122122879212122, "loss": 0.3549, "step": 1572 }, { "epoch": 0.11601680548389474, "grad_norm": 0.12592053413391113, "learning_rate": 0.00012122106900775843, "loss": 0.0105, "step": 1574 }, { "epoch": 0.11616422200928724, "grad_norm": 0.11613775789737701, "learning_rate": 0.00012122090902037755, "loss": 0.0044, "step": 1576 }, { "epoch": 0.11631163853467974, "grad_norm": 0.06327944993972778, "learning_rate": 0.00012122074882997911, "loss": 0.0052, "step": 1578 }, { "epoch": 0.11645905506007223, "grad_norm": 0.26552170515060425, "learning_rate": 0.00012122058843656367, "loss": 0.0049, "step": 1580 }, { "epoch": 0.11660647158546474, "grad_norm": 0.05181106925010681, "learning_rate": 0.00012122042784013175, "loss": 0.8965, "step": 1582 }, { "epoch": 0.11675388811085723, "grad_norm": 0.07022108882665634, "learning_rate": 0.0001212202670406839, "loss": 1.4149, "step": 1584 }, { "epoch": 0.11675388811085723, "eval_1_ratio_diff": 0.014029618082618822, "eval_accuracy": 0.8565861262665627, "eval_f1": 0.8584615384615385, "eval_loss": 0.6103407144546509, "eval_precision": 0.8467374810318664, "eval_recall": 0.8705148205928237, "eval_runtime": 1439.8898, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1584 }, { "epoch": 0.11690130463624972, "grad_norm": 0.20126762986183167, "learning_rate": 0.00012122010603822065, "loss": 0.0077, "step": 1586 }, { "epoch": 0.11704872116164222, "grad_norm": 0.09971367567777634, "learning_rate": 0.00012121994483274255, "loss": 0.0049, "step": 1588 }, { "epoch": 0.11719613768703471, "grad_norm": 0.06467089802026749, "learning_rate": 0.00012121978342425012, "loss": 0.005, "step": 1590 }, { "epoch": 0.11734355421242722, "grad_norm": 0.06981782615184784, "learning_rate": 0.00012121962181274392, "loss": 0.0028, "step": 1592 }, { "epoch": 0.11749097073781971, "grad_norm": 0.12012193351984024, "learning_rate": 0.00012121945999822448, "loss": 0.0022, "step": 1594 }, { "epoch": 0.11763838726321221, "grad_norm": 24.71665382385254, "learning_rate": 0.00012121929798069236, "loss": 1.756, "step": 1596 }, { "epoch": 0.1177858037886047, "grad_norm": 0.31951653957366943, "learning_rate": 0.0001212191357601481, "loss": 0.004, "step": 1598 }, { "epoch": 0.1179332203139972, "grad_norm": 0.03907225281000137, "learning_rate": 0.0001212189733365922, "loss": 0.0018, "step": 1600 }, { "epoch": 0.1179332203139972, "eval_1_ratio_diff": 0.003117692907248615, "eval_accuracy": 0.8628215120810601, "eval_f1": 0.8631415241057543, "eval_loss": 0.6979319453239441, "eval_precision": 0.8604651162790697, "eval_recall": 0.8658346333853354, "eval_runtime": 1440.2188, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1600 }, { "epoch": 0.1180806368393897, "grad_norm": 0.14489419758319855, "learning_rate": 0.00012121881071002525, "loss": 0.004, "step": 1602 }, { "epoch": 0.11822805336478219, "grad_norm": 0.02964833378791809, "learning_rate": 0.00012121864788044781, "loss": 0.0014, "step": 1604 }, { "epoch": 0.1183754698901747, "grad_norm": 0.1308467835187912, "learning_rate": 0.00012121848484786039, "loss": 1.2428, "step": 1606 }, { "epoch": 0.11852288641556719, "grad_norm": 0.012196216732263565, "learning_rate": 0.00012121832161226353, "loss": 0.0039, "step": 1608 }, { "epoch": 0.11867030294095968, "grad_norm": 26.82729721069336, "learning_rate": 0.0001212181581736578, "loss": 0.9557, "step": 1610 }, { "epoch": 0.11881771946635218, "grad_norm": 55.06840515136719, "learning_rate": 0.00012121799453204374, "loss": 1.341, "step": 1612 }, { "epoch": 0.11896513599174467, "grad_norm": 0.10571928322315216, "learning_rate": 0.0001212178306874219, "loss": 0.0018, "step": 1614 }, { "epoch": 0.11911255251713718, "grad_norm": 23.6888427734375, "learning_rate": 0.00012121766663979284, "loss": 2.8349, "step": 1616 }, { "epoch": 0.11911255251713718, "eval_1_ratio_diff": 0.001558846453624252, "eval_accuracy": 0.8612626656274357, "eval_f1": 0.8613707165109035, "eval_loss": 0.6912267804145813, "eval_precision": 0.8600311041990669, "eval_recall": 0.8627145085803433, "eval_runtime": 1439.8805, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1616 }, { "epoch": 0.11925996904252967, "grad_norm": 0.013893804512917995, "learning_rate": 0.00012121750238915708, "loss": 0.0039, "step": 1618 }, { "epoch": 0.11940738556792216, "grad_norm": 0.0326993353664875, "learning_rate": 0.00012121733793551521, "loss": 0.0071, "step": 1620 }, { "epoch": 0.11955480209331466, "grad_norm": 0.021896235644817352, "learning_rate": 0.00012121717327886775, "loss": 0.4694, "step": 1622 }, { "epoch": 0.11970221861870715, "grad_norm": 2.5759835243225098, "learning_rate": 0.00012121700841921524, "loss": 0.8411, "step": 1624 }, { "epoch": 0.11984963514409966, "grad_norm": 6.512516021728516, "learning_rate": 0.00012121684335655828, "loss": 1.2897, "step": 1626 }, { "epoch": 0.11999705166949215, "grad_norm": 1.0826752185821533, "learning_rate": 0.00012121667809089738, "loss": 0.067, "step": 1628 }, { "epoch": 0.12014446819488464, "grad_norm": 0.5020477771759033, "learning_rate": 0.00012121651262223313, "loss": 0.0061, "step": 1630 }, { "epoch": 0.12029188472027715, "grad_norm": 1.0385483503341675, "learning_rate": 0.00012121634695056605, "loss": 0.0162, "step": 1632 }, { "epoch": 0.12029188472027715, "eval_1_ratio_diff": 0.024162120031176904, "eval_accuracy": 0.8854247856586126, "eval_f1": 0.8880426504188881, "eval_loss": 0.4667970538139343, "eval_precision": 0.8675595238095238, "eval_recall": 0.9095163806552262, "eval_runtime": 1440.04, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1632 }, { "epoch": 0.12043930124566964, "grad_norm": 0.3257231116294861, "learning_rate": 0.00012121618107589671, "loss": 0.0073, "step": 1634 }, { "epoch": 0.12058671777106214, "grad_norm": 0.17591340839862823, "learning_rate": 0.00012121601499822568, "loss": 0.7197, "step": 1636 }, { "epoch": 0.12073413429645463, "grad_norm": 20.77132797241211, "learning_rate": 0.0001212158487175535, "loss": 1.5072, "step": 1638 }, { "epoch": 0.12088155082184712, "grad_norm": 0.013665467500686646, "learning_rate": 0.00012121568223388071, "loss": 0.0014, "step": 1640 }, { "epoch": 0.12102896734723963, "grad_norm": 0.368145614862442, "learning_rate": 0.00012121551554720792, "loss": 1.0871, "step": 1642 }, { "epoch": 0.12117638387263212, "grad_norm": 0.2764877378940582, "learning_rate": 0.00012121534865753563, "loss": 0.0044, "step": 1644 }, { "epoch": 0.12132380039802462, "grad_norm": 0.15803444385528564, "learning_rate": 0.00012121518156486446, "loss": 0.0058, "step": 1646 }, { "epoch": 0.12147121692341711, "grad_norm": 21.269418716430664, "learning_rate": 0.0001212150142691949, "loss": 1.5637, "step": 1648 }, { "epoch": 0.12147121692341711, "eval_1_ratio_diff": -0.03117692907248637, "eval_accuracy": 0.8877630553390491, "eval_f1": 0.8840579710144928, "eval_loss": 0.5637651681900024, "eval_precision": 0.913477537437604, "eval_recall": 0.8564742589703588, "eval_runtime": 1440.5567, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1648 }, { "epoch": 0.12161863344880962, "grad_norm": 21.965253829956055, "learning_rate": 0.00012121484677052757, "loss": 0.9775, "step": 1650 }, { "epoch": 0.12176604997420211, "grad_norm": 5.706968307495117, "learning_rate": 0.000121214679068863, "loss": 1.2593, "step": 1652 }, { "epoch": 0.1219134664995946, "grad_norm": 78.91386413574219, "learning_rate": 0.00012121451116420174, "loss": 1.8529, "step": 1654 }, { "epoch": 0.1220608830249871, "grad_norm": 20.03242301940918, "learning_rate": 0.00012121434305654442, "loss": 3.822, "step": 1656 }, { "epoch": 0.1222082995503796, "grad_norm": 18.92554473876953, "learning_rate": 0.00012121417474589151, "loss": 1.7478, "step": 1658 }, { "epoch": 0.1223557160757721, "grad_norm": 18.513463973999023, "learning_rate": 0.00012121400623224365, "loss": 0.9207, "step": 1660 }, { "epoch": 0.12250313260116459, "grad_norm": 2.1414077281951904, "learning_rate": 0.00012121383751560137, "loss": 0.0559, "step": 1662 }, { "epoch": 0.12265054912655708, "grad_norm": 1.9082714319229126, "learning_rate": 0.00012121366859596523, "loss": 0.0867, "step": 1664 }, { "epoch": 0.12265054912655708, "eval_1_ratio_diff": -0.11223694466095091, "eval_accuracy": 0.8316445830085737, "eval_f1": 0.8101933216168717, "eval_loss": 0.46373099088668823, "eval_precision": 0.9275653923541247, "eval_recall": 0.719188767550702, "eval_runtime": 1439.8045, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "step": 1664 }, { "epoch": 0.12279796565194959, "grad_norm": 17.66658592224121, "learning_rate": 0.0001212134994733358, "loss": 0.694, "step": 1666 }, { "epoch": 0.12294538217734208, "grad_norm": 0.5736209750175476, "learning_rate": 0.00012121333014771369, "loss": 0.5414, "step": 1668 }, { "epoch": 0.12309279870273458, "grad_norm": 16.726125717163086, "learning_rate": 0.0001212131606190994, "loss": 2.7414, "step": 1670 }, { "epoch": 0.12324021522812707, "grad_norm": 1.1649620532989502, "learning_rate": 0.00012121299088749353, "loss": 0.0285, "step": 1672 }, { "epoch": 0.12338763175351956, "grad_norm": 18.4560604095459, "learning_rate": 0.00012121282095289665, "loss": 0.9068, "step": 1674 }, { "epoch": 0.12353504827891207, "grad_norm": 0.3899083137512207, "learning_rate": 0.00012121265081530934, "loss": 0.0192, "step": 1676 }, { "epoch": 0.12368246480430456, "grad_norm": 0.6309532523155212, "learning_rate": 0.00012121248047473215, "loss": 0.0398, "step": 1678 }, { "epoch": 0.12382988132969706, "grad_norm": 25.81404685974121, "learning_rate": 0.00012121230993116564, "loss": 0.9268, "step": 1680 }, { "epoch": 0.12382988132969706, "eval_1_ratio_diff": -0.05222135619641466, "eval_accuracy": 0.8620420888542478, "eval_f1": 0.854320987654321, "eval_loss": 0.49660980701446533, "eval_precision": 0.9041811846689896, "eval_recall": 0.8096723868954758, "eval_runtime": 1439.1088, "eval_samples_per_second": 0.892, "eval_steps_per_second": 0.446, "step": 1680 }, { "epoch": 0.12397729785508955, "grad_norm": 0.17194198071956635, "learning_rate": 0.00012121213918461043, "loss": 0.0091, "step": 1682 }, { "epoch": 0.12412471438048205, "grad_norm": 0.1233774870634079, "learning_rate": 0.00012121196823506704, "loss": 0.033, "step": 1684 }, { "epoch": 0.12427213090587455, "grad_norm": 0.12911829352378845, "learning_rate": 0.00012121179708253609, "loss": 0.9894, "step": 1686 }, { "epoch": 0.12441954743126704, "grad_norm": 12.796908378601074, "learning_rate": 0.00012121162572701811, "loss": 0.2167, "step": 1688 }, { "epoch": 0.12456696395665955, "grad_norm": 19.411853790283203, "learning_rate": 0.0001212114541685137, "loss": 1.1343, "step": 1690 }, { "epoch": 0.12471438048205204, "grad_norm": 2.125748872756958, "learning_rate": 0.00012121128240702341, "loss": 0.0167, "step": 1692 }, { "epoch": 0.12486179700744453, "grad_norm": 0.23534013330936432, "learning_rate": 0.00012121111044254785, "loss": 0.0099, "step": 1694 }, { "epoch": 0.12500921353283703, "grad_norm": 0.2723231911659241, "learning_rate": 0.00012121093827508758, "loss": 0.0222, "step": 1696 }, { "epoch": 0.12500921353283703, "eval_1_ratio_diff": -0.05144193296960253, "eval_accuracy": 0.8721745908028059, "eval_f1": 0.8651315789473685, "eval_loss": 0.608511209487915, "eval_precision": 0.9147826086956522, "eval_recall": 0.8205928237129485, "eval_runtime": 1438.9329, "eval_samples_per_second": 0.892, "eval_steps_per_second": 0.446, "step": 1696 }, { "epoch": 0.12515663005822952, "grad_norm": 0.1575896292924881, "learning_rate": 0.00012121076590464316, "loss": 0.0045, "step": 1698 }, { "epoch": 0.12530404658362201, "grad_norm": 25.341609954833984, "learning_rate": 0.00012121059333121521, "loss": 2.9943, "step": 1700 }, { "epoch": 0.12545146310901453, "grad_norm": 0.13375264406204224, "learning_rate": 0.00012121042055480427, "loss": 0.0033, "step": 1702 }, { "epoch": 0.12559887963440702, "grad_norm": 0.06750854849815369, "learning_rate": 0.00012121024757541094, "loss": 0.0024, "step": 1704 }, { "epoch": 0.12574629615979951, "grad_norm": 0.05674993619322777, "learning_rate": 0.00012121007439303577, "loss": 1.2325, "step": 1706 }, { "epoch": 0.125893712685192, "grad_norm": 0.06746107339859009, "learning_rate": 0.00012120990100767938, "loss": 0.0016, "step": 1708 }, { "epoch": 0.1260411292105845, "grad_norm": 18.890642166137695, "learning_rate": 0.00012120972741934233, "loss": 1.5509, "step": 1710 }, { "epoch": 0.12618854573597701, "grad_norm": 0.0601690337061882, "learning_rate": 0.00012120955362802522, "loss": 0.0042, "step": 1712 }, { "epoch": 0.12618854573597701, "eval_1_ratio_diff": -0.21278254091971943, "eval_accuracy": 0.7575993764614185, "eval_f1": 0.6917740336967294, "eval_loss": 1.260048747062683, "eval_precision": 0.9483695652173914, "eval_recall": 0.5444617784711389, "eval_runtime": 1438.8451, "eval_samples_per_second": 0.892, "eval_steps_per_second": 0.446, "step": 1712 }, { "epoch": 0.1263359622613695, "grad_norm": 31.62714385986328, "learning_rate": 0.00012120937963372859, "loss": 2.3397, "step": 1714 }, { "epoch": 0.126483378786762, "grad_norm": 0.09423007071018219, "learning_rate": 0.00012120920543645306, "loss": 0.0056, "step": 1716 }, { "epoch": 0.1266307953121545, "grad_norm": 18.73729133605957, "learning_rate": 0.0001212090310361992, "loss": 1.3417, "step": 1718 }, { "epoch": 0.12677821183754698, "grad_norm": 0.16277751326560974, "learning_rate": 0.0001212088564329676, "loss": 0.0088, "step": 1720 }, { "epoch": 0.1269256283629395, "grad_norm": 18.30181884765625, "learning_rate": 0.00012120868162675886, "loss": 0.966, "step": 1722 }, { "epoch": 0.127073044888332, "grad_norm": 0.3613678812980652, "learning_rate": 0.00012120850661757353, "loss": 1.0053, "step": 1724 }, { "epoch": 0.12722046141372448, "grad_norm": 0.7345402836799622, "learning_rate": 0.00012120833140541222, "loss": 1.4195, "step": 1726 }, { "epoch": 0.12736787793911697, "grad_norm": 1.3485078811645508, "learning_rate": 0.00012120815599027552, "loss": 0.0247, "step": 1728 }, { "epoch": 0.12736787793911697, "eval_1_ratio_diff": -0.04130943102104445, "eval_accuracy": 0.8651597817614964, "eval_f1": 0.8592351505288853, "eval_loss": 0.4965825080871582, "eval_precision": 0.8979591836734694, "eval_recall": 0.8237129485179407, "eval_runtime": 1438.3328, "eval_samples_per_second": 0.892, "eval_steps_per_second": 0.446, "step": 1728 }, { "epoch": 0.12751529446450946, "grad_norm": 17.94972801208496, "learning_rate": 0.000121207980372164, "loss": 0.7518, "step": 1730 }, { "epoch": 0.12766271098990198, "grad_norm": 1.6150920391082764, "learning_rate": 0.00012120780455107827, "loss": 0.0328, "step": 1732 }, { "epoch": 0.12781012751529447, "grad_norm": 0.22876843810081482, "learning_rate": 0.00012120762852701892, "loss": 0.0105, "step": 1734 }, { "epoch": 0.12795754404068696, "grad_norm": 0.1126691922545433, "learning_rate": 0.0001212074522999865, "loss": 0.0038, "step": 1736 }, { "epoch": 0.12810496056607945, "grad_norm": 0.5277115702629089, "learning_rate": 0.00012120727586998164, "loss": 0.0094, "step": 1738 }, { "epoch": 0.12825237709147194, "grad_norm": 0.11928611248731613, "learning_rate": 0.00012120709923700492, "loss": 0.0054, "step": 1740 }, { "epoch": 0.12839979361686446, "grad_norm": 22.84393310546875, "learning_rate": 0.00012120692240105693, "loss": 1.7358, "step": 1742 }, { "epoch": 0.12854721014225695, "grad_norm": 0.08426441997289658, "learning_rate": 0.0001212067453621383, "loss": 0.0029, "step": 1744 }, { "epoch": 0.12854721014225695, "eval_1_ratio_diff": -0.014029618082618878, "eval_accuracy": 0.8784099766173032, "eval_f1": 0.8765822784810127, "eval_loss": 0.6492618322372437, "eval_precision": 0.8892455858747994, "eval_recall": 0.8642745709828393, "eval_runtime": 1438.7827, "eval_samples_per_second": 0.892, "eval_steps_per_second": 0.446, "step": 1744 }, { "epoch": 0.12869462666764944, "grad_norm": 22.079143524169922, "learning_rate": 0.00012120656812024955, "loss": 1.2809, "step": 1746 }, { "epoch": 0.12884204319304193, "grad_norm": 21.899768829345703, "learning_rate": 0.00012120639067539131, "loss": 3.0657, "step": 1748 }, { "epoch": 0.12898945971843442, "grad_norm": 0.1824941784143448, "learning_rate": 0.0001212062130275642, "loss": 0.0032, "step": 1750 }, { "epoch": 0.12913687624382694, "grad_norm": 0.1769951432943344, "learning_rate": 0.00012120603517676877, "loss": 1.2614, "step": 1752 }, { "epoch": 0.12928429276921943, "grad_norm": 21.305864334106445, "learning_rate": 0.00012120585712300566, "loss": 1.0725, "step": 1754 }, { "epoch": 0.12943170929461192, "grad_norm": 0.44233354926109314, "learning_rate": 0.00012120567886627544, "loss": 0.9641, "step": 1756 }, { "epoch": 0.12957912582000441, "grad_norm": 0.2779258191585541, "learning_rate": 0.00012120550040657871, "loss": 0.0096, "step": 1758 }, { "epoch": 0.1297265423453969, "grad_norm": 22.293994903564453, "learning_rate": 0.00012120532174391606, "loss": 0.9558, "step": 1760 }, { "epoch": 0.1297265423453969, "eval_1_ratio_diff": 0.031956352299298496, "eval_accuracy": 0.8901013250194856, "eval_f1": 0.8934240362811792, "eval_loss": 0.45321086049079895, "eval_precision": 0.8665689149560117, "eval_recall": 0.921996879875195, "eval_runtime": 1438.3028, "eval_samples_per_second": 0.892, "eval_steps_per_second": 0.446, "step": 1760 }, { "epoch": 0.12987395887078942, "grad_norm": 0.15532580018043518, "learning_rate": 0.00012120514287828811, "loss": 0.0082, "step": 1762 }, { "epoch": 0.13002137539618192, "grad_norm": 27.137800216674805, "learning_rate": 0.00012120496380969545, "loss": 0.8253, "step": 1764 }, { "epoch": 0.1301687919215744, "grad_norm": 0.12127237766981125, "learning_rate": 0.00012120478453813868, "loss": 0.007, "step": 1766 }, { "epoch": 0.1303162084469669, "grad_norm": 0.12471210211515427, "learning_rate": 0.00012120460506361839, "loss": 0.0118, "step": 1768 }, { "epoch": 0.1304636249723594, "grad_norm": 45.0229377746582, "learning_rate": 0.0001212044253861352, "loss": 3.5846, "step": 1770 }, { "epoch": 0.1306110414977519, "grad_norm": 0.4128153622150421, "learning_rate": 0.0001212042455056897, "loss": 0.0073, "step": 1772 }, { "epoch": 0.1307584580231444, "grad_norm": 0.40481987595558167, "learning_rate": 0.0001212040654222825, "loss": 0.0072, "step": 1774 }, { "epoch": 0.1309058745485369, "grad_norm": 0.11055589467287064, "learning_rate": 0.00012120388513591419, "loss": 1.0826, "step": 1776 }, { "epoch": 0.1309058745485369, "eval_1_ratio_diff": 0.1200311769290725, "eval_accuracy": 0.8332034294621979, "eval_f1": 0.850974930362117, "eval_loss": 0.6285108923912048, "eval_precision": 0.7685534591194969, "eval_recall": 0.953198127925117, "eval_runtime": 1438.3285, "eval_samples_per_second": 0.892, "eval_steps_per_second": 0.446, "step": 1776 }, { "epoch": 0.13105329107392938, "grad_norm": 0.12451104074716568, "learning_rate": 0.0001212037046465854, "loss": 1.0074, "step": 1778 }, { "epoch": 0.1312007075993219, "grad_norm": 0.27884507179260254, "learning_rate": 0.0001212035239542967, "loss": 0.0129, "step": 1780 }, { "epoch": 0.1313481241247144, "grad_norm": 0.992557168006897, "learning_rate": 0.00012120334305904872, "loss": 1.4174, "step": 1782 }, { "epoch": 0.13149554065010688, "grad_norm": 0.9067917466163635, "learning_rate": 0.00012120316196084206, "loss": 1.435, "step": 1784 }, { "epoch": 0.13164295717549937, "grad_norm": 20.08501625061035, "learning_rate": 0.00012120298065967733, "loss": 1.7277, "step": 1786 }, { "epoch": 0.13179037370089186, "grad_norm": 0.20194768905639648, "learning_rate": 0.00012120279915555515, "loss": 0.005, "step": 1788 }, { "epoch": 0.13193779022628438, "grad_norm": 0.29110512137413025, "learning_rate": 0.0001212026174484761, "loss": 0.0065, "step": 1790 }, { "epoch": 0.13208520675167687, "grad_norm": 0.3067338764667511, "learning_rate": 0.00012120243553844079, "loss": 0.006, "step": 1792 }, { "epoch": 0.13208520675167687, "eval_1_ratio_diff": -0.002338269680436489, "eval_accuracy": 0.8978955572876072, "eval_f1": 0.8975762314308053, "eval_loss": 0.42508459091186523, "eval_precision": 0.8996865203761756, "eval_recall": 0.8954758190327613, "eval_runtime": 1439.0957, "eval_samples_per_second": 0.892, "eval_steps_per_second": 0.446, "step": 1792 } ], "logging_steps": 2, "max_steps": 108536, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 64, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1000, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.5936070605815808e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }