{ "best_global_step": 1400, "best_metric": 0.6459359532496649, "best_model_checkpoint": "graphcodebert-swa-from-epoch-1/checkpoint-1400", "epoch": 2.8629856850715747, "eval_steps": 100, "global_step": 1400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010224948875255624, "grad_norm": 2.4707133769989014, "learning_rate": 2.0512820512820512e-08, "loss": 0.8431, "step": 5 }, { "epoch": 0.02044989775051125, "grad_norm": 3.114851951599121, "learning_rate": 4.615384615384615e-08, "loss": 0.844, "step": 10 }, { "epoch": 0.03067484662576687, "grad_norm": 2.2256007194519043, "learning_rate": 7.179487179487178e-08, "loss": 0.847, "step": 15 }, { "epoch": 0.0408997955010225, "grad_norm": 2.5343081951141357, "learning_rate": 9.743589743589743e-08, "loss": 0.8492, "step": 20 }, { "epoch": 0.05112474437627812, "grad_norm": 3.1964163780212402, "learning_rate": 1.2307692307692308e-07, "loss": 0.8475, "step": 25 }, { "epoch": 0.06134969325153374, "grad_norm": 2.0466485023498535, "learning_rate": 1.4871794871794872e-07, "loss": 0.8445, "step": 30 }, { "epoch": 0.07157464212678936, "grad_norm": 2.164569139480591, "learning_rate": 1.7435897435897435e-07, "loss": 0.8452, "step": 35 }, { "epoch": 0.081799591002045, "grad_norm": 2.56343150138855, "learning_rate": 2e-07, "loss": 0.8473, "step": 40 }, { "epoch": 0.09202453987730061, "grad_norm": 2.5742437839508057, "learning_rate": 2.2564102564102563e-07, "loss": 0.848, "step": 45 }, { "epoch": 0.10224948875255624, "grad_norm": 2.587480306625366, "learning_rate": 2.5128205128205126e-07, "loss": 0.8409, "step": 50 }, { "epoch": 0.11247443762781185, "grad_norm": 2.5737764835357666, "learning_rate": 2.7692307692307693e-07, "loss": 0.8471, "step": 55 }, { "epoch": 0.12269938650306748, "grad_norm": 3.044358730316162, "learning_rate": 3.0256410256410254e-07, "loss": 0.8448, "step": 60 }, { "epoch": 0.1329243353783231, "grad_norm": 2.326373815536499, "learning_rate": 3.282051282051282e-07, "loss": 0.8517, "step": 65 }, { "epoch": 0.14314928425357873, "grad_norm": 2.267547607421875, "learning_rate": 3.5384615384615386e-07, "loss": 0.8387, "step": 70 }, { "epoch": 0.15337423312883436, "grad_norm": 2.609232187271118, "learning_rate": 3.7948717948717947e-07, "loss": 0.841, "step": 75 }, { "epoch": 0.16359918200409, "grad_norm": 2.9532523155212402, "learning_rate": 4.0512820512820514e-07, "loss": 0.8509, "step": 80 }, { "epoch": 0.1738241308793456, "grad_norm": 3.002154588699341, "learning_rate": 4.307692307692308e-07, "loss": 0.8482, "step": 85 }, { "epoch": 0.18404907975460122, "grad_norm": 2.701613187789917, "learning_rate": 4.5641025641025636e-07, "loss": 0.8422, "step": 90 }, { "epoch": 0.19427402862985685, "grad_norm": 2.7430365085601807, "learning_rate": 4.82051282051282e-07, "loss": 0.846, "step": 95 }, { "epoch": 0.20449897750511248, "grad_norm": 2.8101418018341064, "learning_rate": 5.076923076923076e-07, "loss": 0.8444, "step": 100 }, { "epoch": 0.20449897750511248, "eval_accuracy": 0.52033, "eval_loss": 0.6922348141670227, "eval_macro_f1": 0.4427650399783254, "eval_precision": 0.6036606007378691, "eval_recall": 0.5386742448919869, "eval_runtime": 80.6812, "eval_samples_per_second": 1239.446, "eval_steps_per_second": 0.607, "step": 100 }, { "epoch": 0.2147239263803681, "grad_norm": 2.5835089683532715, "learning_rate": 5.333333333333333e-07, "loss": 0.8437, "step": 105 }, { "epoch": 0.2249488752556237, "grad_norm": 2.7237253189086914, "learning_rate": 5.58974358974359e-07, "loss": 0.8431, "step": 110 }, { "epoch": 0.23517382413087934, "grad_norm": 2.4648072719573975, "learning_rate": 5.846153846153847e-07, "loss": 0.8399, "step": 115 }, { "epoch": 0.24539877300613497, "grad_norm": 2.7011852264404297, "learning_rate": 6.102564102564103e-07, "loss": 0.8409, "step": 120 }, { "epoch": 0.2556237218813906, "grad_norm": 2.3170969486236572, "learning_rate": 6.358974358974358e-07, "loss": 0.8361, "step": 125 }, { "epoch": 0.2658486707566462, "grad_norm": 2.517194986343384, "learning_rate": 6.615384615384615e-07, "loss": 0.839, "step": 130 }, { "epoch": 0.27607361963190186, "grad_norm": 2.5092124938964844, "learning_rate": 6.871794871794871e-07, "loss": 0.8438, "step": 135 }, { "epoch": 0.28629856850715746, "grad_norm": 2.3993237018585205, "learning_rate": 7.128205128205128e-07, "loss": 0.8349, "step": 140 }, { "epoch": 0.2965235173824131, "grad_norm": 2.1388165950775146, "learning_rate": 7.384615384615384e-07, "loss": 0.8363, "step": 145 }, { "epoch": 0.3067484662576687, "grad_norm": 1.8425891399383545, "learning_rate": 7.64102564102564e-07, "loss": 0.8325, "step": 150 }, { "epoch": 0.3169734151329243, "grad_norm": 1.8665552139282227, "learning_rate": 7.897435897435897e-07, "loss": 0.835, "step": 155 }, { "epoch": 0.32719836400818, "grad_norm": 1.8765455484390259, "learning_rate": 8.153846153846154e-07, "loss": 0.8328, "step": 160 }, { "epoch": 0.3374233128834356, "grad_norm": 2.640779495239258, "learning_rate": 8.41025641025641e-07, "loss": 0.8388, "step": 165 }, { "epoch": 0.3476482617586912, "grad_norm": 2.174116373062134, "learning_rate": 8.666666666666667e-07, "loss": 0.8336, "step": 170 }, { "epoch": 0.35787321063394684, "grad_norm": 1.8411178588867188, "learning_rate": 8.923076923076923e-07, "loss": 0.8384, "step": 175 }, { "epoch": 0.36809815950920244, "grad_norm": 2.3652143478393555, "learning_rate": 9.179487179487179e-07, "loss": 0.8318, "step": 180 }, { "epoch": 0.3783231083844581, "grad_norm": 1.9870903491973877, "learning_rate": 9.435897435897435e-07, "loss": 0.8306, "step": 185 }, { "epoch": 0.3885480572597137, "grad_norm": 2.458887815475464, "learning_rate": 9.692307692307691e-07, "loss": 0.8342, "step": 190 }, { "epoch": 0.3987730061349693, "grad_norm": 1.9105890989303589, "learning_rate": 9.948717948717949e-07, "loss": 0.8301, "step": 195 }, { "epoch": 0.40899795501022496, "grad_norm": 2.04896879196167, "learning_rate": 9.999490793845076e-07, "loss": 0.8291, "step": 200 }, { "epoch": 0.40899795501022496, "eval_accuracy": 0.52697, "eval_loss": 0.6913915872573853, "eval_macro_f1": 0.4511625248903547, "eval_precision": 0.6198512746424523, "eval_recall": 0.5452618609595298, "eval_runtime": 80.6395, "eval_samples_per_second": 1240.088, "eval_steps_per_second": 0.608, "step": 200 }, { "epoch": 0.41922290388548056, "grad_norm": 2.394630193710327, "learning_rate": 9.997422321595486e-07, "loss": 0.8311, "step": 205 }, { "epoch": 0.4294478527607362, "grad_norm": 1.7013665437698364, "learning_rate": 9.993763415653074e-07, "loss": 0.8264, "step": 210 }, { "epoch": 0.4396728016359918, "grad_norm": 2.1158103942871094, "learning_rate": 9.988515240467613e-07, "loss": 0.8262, "step": 215 }, { "epoch": 0.4498977505112474, "grad_norm": 1.5985370874404907, "learning_rate": 9.981679466275095e-07, "loss": 0.8296, "step": 220 }, { "epoch": 0.4601226993865031, "grad_norm": 2.0426042079925537, "learning_rate": 9.973258268566182e-07, "loss": 0.8233, "step": 225 }, { "epoch": 0.4703476482617587, "grad_norm": 1.7411834001541138, "learning_rate": 9.963254327393853e-07, "loss": 0.8269, "step": 230 }, { "epoch": 0.48057259713701433, "grad_norm": 2.1182405948638916, "learning_rate": 9.95167082652047e-07, "loss": 0.8247, "step": 235 }, { "epoch": 0.49079754601226994, "grad_norm": 2.0239953994750977, "learning_rate": 9.938511452404547e-07, "loss": 0.8308, "step": 240 }, { "epoch": 0.5010224948875256, "grad_norm": 2.366060495376587, "learning_rate": 9.923780393027534e-07, "loss": 0.8205, "step": 245 }, { "epoch": 0.5112474437627812, "grad_norm": 1.848169207572937, "learning_rate": 9.907482336560982e-07, "loss": 0.825, "step": 250 }, { "epoch": 0.5214723926380368, "grad_norm": 1.8216668367385864, "learning_rate": 9.889622469874535e-07, "loss": 0.8271, "step": 255 }, { "epoch": 0.5316973415132924, "grad_norm": 1.507730484008789, "learning_rate": 9.8702064768852e-07, "loss": 0.8147, "step": 260 }, { "epoch": 0.5419222903885481, "grad_norm": 1.7608263492584229, "learning_rate": 9.849240536748438e-07, "loss": 0.8221, "step": 265 }, { "epoch": 0.5521472392638037, "grad_norm": 2.203326940536499, "learning_rate": 9.826731321891641e-07, "loss": 0.8292, "step": 270 }, { "epoch": 0.5623721881390593, "grad_norm": 1.9529740810394287, "learning_rate": 9.802685995890632e-07, "loss": 0.8228, "step": 275 }, { "epoch": 0.5725971370143149, "grad_norm": 1.6214399337768555, "learning_rate": 9.777112211189841e-07, "loss": 0.8149, "step": 280 }, { "epoch": 0.5828220858895705, "grad_norm": 2.07482647895813, "learning_rate": 9.750018106666924e-07, "loss": 0.8143, "step": 285 }, { "epoch": 0.5930470347648262, "grad_norm": 1.7083203792572021, "learning_rate": 9.721412305042538e-07, "loss": 0.8188, "step": 290 }, { "epoch": 0.6032719836400818, "grad_norm": 2.0022943019866943, "learning_rate": 9.69130391013617e-07, "loss": 0.8195, "step": 295 }, { "epoch": 0.6134969325153374, "grad_norm": 1.5799461603164673, "learning_rate": 9.659702503968834e-07, "loss": 0.8146, "step": 300 }, { "epoch": 0.6134969325153374, "eval_accuracy": 0.55052, "eval_loss": 0.6896406412124634, "eval_macro_f1": 0.49869783315905847, "eval_precision": 0.6264643684302231, "eval_recall": 0.5665461014402418, "eval_runtime": 80.6145, "eval_samples_per_second": 1240.472, "eval_steps_per_second": 0.608, "step": 300 }, { "epoch": 0.623721881390593, "grad_norm": 1.9373347759246826, "learning_rate": 9.626618143713586e-07, "loss": 0.8166, "step": 305 }, { "epoch": 0.6339468302658486, "grad_norm": 1.6276922225952148, "learning_rate": 9.592061358494813e-07, "loss": 0.8176, "step": 310 }, { "epoch": 0.6441717791411042, "grad_norm": 1.9373250007629395, "learning_rate": 9.556043146037337e-07, "loss": 0.8168, "step": 315 }, { "epoch": 0.65439672801636, "grad_norm": 1.320465087890625, "learning_rate": 9.518574969166391e-07, "loss": 0.8101, "step": 320 }, { "epoch": 0.6646216768916156, "grad_norm": 1.8596330881118774, "learning_rate": 9.47966875215954e-07, "loss": 0.8167, "step": 325 }, { "epoch": 0.6748466257668712, "grad_norm": 1.304662823677063, "learning_rate": 9.439336876951793e-07, "loss": 0.815, "step": 330 }, { "epoch": 0.6850715746421268, "grad_norm": 1.8063029050827026, "learning_rate": 9.397592179195033e-07, "loss": 0.8121, "step": 335 }, { "epoch": 0.6952965235173824, "grad_norm": 1.7432739734649658, "learning_rate": 9.354447944173059e-07, "loss": 0.8104, "step": 340 }, { "epoch": 0.7055214723926381, "grad_norm": 1.4523797035217285, "learning_rate": 9.309917902573533e-07, "loss": 0.8098, "step": 345 }, { "epoch": 0.7157464212678937, "grad_norm": 1.681409478187561, "learning_rate": 9.264016226118188e-07, "loss": 0.8107, "step": 350 }, { "epoch": 0.7259713701431493, "grad_norm": 1.5168694257736206, "learning_rate": 9.216757523052652e-07, "loss": 0.8085, "step": 355 }, { "epoch": 0.7361963190184049, "grad_norm": 1.2200194597244263, "learning_rate": 9.168156833497371e-07, "loss": 0.8109, "step": 360 }, { "epoch": 0.7464212678936605, "grad_norm": 1.2745580673217773, "learning_rate": 9.118229624661078e-07, "loss": 0.8096, "step": 365 }, { "epoch": 0.7566462167689162, "grad_norm": 1.8339142799377441, "learning_rate": 9.066991785918333e-07, "loss": 0.808, "step": 370 }, { "epoch": 0.7668711656441718, "grad_norm": 1.2315114736557007, "learning_rate": 9.01445962375273e-07, "loss": 0.805, "step": 375 }, { "epoch": 0.7770961145194274, "grad_norm": 1.3081412315368652, "learning_rate": 8.960649856567333e-07, "loss": 0.8066, "step": 380 }, { "epoch": 0.787321063394683, "grad_norm": 1.5145998001098633, "learning_rate": 8.90557960936404e-07, "loss": 0.8028, "step": 385 }, { "epoch": 0.7975460122699386, "grad_norm": 1.5990959405899048, "learning_rate": 8.84926640829353e-07, "loss": 0.8035, "step": 390 }, { "epoch": 0.8077709611451943, "grad_norm": 1.2120558023452759, "learning_rate": 8.79172817507756e-07, "loss": 0.802, "step": 395 }, { "epoch": 0.8179959100204499, "grad_norm": 1.5799622535705566, "learning_rate": 8.73298322130535e-07, "loss": 0.8037, "step": 400 }, { "epoch": 0.8179959100204499, "eval_accuracy": 0.58537, "eval_loss": 0.6877263784408569, "eval_macro_f1": 0.5630337315451738, "eval_precision": 0.628845494567806, "eval_recall": 0.5970616303474306, "eval_runtime": 81.293, "eval_samples_per_second": 1230.118, "eval_steps_per_second": 0.603, "step": 400 }, { "epoch": 0.8282208588957055, "grad_norm": 1.3475037813186646, "learning_rate": 8.673050242605921e-07, "loss": 0.8067, "step": 405 }, { "epoch": 0.8384458077709611, "grad_norm": 1.2836309671401978, "learning_rate": 8.611948312698179e-07, "loss": 0.7996, "step": 410 }, { "epoch": 0.8486707566462167, "grad_norm": 1.460316777229309, "learning_rate": 8.5496968773207e-07, "loss": 0.802, "step": 415 }, { "epoch": 0.8588957055214724, "grad_norm": 1.33119797706604, "learning_rate": 8.486315748043109e-07, "loss": 0.798, "step": 420 }, { "epoch": 0.869120654396728, "grad_norm": 1.9951454401016235, "learning_rate": 8.42182509596102e-07, "loss": 0.8013, "step": 425 }, { "epoch": 0.8793456032719836, "grad_norm": 1.2590746879577637, "learning_rate": 8.356245445276584e-07, "loss": 0.7963, "step": 430 }, { "epoch": 0.8895705521472392, "grad_norm": 1.1192667484283447, "learning_rate": 8.28959766676663e-07, "loss": 0.8004, "step": 435 }, { "epoch": 0.8997955010224948, "grad_norm": 1.1180275678634644, "learning_rate": 8.221902971140535e-07, "loss": 0.8041, "step": 440 }, { "epoch": 0.9100204498977505, "grad_norm": 1.1210858821868896, "learning_rate": 8.153182902289897e-07, "loss": 0.7991, "step": 445 }, { "epoch": 0.9202453987730062, "grad_norm": 1.1266220808029175, "learning_rate": 8.083459330432164e-07, "loss": 0.8002, "step": 450 }, { "epoch": 0.9304703476482618, "grad_norm": 1.0373694896697998, "learning_rate": 8.012754445150434e-07, "loss": 0.7974, "step": 455 }, { "epoch": 0.9406952965235174, "grad_norm": 1.2223235368728638, "learning_rate": 7.941090748331589e-07, "loss": 0.8001, "step": 460 }, { "epoch": 0.950920245398773, "grad_norm": 1.4549195766448975, "learning_rate": 7.868491047005065e-07, "loss": 0.7993, "step": 465 }, { "epoch": 0.9611451942740287, "grad_norm": 1.3064852952957153, "learning_rate": 7.794978446084483e-07, "loss": 0.8006, "step": 470 }, { "epoch": 0.9713701431492843, "grad_norm": 1.2408719062805176, "learning_rate": 7.720576341014498e-07, "loss": 0.7983, "step": 475 }, { "epoch": 0.9815950920245399, "grad_norm": 1.2148370742797852, "learning_rate": 7.645308410325187e-07, "loss": 0.7959, "step": 480 }, { "epoch": 0.9918200408997955, "grad_norm": 1.0927603244781494, "learning_rate": 7.569198608096317e-07, "loss": 0.7978, "step": 485 }, { "SWA": "started", "epoch": 1.0, "step": 489 }, { "epoch": 1.0020449897750512, "grad_norm": 1.245108723640442, "learning_rate": 7.492271156333967e-07, "loss": 0.7965, "step": 490 }, { "epoch": 1.0122699386503067, "grad_norm": 1.3393553495407104, "learning_rate": 7.414550537261828e-07, "loss": 0.795, "step": 495 }, { "epoch": 1.0224948875255624, "grad_norm": 1.2823072671890259, "learning_rate": 7.336061485529738e-07, "loss": 0.8014, "step": 500 }, { "epoch": 1.0224948875255624, "eval_accuracy": 0.60723, "eval_loss": 0.6864892244338989, "eval_macro_f1": 0.5966241921587988, "eval_precision": 0.6341761761282843, "eval_recall": 0.6160142746967282, "eval_runtime": 81.931, "eval_samples_per_second": 1220.539, "eval_steps_per_second": 0.598, "step": 500 }, { "epoch": 1.032719836400818, "grad_norm": 1.1278107166290283, "learning_rate": 7.256828980341846e-07, "loss": 0.7977, "step": 505 }, { "epoch": 1.0429447852760736, "grad_norm": 1.110093355178833, "learning_rate": 7.176878237506965e-07, "loss": 0.7954, "step": 510 }, { "epoch": 1.0531697341513293, "grad_norm": 1.2248748540878296, "learning_rate": 7.096234701413617e-07, "loss": 0.7957, "step": 515 }, { "epoch": 1.0633946830265848, "grad_norm": 1.2420642375946045, "learning_rate": 7.014924036932345e-07, "loss": 0.7935, "step": 520 }, { "epoch": 1.0736196319018405, "grad_norm": 1.0777639150619507, "learning_rate": 6.932972121247831e-07, "loss": 0.796, "step": 525 }, { "epoch": 1.0838445807770962, "grad_norm": 1.3830324411392212, "learning_rate": 6.850405035623481e-07, "loss": 0.7929, "step": 530 }, { "epoch": 1.0940695296523517, "grad_norm": 0.9407713413238525, "learning_rate": 6.767249057101023e-07, "loss": 0.7964, "step": 535 }, { "epoch": 1.1042944785276074, "grad_norm": 1.1688194274902344, "learning_rate": 6.683530650137832e-07, "loss": 0.7944, "step": 540 }, { "epoch": 1.114519427402863, "grad_norm": 0.9509923458099365, "learning_rate": 6.599276458184588e-07, "loss": 0.7912, "step": 545 }, { "epoch": 1.1247443762781186, "grad_norm": 1.0683159828186035, "learning_rate": 6.514513295205969e-07, "loss": 0.7931, "step": 550 }, { "epoch": 1.1349693251533743, "grad_norm": 0.9022642374038696, "learning_rate": 6.429268137147104e-07, "loss": 0.7945, "step": 555 }, { "epoch": 1.1451942740286298, "grad_norm": 1.1609984636306763, "learning_rate": 6.343568113348441e-07, "loss": 0.7913, "step": 560 }, { "epoch": 1.1554192229038855, "grad_norm": 1.2184994220733643, "learning_rate": 6.257440497911815e-07, "loss": 0.7919, "step": 565 }, { "epoch": 1.165644171779141, "grad_norm": 1.0256582498550415, "learning_rate": 6.170912701020454e-07, "loss": 0.7912, "step": 570 }, { "epoch": 1.1758691206543967, "grad_norm": 0.8725862503051758, "learning_rate": 6.084012260215645e-07, "loss": 0.7907, "step": 575 }, { "epoch": 1.1860940695296525, "grad_norm": 1.5192348957061768, "learning_rate": 5.996766831632912e-07, "loss": 0.7913, "step": 580 }, { "epoch": 1.196319018404908, "grad_norm": 1.109052062034607, "learning_rate": 5.909204181200414e-07, "loss": 0.795, "step": 585 }, { "epoch": 1.2065439672801637, "grad_norm": 1.0413333177566528, "learning_rate": 5.821352175802419e-07, "loss": 0.7924, "step": 590 }, { "epoch": 1.2167689161554192, "grad_norm": 0.8926281929016113, "learning_rate": 5.733238774410647e-07, "loss": 0.7921, "step": 595 }, { "epoch": 1.2269938650306749, "grad_norm": 0.9231971502304077, "learning_rate": 5.644892019186307e-07, "loss": 0.7894, "step": 600 }, { "epoch": 1.2269938650306749, "eval_accuracy": 0.62182, "eval_loss": 0.6853985786437988, "eval_macro_f1": 0.6195549574374046, "eval_precision": 0.6317310781859349, "eval_recall": 0.6267089641577176, "eval_runtime": 81.4512, "eval_samples_per_second": 1227.728, "eval_steps_per_second": 0.602, "step": 600 }, { "epoch": 1.2372188139059306, "grad_norm": 0.9845394492149353, "learning_rate": 5.556340026555653e-07, "loss": 0.7918, "step": 605 }, { "epoch": 1.247443762781186, "grad_norm": 1.3759487867355347, "learning_rate": 5.467610978261906e-07, "loss": 0.7904, "step": 610 }, { "epoch": 1.2576687116564418, "grad_norm": 1.1568200588226318, "learning_rate": 5.378733112396398e-07, "loss": 0.7923, "step": 615 }, { "epoch": 1.2678936605316973, "grad_norm": 1.4351176023483276, "learning_rate": 5.289734714411775e-07, "loss": 0.7905, "step": 620 }, { "epoch": 1.278118609406953, "grad_norm": 1.178076982498169, "learning_rate": 5.200644108120121e-07, "loss": 0.7947, "step": 625 }, { "epoch": 1.2883435582822087, "grad_norm": 1.2398017644882202, "learning_rate": 5.111489646678896e-07, "loss": 0.796, "step": 630 }, { "epoch": 1.2985685071574642, "grad_norm": 1.1236284971237183, "learning_rate": 5.022299703567508e-07, "loss": 0.7895, "step": 635 }, { "epoch": 1.30879345603272, "grad_norm": 1.0112528800964355, "learning_rate": 4.933102663557439e-07, "loss": 0.79, "step": 640 }, { "epoch": 1.3190184049079754, "grad_norm": 1.3201746940612793, "learning_rate": 4.843926913678757e-07, "loss": 0.7897, "step": 645 }, { "epoch": 1.329243353783231, "grad_norm": 0.969918429851532, "learning_rate": 4.7548008341859384e-07, "loss": 0.7912, "step": 650 }, { "epoch": 1.3394683026584868, "grad_norm": 0.8914945125579834, "learning_rate": 4.665752789525812e-07, "loss": 0.7964, "step": 655 }, { "epoch": 1.3496932515337423, "grad_norm": 0.906989574432373, "learning_rate": 4.576811119310563e-07, "loss": 0.7924, "step": 660 }, { "epoch": 1.359918200408998, "grad_norm": 1.2423877716064453, "learning_rate": 4.488004129298618e-07, "loss": 0.7904, "step": 665 }, { "epoch": 1.3701431492842535, "grad_norm": 1.2455909252166748, "learning_rate": 4.3993600823863256e-07, "loss": 0.7875, "step": 670 }, { "epoch": 1.3803680981595092, "grad_norm": 1.4931528568267822, "learning_rate": 4.3109071896132574e-07, "loss": 0.7947, "step": 675 }, { "epoch": 1.390593047034765, "grad_norm": 1.0538350343704224, "learning_rate": 4.222673601184029e-07, "loss": 0.7886, "step": 680 }, { "epoch": 1.4008179959100204, "grad_norm": 0.9246828556060791, "learning_rate": 4.134687397509467e-07, "loss": 0.7884, "step": 685 }, { "epoch": 1.4110429447852761, "grad_norm": 1.0383715629577637, "learning_rate": 4.0469765802700033e-07, "loss": 0.7943, "step": 690 }, { "epoch": 1.4212678936605316, "grad_norm": 1.0180901288986206, "learning_rate": 3.9595690635041145e-07, "loss": 0.7895, "step": 695 }, { "epoch": 1.4314928425357873, "grad_norm": 0.9119181632995605, "learning_rate": 3.8724926647246536e-07, "loss": 0.7864, "step": 700 }, { "epoch": 1.4314928425357873, "eval_accuracy": 0.62357, "eval_loss": 0.6852650046348572, "eval_macro_f1": 0.6215147432652665, "eval_precision": 0.6330088346022082, "eval_recall": 0.628302383508456, "eval_runtime": 80.5998, "eval_samples_per_second": 1240.698, "eval_steps_per_second": 0.608, "step": 700 }, { "epoch": 1.441717791411043, "grad_norm": 0.8882152438163757, "learning_rate": 3.785775096065909e-07, "loss": 0.7858, "step": 705 }, { "epoch": 1.4519427402862985, "grad_norm": 1.5290203094482422, "learning_rate": 3.699443955464192e-07, "loss": 0.7837, "step": 710 }, { "epoch": 1.4621676891615543, "grad_norm": 0.881521463394165, "learning_rate": 3.613526717874774e-07, "loss": 0.7858, "step": 715 }, { "epoch": 1.4723926380368098, "grad_norm": 0.9955899119377136, "learning_rate": 3.5280507265279555e-07, "loss": 0.7907, "step": 720 }, { "epoch": 1.4826175869120655, "grad_norm": 1.3247544765472412, "learning_rate": 3.443043184227067e-07, "loss": 0.79, "step": 725 }, { "epoch": 1.4928425357873212, "grad_norm": 1.200223445892334, "learning_rate": 3.358531144691148e-07, "loss": 0.7874, "step": 730 }, { "epoch": 1.5030674846625767, "grad_norm": 0.9952226281166077, "learning_rate": 3.2745415039450867e-07, "loss": 0.7874, "step": 735 }, { "epoch": 1.5132924335378322, "grad_norm": 1.2515606880187988, "learning_rate": 3.19110099175993e-07, "loss": 0.789, "step": 740 }, { "epoch": 1.5235173824130879, "grad_norm": 0.8901408314704895, "learning_rate": 3.10823616314612e-07, "loss": 0.7853, "step": 745 }, { "epoch": 1.5337423312883436, "grad_norm": 1.0439373254776, "learning_rate": 3.0259733899023345e-07, "loss": 0.7899, "step": 750 }, { "epoch": 1.5439672801635993, "grad_norm": 1.0658971071243286, "learning_rate": 2.944338852222643e-07, "loss": 0.7868, "step": 755 }, { "epoch": 1.5541922290388548, "grad_norm": 0.927455484867096, "learning_rate": 2.8633585303646413e-07, "loss": 0.7904, "step": 760 }, { "epoch": 1.5644171779141103, "grad_norm": 0.9637423753738403, "learning_rate": 2.783058196381214e-07, "loss": 0.7856, "step": 765 }, { "epoch": 1.574642126789366, "grad_norm": 1.396472692489624, "learning_rate": 2.7034634059185437e-07, "loss": 0.7903, "step": 770 }, { "epoch": 1.5848670756646217, "grad_norm": 0.7922792434692383, "learning_rate": 2.6245994900830257e-07, "loss": 0.7843, "step": 775 }, { "epoch": 1.5950920245398774, "grad_norm": 0.8896881341934204, "learning_rate": 2.546491547379619e-07, "loss": 0.787, "step": 780 }, { "epoch": 1.605316973415133, "grad_norm": 0.8732028007507324, "learning_rate": 2.469164435724212e-07, "loss": 0.7856, "step": 785 }, { "epoch": 1.6155419222903884, "grad_norm": 1.0021744966506958, "learning_rate": 2.3926427645325875e-07, "loss": 0.7867, "step": 790 }, { "epoch": 1.6257668711656441, "grad_norm": 1.1783545017242432, "learning_rate": 2.3169508868884453e-07, "loss": 0.7897, "step": 795 }, { "epoch": 1.6359918200408998, "grad_norm": 0.9119800329208374, "learning_rate": 2.2421128917930243e-07, "loss": 0.7845, "step": 800 }, { "epoch": 1.6359918200408998, "eval_accuracy": 0.62896, "eval_loss": 0.6847647428512573, "eval_macro_f1": 0.6281943240633717, "eval_precision": 0.6346364525627035, "eval_recall": 0.6323959922867678, "eval_runtime": 80.6105, "eval_samples_per_second": 1240.533, "eval_steps_per_second": 0.608, "step": 800 }, { "epoch": 1.6462167689161555, "grad_norm": 0.8903971314430237, "learning_rate": 2.1681525964987474e-07, "loss": 0.7824, "step": 805 }, { "epoch": 1.656441717791411, "grad_norm": 1.115395188331604, "learning_rate": 2.0950935389293656e-07, "loss": 0.7824, "step": 810 }, { "epoch": 1.6666666666666665, "grad_norm": 0.9636144638061523, "learning_rate": 2.022958970189001e-07, "loss": 0.7917, "step": 815 }, { "epoch": 1.6768916155419222, "grad_norm": 0.8787257075309753, "learning_rate": 1.9517718471624532e-07, "loss": 0.7869, "step": 820 }, { "epoch": 1.687116564417178, "grad_norm": 1.0157173871994019, "learning_rate": 1.88155482520916e-07, "loss": 0.7844, "step": 825 }, { "epoch": 1.6973415132924337, "grad_norm": 0.9504719972610474, "learning_rate": 1.812330250953107e-07, "loss": 0.7872, "step": 830 }, { "epoch": 1.7075664621676891, "grad_norm": 0.893625795841217, "learning_rate": 1.7441201551710016e-07, "loss": 0.7879, "step": 835 }, { "epoch": 1.7177914110429446, "grad_norm": 0.8460310101509094, "learning_rate": 1.6769462457809536e-07, "loss": 0.7853, "step": 840 }, { "epoch": 1.7280163599182004, "grad_norm": 0.9349818229675293, "learning_rate": 1.610829900933917e-07, "loss": 0.7862, "step": 845 }, { "epoch": 1.738241308793456, "grad_norm": 0.859866738319397, "learning_rate": 1.545792162210074e-07, "loss": 0.7836, "step": 850 }, { "epoch": 1.7484662576687118, "grad_norm": 1.0148438215255737, "learning_rate": 1.481853727922341e-07, "loss": 0.7859, "step": 855 }, { "epoch": 1.7586912065439673, "grad_norm": 0.8861204385757446, "learning_rate": 1.4190349465291035e-07, "loss": 0.7909, "step": 860 }, { "epoch": 1.7689161554192228, "grad_norm": 0.7679073214530945, "learning_rate": 1.3573558101583105e-07, "loss": 0.785, "step": 865 }, { "epoch": 1.7791411042944785, "grad_norm": 0.7364144325256348, "learning_rate": 1.2968359482449636e-07, "loss": 0.7824, "step": 870 }, { "epoch": 1.7893660531697342, "grad_norm": 0.945924699306488, "learning_rate": 1.2374946212840288e-07, "loss": 0.7864, "step": 875 }, { "epoch": 1.79959100204499, "grad_norm": 1.1060514450073242, "learning_rate": 1.1793507147007714e-07, "loss": 0.7866, "step": 880 }, { "epoch": 1.8098159509202454, "grad_norm": 0.9230445623397827, "learning_rate": 1.1224227328404534e-07, "loss": 0.7895, "step": 885 }, { "epoch": 1.8200408997955009, "grad_norm": 0.9153196811676025, "learning_rate": 1.0667287930793151e-07, "loss": 0.7835, "step": 890 }, { "epoch": 1.8302658486707566, "grad_norm": 0.9513780474662781, "learning_rate": 1.0122866200586944e-07, "loss": 0.7846, "step": 895 }, { "epoch": 1.8404907975460123, "grad_norm": 0.8672247529029846, "learning_rate": 9.591135400441552e-08, "loss": 0.7839, "step": 900 }, { "epoch": 1.8404907975460123, "eval_accuracy": 0.63125, "eval_loss": 0.6845182776451111, "eval_macro_f1": 0.6309538076224105, "eval_precision": 0.6350446377333951, "eval_recall": 0.6339031903992685, "eval_runtime": 80.5646, "eval_samples_per_second": 1241.24, "eval_steps_per_second": 0.608, "step": 900 }, { "epoch": 1.850715746421268, "grad_norm": 1.2127219438552856, "learning_rate": 9.072264754113912e-08, "loss": 0.7876, "step": 905 }, { "epoch": 1.8609406952965235, "grad_norm": 0.875455379486084, "learning_rate": 8.566419392606544e-08, "loss": 0.787, "step": 910 }, { "epoch": 1.871165644171779, "grad_norm": 0.92503821849823, "learning_rate": 8.073760301614596e-08, "loss": 0.7834, "step": 915 }, { "epoch": 1.8813905930470347, "grad_norm": 1.1361068487167358, "learning_rate": 7.594444270291922e-08, "loss": 0.7821, "step": 920 }, { "epoch": 1.8916155419222904, "grad_norm": 1.1415101289749146, "learning_rate": 7.128623841352916e-08, "loss": 0.7877, "step": 925 }, { "epoch": 1.9018404907975461, "grad_norm": 0.9358757138252258, "learning_rate": 6.676447262525547e-08, "loss": 0.7867, "step": 930 }, { "epoch": 1.9120654396728016, "grad_norm": 0.912706732749939, "learning_rate": 6.238058439371479e-08, "loss": 0.7884, "step": 935 }, { "epoch": 1.9222903885480571, "grad_norm": 0.9449842572212219, "learning_rate": 5.813596889488009e-08, "loss": 0.7893, "step": 940 }, { "epoch": 1.9325153374233128, "grad_norm": 0.8449825048446655, "learning_rate": 5.403197698106432e-08, "loss": 0.7828, "step": 945 }, { "epoch": 1.9427402862985685, "grad_norm": 0.9307764768600464, "learning_rate": 5.0069914751010913e-08, "loss": 0.785, "step": 950 }, { "epoch": 1.9529652351738243, "grad_norm": 1.3704556226730347, "learning_rate": 4.625104313422673e-08, "loss": 0.7874, "step": 955 }, { "epoch": 1.9631901840490797, "grad_norm": 1.0163496732711792, "learning_rate": 4.257657748969046e-08, "loss": 0.7834, "step": 960 }, { "epoch": 1.9734151329243352, "grad_norm": 0.8112438321113586, "learning_rate": 3.904768721906304e-08, "loss": 0.7852, "step": 965 }, { "epoch": 1.983640081799591, "grad_norm": 0.885705828666687, "learning_rate": 3.566549539452529e-08, "loss": 0.7792, "step": 970 }, { "epoch": 1.9938650306748467, "grad_norm": 0.8692009449005127, "learning_rate": 3.243107840135878e-08, "loss": 0.7822, "step": 975 }, { "epoch": 2.0040899795501024, "grad_norm": 0.8909807205200195, "learning_rate": 2.9345465595385866e-08, "loss": 0.7826, "step": 980 }, { "epoch": 2.014314928425358, "grad_norm": 0.9065344333648682, "learning_rate": 2.6409638975375737e-08, "loss": 0.7849, "step": 985 }, { "epoch": 2.0245398773006134, "grad_norm": 0.8145809173583984, "learning_rate": 2.3624532870522962e-08, "loss": 0.7885, "step": 990 }, { "epoch": 2.034764826175869, "grad_norm": 0.9461153149604797, "learning_rate": 2.0991033643096457e-08, "loss": 0.7853, "step": 995 }, { "epoch": 2.044989775051125, "grad_norm": 0.8470706343650818, "learning_rate": 1.8509979406353794e-08, "loss": 0.7881, "step": 1000 }, { "epoch": 2.044989775051125, "eval_accuracy": 0.63202, "eval_loss": 0.6844514012336731, "eval_macro_f1": 0.6318036560759084, "eval_precision": 0.6354113747156731, "eval_recall": 0.6344858797364747, "eval_runtime": 81.1838, "eval_samples_per_second": 1231.772, "eval_steps_per_second": 0.604, "step": 1000 }, { "epoch": 2.0552147239263805, "grad_norm": 0.8817445635795593, "learning_rate": 1.6182159757810897e-08, "loss": 0.7879, "step": 1005 }, { "epoch": 2.065439672801636, "grad_norm": 0.856109082698822, "learning_rate": 1.400831552795234e-08, "loss": 0.7868, "step": 1010 }, { "epoch": 2.0756646216768915, "grad_norm": 0.956066370010376, "learning_rate": 1.1989138544461375e-08, "loss": 0.7845, "step": 1015 }, { "epoch": 2.085889570552147, "grad_norm": 0.930978000164032, "learning_rate": 1.0125271412044666e-08, "loss": 0.7876, "step": 1020 }, { "epoch": 2.096114519427403, "grad_norm": 0.9799636602401733, "learning_rate": 8.417307307923615e-09, "loss": 0.7861, "step": 1025 }, { "epoch": 2.1063394683026586, "grad_norm": 0.9991019368171692, "learning_rate": 6.8657897930547435e-09, "loss": 0.7852, "step": 1030 }, { "epoch": 2.116564417177914, "grad_norm": 1.076750636100769, "learning_rate": 5.471212639141132e-09, "loss": 0.7789, "step": 1035 }, { "epoch": 2.1267893660531696, "grad_norm": 0.9805507063865662, "learning_rate": 4.23401967148912e-09, "loss": 0.7829, "step": 1040 }, { "epoch": 2.1370143149284253, "grad_norm": 0.7899750471115112, "learning_rate": 3.154604627760571e-09, "loss": 0.7839, "step": 1045 }, { "epoch": 2.147239263803681, "grad_norm": 1.1698967218399048, "learning_rate": 2.2333110326655526e-09, "loss": 0.7869, "step": 1050 }, { "epoch": 2.1574642126789367, "grad_norm": 0.9302964806556702, "learning_rate": 1.4704320886352873e-09, "loss": 0.7832, "step": 1055 }, { "epoch": 2.1676891615541924, "grad_norm": 1.057986855506897, "learning_rate": 8.662105825103517e-10, "loss": 0.7864, "step": 1060 }, { "epoch": 2.1779141104294477, "grad_norm": 1.0347933769226074, "learning_rate": 4.208388082733161e-10, "loss": 0.7822, "step": 1065 }, { "epoch": 2.1881390593047034, "grad_norm": 0.9827083945274353, "learning_rate": 1.3445850585130924e-10, "loss": 0.784, "step": 1070 }, { "epoch": 2.198364008179959, "grad_norm": 0.8463678956031799, "learning_rate": 7.160816007045767e-12, "loss": 0.7811, "step": 1075 }, { "epoch": 2.208588957055215, "grad_norm": 0.9141009449958801, "learning_rate": 9.999610137486667e-07, "loss": 0.7828, "step": 1080 }, { "epoch": 2.21881390593047, "grad_norm": 0.8992940783500671, "learning_rate": 9.997700753166407e-07, "loss": 0.7843, "step": 1085 }, { "epoch": 2.229038854805726, "grad_norm": 0.9198014140129089, "learning_rate": 9.99420084654225e-07, "loss": 0.7867, "step": 1090 }, { "epoch": 2.2392638036809815, "grad_norm": 0.841385006904602, "learning_rate": 9.98911153146231e-07, "loss": 0.7899, "step": 1095 }, { "epoch": 2.2494887525562373, "grad_norm": 0.9428244233131409, "learning_rate": 9.982434427605222e-07, "loss": 0.783, "step": 1100 }, { "epoch": 2.2494887525562373, "eval_accuracy": 0.63535, "eval_loss": 0.6841139197349548, "eval_macro_f1": 0.6353491904387377, "eval_precision": 0.6368108503242846, "eval_recall": 0.6367719631437929, "eval_runtime": 81.1976, "eval_samples_per_second": 1231.563, "eval_steps_per_second": 0.603, "step": 1100 }, { "epoch": 2.259713701431493, "grad_norm": 0.8474355936050415, "learning_rate": 9.974171659964687e-07, "loss": 0.7805, "step": 1105 }, { "epoch": 2.2699386503067487, "grad_norm": 0.8366284370422363, "learning_rate": 9.964325858173184e-07, "loss": 0.7821, "step": 1110 }, { "epoch": 2.280163599182004, "grad_norm": 1.102426290512085, "learning_rate": 9.952900155665089e-07, "loss": 0.7854, "step": 1115 }, { "epoch": 2.2903885480572597, "grad_norm": 0.8815932273864746, "learning_rate": 9.939898188679462e-07, "loss": 0.7835, "step": 1120 }, { "epoch": 2.3006134969325154, "grad_norm": 0.8016415238380432, "learning_rate": 9.925324095102806e-07, "loss": 0.7842, "step": 1125 }, { "epoch": 2.310838445807771, "grad_norm": 0.8805480599403381, "learning_rate": 9.909182513152177e-07, "loss": 0.7791, "step": 1130 }, { "epoch": 2.3210633946830264, "grad_norm": 0.9736661314964294, "learning_rate": 9.891478579899078e-07, "loss": 0.7825, "step": 1135 }, { "epoch": 2.331288343558282, "grad_norm": 0.8331109285354614, "learning_rate": 9.872217929634573e-07, "loss": 0.7852, "step": 1140 }, { "epoch": 2.341513292433538, "grad_norm": 0.8597177267074585, "learning_rate": 9.851406692076183e-07, "loss": 0.7817, "step": 1145 }, { "epoch": 2.3517382413087935, "grad_norm": 0.7928445339202881, "learning_rate": 9.829051490417071e-07, "loss": 0.7765, "step": 1150 }, { "epoch": 2.361963190184049, "grad_norm": 0.8488237857818604, "learning_rate": 9.80515943921824e-07, "loss": 0.7836, "step": 1155 }, { "epoch": 2.372188139059305, "grad_norm": 0.7608004212379456, "learning_rate": 9.77973814214429e-07, "loss": 0.7834, "step": 1160 }, { "epoch": 2.38241308793456, "grad_norm": 0.8542405962944031, "learning_rate": 9.752795689543563e-07, "loss": 0.7777, "step": 1165 }, { "epoch": 2.392638036809816, "grad_norm": 0.8797897100448608, "learning_rate": 9.72434065587337e-07, "loss": 0.7823, "step": 1170 }, { "epoch": 2.4028629856850716, "grad_norm": 0.9687849283218384, "learning_rate": 9.69438209697118e-07, "loss": 0.7754, "step": 1175 }, { "epoch": 2.4130879345603273, "grad_norm": 0.9111893773078918, "learning_rate": 9.662929547172574e-07, "loss": 0.7806, "step": 1180 }, { "epoch": 2.4233128834355826, "grad_norm": 1.0323760509490967, "learning_rate": 9.629993016276944e-07, "loss": 0.7801, "step": 1185 }, { "epoch": 2.4335378323108383, "grad_norm": 0.79954594373703, "learning_rate": 9.595582986361872e-07, "loss": 0.7781, "step": 1190 }, { "epoch": 2.443762781186094, "grad_norm": 0.7106928825378418, "learning_rate": 9.559710408447184e-07, "loss": 0.7788, "step": 1195 }, { "epoch": 2.4539877300613497, "grad_norm": 0.77292400598526, "learning_rate": 9.522386699009795e-07, "loss": 0.7827, "step": 1200 }, { "epoch": 2.4539877300613497, "eval_accuracy": 0.645, "eval_loss": 0.6828427314758301, "eval_macro_f1": 0.6440359919423964, "eval_precision": 0.6441481409802297, "eval_recall": 0.6439695264773649, "eval_runtime": 81.1775, "eval_samples_per_second": 1231.869, "eval_steps_per_second": 0.604, "step": 1200 }, { "epoch": 2.4642126789366054, "grad_norm": 0.8576335310935974, "learning_rate": 9.483623736350402e-07, "loss": 0.7765, "step": 1205 }, { "epoch": 2.474437627811861, "grad_norm": 0.7940819263458252, "learning_rate": 9.443433856813196e-07, "loss": 0.7744, "step": 1210 }, { "epoch": 2.4846625766871164, "grad_norm": 0.9138656854629517, "learning_rate": 9.401829850859823e-07, "loss": 0.779, "step": 1215 }, { "epoch": 2.494887525562372, "grad_norm": 0.7292961478233337, "learning_rate": 9.358824958998804e-07, "loss": 0.7741, "step": 1220 }, { "epoch": 2.505112474437628, "grad_norm": 0.8346101641654968, "learning_rate": 9.314432867571731e-07, "loss": 0.7769, "step": 1225 }, { "epoch": 2.5153374233128836, "grad_norm": 0.7433446645736694, "learning_rate": 9.268667704397576e-07, "loss": 0.7811, "step": 1230 }, { "epoch": 2.525562372188139, "grad_norm": 0.7142143845558167, "learning_rate": 9.22154403427651e-07, "loss": 0.7739, "step": 1235 }, { "epoch": 2.5357873210633946, "grad_norm": 0.8269698023796082, "learning_rate": 9.173076854354633e-07, "loss": 0.7751, "step": 1240 }, { "epoch": 2.5460122699386503, "grad_norm": 0.7005385160446167, "learning_rate": 9.123281589351127e-07, "loss": 0.7747, "step": 1245 }, { "epoch": 2.556237218813906, "grad_norm": 0.7422548532485962, "learning_rate": 9.072174086649326e-07, "loss": 0.7764, "step": 1250 }, { "epoch": 2.5664621676891617, "grad_norm": 0.7844764590263367, "learning_rate": 9.01977061125327e-07, "loss": 0.7751, "step": 1255 }, { "epoch": 2.5766871165644174, "grad_norm": 0.8993695378303528, "learning_rate": 8.966087840611356e-07, "loss": 0.7771, "step": 1260 }, { "epoch": 2.5869120654396727, "grad_norm": 0.7648841738700867, "learning_rate": 8.911142859308729e-07, "loss": 0.7771, "step": 1265 }, { "epoch": 2.5971370143149284, "grad_norm": 0.789523720741272, "learning_rate": 8.854953153630096e-07, "loss": 0.7732, "step": 1270 }, { "epoch": 2.607361963190184, "grad_norm": 0.7698408365249634, "learning_rate": 8.7975366059947e-07, "loss": 0.769, "step": 1275 }, { "epoch": 2.61758691206544, "grad_norm": 1.019235610961914, "learning_rate": 8.738911489265233e-07, "loss": 0.7768, "step": 1280 }, { "epoch": 2.627811860940695, "grad_norm": 0.8915722966194153, "learning_rate": 8.679096460932475e-07, "loss": 0.774, "step": 1285 }, { "epoch": 2.638036809815951, "grad_norm": 1.0551347732543945, "learning_rate": 8.618110557177536e-07, "loss": 0.7711, "step": 1290 }, { "epoch": 2.6482617586912065, "grad_norm": 0.7630209922790527, "learning_rate": 8.555973186813575e-07, "loss": 0.7724, "step": 1295 }, { "epoch": 2.658486707566462, "grad_norm": 0.6783341765403748, "learning_rate": 8.49270412510893e-07, "loss": 0.773, "step": 1300 }, { "epoch": 2.658486707566462, "eval_accuracy": 0.64275, "eval_loss": 0.6818840503692627, "eval_macro_f1": 0.6366805441223703, "eval_precision": 0.6443671237738225, "eval_recall": 0.6381477730287184, "eval_runtime": 81.1862, "eval_samples_per_second": 1231.736, "eval_steps_per_second": 0.604, "step": 1300 }, { "epoch": 2.668711656441718, "grad_norm": 0.6987424492835999, "learning_rate": 8.428323507493627e-07, "loss": 0.7732, "step": 1305 }, { "epoch": 2.6789366053169736, "grad_norm": 1.0909086465835571, "learning_rate": 8.362851823151251e-07, "loss": 0.7753, "step": 1310 }, { "epoch": 2.689161554192229, "grad_norm": 0.8491300344467163, "learning_rate": 8.296309908498261e-07, "loss": 0.776, "step": 1315 }, { "epoch": 2.6993865030674846, "grad_norm": 0.739740788936615, "learning_rate": 8.228718940552775e-07, "loss": 0.7728, "step": 1320 }, { "epoch": 2.7096114519427403, "grad_norm": 0.8270577192306519, "learning_rate": 8.160100430194969e-07, "loss": 0.7703, "step": 1325 }, { "epoch": 2.719836400817996, "grad_norm": 0.836187481880188, "learning_rate": 8.090476215321226e-07, "loss": 0.7742, "step": 1330 }, { "epoch": 2.7300613496932513, "grad_norm": 0.7952380180358887, "learning_rate": 8.019868453894195e-07, "loss": 0.7736, "step": 1335 }, { "epoch": 2.740286298568507, "grad_norm": 0.8082153797149658, "learning_rate": 7.948299616891019e-07, "loss": 0.7742, "step": 1340 }, { "epoch": 2.7505112474437627, "grad_norm": 0.7107052206993103, "learning_rate": 7.875792481151917e-07, "loss": 0.7703, "step": 1345 }, { "epoch": 2.7607361963190185, "grad_norm": 0.7359179258346558, "learning_rate": 7.802370122131438e-07, "loss": 0.7738, "step": 1350 }, { "epoch": 2.770961145194274, "grad_norm": 0.7772730588912964, "learning_rate": 7.728055906554683e-07, "loss": 0.7715, "step": 1355 }, { "epoch": 2.78118609406953, "grad_norm": 1.2671797275543213, "learning_rate": 7.65287348498082e-07, "loss": 0.7735, "step": 1360 }, { "epoch": 2.791411042944785, "grad_norm": 0.8609771132469177, "learning_rate": 7.576846784276276e-07, "loss": 0.77, "step": 1365 }, { "epoch": 2.801635991820041, "grad_norm": 0.6689844131469727, "learning_rate": 7.500000000000002e-07, "loss": 0.7702, "step": 1370 }, { "epoch": 2.8118609406952966, "grad_norm": 0.80215984582901, "learning_rate": 7.422357588703195e-07, "loss": 0.7782, "step": 1375 }, { "epoch": 2.8220858895705523, "grad_norm": 0.7997626066207886, "learning_rate": 7.343944260146006e-07, "loss": 0.7705, "step": 1380 }, { "epoch": 2.8323108384458076, "grad_norm": 0.7380858659744263, "learning_rate": 7.264784969433624e-07, "loss": 0.7718, "step": 1385 }, { "epoch": 2.8425357873210633, "grad_norm": 0.7241907119750977, "learning_rate": 7.184904909074292e-07, "loss": 0.7668, "step": 1390 }, { "epoch": 2.852760736196319, "grad_norm": 0.7640359401702881, "learning_rate": 7.104329500961783e-07, "loss": 0.7726, "step": 1395 }, { "epoch": 2.8629856850715747, "grad_norm": 0.8806844353675842, "learning_rate": 7.023084388284846e-07, "loss": 0.7712, "step": 1400 }, { "epoch": 2.8629856850715747, "eval_accuracy": 0.65106, "eval_loss": 0.6813496947288513, "eval_macro_f1": 0.6459359532496649, "eval_precision": 0.6523398288087938, "eval_recall": 0.6468889448544942, "eval_runtime": 81.1842, "eval_samples_per_second": 1231.767, "eval_steps_per_second": 0.604, "step": 1400 } ], "logging_steps": 5, "max_steps": 1956, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 100, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.768087094906061e+17, "train_batch_size": 1024, "trial_name": null, "trial_params": null }