{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 3219, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "cosine_similarity": 0, "epoch": 0.0009319664492078285, "grad_norm": 5.171712448049341, "learning_rate": 1.5527950310559006e-07, "loss": 4.6296, "reason_loss": 2.0136165618896484, "step": 1, "utility_loss": 2.615985870361328 }, { "cosine_similarity": 0, "epoch": 0.001863932898415657, "grad_norm": 4.9084978666746375, "learning_rate": 3.1055900621118013e-07, "loss": 4.1935, "reason_loss": 2.020270347595215, "step": 2, "utility_loss": 2.173187017440796 }, { "cosine_similarity": 0, "epoch": 0.0027958993476234857, "grad_norm": 5.612985648681752, "learning_rate": 4.6583850931677024e-07, "loss": 4.7486, "reason_loss": 1.9590039253234863, "step": 3, "utility_loss": 2.7896132469177246 }, { "cosine_similarity": 0, "epoch": 0.003727865796831314, "grad_norm": 6.416128191894315, "learning_rate": 6.211180124223603e-07, "loss": 5.066, "reason_loss": 1.8795961141586304, "step": 4, "utility_loss": 3.186357021331787 }, { "cosine_similarity": 0, "epoch": 0.004659832246039142, "grad_norm": 5.997825789204954, "learning_rate": 7.763975155279503e-07, "loss": 4.9582, "reason_loss": 1.9500539302825928, "step": 5, "utility_loss": 3.0081613063812256 }, { "cosine_similarity": 0, "epoch": 0.005591798695246971, "grad_norm": 5.5207214975007535, "learning_rate": 9.316770186335405e-07, "loss": 4.9419, "reason_loss": 1.9188984632492065, "step": 6, "utility_loss": 3.0229721069335938 }, { "cosine_similarity": 0, "epoch": 0.0065237651444548, "grad_norm": 6.641332972179559, "learning_rate": 1.0869565217391306e-06, "loss": 5.1301, "reason_loss": 1.9163596630096436, "step": 7, "utility_loss": 3.213759183883667 }, { "cosine_similarity": 0, "epoch": 0.007455731593662628, "grad_norm": 6.3859333030298036, "learning_rate": 1.2422360248447205e-06, "loss": 4.9858, "reason_loss": 1.9151531457901, "step": 8, "utility_loss": 3.0706076622009277 }, { "cosine_similarity": 0, "epoch": 0.008387698042870456, "grad_norm": 5.951235763124147, "learning_rate": 1.3975155279503107e-06, "loss": 4.9382, "reason_loss": 1.9041388034820557, "step": 9, "utility_loss": 3.034071445465088 }, { "cosine_similarity": 0, "epoch": 0.009319664492078284, "grad_norm": 5.557522005874533, "learning_rate": 1.5527950310559006e-06, "loss": 4.5539, "reason_loss": 1.8546584844589233, "step": 10, "utility_loss": 2.6991968154907227 }, { "cosine_similarity": 0, "epoch": 0.010251630941286114, "grad_norm": 6.262799552135621, "learning_rate": 1.7080745341614908e-06, "loss": 4.7843, "reason_loss": 1.7738195657730103, "step": 11, "utility_loss": 3.0104751586914062 }, { "cosine_similarity": 0, "epoch": 0.011183597390493943, "grad_norm": 5.3510721187343995, "learning_rate": 1.863354037267081e-06, "loss": 4.8217, "reason_loss": 1.8177037239074707, "step": 12, "utility_loss": 3.0039944648742676 }, { "cosine_similarity": 0, "epoch": 0.012115563839701771, "grad_norm": 6.662364959945718, "learning_rate": 2.018633540372671e-06, "loss": 4.7589, "reason_loss": 1.691407561302185, "step": 13, "utility_loss": 3.0674450397491455 }, { "cosine_similarity": 0, "epoch": 0.0130475302889096, "grad_norm": 5.601090240152511, "learning_rate": 2.173913043478261e-06, "loss": 4.3123, "reason_loss": 1.6647298336029053, "step": 14, "utility_loss": 2.6475865840911865 }, { "cosine_similarity": 0, "epoch": 0.013979496738117428, "grad_norm": 4.556933245136975, "learning_rate": 2.329192546583851e-06, "loss": 3.8003, "reason_loss": 1.5788147449493408, "step": 15, "utility_loss": 2.2214913368225098 }, { "cosine_similarity": 0, "epoch": 0.014911463187325256, "grad_norm": 5.984998194459132, "learning_rate": 2.484472049689441e-06, "loss": 4.5816, "reason_loss": 1.5885989665985107, "step": 16, "utility_loss": 2.9929757118225098 }, { "cosine_similarity": 0, "epoch": 0.015843429636533086, "grad_norm": 4.5430388872287, "learning_rate": 2.639751552795031e-06, "loss": 3.974, "reason_loss": 1.4211088418960571, "step": 17, "utility_loss": 2.552917957305908 }, { "cosine_similarity": 0, "epoch": 0.016775396085740912, "grad_norm": 4.346512231183761, "learning_rate": 2.7950310559006214e-06, "loss": 3.7125, "reason_loss": 1.3518774509429932, "step": 18, "utility_loss": 2.360611915588379 }, { "cosine_similarity": 0, "epoch": 0.017707362534948742, "grad_norm": 4.199316851748084, "learning_rate": 2.9503105590062115e-06, "loss": 4.3199, "reason_loss": 1.293019413948059, "step": 19, "utility_loss": 3.0269148349761963 }, { "cosine_similarity": 0, "epoch": 0.01863932898415657, "grad_norm": 4.426182016910694, "learning_rate": 3.1055900621118013e-06, "loss": 3.6903, "reason_loss": 1.2041611671447754, "step": 20, "utility_loss": 2.486119270324707 }, { "cosine_similarity": 0, "epoch": 0.0195712954333644, "grad_norm": 5.625759956092665, "learning_rate": 3.2608695652173914e-06, "loss": 4.0983, "reason_loss": 1.201762080192566, "step": 21, "utility_loss": 2.896512985229492 }, { "cosine_similarity": 0, "epoch": 0.02050326188257223, "grad_norm": 3.892330309464417, "learning_rate": 3.4161490683229816e-06, "loss": 3.4996, "reason_loss": 1.1266008615493774, "step": 22, "utility_loss": 2.37296199798584 }, { "cosine_similarity": 0, "epoch": 0.021435228331780055, "grad_norm": 3.891994458221883, "learning_rate": 3.5714285714285714e-06, "loss": 3.4104, "reason_loss": 1.0821294784545898, "step": 23, "utility_loss": 2.3282227516174316 }, { "cosine_similarity": 0, "epoch": 0.022367194780987885, "grad_norm": 3.6385492425130375, "learning_rate": 3.726708074534162e-06, "loss": 3.5987, "reason_loss": 1.096297264099121, "step": 24, "utility_loss": 2.502371072769165 }, { "cosine_similarity": 0, "epoch": 0.023299161230195712, "grad_norm": 3.9312509057250686, "learning_rate": 3.881987577639752e-06, "loss": 3.492, "reason_loss": 1.0107749700546265, "step": 25, "utility_loss": 2.4812612533569336 }, { "cosine_similarity": 0, "epoch": 0.024231127679403542, "grad_norm": 4.87855203165763, "learning_rate": 4.037267080745342e-06, "loss": 2.9942, "reason_loss": 1.0036568641662598, "step": 26, "utility_loss": 1.990555763244629 }, { "cosine_similarity": 0, "epoch": 0.02516309412861137, "grad_norm": 3.7776250699269953, "learning_rate": 4.192546583850932e-06, "loss": 3.7395, "reason_loss": 0.9668811559677124, "step": 27, "utility_loss": 2.7725753784179688 }, { "cosine_similarity": 0, "epoch": 0.0260950605778192, "grad_norm": 4.261950118457909, "learning_rate": 4.347826086956522e-06, "loss": 3.3004, "reason_loss": 0.9362485408782959, "step": 28, "utility_loss": 2.364105701446533 }, { "cosine_similarity": 0, "epoch": 0.02702702702702703, "grad_norm": 3.332748998581826, "learning_rate": 4.503105590062112e-06, "loss": 3.4403, "reason_loss": 0.9172824621200562, "step": 29, "utility_loss": 2.523047685623169 }, { "cosine_similarity": 0, "epoch": 0.027958993476234855, "grad_norm": 2.579320146918576, "learning_rate": 4.658385093167702e-06, "loss": 3.0928, "reason_loss": 0.8971108198165894, "step": 30, "utility_loss": 2.195699691772461 }, { "cosine_similarity": 0, "epoch": 0.028890959925442685, "grad_norm": 3.387420003008262, "learning_rate": 4.813664596273292e-06, "loss": 3.3773, "reason_loss": 0.8920701742172241, "step": 31, "utility_loss": 2.485248565673828 }, { "cosine_similarity": 0, "epoch": 0.02982292637465051, "grad_norm": 3.414246554128951, "learning_rate": 4.968944099378882e-06, "loss": 3.2514, "reason_loss": 0.8720953464508057, "step": 32, "utility_loss": 2.3793282508850098 }, { "cosine_similarity": 0, "epoch": 0.03075489282385834, "grad_norm": 3.6750396419291946, "learning_rate": 5.124223602484472e-06, "loss": 3.4279, "reason_loss": 0.8371827006340027, "step": 33, "utility_loss": 2.590669631958008 }, { "cosine_similarity": 0, "epoch": 0.03168685927306617, "grad_norm": 3.3800016587162554, "learning_rate": 5.279503105590062e-06, "loss": 3.1477, "reason_loss": 0.8328840732574463, "step": 34, "utility_loss": 2.3148458003997803 }, { "cosine_similarity": 0, "epoch": 0.032618825722273995, "grad_norm": 3.2564539728903554, "learning_rate": 5.4347826086956525e-06, "loss": 3.5162, "reason_loss": 0.8789810538291931, "step": 35, "utility_loss": 2.637265682220459 }, { "cosine_similarity": 0, "epoch": 0.033550792171481825, "grad_norm": 3.098743117916929, "learning_rate": 5.590062111801243e-06, "loss": 3.2834, "reason_loss": 0.8236759305000305, "step": 36, "utility_loss": 2.4596972465515137 }, { "cosine_similarity": 0, "epoch": 0.034482758620689655, "grad_norm": 3.2370414322273033, "learning_rate": 5.745341614906832e-06, "loss": 3.3855, "reason_loss": 0.8296233415603638, "step": 37, "utility_loss": 2.555920124053955 }, { "cosine_similarity": 0, "epoch": 0.035414725069897485, "grad_norm": 3.200579492204642, "learning_rate": 5.900621118012423e-06, "loss": 3.405, "reason_loss": 0.840951681137085, "step": 38, "utility_loss": 2.564058780670166 }, { "cosine_similarity": 0, "epoch": 0.036346691519105315, "grad_norm": 3.251804746933613, "learning_rate": 6.055900621118013e-06, "loss": 3.4441, "reason_loss": 0.8096835613250732, "step": 39, "utility_loss": 2.6344001293182373 }, { "cosine_similarity": 0, "epoch": 0.03727865796831314, "grad_norm": 3.0560753731290258, "learning_rate": 6.2111801242236025e-06, "loss": 3.3831, "reason_loss": 0.8358979225158691, "step": 40, "utility_loss": 2.547192335128784 }, { "cosine_similarity": 0, "epoch": 0.03821062441752097, "grad_norm": 2.646464867708147, "learning_rate": 6.366459627329193e-06, "loss": 3.0495, "reason_loss": 0.7640071511268616, "step": 41, "utility_loss": 2.2855136394500732 }, { "cosine_similarity": 0, "epoch": 0.0391425908667288, "grad_norm": 3.972390769881573, "learning_rate": 6.521739130434783e-06, "loss": 3.2839, "reason_loss": 0.835463285446167, "step": 42, "utility_loss": 2.4484148025512695 }, { "cosine_similarity": 0, "epoch": 0.04007455731593663, "grad_norm": 2.8840704144374016, "learning_rate": 6.677018633540373e-06, "loss": 3.2226, "reason_loss": 0.7883830070495605, "step": 43, "utility_loss": 2.434232711791992 }, { "cosine_similarity": 0, "epoch": 0.04100652376514446, "grad_norm": 2.9671588082767575, "learning_rate": 6.832298136645963e-06, "loss": 3.2383, "reason_loss": 0.7429959774017334, "step": 44, "utility_loss": 2.4953083992004395 }, { "cosine_similarity": 0, "epoch": 0.04193849021435228, "grad_norm": 3.012506076561981, "learning_rate": 6.9875776397515525e-06, "loss": 3.3408, "reason_loss": 0.8103909492492676, "step": 45, "utility_loss": 2.530407428741455 }, { "cosine_similarity": 0, "epoch": 0.04287045666356011, "grad_norm": 2.908840922750312, "learning_rate": 7.142857142857143e-06, "loss": 2.6505, "reason_loss": 0.768646240234375, "step": 46, "utility_loss": 1.8818316459655762 }, { "cosine_similarity": 0, "epoch": 0.04380242311276794, "grad_norm": 3.277027652339635, "learning_rate": 7.298136645962733e-06, "loss": 3.0633, "reason_loss": 0.8158451318740845, "step": 47, "utility_loss": 2.2474207878112793 }, { "cosine_similarity": 0, "epoch": 0.04473438956197577, "grad_norm": 2.9207229851316243, "learning_rate": 7.453416149068324e-06, "loss": 3.466, "reason_loss": 0.7838253974914551, "step": 48, "utility_loss": 2.682223320007324 }, { "cosine_similarity": 0, "epoch": 0.045666356011183594, "grad_norm": 3.363883234091837, "learning_rate": 7.608695652173914e-06, "loss": 3.1933, "reason_loss": 0.768595814704895, "step": 49, "utility_loss": 2.4247374534606934 }, { "cosine_similarity": 0, "epoch": 0.046598322460391424, "grad_norm": 3.3513772284688383, "learning_rate": 7.763975155279503e-06, "loss": 3.0796, "reason_loss": 0.7085769772529602, "step": 50, "utility_loss": 2.371025323867798 }, { "cosine_similarity": 0, "epoch": 0.047530288909599254, "grad_norm": 5.619957365503719, "learning_rate": 7.919254658385093e-06, "loss": 3.0788, "reason_loss": 0.7149043083190918, "step": 51, "utility_loss": 2.3638863563537598 }, { "cosine_similarity": 0, "epoch": 0.048462255358807084, "grad_norm": 4.093499243655066, "learning_rate": 8.074534161490684e-06, "loss": 2.9897, "reason_loss": 0.7324376702308655, "step": 52, "utility_loss": 2.2572412490844727 }, { "cosine_similarity": 0, "epoch": 0.049394221808014914, "grad_norm": 2.801741369073214, "learning_rate": 8.229813664596275e-06, "loss": 3.3173, "reason_loss": 0.7505319118499756, "step": 53, "utility_loss": 2.5668153762817383 }, { "cosine_similarity": 0, "epoch": 0.05032618825722274, "grad_norm": 3.6677974691709014, "learning_rate": 8.385093167701864e-06, "loss": 3.2218, "reason_loss": 0.7191959023475647, "step": 54, "utility_loss": 2.502640724182129 }, { "cosine_similarity": 0, "epoch": 0.05125815470643057, "grad_norm": 3.572344537135671, "learning_rate": 8.540372670807453e-06, "loss": 3.3413, "reason_loss": 0.795028805732727, "step": 55, "utility_loss": 2.54630184173584 }, { "cosine_similarity": 0, "epoch": 0.0521901211556384, "grad_norm": 2.597540442198312, "learning_rate": 8.695652173913044e-06, "loss": 2.9341, "reason_loss": 0.7008624076843262, "step": 56, "utility_loss": 2.2332518100738525 }, { "cosine_similarity": 0, "epoch": 0.05312208760484623, "grad_norm": 6.369641340733517, "learning_rate": 8.850931677018634e-06, "loss": 3.3771, "reason_loss": 0.7110130786895752, "step": 57, "utility_loss": 2.666043281555176 }, { "cosine_similarity": 0, "epoch": 0.05405405405405406, "grad_norm": 2.5498920674443952, "learning_rate": 9.006211180124225e-06, "loss": 3.2961, "reason_loss": 0.7306719422340393, "step": 58, "utility_loss": 2.565472364425659 }, { "cosine_similarity": 0, "epoch": 0.05498602050326188, "grad_norm": 2.7217077236429454, "learning_rate": 9.161490683229814e-06, "loss": 3.3975, "reason_loss": 0.6788209676742554, "step": 59, "utility_loss": 2.7186994552612305 }, { "cosine_similarity": 0, "epoch": 0.05591798695246971, "grad_norm": 2.6677592631231573, "learning_rate": 9.316770186335403e-06, "loss": 3.3231, "reason_loss": 0.7006375789642334, "step": 60, "utility_loss": 2.6225016117095947 }, { "cosine_similarity": 0, "epoch": 0.05684995340167754, "grad_norm": 3.3485634326845424, "learning_rate": 9.472049689440994e-06, "loss": 2.8577, "reason_loss": 0.7211652994155884, "step": 61, "utility_loss": 2.13651442527771 }, { "cosine_similarity": 0, "epoch": 0.05778191985088537, "grad_norm": 3.2046161382721525, "learning_rate": 9.627329192546584e-06, "loss": 3.0817, "reason_loss": 0.710594892501831, "step": 62, "utility_loss": 2.371091842651367 }, { "cosine_similarity": 0, "epoch": 0.05871388630009319, "grad_norm": 2.430093224418146, "learning_rate": 9.782608695652175e-06, "loss": 2.597, "reason_loss": 0.6731866598129272, "step": 63, "utility_loss": 1.9238008260726929 }, { "cosine_similarity": 0, "epoch": 0.05964585274930102, "grad_norm": 2.96458956758876, "learning_rate": 9.937888198757764e-06, "loss": 3.0624, "reason_loss": 0.7096542119979858, "step": 64, "utility_loss": 2.352698802947998 }, { "cosine_similarity": 0, "epoch": 0.06057781919850885, "grad_norm": 2.508226380930372, "learning_rate": 1.0093167701863353e-05, "loss": 3.1915, "reason_loss": 0.6413389444351196, "step": 65, "utility_loss": 2.550208568572998 }, { "cosine_similarity": 0, "epoch": 0.06150978564771668, "grad_norm": 2.0130462531566264, "learning_rate": 1.0248447204968944e-05, "loss": 2.8815, "reason_loss": 0.707048237323761, "step": 66, "utility_loss": 2.1744492053985596 }, { "cosine_similarity": 0, "epoch": 0.06244175209692451, "grad_norm": 2.620041996474644, "learning_rate": 1.0403726708074535e-05, "loss": 2.9321, "reason_loss": 0.7045909762382507, "step": 67, "utility_loss": 2.227508544921875 }, { "cosine_similarity": 0, "epoch": 0.06337371854613234, "grad_norm": 2.6258327680490643, "learning_rate": 1.0559006211180125e-05, "loss": 2.8116, "reason_loss": 0.6753522157669067, "step": 68, "utility_loss": 2.136213541030884 }, { "cosine_similarity": 0, "epoch": 0.06430568499534017, "grad_norm": 2.9935280245036666, "learning_rate": 1.0714285714285714e-05, "loss": 3.2997, "reason_loss": 0.674191415309906, "step": 69, "utility_loss": 2.6254849433898926 }, { "cosine_similarity": 0, "epoch": 0.06523765144454799, "grad_norm": 2.4708098278724706, "learning_rate": 1.0869565217391305e-05, "loss": 3.1668, "reason_loss": 0.681716799736023, "step": 70, "utility_loss": 2.485119581222534 }, { "cosine_similarity": 0, "epoch": 0.06616961789375582, "grad_norm": 2.452225819147753, "learning_rate": 1.1024844720496894e-05, "loss": 3.211, "reason_loss": 0.6810117959976196, "step": 71, "utility_loss": 2.5300025939941406 }, { "cosine_similarity": 0, "epoch": 0.06710158434296365, "grad_norm": 2.505626447774423, "learning_rate": 1.1180124223602485e-05, "loss": 3.0791, "reason_loss": 0.690954327583313, "step": 72, "utility_loss": 2.388127565383911 }, { "cosine_similarity": 0, "epoch": 0.06803355079217148, "grad_norm": 2.958045307516623, "learning_rate": 1.1335403726708076e-05, "loss": 3.2258, "reason_loss": 0.6671375036239624, "step": 73, "utility_loss": 2.558684825897217 }, { "cosine_similarity": 0, "epoch": 0.06896551724137931, "grad_norm": 2.3367811720272633, "learning_rate": 1.1490683229813664e-05, "loss": 2.8338, "reason_loss": 0.6562087535858154, "step": 74, "utility_loss": 2.1776161193847656 }, { "cosine_similarity": 0, "epoch": 0.06989748369058714, "grad_norm": 3.7242552612748545, "learning_rate": 1.1645962732919255e-05, "loss": 3.0074, "reason_loss": 0.671761155128479, "step": 75, "utility_loss": 2.3356332778930664 }, { "cosine_similarity": 0, "epoch": 0.07082945013979497, "grad_norm": 2.4285284367398035, "learning_rate": 1.1801242236024846e-05, "loss": 2.8742, "reason_loss": 0.688969612121582, "step": 76, "utility_loss": 2.1852505207061768 }, { "cosine_similarity": 0, "epoch": 0.0717614165890028, "grad_norm": 2.285315076684582, "learning_rate": 1.1956521739130435e-05, "loss": 2.4674, "reason_loss": 0.6887001991271973, "step": 77, "utility_loss": 1.7787227630615234 }, { "cosine_similarity": 0, "epoch": 0.07269338303821063, "grad_norm": 2.9856020181981036, "learning_rate": 1.2111801242236026e-05, "loss": 2.935, "reason_loss": 0.6424647569656372, "step": 78, "utility_loss": 2.292525053024292 }, { "cosine_similarity": 0, "epoch": 0.07362534948741846, "grad_norm": 2.796404860343675, "learning_rate": 1.2267080745341616e-05, "loss": 2.8014, "reason_loss": 0.6334725022315979, "step": 79, "utility_loss": 2.1679577827453613 }, { "cosine_similarity": 0, "epoch": 0.07455731593662628, "grad_norm": 2.077307504373672, "learning_rate": 1.2422360248447205e-05, "loss": 2.7526, "reason_loss": 0.6897599697113037, "step": 80, "utility_loss": 2.062803268432617 }, { "cosine_similarity": 0, "epoch": 0.0754892823858341, "grad_norm": 3.1305435316977057, "learning_rate": 1.2577639751552794e-05, "loss": 2.9538, "reason_loss": 0.6715086698532104, "step": 81, "utility_loss": 2.28228759765625 }, { "cosine_similarity": 0, "epoch": 0.07642124883504194, "grad_norm": 2.4821759652428015, "learning_rate": 1.2732919254658385e-05, "loss": 3.0208, "reason_loss": 0.644418478012085, "step": 82, "utility_loss": 2.376352548599243 }, { "cosine_similarity": 0, "epoch": 0.07735321528424977, "grad_norm": 2.8438898916908264, "learning_rate": 1.2888198757763975e-05, "loss": 2.6497, "reason_loss": 0.647688090801239, "step": 83, "utility_loss": 2.0020151138305664 }, { "cosine_similarity": 0, "epoch": 0.0782851817334576, "grad_norm": 1.7073888065459784, "learning_rate": 1.3043478260869566e-05, "loss": 2.654, "reason_loss": 0.6590896844863892, "step": 84, "utility_loss": 1.9949254989624023 }, { "cosine_similarity": 0, "epoch": 0.07921714818266543, "grad_norm": 2.7619040851875813, "learning_rate": 1.3198757763975155e-05, "loss": 2.7152, "reason_loss": 0.6539163589477539, "step": 85, "utility_loss": 2.061284303665161 }, { "cosine_similarity": 0, "epoch": 0.08014911463187326, "grad_norm": 2.138241830149387, "learning_rate": 1.3354037267080746e-05, "loss": 3.1923, "reason_loss": 0.6738722324371338, "step": 86, "utility_loss": 2.518476963043213 }, { "cosine_similarity": 0, "epoch": 0.08108108108108109, "grad_norm": 2.453591771715559, "learning_rate": 1.3509316770186337e-05, "loss": 3.4838, "reason_loss": 0.6733725070953369, "step": 87, "utility_loss": 2.8104453086853027 }, { "cosine_similarity": 0, "epoch": 0.08201304753028892, "grad_norm": 2.6320954531380156, "learning_rate": 1.3664596273291926e-05, "loss": 3.0613, "reason_loss": 0.6789976954460144, "step": 88, "utility_loss": 2.3823049068450928 }, { "cosine_similarity": 0, "epoch": 0.08294501397949673, "grad_norm": 2.1932201560187212, "learning_rate": 1.3819875776397517e-05, "loss": 2.9647, "reason_loss": 0.7001315951347351, "step": 89, "utility_loss": 2.2645466327667236 }, { "cosine_similarity": 0, "epoch": 0.08387698042870456, "grad_norm": 2.3088074423061813, "learning_rate": 1.3975155279503105e-05, "loss": 2.5572, "reason_loss": 0.6097785234451294, "step": 90, "utility_loss": 1.9474167823791504 }, { "cosine_similarity": 0, "epoch": 0.08480894687791239, "grad_norm": 1.9364104347329165, "learning_rate": 1.4130434782608694e-05, "loss": 2.6561, "reason_loss": 0.6615878343582153, "step": 91, "utility_loss": 1.994476318359375 }, { "cosine_similarity": 0, "epoch": 0.08574091332712022, "grad_norm": 2.2478572438728923, "learning_rate": 1.4285714285714285e-05, "loss": 2.575, "reason_loss": 0.6466121673583984, "step": 92, "utility_loss": 1.9284162521362305 }, { "cosine_similarity": 0, "epoch": 0.08667287977632805, "grad_norm": 2.7261380185190305, "learning_rate": 1.4440993788819876e-05, "loss": 2.8331, "reason_loss": 0.6488088369369507, "step": 93, "utility_loss": 2.1843113899230957 }, { "cosine_similarity": 0, "epoch": 0.08760484622553588, "grad_norm": 2.6388825312685857, "learning_rate": 1.4596273291925466e-05, "loss": 2.7692, "reason_loss": 0.6453490853309631, "step": 94, "utility_loss": 2.1238887310028076 }, { "cosine_similarity": 0, "epoch": 0.08853681267474371, "grad_norm": 2.439468735245749, "learning_rate": 1.4751552795031057e-05, "loss": 2.8695, "reason_loss": 0.6328833103179932, "step": 95, "utility_loss": 2.236652374267578 }, { "cosine_similarity": 0, "epoch": 0.08946877912395154, "grad_norm": 2.5214077978444265, "learning_rate": 1.4906832298136648e-05, "loss": 2.9704, "reason_loss": 0.6670979261398315, "step": 96, "utility_loss": 2.3032889366149902 }, { "cosine_similarity": 0, "epoch": 0.09040074557315937, "grad_norm": 1.974854153557767, "learning_rate": 1.5062111801242237e-05, "loss": 2.6111, "reason_loss": 0.589017391204834, "step": 97, "utility_loss": 2.022090435028076 }, { "cosine_similarity": 0, "epoch": 0.09133271202236719, "grad_norm": 2.1576182004353868, "learning_rate": 1.5217391304347828e-05, "loss": 2.3665, "reason_loss": 0.6097136735916138, "step": 98, "utility_loss": 1.7567529678344727 }, { "cosine_similarity": 0, "epoch": 0.09226467847157502, "grad_norm": 2.4864601144902747, "learning_rate": 1.537267080745342e-05, "loss": 2.8854, "reason_loss": 0.6512236595153809, "step": 99, "utility_loss": 2.234175682067871 }, { "cosine_similarity": 0, "epoch": 0.09319664492078285, "grad_norm": 2.382590674482861, "learning_rate": 1.5527950310559007e-05, "loss": 2.6491, "reason_loss": 0.6448917984962463, "step": 100, "utility_loss": 2.0041921138763428 }, { "cosine_similarity": 0, "epoch": 0.09412861136999068, "grad_norm": 2.3295096335946446, "learning_rate": 1.5683229813664594e-05, "loss": 2.6103, "reason_loss": 0.6322693824768066, "step": 101, "utility_loss": 1.9780324697494507 }, { "cosine_similarity": 0, "epoch": 0.09506057781919851, "grad_norm": 2.4585907556807083, "learning_rate": 1.5838509316770185e-05, "loss": 2.7286, "reason_loss": 0.6161856651306152, "step": 102, "utility_loss": 2.1124072074890137 }, { "cosine_similarity": 0, "epoch": 0.09599254426840634, "grad_norm": 2.5755821068747093, "learning_rate": 1.5993788819875776e-05, "loss": 2.7421, "reason_loss": 0.6408790349960327, "step": 103, "utility_loss": 2.1012203693389893 }, { "cosine_similarity": 0, "epoch": 0.09692451071761417, "grad_norm": 1.8406426176552433, "learning_rate": 1.6149068322981367e-05, "loss": 2.5275, "reason_loss": 0.6320089101791382, "step": 104, "utility_loss": 1.8954819440841675 }, { "cosine_similarity": 0, "epoch": 0.097856477166822, "grad_norm": 2.7074181956982386, "learning_rate": 1.630434782608696e-05, "loss": 2.8717, "reason_loss": 0.6013627648353577, "step": 105, "utility_loss": 2.2703399658203125 }, { "cosine_similarity": 0, "epoch": 0.09878844361602983, "grad_norm": 2.318935011345901, "learning_rate": 1.645962732919255e-05, "loss": 3.2647, "reason_loss": 0.6251238584518433, "step": 106, "utility_loss": 2.639573335647583 }, { "cosine_similarity": 0, "epoch": 0.09972041006523766, "grad_norm": 2.0680667419269874, "learning_rate": 1.6614906832298137e-05, "loss": 2.5092, "reason_loss": 0.6087495684623718, "step": 107, "utility_loss": 1.9004161357879639 }, { "cosine_similarity": 0, "epoch": 0.10065237651444547, "grad_norm": 3.0862601396256113, "learning_rate": 1.6770186335403728e-05, "loss": 2.8073, "reason_loss": 0.6623332500457764, "step": 108, "utility_loss": 2.1449317932128906 }, { "cosine_similarity": 0, "epoch": 0.1015843429636533, "grad_norm": 2.0762418842478607, "learning_rate": 1.6925465838509316e-05, "loss": 2.3466, "reason_loss": 0.6048797965049744, "step": 109, "utility_loss": 1.7417500019073486 }, { "cosine_similarity": 0, "epoch": 0.10251630941286113, "grad_norm": 2.4513081885567924, "learning_rate": 1.7080745341614907e-05, "loss": 2.5111, "reason_loss": 0.6355952024459839, "step": 110, "utility_loss": 1.8755111694335938 }, { "cosine_similarity": 0, "epoch": 0.10344827586206896, "grad_norm": 2.2311396630369584, "learning_rate": 1.7236024844720498e-05, "loss": 2.7171, "reason_loss": 0.6091477274894714, "step": 111, "utility_loss": 2.107968807220459 }, { "cosine_similarity": 0, "epoch": 0.1043802423112768, "grad_norm": 2.2023363062375205, "learning_rate": 1.739130434782609e-05, "loss": 2.6951, "reason_loss": 0.6673604249954224, "step": 112, "utility_loss": 2.0276987552642822 }, { "cosine_similarity": 0, "epoch": 0.10531220876048462, "grad_norm": 3.096228028197411, "learning_rate": 1.7546583850931676e-05, "loss": 3.1732, "reason_loss": 0.6398952007293701, "step": 113, "utility_loss": 2.5333473682403564 }, { "cosine_similarity": 0, "epoch": 0.10624417520969245, "grad_norm": 2.2545526114698795, "learning_rate": 1.7701863354037267e-05, "loss": 2.5694, "reason_loss": 0.597078800201416, "step": 114, "utility_loss": 1.9723340272903442 }, { "cosine_similarity": 0, "epoch": 0.10717614165890028, "grad_norm": 2.7289681671750574, "learning_rate": 1.785714285714286e-05, "loss": 3.5796, "reason_loss": 0.6026936173439026, "step": 115, "utility_loss": 2.9768612384796143 }, { "cosine_similarity": 0, "epoch": 0.10810810810810811, "grad_norm": 2.4321608965498065, "learning_rate": 1.801242236024845e-05, "loss": 2.5578, "reason_loss": 0.604581356048584, "step": 116, "utility_loss": 1.9532043933868408 }, { "cosine_similarity": 0, "epoch": 0.10904007455731593, "grad_norm": 2.022510786946123, "learning_rate": 1.816770186335404e-05, "loss": 2.9012, "reason_loss": 0.6105254888534546, "step": 117, "utility_loss": 2.290656089782715 }, { "cosine_similarity": 0, "epoch": 0.10997204100652376, "grad_norm": 2.3392038902604293, "learning_rate": 1.8322981366459628e-05, "loss": 2.7298, "reason_loss": 0.6153237223625183, "step": 118, "utility_loss": 2.114452362060547 }, { "cosine_similarity": 0, "epoch": 0.11090400745573159, "grad_norm": 3.0528860300952054, "learning_rate": 1.8478260869565216e-05, "loss": 2.8074, "reason_loss": 0.611592173576355, "step": 119, "utility_loss": 2.1957621574401855 }, { "cosine_similarity": 0, "epoch": 0.11183597390493942, "grad_norm": 1.8032615162063519, "learning_rate": 1.8633540372670807e-05, "loss": 2.5153, "reason_loss": 0.6038700342178345, "step": 120, "utility_loss": 1.9114298820495605 }, { "cosine_similarity": 0, "epoch": 0.11276794035414725, "grad_norm": 1.9358410872151632, "learning_rate": 1.8788819875776398e-05, "loss": 2.859, "reason_loss": 0.6315078735351562, "step": 121, "utility_loss": 2.2275280952453613 }, { "cosine_similarity": 0, "epoch": 0.11369990680335508, "grad_norm": 4.245881012350622, "learning_rate": 1.894409937888199e-05, "loss": 2.4417, "reason_loss": 0.6283017992973328, "step": 122, "utility_loss": 1.813443660736084 }, { "cosine_similarity": 0, "epoch": 0.11463187325256291, "grad_norm": 2.1894450122826172, "learning_rate": 1.909937888198758e-05, "loss": 2.7898, "reason_loss": 0.5862406492233276, "step": 123, "utility_loss": 2.2035787105560303 }, { "cosine_similarity": 0, "epoch": 0.11556383970177074, "grad_norm": 2.691699521435998, "learning_rate": 1.9254658385093167e-05, "loss": 2.5515, "reason_loss": 0.6002707481384277, "step": 124, "utility_loss": 1.951228380203247 }, { "cosine_similarity": 0, "epoch": 0.11649580615097857, "grad_norm": 2.5139554182917276, "learning_rate": 1.940993788819876e-05, "loss": 2.8062, "reason_loss": 0.5721983909606934, "step": 125, "utility_loss": 2.2339887619018555 }, { "cosine_similarity": 0, "epoch": 0.11742777260018639, "grad_norm": 2.7138073625183874, "learning_rate": 1.956521739130435e-05, "loss": 2.9911, "reason_loss": 0.6168317198753357, "step": 126, "utility_loss": 2.374302864074707 }, { "cosine_similarity": 0, "epoch": 0.11835973904939422, "grad_norm": 2.4909021316325477, "learning_rate": 1.972049689440994e-05, "loss": 2.7905, "reason_loss": 0.6194929480552673, "step": 127, "utility_loss": 2.171020984649658 }, { "cosine_similarity": 0, "epoch": 0.11929170549860205, "grad_norm": 2.3560667441869008, "learning_rate": 1.9875776397515528e-05, "loss": 2.6872, "reason_loss": 0.6098010540008545, "step": 128, "utility_loss": 2.077359676361084 }, { "cosine_similarity": 0, "epoch": 0.12022367194780988, "grad_norm": 2.4022333648343372, "learning_rate": 2.003105590062112e-05, "loss": 2.8902, "reason_loss": 0.5987846851348877, "step": 129, "utility_loss": 2.2914295196533203 }, { "cosine_similarity": 0, "epoch": 0.1211556383970177, "grad_norm": 3.0172975037337033, "learning_rate": 2.0186335403726707e-05, "loss": 3.0669, "reason_loss": 0.6097142100334167, "step": 130, "utility_loss": 2.4571847915649414 }, { "cosine_similarity": 0, "epoch": 0.12208760484622554, "grad_norm": 2.6765346380401334, "learning_rate": 2.0341614906832298e-05, "loss": 2.7568, "reason_loss": 0.5911454558372498, "step": 131, "utility_loss": 2.1656222343444824 }, { "cosine_similarity": 0, "epoch": 0.12301957129543337, "grad_norm": 2.032681324884587, "learning_rate": 2.049689440993789e-05, "loss": 2.7803, "reason_loss": 0.6266502737998962, "step": 132, "utility_loss": 2.15366792678833 }, { "cosine_similarity": 0, "epoch": 0.1239515377446412, "grad_norm": 1.8693835117451096, "learning_rate": 2.065217391304348e-05, "loss": 2.4565, "reason_loss": 0.6171969771385193, "step": 133, "utility_loss": 1.8392715454101562 }, { "cosine_similarity": 0, "epoch": 0.12488350419384903, "grad_norm": 2.7524877158917227, "learning_rate": 2.080745341614907e-05, "loss": 3.0117, "reason_loss": 0.646472692489624, "step": 134, "utility_loss": 2.3652031421661377 }, { "cosine_similarity": 0, "epoch": 0.12581547064305684, "grad_norm": 3.4191071812453275, "learning_rate": 2.096273291925466e-05, "loss": 2.8536, "reason_loss": 0.580490231513977, "step": 135, "utility_loss": 2.2731008529663086 }, { "cosine_similarity": 0, "epoch": 0.1267474370922647, "grad_norm": 2.07929189823544, "learning_rate": 2.111801242236025e-05, "loss": 2.3633, "reason_loss": 0.5835025906562805, "step": 136, "utility_loss": 1.7797826528549194 }, { "cosine_similarity": 0, "epoch": 0.1276794035414725, "grad_norm": 2.336576181867284, "learning_rate": 2.127329192546584e-05, "loss": 2.4734, "reason_loss": 0.5914127230644226, "step": 137, "utility_loss": 1.8819609880447388 }, { "cosine_similarity": 0, "epoch": 0.12861136999068035, "grad_norm": 2.346424571360728, "learning_rate": 2.1428571428571428e-05, "loss": 2.9078, "reason_loss": 0.60760498046875, "step": 138, "utility_loss": 2.300218343734741 }, { "cosine_similarity": 0, "epoch": 0.12954333643988816, "grad_norm": 2.085082364512821, "learning_rate": 2.158385093167702e-05, "loss": 2.6166, "reason_loss": 0.5788588523864746, "step": 139, "utility_loss": 2.037783622741699 }, { "cosine_similarity": 0, "epoch": 0.13047530288909598, "grad_norm": 2.233028574372523, "learning_rate": 2.173913043478261e-05, "loss": 2.6278, "reason_loss": 0.6042324900627136, "step": 140, "utility_loss": 2.023557186126709 }, { "cosine_similarity": 0, "epoch": 0.13140726933830382, "grad_norm": 2.201754218929585, "learning_rate": 2.1894409937888198e-05, "loss": 2.8541, "reason_loss": 0.6556736826896667, "step": 141, "utility_loss": 2.1984357833862305 }, { "cosine_similarity": 0, "epoch": 0.13233923578751164, "grad_norm": 2.179427572797279, "learning_rate": 2.204968944099379e-05, "loss": 2.6583, "reason_loss": 0.608805239200592, "step": 142, "utility_loss": 2.049468517303467 }, { "cosine_similarity": 0, "epoch": 0.13327120223671948, "grad_norm": 2.6370763283852283, "learning_rate": 2.220496894409938e-05, "loss": 2.5786, "reason_loss": 0.585275411605835, "step": 143, "utility_loss": 1.9932770729064941 }, { "cosine_similarity": 0, "epoch": 0.1342031686859273, "grad_norm": 1.975711978742075, "learning_rate": 2.236024844720497e-05, "loss": 2.0894, "reason_loss": 0.6483074426651001, "step": 144, "utility_loss": 1.441128134727478 }, { "cosine_similarity": 0, "epoch": 0.13513513513513514, "grad_norm": 1.8587681433484238, "learning_rate": 2.2515527950310562e-05, "loss": 2.5085, "reason_loss": 0.6189985275268555, "step": 145, "utility_loss": 1.8895106315612793 }, { "cosine_similarity": 0, "epoch": 0.13606710158434296, "grad_norm": 1.735013515589718, "learning_rate": 2.2670807453416153e-05, "loss": 2.8147, "reason_loss": 0.6026166677474976, "step": 146, "utility_loss": 2.2121262550354004 }, { "cosine_similarity": 0, "epoch": 0.1369990680335508, "grad_norm": 1.9625932469356406, "learning_rate": 2.282608695652174e-05, "loss": 2.7251, "reason_loss": 0.6110028028488159, "step": 147, "utility_loss": 2.1140518188476562 }, { "cosine_similarity": 0, "epoch": 0.13793103448275862, "grad_norm": 2.470100163088774, "learning_rate": 2.2981366459627328e-05, "loss": 3.0385, "reason_loss": 0.5525586009025574, "step": 148, "utility_loss": 2.4859745502471924 }, { "cosine_similarity": 0, "epoch": 0.13886300093196646, "grad_norm": 2.154162324772856, "learning_rate": 2.313664596273292e-05, "loss": 2.9127, "reason_loss": 0.5716983079910278, "step": 149, "utility_loss": 2.34104061126709 }, { "cosine_similarity": 0, "epoch": 0.13979496738117428, "grad_norm": 1.5299840258575421, "learning_rate": 2.329192546583851e-05, "loss": 2.2319, "reason_loss": 0.6360281705856323, "step": 150, "utility_loss": 1.5958232879638672 }, { "cosine_similarity": 0, "epoch": 0.1407269338303821, "grad_norm": 1.9799969849044075, "learning_rate": 2.34472049689441e-05, "loss": 2.5059, "reason_loss": 0.6281657218933105, "step": 151, "utility_loss": 1.8777374029159546 }, { "cosine_similarity": 0, "epoch": 0.14165890027958994, "grad_norm": 1.888566824680166, "learning_rate": 2.3602484472049692e-05, "loss": 2.5569, "reason_loss": 0.6096579432487488, "step": 152, "utility_loss": 1.947288155555725 }, { "cosine_similarity": 0, "epoch": 0.14259086672879775, "grad_norm": 2.4860321829900984, "learning_rate": 2.375776397515528e-05, "loss": 2.5413, "reason_loss": 0.5935897827148438, "step": 153, "utility_loss": 1.9477182626724243 }, { "cosine_similarity": 0, "epoch": 0.1435228331780056, "grad_norm": 2.17821089152299, "learning_rate": 2.391304347826087e-05, "loss": 2.7019, "reason_loss": 0.6104370355606079, "step": 154, "utility_loss": 2.0914621353149414 }, { "cosine_similarity": 0, "epoch": 0.14445479962721341, "grad_norm": 3.0242102635360553, "learning_rate": 2.4068322981366462e-05, "loss": 2.7355, "reason_loss": 0.5820596218109131, "step": 155, "utility_loss": 2.1534159183502197 }, { "cosine_similarity": 0, "epoch": 0.14538676607642126, "grad_norm": 2.1683144612517675, "learning_rate": 2.4223602484472053e-05, "loss": 2.7057, "reason_loss": 0.5701632499694824, "step": 156, "utility_loss": 2.1355018615722656 }, { "cosine_similarity": 0, "epoch": 0.14631873252562907, "grad_norm": 2.3402728103080697, "learning_rate": 2.437888198757764e-05, "loss": 2.4603, "reason_loss": 0.5700958967208862, "step": 157, "utility_loss": 1.8902158737182617 }, { "cosine_similarity": 0, "epoch": 0.14725069897483692, "grad_norm": 2.019055467271246, "learning_rate": 2.453416149068323e-05, "loss": 2.4272, "reason_loss": 0.5990322232246399, "step": 158, "utility_loss": 1.828209638595581 }, { "cosine_similarity": 0, "epoch": 0.14818266542404473, "grad_norm": 2.6773454935072953, "learning_rate": 2.468944099378882e-05, "loss": 2.6617, "reason_loss": 0.590153694152832, "step": 159, "utility_loss": 2.0715084075927734 }, { "cosine_similarity": 0, "epoch": 0.14911463187325255, "grad_norm": 2.483092569383316, "learning_rate": 2.484472049689441e-05, "loss": 2.9441, "reason_loss": 0.5767627954483032, "step": 160, "utility_loss": 2.3673365116119385 }, { "cosine_similarity": 0, "epoch": 0.1500465983224604, "grad_norm": 2.014264061079864, "learning_rate": 2.5e-05, "loss": 2.6082, "reason_loss": 0.5756080746650696, "step": 161, "utility_loss": 2.032557487487793 }, { "cosine_similarity": 0, "epoch": 0.1509785647716682, "grad_norm": 2.2466892046518767, "learning_rate": 2.515527950310559e-05, "loss": 2.6308, "reason_loss": 0.585617184638977, "step": 162, "utility_loss": 2.0451412200927734 }, { "cosine_similarity": 0, "epoch": 0.15191053122087605, "grad_norm": 3.109171993357474, "learning_rate": 2.5310559006211183e-05, "loss": 2.8423, "reason_loss": 0.5954612493515015, "step": 163, "utility_loss": 2.2468795776367188 }, { "cosine_similarity": 0, "epoch": 0.15284249767008387, "grad_norm": 2.158242781012718, "learning_rate": 2.546583850931677e-05, "loss": 2.8018, "reason_loss": 0.6151527166366577, "step": 164, "utility_loss": 2.186641216278076 }, { "cosine_similarity": 0, "epoch": 0.15377446411929171, "grad_norm": 1.8993227413155296, "learning_rate": 2.5621118012422362e-05, "loss": 2.4177, "reason_loss": 0.6044272184371948, "step": 165, "utility_loss": 1.8132976293563843 }, { "cosine_similarity": 0, "epoch": 0.15470643056849953, "grad_norm": 2.4018262218903517, "learning_rate": 2.577639751552795e-05, "loss": 2.6716, "reason_loss": 0.587182879447937, "step": 166, "utility_loss": 2.084460735321045 }, { "cosine_similarity": 0, "epoch": 0.15563839701770738, "grad_norm": 2.11224059027666, "learning_rate": 2.5931677018633544e-05, "loss": 2.6542, "reason_loss": 0.5639519691467285, "step": 167, "utility_loss": 2.0902624130249023 }, { "cosine_similarity": 0, "epoch": 0.1565703634669152, "grad_norm": 2.393171453081305, "learning_rate": 2.608695652173913e-05, "loss": 2.6116, "reason_loss": 0.5848773121833801, "step": 168, "utility_loss": 2.026731014251709 }, { "cosine_similarity": 0, "epoch": 0.157502329916123, "grad_norm": 2.2499443235154786, "learning_rate": 2.6242236024844723e-05, "loss": 2.7026, "reason_loss": 0.5398585796356201, "step": 169, "utility_loss": 2.1627626419067383 }, { "cosine_similarity": 0, "epoch": 0.15843429636533085, "grad_norm": 1.577836705973763, "learning_rate": 2.639751552795031e-05, "loss": 2.0415, "reason_loss": 0.5573697090148926, "step": 170, "utility_loss": 1.4841787815093994 }, { "cosine_similarity": 0, "epoch": 0.15936626281453867, "grad_norm": 1.8136346500608957, "learning_rate": 2.6552795031055898e-05, "loss": 2.4602, "reason_loss": 0.5757802724838257, "step": 171, "utility_loss": 1.884374737739563 }, { "cosine_similarity": 0, "epoch": 0.1602982292637465, "grad_norm": 1.96577207939629, "learning_rate": 2.6708074534161492e-05, "loss": 2.8247, "reason_loss": 0.5703526735305786, "step": 172, "utility_loss": 2.254305839538574 }, { "cosine_similarity": 0, "epoch": 0.16123019571295433, "grad_norm": 2.2496441883154397, "learning_rate": 2.686335403726708e-05, "loss": 2.4962, "reason_loss": 0.6230277419090271, "step": 173, "utility_loss": 1.873192548751831 }, { "cosine_similarity": 0, "epoch": 0.16216216216216217, "grad_norm": 1.8983985705426845, "learning_rate": 2.7018633540372674e-05, "loss": 2.1818, "reason_loss": 0.5567182302474976, "step": 174, "utility_loss": 1.6250839233398438 }, { "cosine_similarity": 0, "epoch": 0.16309412861137, "grad_norm": 1.846746307913506, "learning_rate": 2.7173913043478262e-05, "loss": 2.5043, "reason_loss": 0.5791695713996887, "step": 175, "utility_loss": 1.9251105785369873 }, { "cosine_similarity": 0, "epoch": 0.16402609506057783, "grad_norm": 2.781905831929465, "learning_rate": 2.7329192546583853e-05, "loss": 2.7827, "reason_loss": 0.557723879814148, "step": 176, "utility_loss": 2.2249436378479004 }, { "cosine_similarity": 0, "epoch": 0.16495806150978565, "grad_norm": 1.8205435483539785, "learning_rate": 2.748447204968944e-05, "loss": 2.8625, "reason_loss": 0.5586204528808594, "step": 177, "utility_loss": 2.303894519805908 }, { "cosine_similarity": 0, "epoch": 0.16589002795899346, "grad_norm": 2.4392857022004195, "learning_rate": 2.7639751552795035e-05, "loss": 2.4781, "reason_loss": 0.5700114369392395, "step": 178, "utility_loss": 1.908052921295166 }, { "cosine_similarity": 0, "epoch": 0.1668219944082013, "grad_norm": 1.6953569050116448, "learning_rate": 2.7795031055900623e-05, "loss": 2.5839, "reason_loss": 0.6032040119171143, "step": 179, "utility_loss": 1.9806909561157227 }, { "cosine_similarity": 0, "epoch": 0.16775396085740912, "grad_norm": 1.718292198357977, "learning_rate": 2.795031055900621e-05, "loss": 2.2387, "reason_loss": 0.5766808986663818, "step": 180, "utility_loss": 1.6619904041290283 }, { "cosine_similarity": 0, "epoch": 0.16868592730661697, "grad_norm": 2.092963697463631, "learning_rate": 2.8105590062111805e-05, "loss": 2.7452, "reason_loss": 0.5570244789123535, "step": 181, "utility_loss": 2.188199996948242 }, { "cosine_similarity": 0, "epoch": 0.16961789375582478, "grad_norm": 2.228603678937117, "learning_rate": 2.826086956521739e-05, "loss": 2.6151, "reason_loss": 0.619562029838562, "step": 182, "utility_loss": 1.9955332279205322 }, { "cosine_similarity": 0, "epoch": 0.17054986020503263, "grad_norm": 2.268351820933109, "learning_rate": 2.8416149068322983e-05, "loss": 2.6418, "reason_loss": 0.555252730846405, "step": 183, "utility_loss": 2.08655047416687 }, { "cosine_similarity": 0, "epoch": 0.17148182665424044, "grad_norm": 2.451024861980115, "learning_rate": 2.857142857142857e-05, "loss": 2.5194, "reason_loss": 0.573601484298706, "step": 184, "utility_loss": 1.9458403587341309 }, { "cosine_similarity": 0, "epoch": 0.1724137931034483, "grad_norm": 1.909023120361615, "learning_rate": 2.8726708074534165e-05, "loss": 2.8155, "reason_loss": 0.5843527913093567, "step": 185, "utility_loss": 2.2311954498291016 }, { "cosine_similarity": 0, "epoch": 0.1733457595526561, "grad_norm": 2.7149207546925824, "learning_rate": 2.8881987577639753e-05, "loss": 2.0476, "reason_loss": 0.5785356760025024, "step": 186, "utility_loss": 1.4690651893615723 }, { "cosine_similarity": 0, "epoch": 0.17427772600186392, "grad_norm": 2.029368692220658, "learning_rate": 2.9037267080745344e-05, "loss": 2.5809, "reason_loss": 0.5788195133209229, "step": 187, "utility_loss": 2.002035617828369 }, { "cosine_similarity": 0, "epoch": 0.17520969245107176, "grad_norm": 1.7610746566325093, "learning_rate": 2.919254658385093e-05, "loss": 2.5241, "reason_loss": 0.563827395439148, "step": 188, "utility_loss": 1.9603031873703003 }, { "cosine_similarity": 0, "epoch": 0.17614165890027958, "grad_norm": 1.6519790733051745, "learning_rate": 2.9347826086956526e-05, "loss": 2.3314, "reason_loss": 0.6276938915252686, "step": 189, "utility_loss": 1.7037192583084106 }, { "cosine_similarity": 0, "epoch": 0.17707362534948742, "grad_norm": 1.5537361967391576, "learning_rate": 2.9503105590062114e-05, "loss": 2.308, "reason_loss": 0.5952551960945129, "step": 190, "utility_loss": 1.7127695083618164 }, { "cosine_similarity": 0, "epoch": 0.17800559179869524, "grad_norm": 1.824641679044539, "learning_rate": 2.96583850931677e-05, "loss": 2.7142, "reason_loss": 0.5851694345474243, "step": 191, "utility_loss": 2.1290526390075684 }, { "cosine_similarity": 0, "epoch": 0.17893755824790308, "grad_norm": 1.7522952166735635, "learning_rate": 2.9813664596273296e-05, "loss": 2.3007, "reason_loss": 0.5461270809173584, "step": 192, "utility_loss": 1.7545418739318848 }, { "cosine_similarity": 0, "epoch": 0.1798695246971109, "grad_norm": 1.9660891549954183, "learning_rate": 2.9968944099378883e-05, "loss": 2.3575, "reason_loss": 0.5746995210647583, "step": 193, "utility_loss": 1.7828021049499512 }, { "cosine_similarity": 0, "epoch": 0.18080149114631874, "grad_norm": 2.128738332354536, "learning_rate": 3.0124223602484474e-05, "loss": 2.4204, "reason_loss": 0.5963000655174255, "step": 194, "utility_loss": 1.8241472244262695 }, { "cosine_similarity": 0, "epoch": 0.18173345759552656, "grad_norm": 1.9406131620091716, "learning_rate": 3.0279503105590062e-05, "loss": 2.7802, "reason_loss": 0.5854953527450562, "step": 195, "utility_loss": 2.194706916809082 }, { "cosine_similarity": 0, "epoch": 0.18266542404473438, "grad_norm": 1.8353050630876797, "learning_rate": 3.0434782608695656e-05, "loss": 2.3486, "reason_loss": 0.5813924074172974, "step": 196, "utility_loss": 1.7671642303466797 }, { "cosine_similarity": 0, "epoch": 0.18359739049394222, "grad_norm": 1.7888746628786911, "learning_rate": 3.059006211180124e-05, "loss": 2.3473, "reason_loss": 0.6038748025894165, "step": 197, "utility_loss": 1.7433987855911255 }, { "cosine_similarity": 0, "epoch": 0.18452935694315004, "grad_norm": 2.0598291014640178, "learning_rate": 3.074534161490684e-05, "loss": 2.7145, "reason_loss": 0.5672619342803955, "step": 198, "utility_loss": 2.147250175476074 }, { "cosine_similarity": 0, "epoch": 0.18546132339235788, "grad_norm": 2.0754389074117854, "learning_rate": 3.090062111801242e-05, "loss": 2.6179, "reason_loss": 0.5944061279296875, "step": 199, "utility_loss": 2.0234954357147217 }, { "cosine_similarity": 0, "epoch": 0.1863932898415657, "grad_norm": 2.43368851594392, "learning_rate": 3.1055900621118014e-05, "loss": 2.8947, "reason_loss": 0.5538617372512817, "step": 200, "utility_loss": 2.340871810913086 }, { "cosine_similarity": 0, "epoch": 0.18732525629077354, "grad_norm": 1.8990587268865775, "learning_rate": 3.1211180124223605e-05, "loss": 2.5769, "reason_loss": 0.5679610371589661, "step": 201, "utility_loss": 2.008889675140381 }, { "cosine_similarity": 0, "epoch": 0.18825722273998136, "grad_norm": 1.6852649081438158, "learning_rate": 3.136645962732919e-05, "loss": 2.4176, "reason_loss": 0.5537108182907104, "step": 202, "utility_loss": 1.8638737201690674 }, { "cosine_similarity": 0, "epoch": 0.1891891891891892, "grad_norm": 2.337071581255283, "learning_rate": 3.152173913043479e-05, "loss": 2.5872, "reason_loss": 0.5489261150360107, "step": 203, "utility_loss": 2.038250207901001 }, { "cosine_similarity": 0, "epoch": 0.19012115563839702, "grad_norm": 2.3610616145056267, "learning_rate": 3.167701863354037e-05, "loss": 2.7771, "reason_loss": 0.5832510590553284, "step": 204, "utility_loss": 2.193861961364746 }, { "cosine_similarity": 0, "epoch": 0.19105312208760486, "grad_norm": 2.263340886654048, "learning_rate": 3.183229813664597e-05, "loss": 2.436, "reason_loss": 0.5648998618125916, "step": 205, "utility_loss": 1.8711445331573486 }, { "cosine_similarity": 0, "epoch": 0.19198508853681268, "grad_norm": 1.898927643585863, "learning_rate": 3.198757763975155e-05, "loss": 2.713, "reason_loss": 0.6160843372344971, "step": 206, "utility_loss": 2.096902370452881 }, { "cosine_similarity": 0, "epoch": 0.1929170549860205, "grad_norm": 2.340331867205867, "learning_rate": 3.2142857142857144e-05, "loss": 2.5765, "reason_loss": 0.5785426497459412, "step": 207, "utility_loss": 1.9979078769683838 }, { "cosine_similarity": 0, "epoch": 0.19384902143522834, "grad_norm": 1.9550322053086762, "learning_rate": 3.2298136645962735e-05, "loss": 2.7134, "reason_loss": 0.5717428922653198, "step": 208, "utility_loss": 2.141623020172119 }, { "cosine_similarity": 0, "epoch": 0.19478098788443615, "grad_norm": 1.8889978891634964, "learning_rate": 3.245341614906832e-05, "loss": 1.8165, "reason_loss": 0.5469029545783997, "step": 209, "utility_loss": 1.2696316242218018 }, { "cosine_similarity": 0, "epoch": 0.195712954333644, "grad_norm": 2.047637415678827, "learning_rate": 3.260869565217392e-05, "loss": 2.512, "reason_loss": 0.5909518003463745, "step": 210, "utility_loss": 1.921044111251831 }, { "cosine_similarity": 0, "epoch": 0.1966449207828518, "grad_norm": 1.4277720914784342, "learning_rate": 3.27639751552795e-05, "loss": 2.0757, "reason_loss": 0.5982572436332703, "step": 211, "utility_loss": 1.477405309677124 }, { "cosine_similarity": 0, "epoch": 0.19757688723205966, "grad_norm": 1.5981622979368109, "learning_rate": 3.29192546583851e-05, "loss": 2.5735, "reason_loss": 0.584470272064209, "step": 212, "utility_loss": 1.989072322845459 }, { "cosine_similarity": 0, "epoch": 0.19850885368126747, "grad_norm": 2.438221354255468, "learning_rate": 3.307453416149068e-05, "loss": 2.4177, "reason_loss": 0.5490233898162842, "step": 213, "utility_loss": 1.8686637878417969 }, { "cosine_similarity": 0, "epoch": 0.19944082013047532, "grad_norm": 2.799223949384493, "learning_rate": 3.3229813664596274e-05, "loss": 2.8725, "reason_loss": 0.5635131597518921, "step": 214, "utility_loss": 2.3089842796325684 }, { "cosine_similarity": 0, "epoch": 0.20037278657968313, "grad_norm": 1.9506520365648805, "learning_rate": 3.3385093167701865e-05, "loss": 2.1994, "reason_loss": 0.5202204585075378, "step": 215, "utility_loss": 1.6792024374008179 }, { "cosine_similarity": 0, "epoch": 0.20130475302889095, "grad_norm": 2.0719436136222082, "learning_rate": 3.3540372670807456e-05, "loss": 2.2731, "reason_loss": 0.5438313484191895, "step": 216, "utility_loss": 1.7293105125427246 }, { "cosine_similarity": 0, "epoch": 0.2022367194780988, "grad_norm": 2.1448629721452037, "learning_rate": 3.369565217391305e-05, "loss": 2.43, "reason_loss": 0.5418318510055542, "step": 217, "utility_loss": 1.8881853818893433 }, { "cosine_similarity": 0, "epoch": 0.2031686859273066, "grad_norm": 1.4051147267915327, "learning_rate": 3.385093167701863e-05, "loss": 2.4465, "reason_loss": 0.6075325012207031, "step": 218, "utility_loss": 1.8389281034469604 }, { "cosine_similarity": 0, "epoch": 0.20410065237651445, "grad_norm": 2.1291664012231792, "learning_rate": 3.400621118012422e-05, "loss": 2.5944, "reason_loss": 0.5448473691940308, "step": 219, "utility_loss": 2.0495431423187256 }, { "cosine_similarity": 0, "epoch": 0.20503261882572227, "grad_norm": 2.6030950469827423, "learning_rate": 3.4161490683229814e-05, "loss": 2.5878, "reason_loss": 0.5840301513671875, "step": 220, "utility_loss": 2.0038106441497803 }, { "cosine_similarity": 0, "epoch": 0.2059645852749301, "grad_norm": 1.8721876415851924, "learning_rate": 3.4316770186335405e-05, "loss": 2.5974, "reason_loss": 0.5449261665344238, "step": 221, "utility_loss": 2.0524425506591797 }, { "cosine_similarity": 0, "epoch": 0.20689655172413793, "grad_norm": 1.9494610178688179, "learning_rate": 3.4472049689440996e-05, "loss": 2.4882, "reason_loss": 0.5450698137283325, "step": 222, "utility_loss": 1.943140983581543 }, { "cosine_similarity": 0, "epoch": 0.20782851817334577, "grad_norm": 2.081143254778967, "learning_rate": 3.462732919254659e-05, "loss": 2.586, "reason_loss": 0.541050136089325, "step": 223, "utility_loss": 2.044976234436035 }, { "cosine_similarity": 0, "epoch": 0.2087604846225536, "grad_norm": 2.194917124554097, "learning_rate": 3.478260869565218e-05, "loss": 2.6487, "reason_loss": 0.5286643505096436, "step": 224, "utility_loss": 2.120084285736084 }, { "cosine_similarity": 0, "epoch": 0.2096924510717614, "grad_norm": 2.892398201950919, "learning_rate": 3.493788819875777e-05, "loss": 2.6901, "reason_loss": 0.5641381144523621, "step": 225, "utility_loss": 2.125915050506592 }, { "cosine_similarity": 0, "epoch": 0.21062441752096925, "grad_norm": 2.277789736775707, "learning_rate": 3.509316770186335e-05, "loss": 2.5773, "reason_loss": 0.5397845506668091, "step": 226, "utility_loss": 2.037510395050049 }, { "cosine_similarity": 0, "epoch": 0.21155638397017706, "grad_norm": 2.0541628598012953, "learning_rate": 3.524844720496895e-05, "loss": 2.5268, "reason_loss": 0.5806760787963867, "step": 227, "utility_loss": 1.9461472034454346 }, { "cosine_similarity": 0, "epoch": 0.2124883504193849, "grad_norm": 1.8708391322053637, "learning_rate": 3.5403726708074535e-05, "loss": 2.6274, "reason_loss": 0.5369601249694824, "step": 228, "utility_loss": 2.09041166305542 }, { "cosine_similarity": 0, "epoch": 0.21342031686859272, "grad_norm": 2.1887154863340537, "learning_rate": 3.5559006211180126e-05, "loss": 2.7875, "reason_loss": 0.5835192203521729, "step": 229, "utility_loss": 2.2039577960968018 }, { "cosine_similarity": 0, "epoch": 0.21435228331780057, "grad_norm": 1.8481165363291066, "learning_rate": 3.571428571428572e-05, "loss": 2.6249, "reason_loss": 0.560674786567688, "step": 230, "utility_loss": 2.0642552375793457 }, { "cosine_similarity": 0, "epoch": 0.21528424976700838, "grad_norm": 1.6790729163763498, "learning_rate": 3.58695652173913e-05, "loss": 2.0727, "reason_loss": 0.5508855581283569, "step": 231, "utility_loss": 1.5217704772949219 }, { "cosine_similarity": 0, "epoch": 0.21621621621621623, "grad_norm": 2.241898008250472, "learning_rate": 3.60248447204969e-05, "loss": 2.4013, "reason_loss": 0.5752378702163696, "step": 232, "utility_loss": 1.8260990381240845 }, { "cosine_similarity": 0, "epoch": 0.21714818266542404, "grad_norm": 1.8856982641859756, "learning_rate": 3.618012422360248e-05, "loss": 2.8392, "reason_loss": 0.5832222700119019, "step": 233, "utility_loss": 2.2559986114501953 }, { "cosine_similarity": 0, "epoch": 0.21808014911463186, "grad_norm": 1.8413302898765254, "learning_rate": 3.633540372670808e-05, "loss": 2.2766, "reason_loss": 0.5522726774215698, "step": 234, "utility_loss": 1.7243645191192627 }, { "cosine_similarity": 0, "epoch": 0.2190121155638397, "grad_norm": 1.6645527488052398, "learning_rate": 3.6490683229813665e-05, "loss": 2.1546, "reason_loss": 0.5285754203796387, "step": 235, "utility_loss": 1.626017689704895 }, { "cosine_similarity": 0, "epoch": 0.21994408201304752, "grad_norm": 1.56945503391033, "learning_rate": 3.6645962732919256e-05, "loss": 2.0629, "reason_loss": 0.563045084476471, "step": 236, "utility_loss": 1.4998345375061035 }, { "cosine_similarity": 0, "epoch": 0.22087604846225536, "grad_norm": 1.8948308510526064, "learning_rate": 3.680124223602485e-05, "loss": 2.1818, "reason_loss": 0.5596588253974915, "step": 237, "utility_loss": 1.6221158504486084 }, { "cosine_similarity": 0, "epoch": 0.22180801491146318, "grad_norm": 1.7365825922726752, "learning_rate": 3.695652173913043e-05, "loss": 2.243, "reason_loss": 0.5744996666908264, "step": 238, "utility_loss": 1.66849684715271 }, { "cosine_similarity": 0, "epoch": 0.22273998136067102, "grad_norm": 2.121057420159703, "learning_rate": 3.711180124223603e-05, "loss": 2.2804, "reason_loss": 0.522409200668335, "step": 239, "utility_loss": 1.7580382823944092 }, { "cosine_similarity": 0, "epoch": 0.22367194780987884, "grad_norm": 1.9612588213485205, "learning_rate": 3.7267080745341614e-05, "loss": 2.6149, "reason_loss": 0.549566924571991, "step": 240, "utility_loss": 2.0653209686279297 }, { "cosine_similarity": 0, "epoch": 0.22460391425908668, "grad_norm": 2.205222071545244, "learning_rate": 3.742236024844721e-05, "loss": 2.7046, "reason_loss": 0.5658124685287476, "step": 241, "utility_loss": 2.138823986053467 }, { "cosine_similarity": 0, "epoch": 0.2255358807082945, "grad_norm": 1.641806450815362, "learning_rate": 3.7577639751552796e-05, "loss": 2.5333, "reason_loss": 0.548174262046814, "step": 242, "utility_loss": 1.9851725101470947 }, { "cosine_similarity": 0, "epoch": 0.22646784715750232, "grad_norm": 1.9122349015332147, "learning_rate": 3.773291925465839e-05, "loss": 2.4244, "reason_loss": 0.572616457939148, "step": 243, "utility_loss": 1.8518311977386475 }, { "cosine_similarity": 0, "epoch": 0.22739981360671016, "grad_norm": 1.7513861160513304, "learning_rate": 3.788819875776398e-05, "loss": 2.5303, "reason_loss": 0.5450221300125122, "step": 244, "utility_loss": 1.9852527379989624 }, { "cosine_similarity": 0, "epoch": 0.22833178005591798, "grad_norm": 2.114491480367599, "learning_rate": 3.804347826086957e-05, "loss": 2.6172, "reason_loss": 0.5681341886520386, "step": 245, "utility_loss": 2.0490341186523438 }, { "cosine_similarity": 0, "epoch": 0.22926374650512582, "grad_norm": 1.4891543256609703, "learning_rate": 3.819875776397516e-05, "loss": 2.6677, "reason_loss": 0.5340662002563477, "step": 246, "utility_loss": 2.133589744567871 }, { "cosine_similarity": 0, "epoch": 0.23019571295433364, "grad_norm": 3.2611171898382056, "learning_rate": 3.8354037267080744e-05, "loss": 2.8246, "reason_loss": 0.5337564945220947, "step": 247, "utility_loss": 2.290802001953125 }, { "cosine_similarity": 0, "epoch": 0.23112767940354148, "grad_norm": 1.998833507328311, "learning_rate": 3.8509316770186335e-05, "loss": 2.4215, "reason_loss": 0.5832949876785278, "step": 248, "utility_loss": 1.8381567001342773 }, { "cosine_similarity": 0, "epoch": 0.2320596458527493, "grad_norm": 1.5941349364383663, "learning_rate": 3.8664596273291926e-05, "loss": 2.4139, "reason_loss": 0.5618233680725098, "step": 249, "utility_loss": 1.852095365524292 }, { "cosine_similarity": 0, "epoch": 0.23299161230195714, "grad_norm": 1.6440087431909358, "learning_rate": 3.881987577639752e-05, "loss": 2.2518, "reason_loss": 0.5471892356872559, "step": 250, "utility_loss": 1.7046245336532593 }, { "cosine_similarity": 0, "epoch": 0.23392357875116496, "grad_norm": 1.708180659259751, "learning_rate": 3.897515527950311e-05, "loss": 2.5763, "reason_loss": 0.5836954712867737, "step": 251, "utility_loss": 1.9926435947418213 }, { "cosine_similarity": 0, "epoch": 0.23485554520037277, "grad_norm": 1.654036994207959, "learning_rate": 3.91304347826087e-05, "loss": 2.4886, "reason_loss": 0.5397282838821411, "step": 252, "utility_loss": 1.9489190578460693 }, { "cosine_similarity": 0, "epoch": 0.23578751164958062, "grad_norm": 2.02779204751326, "learning_rate": 3.928571428571429e-05, "loss": 2.4631, "reason_loss": 0.5341030359268188, "step": 253, "utility_loss": 1.9290462732315063 }, { "cosine_similarity": 0, "epoch": 0.23671947809878843, "grad_norm": 1.6359098987331004, "learning_rate": 3.944099378881988e-05, "loss": 2.5468, "reason_loss": 0.5855284929275513, "step": 254, "utility_loss": 1.9612650871276855 }, { "cosine_similarity": 0, "epoch": 0.23765144454799628, "grad_norm": 1.5818208838476986, "learning_rate": 3.9596273291925465e-05, "loss": 2.2225, "reason_loss": 0.571771502494812, "step": 255, "utility_loss": 1.6507784128189087 }, { "cosine_similarity": 0, "epoch": 0.2385834109972041, "grad_norm": 1.8167680583071941, "learning_rate": 3.9751552795031056e-05, "loss": 2.0479, "reason_loss": 0.5409564971923828, "step": 256, "utility_loss": 1.5068947076797485 }, { "cosine_similarity": 0, "epoch": 0.23951537744641194, "grad_norm": 2.255512255218169, "learning_rate": 3.990683229813665e-05, "loss": 2.6957, "reason_loss": 0.5530112385749817, "step": 257, "utility_loss": 2.1426637172698975 }, { "cosine_similarity": 0, "epoch": 0.24044734389561975, "grad_norm": 1.895941438572667, "learning_rate": 4.006211180124224e-05, "loss": 2.4607, "reason_loss": 0.5512063503265381, "step": 258, "utility_loss": 1.9094921350479126 }, { "cosine_similarity": 0, "epoch": 0.2413793103448276, "grad_norm": 1.4956023098752596, "learning_rate": 4.021739130434783e-05, "loss": 2.3921, "reason_loss": 0.5469622611999512, "step": 259, "utility_loss": 1.845167875289917 }, { "cosine_similarity": 0, "epoch": 0.2423112767940354, "grad_norm": 1.7437887035080115, "learning_rate": 4.0372670807453414e-05, "loss": 2.443, "reason_loss": 0.5950831174850464, "step": 260, "utility_loss": 1.8478834629058838 }, { "cosine_similarity": 0, "epoch": 0.24324324324324326, "grad_norm": 2.173398107455125, "learning_rate": 4.052795031055901e-05, "loss": 2.39, "reason_loss": 0.5614568591117859, "step": 261, "utility_loss": 1.8285527229309082 }, { "cosine_similarity": 0, "epoch": 0.24417520969245107, "grad_norm": 1.943486954977124, "learning_rate": 4.0683229813664596e-05, "loss": 2.2765, "reason_loss": 0.603929877281189, "step": 262, "utility_loss": 1.672577142715454 }, { "cosine_similarity": 0, "epoch": 0.2451071761416589, "grad_norm": 1.777845542883601, "learning_rate": 4.0838509316770193e-05, "loss": 2.4192, "reason_loss": 0.5527471303939819, "step": 263, "utility_loss": 1.8664608001708984 }, { "cosine_similarity": 0, "epoch": 0.24603914259086673, "grad_norm": 1.643166876161188, "learning_rate": 4.099378881987578e-05, "loss": 2.4522, "reason_loss": 0.5070778131484985, "step": 264, "utility_loss": 1.9450914859771729 }, { "cosine_similarity": 0, "epoch": 0.24697110904007455, "grad_norm": 2.1020131146413257, "learning_rate": 4.114906832298137e-05, "loss": 2.4118, "reason_loss": 0.537281334400177, "step": 265, "utility_loss": 1.874503254890442 }, { "cosine_similarity": 0, "epoch": 0.2479030754892824, "grad_norm": 1.652647168398533, "learning_rate": 4.130434782608696e-05, "loss": 2.3513, "reason_loss": 0.5599501132965088, "step": 266, "utility_loss": 1.7913339138031006 }, { "cosine_similarity": 0, "epoch": 0.2488350419384902, "grad_norm": 1.5293530385775005, "learning_rate": 4.1459627329192544e-05, "loss": 2.7058, "reason_loss": 0.5672691464424133, "step": 267, "utility_loss": 2.138519287109375 }, { "cosine_similarity": 0, "epoch": 0.24976700838769805, "grad_norm": 1.3859236689084111, "learning_rate": 4.161490683229814e-05, "loss": 1.9097, "reason_loss": 0.5624668598175049, "step": 268, "utility_loss": 1.347196102142334 }, { "cosine_similarity": 0, "epoch": 0.2506989748369059, "grad_norm": 1.973935505453008, "learning_rate": 4.1770186335403726e-05, "loss": 2.5412, "reason_loss": 0.5454761385917664, "step": 269, "utility_loss": 1.9957540035247803 }, { "cosine_similarity": 0, "epoch": 0.2516309412861137, "grad_norm": 2.558207807650575, "learning_rate": 4.192546583850932e-05, "loss": 2.3954, "reason_loss": 0.5325770974159241, "step": 270, "utility_loss": 1.8628618717193604 }, { "cosine_similarity": 0, "epoch": 0.25256290773532153, "grad_norm": 1.9543442549015948, "learning_rate": 4.208074534161491e-05, "loss": 2.282, "reason_loss": 0.5137364864349365, "step": 271, "utility_loss": 1.768294095993042 }, { "cosine_similarity": 0, "epoch": 0.2534948741845294, "grad_norm": 1.8903528223387769, "learning_rate": 4.22360248447205e-05, "loss": 2.5372, "reason_loss": 0.577646791934967, "step": 272, "utility_loss": 1.959543228149414 }, { "cosine_similarity": 0, "epoch": 0.25442684063373716, "grad_norm": 1.519915087350751, "learning_rate": 4.239130434782609e-05, "loss": 2.0332, "reason_loss": 0.5720705389976501, "step": 273, "utility_loss": 1.461092472076416 }, { "cosine_similarity": 0, "epoch": 0.255358807082945, "grad_norm": 1.661706624801028, "learning_rate": 4.254658385093168e-05, "loss": 2.5458, "reason_loss": 0.5605642795562744, "step": 274, "utility_loss": 1.9852097034454346 }, { "cosine_similarity": 0, "epoch": 0.25629077353215285, "grad_norm": 1.6614384364008437, "learning_rate": 4.270186335403727e-05, "loss": 2.7355, "reason_loss": 0.5717644691467285, "step": 275, "utility_loss": 2.163769483566284 }, { "cosine_similarity": 0, "epoch": 0.2572227399813607, "grad_norm": 1.9269194501309979, "learning_rate": 4.2857142857142856e-05, "loss": 2.5387, "reason_loss": 0.5949586033821106, "step": 276, "utility_loss": 1.943702220916748 }, { "cosine_similarity": 0, "epoch": 0.2581547064305685, "grad_norm": 2.245819698149013, "learning_rate": 4.301242236024845e-05, "loss": 2.1565, "reason_loss": 0.5316360592842102, "step": 277, "utility_loss": 1.6248643398284912 }, { "cosine_similarity": 0, "epoch": 0.2590866728797763, "grad_norm": 1.7238524529769248, "learning_rate": 4.316770186335404e-05, "loss": 2.4463, "reason_loss": 0.53597491979599, "step": 278, "utility_loss": 1.9102838039398193 }, { "cosine_similarity": 0, "epoch": 0.26001863932898417, "grad_norm": 1.7961380684523767, "learning_rate": 4.332298136645963e-05, "loss": 2.1022, "reason_loss": 0.5557235479354858, "step": 279, "utility_loss": 1.5465052127838135 }, { "cosine_similarity": 0, "epoch": 0.26095060577819196, "grad_norm": 1.7191326607377715, "learning_rate": 4.347826086956522e-05, "loss": 2.6714, "reason_loss": 0.529171347618103, "step": 280, "utility_loss": 2.1422252655029297 }, { "cosine_similarity": 0, "epoch": 0.2618825722273998, "grad_norm": 2.0373837585135615, "learning_rate": 4.363354037267081e-05, "loss": 2.5393, "reason_loss": 0.5629432797431946, "step": 281, "utility_loss": 1.9763355255126953 }, { "cosine_similarity": 0, "epoch": 0.26281453867660765, "grad_norm": 1.5198325328366575, "learning_rate": 4.3788819875776396e-05, "loss": 1.9082, "reason_loss": 0.5881476402282715, "step": 282, "utility_loss": 1.3201005458831787 }, { "cosine_similarity": 0, "epoch": 0.2637465051258155, "grad_norm": 1.789116378235906, "learning_rate": 4.3944099378881993e-05, "loss": 2.1983, "reason_loss": 0.539150595664978, "step": 283, "utility_loss": 1.6591416597366333 }, { "cosine_similarity": 0, "epoch": 0.2646784715750233, "grad_norm": 1.4209629045270749, "learning_rate": 4.409937888198758e-05, "loss": 2.0238, "reason_loss": 0.5368297100067139, "step": 284, "utility_loss": 1.487018346786499 }, { "cosine_similarity": 0, "epoch": 0.2656104380242311, "grad_norm": 1.7741916697441744, "learning_rate": 4.425465838509317e-05, "loss": 2.6112, "reason_loss": 0.5442548990249634, "step": 285, "utility_loss": 2.066964864730835 }, { "cosine_similarity": 0, "epoch": 0.26654240447343897, "grad_norm": 1.6595270901087136, "learning_rate": 4.440993788819876e-05, "loss": 2.5052, "reason_loss": 0.5684736371040344, "step": 286, "utility_loss": 1.9367552995681763 }, { "cosine_similarity": 0, "epoch": 0.2674743709226468, "grad_norm": 1.7794240426139158, "learning_rate": 4.456521739130435e-05, "loss": 2.6827, "reason_loss": 0.5417989492416382, "step": 287, "utility_loss": 2.1408751010894775 }, { "cosine_similarity": 0, "epoch": 0.2684063373718546, "grad_norm": 1.7136029460254838, "learning_rate": 4.472049689440994e-05, "loss": 2.5491, "reason_loss": 0.555923342704773, "step": 288, "utility_loss": 1.9931912422180176 }, { "cosine_similarity": 0, "epoch": 0.26933830382106244, "grad_norm": 1.8182486767720534, "learning_rate": 4.4875776397515526e-05, "loss": 2.8133, "reason_loss": 0.5715357661247253, "step": 289, "utility_loss": 2.241804838180542 }, { "cosine_similarity": 0, "epoch": 0.2702702702702703, "grad_norm": 1.5688476539616991, "learning_rate": 4.5031055900621124e-05, "loss": 2.4103, "reason_loss": 0.5429417490959167, "step": 290, "utility_loss": 1.867318868637085 }, { "cosine_similarity": 0, "epoch": 0.2712022367194781, "grad_norm": 1.751923126532157, "learning_rate": 4.518633540372671e-05, "loss": 1.8584, "reason_loss": 0.5606403350830078, "step": 291, "utility_loss": 1.2977890968322754 }, { "cosine_similarity": 0, "epoch": 0.2721342031686859, "grad_norm": 1.6846128777657396, "learning_rate": 4.5341614906832306e-05, "loss": 2.2344, "reason_loss": 0.554923951625824, "step": 292, "utility_loss": 1.6794655323028564 }, { "cosine_similarity": 0, "epoch": 0.27306616961789376, "grad_norm": 1.7363117151333185, "learning_rate": 4.549689440993789e-05, "loss": 2.6574, "reason_loss": 0.5440471172332764, "step": 293, "utility_loss": 2.113306999206543 }, { "cosine_similarity": 0, "epoch": 0.2739981360671016, "grad_norm": 1.6340143445845077, "learning_rate": 4.565217391304348e-05, "loss": 2.5316, "reason_loss": 0.562835693359375, "step": 294, "utility_loss": 1.9687769412994385 }, { "cosine_similarity": 0, "epoch": 0.2749301025163094, "grad_norm": 1.7222729283786558, "learning_rate": 4.580745341614907e-05, "loss": 2.6354, "reason_loss": 0.5796807408332825, "step": 295, "utility_loss": 2.0557258129119873 }, { "cosine_similarity": 0, "epoch": 0.27586206896551724, "grad_norm": 1.7673678849411332, "learning_rate": 4.5962732919254656e-05, "loss": 2.2025, "reason_loss": 0.5311561822891235, "step": 296, "utility_loss": 1.67131769657135 }, { "cosine_similarity": 0, "epoch": 0.2767940354147251, "grad_norm": 1.5602747939853796, "learning_rate": 4.6118012422360254e-05, "loss": 2.1363, "reason_loss": 0.5517641305923462, "step": 297, "utility_loss": 1.5845203399658203 }, { "cosine_similarity": 0, "epoch": 0.2777260018639329, "grad_norm": 1.8087597220932052, "learning_rate": 4.627329192546584e-05, "loss": 2.5721, "reason_loss": 0.5751932859420776, "step": 298, "utility_loss": 1.9969148635864258 }, { "cosine_similarity": 0, "epoch": 0.2786579683131407, "grad_norm": 1.6997725722307047, "learning_rate": 4.642857142857143e-05, "loss": 2.4071, "reason_loss": 0.5462980270385742, "step": 299, "utility_loss": 1.8607600927352905 }, { "cosine_similarity": 0, "epoch": 0.27958993476234856, "grad_norm": 1.7243875558971025, "learning_rate": 4.658385093167702e-05, "loss": 2.6525, "reason_loss": 0.6014015674591064, "step": 300, "utility_loss": 2.051136016845703 }, { "cosine_similarity": 0, "epoch": 0.2805219012115564, "grad_norm": 1.3563928275119106, "learning_rate": 4.673913043478261e-05, "loss": 1.6961, "reason_loss": 0.5497259497642517, "step": 301, "utility_loss": 1.1463414430618286 }, { "cosine_similarity": 0, "epoch": 0.2814538676607642, "grad_norm": 1.4138960967108296, "learning_rate": 4.68944099378882e-05, "loss": 2.007, "reason_loss": 0.5457945466041565, "step": 302, "utility_loss": 1.461173415184021 }, { "cosine_similarity": 0, "epoch": 0.28238583410997203, "grad_norm": 3.251413652708668, "learning_rate": 4.7049689440993793e-05, "loss": 2.512, "reason_loss": 0.5295814275741577, "step": 303, "utility_loss": 1.9824347496032715 }, { "cosine_similarity": 0, "epoch": 0.2833178005591799, "grad_norm": 1.841576312448804, "learning_rate": 4.7204968944099384e-05, "loss": 2.4598, "reason_loss": 0.5696566104888916, "step": 304, "utility_loss": 1.8901238441467285 }, { "cosine_similarity": 0, "epoch": 0.2842497670083877, "grad_norm": 1.806589580057016, "learning_rate": 4.736024844720497e-05, "loss": 2.3905, "reason_loss": 0.5502476692199707, "step": 305, "utility_loss": 1.8402799367904663 }, { "cosine_similarity": 0, "epoch": 0.2851817334575955, "grad_norm": 2.202212714673509, "learning_rate": 4.751552795031056e-05, "loss": 2.6396, "reason_loss": 0.5227651596069336, "step": 306, "utility_loss": 2.116795539855957 }, { "cosine_similarity": 0, "epoch": 0.28611369990680335, "grad_norm": 1.6904552102768176, "learning_rate": 4.767080745341615e-05, "loss": 2.4168, "reason_loss": 0.5345472097396851, "step": 307, "utility_loss": 1.882286787033081 }, { "cosine_similarity": 0, "epoch": 0.2870456663560112, "grad_norm": 1.7735239273826289, "learning_rate": 4.782608695652174e-05, "loss": 2.5439, "reason_loss": 0.5334944725036621, "step": 308, "utility_loss": 2.010429620742798 }, { "cosine_similarity": 0, "epoch": 0.287977632805219, "grad_norm": 1.7051926815506764, "learning_rate": 4.798136645962733e-05, "loss": 2.3482, "reason_loss": 0.535731852054596, "step": 309, "utility_loss": 1.8124425411224365 }, { "cosine_similarity": 0, "epoch": 0.28890959925442683, "grad_norm": 1.47210437211225, "learning_rate": 4.8136645962732924e-05, "loss": 1.9385, "reason_loss": 0.568031907081604, "step": 310, "utility_loss": 1.3704532384872437 }, { "cosine_similarity": 0, "epoch": 0.2898415657036347, "grad_norm": 2.2737051795245464, "learning_rate": 4.829192546583851e-05, "loss": 2.6106, "reason_loss": 0.5728371143341064, "step": 311, "utility_loss": 2.037719249725342 }, { "cosine_similarity": 0, "epoch": 0.2907735321528425, "grad_norm": 1.8279207367219417, "learning_rate": 4.8447204968944106e-05, "loss": 2.3636, "reason_loss": 0.538894534111023, "step": 312, "utility_loss": 1.8247464895248413 }, { "cosine_similarity": 0, "epoch": 0.2917054986020503, "grad_norm": 1.3396753804199868, "learning_rate": 4.860248447204969e-05, "loss": 1.9981, "reason_loss": 0.5596071481704712, "step": 313, "utility_loss": 1.4384658336639404 }, { "cosine_similarity": 0, "epoch": 0.29263746505125815, "grad_norm": 2.075419931729126, "learning_rate": 4.875776397515528e-05, "loss": 2.5052, "reason_loss": 0.52135169506073, "step": 314, "utility_loss": 1.9837992191314697 }, { "cosine_similarity": 0, "epoch": 0.293569431500466, "grad_norm": 1.7675758075611165, "learning_rate": 4.891304347826087e-05, "loss": 2.6069, "reason_loss": 0.5702043771743774, "step": 315, "utility_loss": 2.036731243133545 }, { "cosine_similarity": 0, "epoch": 0.29450139794967384, "grad_norm": 1.5155509615649627, "learning_rate": 4.906832298136646e-05, "loss": 2.3486, "reason_loss": 0.5151800513267517, "step": 316, "utility_loss": 1.8333828449249268 }, { "cosine_similarity": 0, "epoch": 0.2954333643988816, "grad_norm": 1.8709713735924391, "learning_rate": 4.9223602484472054e-05, "loss": 2.3742, "reason_loss": 0.5217232704162598, "step": 317, "utility_loss": 1.8524610996246338 }, { "cosine_similarity": 0, "epoch": 0.29636533084808947, "grad_norm": 1.5104080975076148, "learning_rate": 4.937888198757764e-05, "loss": 2.3133, "reason_loss": 0.5427089929580688, "step": 318, "utility_loss": 1.7706063985824585 }, { "cosine_similarity": 0, "epoch": 0.2972972972972973, "grad_norm": 1.5726895121692845, "learning_rate": 4.9534161490683236e-05, "loss": 2.4839, "reason_loss": 0.526587963104248, "step": 319, "utility_loss": 1.9573516845703125 }, { "cosine_similarity": 0, "epoch": 0.2982292637465051, "grad_norm": 1.8936504916339836, "learning_rate": 4.968944099378882e-05, "loss": 2.4268, "reason_loss": 0.5539220571517944, "step": 320, "utility_loss": 1.8728773593902588 }, { "cosine_similarity": 0, "epoch": 0.29916123019571295, "grad_norm": 1.6135468491170366, "learning_rate": 4.984472049689442e-05, "loss": 2.4356, "reason_loss": 0.5174787044525146, "step": 321, "utility_loss": 1.9181034564971924 }, { "cosine_similarity": 0, "epoch": 0.3000931966449208, "grad_norm": 1.378396240353653, "learning_rate": 5e-05, "loss": 2.0827, "reason_loss": 0.5341855883598328, "step": 322, "utility_loss": 1.5485141277313232 }, { "cosine_similarity": 0, "epoch": 0.30102516309412863, "grad_norm": 1.945657164238801, "learning_rate": 4.998274076630998e-05, "loss": 2.6722, "reason_loss": 0.531230092048645, "step": 323, "utility_loss": 2.140986680984497 }, { "cosine_similarity": 0, "epoch": 0.3019571295433364, "grad_norm": 1.5764150645104034, "learning_rate": 4.996548153261996e-05, "loss": 1.6962, "reason_loss": 0.5324915051460266, "step": 324, "utility_loss": 1.1637232303619385 }, { "cosine_similarity": 0, "epoch": 0.30288909599254427, "grad_norm": 1.5480963135198482, "learning_rate": 4.994822229892993e-05, "loss": 2.5044, "reason_loss": 0.5286862850189209, "step": 325, "utility_loss": 1.9757561683654785 }, { "cosine_similarity": 0, "epoch": 0.3038210624417521, "grad_norm": 1.6263078547598788, "learning_rate": 4.9930963065239906e-05, "loss": 2.1723, "reason_loss": 0.5336238145828247, "step": 326, "utility_loss": 1.6387007236480713 }, { "cosine_similarity": 0, "epoch": 0.3047530288909599, "grad_norm": 1.625020965470677, "learning_rate": 4.991370383154988e-05, "loss": 2.2191, "reason_loss": 0.5440060496330261, "step": 327, "utility_loss": 1.675080418586731 }, { "cosine_similarity": 0, "epoch": 0.30568499534016774, "grad_norm": 1.5274578025553138, "learning_rate": 4.989644459785986e-05, "loss": 2.4247, "reason_loss": 0.5363049507141113, "step": 328, "utility_loss": 1.8883678913116455 }, { "cosine_similarity": 0, "epoch": 0.3066169617893756, "grad_norm": 2.1420809783788295, "learning_rate": 4.9879185364169836e-05, "loss": 2.4459, "reason_loss": 0.5667109489440918, "step": 329, "utility_loss": 1.8791828155517578 }, { "cosine_similarity": 0, "epoch": 0.30754892823858343, "grad_norm": 1.4451911385664598, "learning_rate": 4.986192613047981e-05, "loss": 2.2901, "reason_loss": 0.5349295139312744, "step": 330, "utility_loss": 1.755149006843567 }, { "cosine_similarity": 0, "epoch": 0.3084808946877912, "grad_norm": 1.49489279581298, "learning_rate": 4.9844666896789785e-05, "loss": 2.0401, "reason_loss": 0.5345085859298706, "step": 331, "utility_loss": 1.5056061744689941 }, { "cosine_similarity": 0, "epoch": 0.30941286113699906, "grad_norm": 1.3910593924178576, "learning_rate": 4.982740766309976e-05, "loss": 1.8537, "reason_loss": 0.5366315841674805, "step": 332, "utility_loss": 1.3170592784881592 }, { "cosine_similarity": 0, "epoch": 0.3103448275862069, "grad_norm": 1.8401201547081005, "learning_rate": 4.981014842940973e-05, "loss": 2.1268, "reason_loss": 0.5390446186065674, "step": 333, "utility_loss": 1.5877864360809326 }, { "cosine_similarity": 0, "epoch": 0.31127679403541475, "grad_norm": 1.9756028553799752, "learning_rate": 4.979288919571971e-05, "loss": 2.4401, "reason_loss": 0.547577977180481, "step": 334, "utility_loss": 1.89251708984375 }, { "cosine_similarity": 0, "epoch": 0.31220876048462254, "grad_norm": 1.4957423233450189, "learning_rate": 4.977562996202969e-05, "loss": 2.0318, "reason_loss": 0.5392239093780518, "step": 335, "utility_loss": 1.4926072359085083 }, { "cosine_similarity": 0, "epoch": 0.3131407269338304, "grad_norm": 1.7391139767008725, "learning_rate": 4.975837072833966e-05, "loss": 2.4519, "reason_loss": 0.5718519687652588, "step": 336, "utility_loss": 1.880024790763855 }, { "cosine_similarity": 0, "epoch": 0.3140726933830382, "grad_norm": 1.651733632373874, "learning_rate": 4.974111149464964e-05, "loss": 2.2237, "reason_loss": 0.536705732345581, "step": 337, "utility_loss": 1.687007188796997 }, { "cosine_similarity": 0, "epoch": 0.315004659832246, "grad_norm": 1.6010269415086043, "learning_rate": 4.972385226095961e-05, "loss": 2.0832, "reason_loss": 0.5452485084533691, "step": 338, "utility_loss": 1.5379828214645386 }, { "cosine_similarity": 0, "epoch": 0.31593662628145386, "grad_norm": 2.028259915640563, "learning_rate": 4.970659302726959e-05, "loss": 2.328, "reason_loss": 0.5193299055099487, "step": 339, "utility_loss": 1.8086512088775635 }, { "cosine_similarity": 0, "epoch": 0.3168685927306617, "grad_norm": 1.6921085317015696, "learning_rate": 4.968933379357957e-05, "loss": 2.5847, "reason_loss": 0.5483134984970093, "step": 340, "utility_loss": 2.0363903045654297 }, { "cosine_similarity": 0, "epoch": 0.31780055917986955, "grad_norm": 1.5371154978589257, "learning_rate": 4.967207455988954e-05, "loss": 2.451, "reason_loss": 0.5357315540313721, "step": 341, "utility_loss": 1.9152687788009644 }, { "cosine_similarity": 0, "epoch": 0.31873252562907733, "grad_norm": 2.3741197559258227, "learning_rate": 4.9654815326199515e-05, "loss": 2.2524, "reason_loss": 0.5584676265716553, "step": 342, "utility_loss": 1.6939234733581543 }, { "cosine_similarity": 0, "epoch": 0.3196644920782852, "grad_norm": 1.6141896621101632, "learning_rate": 4.9637556092509496e-05, "loss": 2.2832, "reason_loss": 0.5249395370483398, "step": 343, "utility_loss": 1.7582972049713135 }, { "cosine_similarity": 0, "epoch": 0.320596458527493, "grad_norm": 1.9087909410743304, "learning_rate": 4.962029685881947e-05, "loss": 2.4468, "reason_loss": 0.5619872808456421, "step": 344, "utility_loss": 1.8848413228988647 }, { "cosine_similarity": 0, "epoch": 0.32152842497670087, "grad_norm": 1.9580483120682177, "learning_rate": 4.9603037625129445e-05, "loss": 2.6067, "reason_loss": 0.5072221755981445, "step": 345, "utility_loss": 2.0995161533355713 }, { "cosine_similarity": 0, "epoch": 0.32246039142590865, "grad_norm": 1.4904451560847516, "learning_rate": 4.9585778391439426e-05, "loss": 1.9595, "reason_loss": 0.5432315468788147, "step": 346, "utility_loss": 1.4162466526031494 }, { "cosine_similarity": 0, "epoch": 0.3233923578751165, "grad_norm": 1.5718306249648837, "learning_rate": 4.95685191577494e-05, "loss": 2.0785, "reason_loss": 0.5869742631912231, "step": 347, "utility_loss": 1.4915016889572144 }, { "cosine_similarity": 0, "epoch": 0.32432432432432434, "grad_norm": 1.3672700905011443, "learning_rate": 4.9551259924059375e-05, "loss": 2.0553, "reason_loss": 0.554318368434906, "step": 348, "utility_loss": 1.5010144710540771 }, { "cosine_similarity": 0, "epoch": 0.32525629077353213, "grad_norm": 2.039907043755388, "learning_rate": 4.953400069036935e-05, "loss": 2.3177, "reason_loss": 0.5454809665679932, "step": 349, "utility_loss": 1.7722305059432983 }, { "cosine_similarity": 0, "epoch": 0.32618825722274, "grad_norm": 1.6216428017572964, "learning_rate": 4.951674145667933e-05, "loss": 2.5225, "reason_loss": 0.5384238362312317, "step": 350, "utility_loss": 1.9840962886810303 }, { "cosine_similarity": 0, "epoch": 0.3271202236719478, "grad_norm": 1.6342349696233736, "learning_rate": 4.9499482222989304e-05, "loss": 2.2789, "reason_loss": 0.5429508686065674, "step": 351, "utility_loss": 1.7359744310379028 }, { "cosine_similarity": 0, "epoch": 0.32805219012115566, "grad_norm": 1.4815171117200543, "learning_rate": 4.948222298929928e-05, "loss": 2.4124, "reason_loss": 0.5348432064056396, "step": 352, "utility_loss": 1.8775877952575684 }, { "cosine_similarity": 0, "epoch": 0.32898415657036345, "grad_norm": 2.2166077942097893, "learning_rate": 4.946496375560925e-05, "loss": 2.7281, "reason_loss": 0.52386873960495, "step": 353, "utility_loss": 2.2042288780212402 }, { "cosine_similarity": 0, "epoch": 0.3299161230195713, "grad_norm": 2.1270607997467947, "learning_rate": 4.944770452191923e-05, "loss": 2.3486, "reason_loss": 0.5292650461196899, "step": 354, "utility_loss": 1.8193378448486328 }, { "cosine_similarity": 0, "epoch": 0.33084808946877914, "grad_norm": 1.4472731181325407, "learning_rate": 4.94304452882292e-05, "loss": 2.3509, "reason_loss": 0.5556771755218506, "step": 355, "utility_loss": 1.7951738834381104 }, { "cosine_similarity": 0, "epoch": 0.3317800559179869, "grad_norm": 1.6677068035424447, "learning_rate": 4.9413186054539176e-05, "loss": 2.6162, "reason_loss": 0.5068609118461609, "step": 356, "utility_loss": 2.109342575073242 }, { "cosine_similarity": 0, "epoch": 0.33271202236719477, "grad_norm": 1.4766362102481772, "learning_rate": 4.939592682084916e-05, "loss": 1.8582, "reason_loss": 0.538421094417572, "step": 357, "utility_loss": 1.3197845220565796 }, { "cosine_similarity": 0, "epoch": 0.3336439888164026, "grad_norm": 1.8394307165684487, "learning_rate": 4.937866758715913e-05, "loss": 2.3258, "reason_loss": 0.5441458225250244, "step": 358, "utility_loss": 1.7816412448883057 }, { "cosine_similarity": 0, "epoch": 0.33457595526561046, "grad_norm": 1.4274065708162043, "learning_rate": 4.9361408353469105e-05, "loss": 2.2776, "reason_loss": 0.5668412446975708, "step": 359, "utility_loss": 1.7108081579208374 }, { "cosine_similarity": 0, "epoch": 0.33550792171481825, "grad_norm": 1.4356745785406304, "learning_rate": 4.934414911977908e-05, "loss": 2.6037, "reason_loss": 0.5140258073806763, "step": 360, "utility_loss": 2.0897059440612793 }, { "cosine_similarity": 0, "epoch": 0.3364398881640261, "grad_norm": 1.7827190366046535, "learning_rate": 4.932688988608906e-05, "loss": 2.3481, "reason_loss": 0.5179823637008667, "step": 361, "utility_loss": 1.830165147781372 }, { "cosine_similarity": 0, "epoch": 0.33737185461323393, "grad_norm": 1.5201958996324556, "learning_rate": 4.9309630652399035e-05, "loss": 1.7631, "reason_loss": 0.5167993307113647, "step": 362, "utility_loss": 1.246347427368164 }, { "cosine_similarity": 0, "epoch": 0.3383038210624418, "grad_norm": 1.562806855064279, "learning_rate": 4.929237141870901e-05, "loss": 2.3059, "reason_loss": 0.531794011592865, "step": 363, "utility_loss": 1.7741520404815674 }, { "cosine_similarity": 0, "epoch": 0.33923578751164957, "grad_norm": 1.600925861672034, "learning_rate": 4.927511218501899e-05, "loss": 2.6596, "reason_loss": 0.5633617639541626, "step": 364, "utility_loss": 2.0962533950805664 }, { "cosine_similarity": 0, "epoch": 0.3401677539608574, "grad_norm": 1.4169517911873655, "learning_rate": 4.9257852951328965e-05, "loss": 2.1602, "reason_loss": 0.5227824449539185, "step": 365, "utility_loss": 1.6373710632324219 }, { "cosine_similarity": 0, "epoch": 0.34109972041006525, "grad_norm": 1.8202588926769627, "learning_rate": 4.924059371763894e-05, "loss": 2.6013, "reason_loss": 0.5327925682067871, "step": 366, "utility_loss": 2.0684988498687744 }, { "cosine_similarity": 0, "epoch": 0.34203168685927304, "grad_norm": 1.9729368650173607, "learning_rate": 4.922333448394891e-05, "loss": 2.6133, "reason_loss": 0.5906494855880737, "step": 367, "utility_loss": 2.0226259231567383 }, { "cosine_similarity": 0, "epoch": 0.3429636533084809, "grad_norm": 1.439678527637217, "learning_rate": 4.9206075250258894e-05, "loss": 1.9303, "reason_loss": 0.5386235117912292, "step": 368, "utility_loss": 1.3916444778442383 }, { "cosine_similarity": 0, "epoch": 0.34389561975768873, "grad_norm": 1.702169715901407, "learning_rate": 4.918881601656887e-05, "loss": 2.2493, "reason_loss": 0.5130845308303833, "step": 369, "utility_loss": 1.7361915111541748 }, { "cosine_similarity": 0, "epoch": 0.3448275862068966, "grad_norm": 2.0771600847955987, "learning_rate": 4.917155678287884e-05, "loss": 2.5428, "reason_loss": 0.5503107905387878, "step": 370, "utility_loss": 1.992455244064331 }, { "cosine_similarity": 0, "epoch": 0.34575955265610436, "grad_norm": 1.8650562584605583, "learning_rate": 4.915429754918882e-05, "loss": 2.421, "reason_loss": 0.5203589200973511, "step": 371, "utility_loss": 1.900661587715149 }, { "cosine_similarity": 0, "epoch": 0.3466915191053122, "grad_norm": 1.6163092767527565, "learning_rate": 4.91370383154988e-05, "loss": 2.3986, "reason_loss": 0.5238813161849976, "step": 372, "utility_loss": 1.8746857643127441 }, { "cosine_similarity": 0, "epoch": 0.34762348555452005, "grad_norm": 1.6635065543894065, "learning_rate": 4.911977908180877e-05, "loss": 2.0009, "reason_loss": 0.5379317998886108, "step": 373, "utility_loss": 1.463014841079712 }, { "cosine_similarity": 0, "epoch": 0.34855545200372784, "grad_norm": 1.821598543923346, "learning_rate": 4.910251984811875e-05, "loss": 2.2665, "reason_loss": 0.5056220293045044, "step": 374, "utility_loss": 1.7608994245529175 }, { "cosine_similarity": 0, "epoch": 0.3494874184529357, "grad_norm": 1.6693652056228516, "learning_rate": 4.908526061442872e-05, "loss": 2.4639, "reason_loss": 0.5512155294418335, "step": 375, "utility_loss": 1.9127264022827148 }, { "cosine_similarity": 0, "epoch": 0.3504193849021435, "grad_norm": 1.9091249071990855, "learning_rate": 4.9068001380738695e-05, "loss": 2.4413, "reason_loss": 0.5374916791915894, "step": 376, "utility_loss": 1.903761863708496 }, { "cosine_similarity": 0, "epoch": 0.35135135135135137, "grad_norm": 1.3528951201716304, "learning_rate": 4.905074214704867e-05, "loss": 1.9472, "reason_loss": 0.5703850984573364, "step": 377, "utility_loss": 1.3768045902252197 }, { "cosine_similarity": 0, "epoch": 0.35228331780055916, "grad_norm": 1.9474427057657107, "learning_rate": 4.9033482913358644e-05, "loss": 2.3406, "reason_loss": 0.5310807228088379, "step": 378, "utility_loss": 1.8094847202301025 }, { "cosine_similarity": 0, "epoch": 0.353215284249767, "grad_norm": 1.5551350950996572, "learning_rate": 4.9016223679668625e-05, "loss": 2.1327, "reason_loss": 0.5571799278259277, "step": 379, "utility_loss": 1.5755467414855957 }, { "cosine_similarity": 0, "epoch": 0.35414725069897485, "grad_norm": 1.4311347031490993, "learning_rate": 4.89989644459786e-05, "loss": 2.485, "reason_loss": 0.539016604423523, "step": 380, "utility_loss": 1.945988416671753 }, { "cosine_similarity": 0, "epoch": 0.3550792171481827, "grad_norm": 4.531937832440232, "learning_rate": 4.8981705212288574e-05, "loss": 2.1542, "reason_loss": 0.5431143045425415, "step": 381, "utility_loss": 1.6111207008361816 }, { "cosine_similarity": 0, "epoch": 0.3560111835973905, "grad_norm": 1.3607387670199935, "learning_rate": 4.896444597859855e-05, "loss": 2.1415, "reason_loss": 0.5286798477172852, "step": 382, "utility_loss": 1.6128120422363281 }, { "cosine_similarity": 0, "epoch": 0.3569431500465983, "grad_norm": 1.8016210000307107, "learning_rate": 4.894718674490853e-05, "loss": 2.5053, "reason_loss": 0.5743646621704102, "step": 383, "utility_loss": 1.9309406280517578 }, { "cosine_similarity": 0, "epoch": 0.35787511649580617, "grad_norm": 1.197966354246824, "learning_rate": 4.89299275112185e-05, "loss": 1.9387, "reason_loss": 0.5563223361968994, "step": 384, "utility_loss": 1.3824023008346558 }, { "cosine_similarity": 0, "epoch": 0.35880708294501396, "grad_norm": 1.7270521800488243, "learning_rate": 4.891266827752848e-05, "loss": 2.4896, "reason_loss": 0.541811466217041, "step": 385, "utility_loss": 1.9477756023406982 }, { "cosine_similarity": 0, "epoch": 0.3597390493942218, "grad_norm": 1.573002826244502, "learning_rate": 4.889540904383846e-05, "loss": 2.3589, "reason_loss": 0.521989107131958, "step": 386, "utility_loss": 1.8368749618530273 }, { "cosine_similarity": 0, "epoch": 0.36067101584342964, "grad_norm": 1.376847382627604, "learning_rate": 4.887814981014843e-05, "loss": 2.4802, "reason_loss": 0.5132481455802917, "step": 387, "utility_loss": 1.9669023752212524 }, { "cosine_similarity": 0, "epoch": 0.3616029822926375, "grad_norm": 1.5917933773495716, "learning_rate": 4.886089057645841e-05, "loss": 2.5828, "reason_loss": 0.5376812815666199, "step": 388, "utility_loss": 2.045138359069824 }, { "cosine_similarity": 0, "epoch": 0.3625349487418453, "grad_norm": 1.1889747196446085, "learning_rate": 4.884363134276838e-05, "loss": 2.2839, "reason_loss": 0.5446523427963257, "step": 389, "utility_loss": 1.739241123199463 }, { "cosine_similarity": 0, "epoch": 0.3634669151910531, "grad_norm": 1.394403673666168, "learning_rate": 4.882637210907836e-05, "loss": 1.9475, "reason_loss": 0.5372833013534546, "step": 390, "utility_loss": 1.4102418422698975 }, { "cosine_similarity": 0, "epoch": 0.36439888164026096, "grad_norm": 1.8294670938765916, "learning_rate": 4.880911287538834e-05, "loss": 2.3376, "reason_loss": 0.5697647333145142, "step": 391, "utility_loss": 1.767785668373108 }, { "cosine_similarity": 0, "epoch": 0.36533084808946875, "grad_norm": 3.0312898298934825, "learning_rate": 4.879185364169831e-05, "loss": 2.9339, "reason_loss": 0.49105602502822876, "step": 392, "utility_loss": 2.442822217941284 }, { "cosine_similarity": 0, "epoch": 0.3662628145386766, "grad_norm": 1.6642269227376625, "learning_rate": 4.877459440800829e-05, "loss": 2.3699, "reason_loss": 0.5239763259887695, "step": 393, "utility_loss": 1.8459367752075195 }, { "cosine_similarity": 0, "epoch": 0.36719478098788444, "grad_norm": 1.490082381849791, "learning_rate": 4.8757335174318266e-05, "loss": 2.2563, "reason_loss": 0.5462613701820374, "step": 394, "utility_loss": 1.7100366353988647 }, { "cosine_similarity": 0, "epoch": 0.3681267474370923, "grad_norm": 1.7707773863933731, "learning_rate": 4.874007594062824e-05, "loss": 1.971, "reason_loss": 0.5609350800514221, "step": 395, "utility_loss": 1.4100747108459473 }, { "cosine_similarity": 0, "epoch": 0.36905871388630007, "grad_norm": 1.7715397608820043, "learning_rate": 4.8722816706938215e-05, "loss": 2.6498, "reason_loss": 0.521235466003418, "step": 396, "utility_loss": 2.1285862922668457 }, { "cosine_similarity": 0, "epoch": 0.3699906803355079, "grad_norm": 1.6940848286598664, "learning_rate": 4.870555747324819e-05, "loss": 1.9525, "reason_loss": 0.4998700022697449, "step": 397, "utility_loss": 1.4525809288024902 }, { "cosine_similarity": 0, "epoch": 0.37092264678471576, "grad_norm": 1.5057641357176548, "learning_rate": 4.8688298239558164e-05, "loss": 2.2639, "reason_loss": 0.547905445098877, "step": 398, "utility_loss": 1.715946912765503 }, { "cosine_similarity": 0, "epoch": 0.3718546132339236, "grad_norm": 1.3460971393156538, "learning_rate": 4.867103900586814e-05, "loss": 2.4137, "reason_loss": 0.522821307182312, "step": 399, "utility_loss": 1.8908412456512451 }, { "cosine_similarity": 0, "epoch": 0.3727865796831314, "grad_norm": 1.9153131808891783, "learning_rate": 4.865377977217811e-05, "loss": 2.2264, "reason_loss": 0.5105240345001221, "step": 400, "utility_loss": 1.7158265113830566 }, { "cosine_similarity": 0, "epoch": 0.37371854613233924, "grad_norm": 1.9888808894586358, "learning_rate": 4.863652053848809e-05, "loss": 2.4898, "reason_loss": 0.5355267524719238, "step": 401, "utility_loss": 1.9543037414550781 }, { "cosine_similarity": 0, "epoch": 0.3746505125815471, "grad_norm": 2.103674361230901, "learning_rate": 4.861926130479807e-05, "loss": 2.4152, "reason_loss": 0.5670748353004456, "step": 402, "utility_loss": 1.8481383323669434 }, { "cosine_similarity": 0, "epoch": 0.37558247903075487, "grad_norm": 1.3988347879428917, "learning_rate": 4.860200207110804e-05, "loss": 2.4175, "reason_loss": 0.5304399132728577, "step": 403, "utility_loss": 1.8871053457260132 }, { "cosine_similarity": 0, "epoch": 0.3765144454799627, "grad_norm": 1.2781723359361623, "learning_rate": 4.858474283741802e-05, "loss": 2.2358, "reason_loss": 0.544795036315918, "step": 404, "utility_loss": 1.6909947395324707 }, { "cosine_similarity": 0, "epoch": 0.37744641192917056, "grad_norm": 1.6534242827345325, "learning_rate": 4.8567483603728e-05, "loss": 2.014, "reason_loss": 0.5321586728096008, "step": 405, "utility_loss": 1.4818789958953857 }, { "cosine_similarity": 0, "epoch": 0.3783783783783784, "grad_norm": 1.6874978853359297, "learning_rate": 4.855022437003797e-05, "loss": 2.145, "reason_loss": 0.5319384336471558, "step": 406, "utility_loss": 1.61307954788208 }, { "cosine_similarity": 0, "epoch": 0.3793103448275862, "grad_norm": 1.5747881719896522, "learning_rate": 4.8532965136347946e-05, "loss": 2.2138, "reason_loss": 0.537218451499939, "step": 407, "utility_loss": 1.6765737533569336 }, { "cosine_similarity": 0, "epoch": 0.38024231127679403, "grad_norm": 1.482204127255537, "learning_rate": 4.851570590265793e-05, "loss": 2.0619, "reason_loss": 0.5342835187911987, "step": 408, "utility_loss": 1.5276579856872559 }, { "cosine_similarity": 0, "epoch": 0.3811742777260019, "grad_norm": 1.8032843589415264, "learning_rate": 4.84984466689679e-05, "loss": 1.9957, "reason_loss": 0.5420255661010742, "step": 409, "utility_loss": 1.4536521434783936 }, { "cosine_similarity": 0, "epoch": 0.3821062441752097, "grad_norm": 2.35407492870453, "learning_rate": 4.8481187435277875e-05, "loss": 2.411, "reason_loss": 0.5391433238983154, "step": 410, "utility_loss": 1.871858835220337 }, { "cosine_similarity": 0, "epoch": 0.3830382106244175, "grad_norm": 2.4463970094252283, "learning_rate": 4.846392820158785e-05, "loss": 2.083, "reason_loss": 0.537238359451294, "step": 411, "utility_loss": 1.545798659324646 }, { "cosine_similarity": 0, "epoch": 0.38397017707362535, "grad_norm": 1.6613854201304055, "learning_rate": 4.844666896789783e-05, "loss": 2.6866, "reason_loss": 0.5588686466217041, "step": 412, "utility_loss": 2.127762794494629 }, { "cosine_similarity": 0, "epoch": 0.3849021435228332, "grad_norm": 2.2552350005126134, "learning_rate": 4.8429409734207805e-05, "loss": 2.5345, "reason_loss": 0.49675801396369934, "step": 413, "utility_loss": 2.0377116203308105 }, { "cosine_similarity": 0, "epoch": 0.385834109972041, "grad_norm": 1.3180855007065377, "learning_rate": 4.841215050051778e-05, "loss": 1.9251, "reason_loss": 0.4776773154735565, "step": 414, "utility_loss": 1.4474284648895264 }, { "cosine_similarity": 0, "epoch": 0.38676607642124883, "grad_norm": 1.6468034801265445, "learning_rate": 4.839489126682776e-05, "loss": 2.0684, "reason_loss": 0.5338543057441711, "step": 415, "utility_loss": 1.5344984531402588 }, { "cosine_similarity": 0, "epoch": 0.38769804287045667, "grad_norm": 1.357318989468679, "learning_rate": 4.8377632033137735e-05, "loss": 2.1382, "reason_loss": 0.5810497999191284, "step": 416, "utility_loss": 1.5571093559265137 }, { "cosine_similarity": 0, "epoch": 0.3886300093196645, "grad_norm": 1.4781963840626837, "learning_rate": 4.836037279944771e-05, "loss": 2.216, "reason_loss": 0.5453343391418457, "step": 417, "utility_loss": 1.670680046081543 }, { "cosine_similarity": 0, "epoch": 0.3895619757688723, "grad_norm": 1.4359563851658772, "learning_rate": 4.834311356575768e-05, "loss": 2.2697, "reason_loss": 0.5303051471710205, "step": 418, "utility_loss": 1.739378571510315 }, { "cosine_similarity": 0, "epoch": 0.39049394221808015, "grad_norm": 1.5367843911446943, "learning_rate": 4.832585433206766e-05, "loss": 2.3939, "reason_loss": 0.5548171401023865, "step": 419, "utility_loss": 1.8391027450561523 }, { "cosine_similarity": 0, "epoch": 0.391425908667288, "grad_norm": 1.3714663437321484, "learning_rate": 4.830859509837763e-05, "loss": 2.4173, "reason_loss": 0.5110491514205933, "step": 420, "utility_loss": 1.906287670135498 }, { "cosine_similarity": 0, "epoch": 0.3923578751164958, "grad_norm": 1.3218177045672448, "learning_rate": 4.8291335864687606e-05, "loss": 2.2366, "reason_loss": 0.5382453203201294, "step": 421, "utility_loss": 1.698348879814148 }, { "cosine_similarity": 0, "epoch": 0.3932898415657036, "grad_norm": 1.5364682508606544, "learning_rate": 4.827407663099758e-05, "loss": 2.2175, "reason_loss": 0.5395327806472778, "step": 422, "utility_loss": 1.677984595298767 }, { "cosine_similarity": 0, "epoch": 0.39422180801491147, "grad_norm": 2.048181668939588, "learning_rate": 4.825681739730756e-05, "loss": 2.1016, "reason_loss": 0.5733353495597839, "step": 423, "utility_loss": 1.5282573699951172 }, { "cosine_similarity": 0, "epoch": 0.3951537744641193, "grad_norm": 2.022528411986489, "learning_rate": 4.8239558163617536e-05, "loss": 2.5467, "reason_loss": 0.5270355939865112, "step": 424, "utility_loss": 2.0196189880371094 }, { "cosine_similarity": 0, "epoch": 0.3960857409133271, "grad_norm": 1.2732640922469896, "learning_rate": 4.822229892992751e-05, "loss": 1.9081, "reason_loss": 0.5292565822601318, "step": 425, "utility_loss": 1.3788530826568604 }, { "cosine_similarity": 0, "epoch": 0.39701770736253494, "grad_norm": 1.3311379230947886, "learning_rate": 4.820503969623749e-05, "loss": 1.7499, "reason_loss": 0.5558258295059204, "step": 426, "utility_loss": 1.1941118240356445 }, { "cosine_similarity": 0, "epoch": 0.3979496738117428, "grad_norm": 1.2788748749998446, "learning_rate": 4.8187780462547465e-05, "loss": 2.1144, "reason_loss": 0.5315130949020386, "step": 427, "utility_loss": 1.582848072052002 }, { "cosine_similarity": 0, "epoch": 0.39888164026095063, "grad_norm": 1.6644897397137586, "learning_rate": 4.817052122885744e-05, "loss": 2.2405, "reason_loss": 0.506519079208374, "step": 428, "utility_loss": 1.7339577674865723 }, { "cosine_similarity": 0, "epoch": 0.3998136067101584, "grad_norm": 1.3205929551785311, "learning_rate": 4.8153261995167414e-05, "loss": 2.3256, "reason_loss": 0.5013392567634583, "step": 429, "utility_loss": 1.8242709636688232 }, { "cosine_similarity": 0, "epoch": 0.40074557315936626, "grad_norm": 1.3376348125974746, "learning_rate": 4.8136002761477395e-05, "loss": 2.247, "reason_loss": 0.5351072549819946, "step": 430, "utility_loss": 1.7119367122650146 }, { "cosine_similarity": 0, "epoch": 0.4016775396085741, "grad_norm": 1.553563226244349, "learning_rate": 4.811874352778737e-05, "loss": 2.5573, "reason_loss": 0.540227472782135, "step": 431, "utility_loss": 2.017117500305176 }, { "cosine_similarity": 0, "epoch": 0.4026095060577819, "grad_norm": 1.3210512006802086, "learning_rate": 4.8101484294097344e-05, "loss": 1.9038, "reason_loss": 0.5491035580635071, "step": 432, "utility_loss": 1.3546879291534424 }, { "cosine_similarity": 0, "epoch": 0.40354147250698974, "grad_norm": 1.5518317545755438, "learning_rate": 4.808422506040732e-05, "loss": 2.6514, "reason_loss": 0.5294361114501953, "step": 433, "utility_loss": 2.121915340423584 }, { "cosine_similarity": 0, "epoch": 0.4044734389561976, "grad_norm": 1.3941268189767204, "learning_rate": 4.80669658267173e-05, "loss": 2.0892, "reason_loss": 0.544837474822998, "step": 434, "utility_loss": 1.5443992614746094 }, { "cosine_similarity": 0, "epoch": 0.40540540540540543, "grad_norm": 1.3083814164731082, "learning_rate": 4.804970659302727e-05, "loss": 2.0395, "reason_loss": 0.5380094051361084, "step": 435, "utility_loss": 1.5015250444412231 }, { "cosine_similarity": 0, "epoch": 0.4063373718546132, "grad_norm": 1.6712840768665305, "learning_rate": 4.803244735933725e-05, "loss": 2.5326, "reason_loss": 0.548356294631958, "step": 436, "utility_loss": 1.984228491783142 }, { "cosine_similarity": 0, "epoch": 0.40726933830382106, "grad_norm": 1.6974386620584712, "learning_rate": 4.801518812564723e-05, "loss": 2.3589, "reason_loss": 0.5471886396408081, "step": 437, "utility_loss": 1.8116943836212158 }, { "cosine_similarity": 0, "epoch": 0.4082013047530289, "grad_norm": 1.6813369016844175, "learning_rate": 4.79979288919572e-05, "loss": 2.2958, "reason_loss": 0.5282523036003113, "step": 438, "utility_loss": 1.7675079107284546 }, { "cosine_similarity": 0, "epoch": 0.4091332712022367, "grad_norm": 1.2743969603876268, "learning_rate": 4.798066965826718e-05, "loss": 2.0831, "reason_loss": 0.5223993062973022, "step": 439, "utility_loss": 1.5607155561447144 }, { "cosine_similarity": 0, "epoch": 0.41006523765144454, "grad_norm": 2.215851101154157, "learning_rate": 4.796341042457715e-05, "loss": 2.2959, "reason_loss": 0.530653715133667, "step": 440, "utility_loss": 1.7652394771575928 }, { "cosine_similarity": 0, "epoch": 0.4109972041006524, "grad_norm": 1.4404214994934308, "learning_rate": 4.7946151190887126e-05, "loss": 2.301, "reason_loss": 0.5285555124282837, "step": 441, "utility_loss": 1.7724663019180298 }, { "cosine_similarity": 0, "epoch": 0.4119291705498602, "grad_norm": 1.7487295401822822, "learning_rate": 4.79288919571971e-05, "loss": 2.2804, "reason_loss": 0.5366430282592773, "step": 442, "utility_loss": 1.743793249130249 }, { "cosine_similarity": 0, "epoch": 0.412861136999068, "grad_norm": 1.4582473192854397, "learning_rate": 4.7911632723507074e-05, "loss": 2.1405, "reason_loss": 0.5200144052505493, "step": 443, "utility_loss": 1.6205196380615234 }, { "cosine_similarity": 0, "epoch": 0.41379310344827586, "grad_norm": 1.6005970748528429, "learning_rate": 4.7894373489817056e-05, "loss": 2.404, "reason_loss": 0.5088359713554382, "step": 444, "utility_loss": 1.8951232433319092 }, { "cosine_similarity": 0, "epoch": 0.4147250698974837, "grad_norm": 1.5898451368806439, "learning_rate": 4.787711425612703e-05, "loss": 2.3427, "reason_loss": 0.5489920377731323, "step": 445, "utility_loss": 1.7936766147613525 }, { "cosine_similarity": 0, "epoch": 0.41565703634669154, "grad_norm": 1.3413750439859544, "learning_rate": 4.7859855022437004e-05, "loss": 2.4443, "reason_loss": 0.5326647162437439, "step": 446, "utility_loss": 1.9116843938827515 }, { "cosine_similarity": 0, "epoch": 0.41658900279589933, "grad_norm": 1.8244822795277253, "learning_rate": 4.784259578874698e-05, "loss": 2.2878, "reason_loss": 0.511304497718811, "step": 447, "utility_loss": 1.7764976024627686 }, { "cosine_similarity": 0, "epoch": 0.4175209692451072, "grad_norm": 1.2840438872578206, "learning_rate": 4.782533655505696e-05, "loss": 1.9886, "reason_loss": 0.5148393511772156, "step": 448, "utility_loss": 1.4737911224365234 }, { "cosine_similarity": 0, "epoch": 0.418452935694315, "grad_norm": 1.5760420831198652, "learning_rate": 4.7808077321366934e-05, "loss": 2.6172, "reason_loss": 0.527788519859314, "step": 449, "utility_loss": 2.089362144470215 }, { "cosine_similarity": 0, "epoch": 0.4193849021435228, "grad_norm": 1.8274832320293748, "learning_rate": 4.779081808767691e-05, "loss": 2.3551, "reason_loss": 0.5052622556686401, "step": 450, "utility_loss": 1.849868655204773 }, { "cosine_similarity": 0, "epoch": 0.42031686859273065, "grad_norm": 1.4216662673440668, "learning_rate": 4.777355885398688e-05, "loss": 2.2832, "reason_loss": 0.5213606953620911, "step": 451, "utility_loss": 1.7618589401245117 }, { "cosine_similarity": 0, "epoch": 0.4212488350419385, "grad_norm": 1.569136978129567, "learning_rate": 4.7756299620296863e-05, "loss": 2.3665, "reason_loss": 0.531318187713623, "step": 452, "utility_loss": 1.8352149724960327 }, { "cosine_similarity": 0, "epoch": 0.42218080149114634, "grad_norm": 1.4531125173337045, "learning_rate": 4.773904038660684e-05, "loss": 2.5686, "reason_loss": 0.5330137014389038, "step": 453, "utility_loss": 2.035623073577881 }, { "cosine_similarity": 0, "epoch": 0.42311276794035413, "grad_norm": 1.2319062184979337, "learning_rate": 4.772178115291681e-05, "loss": 2.076, "reason_loss": 0.5273346900939941, "step": 454, "utility_loss": 1.5486446619033813 }, { "cosine_similarity": 0, "epoch": 0.424044734389562, "grad_norm": 1.3054011745938403, "learning_rate": 4.770452191922679e-05, "loss": 2.2248, "reason_loss": 0.512397289276123, "step": 455, "utility_loss": 1.7123714685440063 }, { "cosine_similarity": 0, "epoch": 0.4249767008387698, "grad_norm": 1.4147330724357918, "learning_rate": 4.768726268553677e-05, "loss": 2.2781, "reason_loss": 0.4941183626651764, "step": 456, "utility_loss": 1.7839707136154175 }, { "cosine_similarity": 0, "epoch": 0.42590866728797766, "grad_norm": 1.4596523975912692, "learning_rate": 4.767000345184674e-05, "loss": 2.3201, "reason_loss": 0.5308957099914551, "step": 457, "utility_loss": 1.7892166376113892 }, { "cosine_similarity": 0, "epoch": 0.42684063373718545, "grad_norm": 1.4114743960183604, "learning_rate": 4.7652744218156716e-05, "loss": 2.1187, "reason_loss": 0.5349090099334717, "step": 458, "utility_loss": 1.583765983581543 }, { "cosine_similarity": 0, "epoch": 0.4277726001863933, "grad_norm": 1.8534028427500802, "learning_rate": 4.76354849844667e-05, "loss": 2.3669, "reason_loss": 0.5139614939689636, "step": 459, "utility_loss": 1.85292387008667 }, { "cosine_similarity": 0, "epoch": 0.42870456663560114, "grad_norm": 1.771319553566132, "learning_rate": 4.761822575077667e-05, "loss": 2.317, "reason_loss": 0.536064088344574, "step": 460, "utility_loss": 1.7809069156646729 }, { "cosine_similarity": 0, "epoch": 0.4296365330848089, "grad_norm": 1.3914369407836886, "learning_rate": 4.7600966517086646e-05, "loss": 2.1718, "reason_loss": 0.5245531797409058, "step": 461, "utility_loss": 1.6472601890563965 }, { "cosine_similarity": 0, "epoch": 0.43056849953401677, "grad_norm": 1.6834454066837568, "learning_rate": 4.758370728339662e-05, "loss": 2.2673, "reason_loss": 0.49932292103767395, "step": 462, "utility_loss": 1.76802396774292 }, { "cosine_similarity": 0, "epoch": 0.4315004659832246, "grad_norm": 1.2085772017058605, "learning_rate": 4.7566448049706594e-05, "loss": 1.8892, "reason_loss": 0.511427104473114, "step": 463, "utility_loss": 1.3778128623962402 }, { "cosine_similarity": 0, "epoch": 0.43243243243243246, "grad_norm": 1.4464698198544068, "learning_rate": 4.754918881601657e-05, "loss": 2.3338, "reason_loss": 0.49391523003578186, "step": 464, "utility_loss": 1.8398590087890625 }, { "cosine_similarity": 0, "epoch": 0.43336439888164024, "grad_norm": 1.4431766136552304, "learning_rate": 4.753192958232654e-05, "loss": 2.5061, "reason_loss": 0.5664722919464111, "step": 465, "utility_loss": 1.9396512508392334 }, { "cosine_similarity": 0, "epoch": 0.4342963653308481, "grad_norm": 1.3314243218800896, "learning_rate": 4.7514670348636524e-05, "loss": 2.059, "reason_loss": 0.5118895769119263, "step": 466, "utility_loss": 1.5470702648162842 }, { "cosine_similarity": 0, "epoch": 0.43522833178005593, "grad_norm": 1.6799407278665621, "learning_rate": 4.74974111149465e-05, "loss": 2.127, "reason_loss": 0.5645487308502197, "step": 467, "utility_loss": 1.5624339580535889 }, { "cosine_similarity": 0, "epoch": 0.4361602982292637, "grad_norm": 2.3373853989153717, "learning_rate": 4.748015188125647e-05, "loss": 2.4739, "reason_loss": 0.5429278612136841, "step": 468, "utility_loss": 1.930942177772522 }, { "cosine_similarity": 0, "epoch": 0.43709226467847156, "grad_norm": 1.345569581421901, "learning_rate": 4.746289264756645e-05, "loss": 2.1859, "reason_loss": 0.535282552242279, "step": 469, "utility_loss": 1.6505742073059082 }, { "cosine_similarity": 0, "epoch": 0.4380242311276794, "grad_norm": 1.7337399395494555, "learning_rate": 4.744563341387643e-05, "loss": 2.3687, "reason_loss": 0.5212289094924927, "step": 470, "utility_loss": 1.847474217414856 }, { "cosine_similarity": 0, "epoch": 0.43895619757688725, "grad_norm": 3.11705126830192, "learning_rate": 4.74283741801864e-05, "loss": 2.5173, "reason_loss": 0.5097020268440247, "step": 471, "utility_loss": 2.0075719356536865 }, { "cosine_similarity": 0, "epoch": 0.43988816402609504, "grad_norm": 1.397653682780546, "learning_rate": 4.7411114946496376e-05, "loss": 2.3518, "reason_loss": 0.5553497076034546, "step": 472, "utility_loss": 1.796454906463623 }, { "cosine_similarity": 0, "epoch": 0.4408201304753029, "grad_norm": 1.324782118247553, "learning_rate": 4.739385571280635e-05, "loss": 2.5466, "reason_loss": 0.5342787504196167, "step": 473, "utility_loss": 2.0123181343078613 }, { "cosine_similarity": 0, "epoch": 0.44175209692451073, "grad_norm": 1.2323065683905772, "learning_rate": 4.737659647911633e-05, "loss": 1.812, "reason_loss": 0.5004421472549438, "step": 474, "utility_loss": 1.311544418334961 }, { "cosine_similarity": 0, "epoch": 0.4426840633737186, "grad_norm": 1.606897680711316, "learning_rate": 4.7359337245426306e-05, "loss": 2.5533, "reason_loss": 0.5129261016845703, "step": 475, "utility_loss": 2.040344476699829 }, { "cosine_similarity": 0, "epoch": 0.44361602982292636, "grad_norm": 1.5201744897514557, "learning_rate": 4.734207801173628e-05, "loss": 1.8857, "reason_loss": 0.5603766441345215, "step": 476, "utility_loss": 1.3252943754196167 }, { "cosine_similarity": 0, "epoch": 0.4445479962721342, "grad_norm": 1.3588155834165982, "learning_rate": 4.732481877804626e-05, "loss": 2.1621, "reason_loss": 0.5225673913955688, "step": 477, "utility_loss": 1.639565348625183 }, { "cosine_similarity": 0, "epoch": 0.44547996272134205, "grad_norm": 2.458646891186301, "learning_rate": 4.7307559544356236e-05, "loss": 2.2328, "reason_loss": 0.548778772354126, "step": 478, "utility_loss": 1.6839752197265625 }, { "cosine_similarity": 0, "epoch": 0.44641192917054984, "grad_norm": 1.3380187092925424, "learning_rate": 4.729030031066621e-05, "loss": 2.1398, "reason_loss": 0.5171201229095459, "step": 479, "utility_loss": 1.622666835784912 }, { "cosine_similarity": 0, "epoch": 0.4473438956197577, "grad_norm": 2.303485879849764, "learning_rate": 4.7273041076976184e-05, "loss": 1.9965, "reason_loss": 0.5341800451278687, "step": 480, "utility_loss": 1.4623528718948364 }, { "cosine_similarity": 0, "epoch": 0.4482758620689655, "grad_norm": 1.1792684793234711, "learning_rate": 4.7255781843286165e-05, "loss": 1.9539, "reason_loss": 0.5436171293258667, "step": 481, "utility_loss": 1.4103202819824219 }, { "cosine_similarity": 0, "epoch": 0.44920782851817337, "grad_norm": 1.4428018959602433, "learning_rate": 4.723852260959614e-05, "loss": 1.9504, "reason_loss": 0.5432002544403076, "step": 482, "utility_loss": 1.4071862697601318 }, { "cosine_similarity": 0, "epoch": 0.45013979496738116, "grad_norm": 1.4439039343003535, "learning_rate": 4.7221263375906114e-05, "loss": 2.5841, "reason_loss": 0.5257207155227661, "step": 483, "utility_loss": 2.05841326713562 }, { "cosine_similarity": 0, "epoch": 0.451071761416589, "grad_norm": 1.4519889204591554, "learning_rate": 4.720400414221609e-05, "loss": 2.0912, "reason_loss": 0.49394774436950684, "step": 484, "utility_loss": 1.5972551107406616 }, { "cosine_similarity": 0, "epoch": 0.45200372786579684, "grad_norm": 1.4314398315545889, "learning_rate": 4.718674490852606e-05, "loss": 2.1689, "reason_loss": 0.5262106657028198, "step": 485, "utility_loss": 1.6427031755447388 }, { "cosine_similarity": 0, "epoch": 0.45293569431500463, "grad_norm": 1.6512135551521712, "learning_rate": 4.716948567483604e-05, "loss": 2.2449, "reason_loss": 0.5529636740684509, "step": 486, "utility_loss": 1.691914439201355 }, { "cosine_similarity": 0, "epoch": 0.4538676607642125, "grad_norm": 1.4255035011582702, "learning_rate": 4.715222644114601e-05, "loss": 2.3005, "reason_loss": 0.5180765986442566, "step": 487, "utility_loss": 1.7824420928955078 }, { "cosine_similarity": 0, "epoch": 0.4547996272134203, "grad_norm": 1.431058753279079, "learning_rate": 4.713496720745599e-05, "loss": 2.2127, "reason_loss": 0.5257208347320557, "step": 488, "utility_loss": 1.6870038509368896 }, { "cosine_similarity": 0, "epoch": 0.45573159366262816, "grad_norm": 1.2313227923703363, "learning_rate": 4.7117707973765966e-05, "loss": 1.6796, "reason_loss": 0.5179042220115662, "step": 489, "utility_loss": 1.1617021560668945 }, { "cosine_similarity": 0, "epoch": 0.45666356011183595, "grad_norm": 1.7365933614223106, "learning_rate": 4.710044874007594e-05, "loss": 2.2611, "reason_loss": 0.48962873220443726, "step": 490, "utility_loss": 1.7714483737945557 }, { "cosine_similarity": 0, "epoch": 0.4575955265610438, "grad_norm": 1.2210753105489844, "learning_rate": 4.7083189506385915e-05, "loss": 2.103, "reason_loss": 0.5001730918884277, "step": 491, "utility_loss": 1.6028298139572144 }, { "cosine_similarity": 0, "epoch": 0.45852749301025164, "grad_norm": 1.4592043327399167, "learning_rate": 4.7065930272695896e-05, "loss": 2.4264, "reason_loss": 0.5535148978233337, "step": 492, "utility_loss": 1.8728803396224976 }, { "cosine_similarity": 0, "epoch": 0.4594594594594595, "grad_norm": 1.493495187903074, "learning_rate": 4.704867103900587e-05, "loss": 2.4674, "reason_loss": 0.4980936050415039, "step": 493, "utility_loss": 1.9692678451538086 }, { "cosine_similarity": 0, "epoch": 0.4603914259086673, "grad_norm": 1.3133172480789244, "learning_rate": 4.7031411805315845e-05, "loss": 2.4478, "reason_loss": 0.5223960876464844, "step": 494, "utility_loss": 1.9254462718963623 }, { "cosine_similarity": 0, "epoch": 0.4613233923578751, "grad_norm": 1.4352484821652378, "learning_rate": 4.7014152571625826e-05, "loss": 2.2914, "reason_loss": 0.5612344741821289, "step": 495, "utility_loss": 1.7302002906799316 }, { "cosine_similarity": 0, "epoch": 0.46225535880708296, "grad_norm": 1.4026226661343553, "learning_rate": 4.69968933379358e-05, "loss": 2.271, "reason_loss": 0.532017707824707, "step": 496, "utility_loss": 1.7389981746673584 }, { "cosine_similarity": 0, "epoch": 0.46318732525629075, "grad_norm": 1.4493276817413678, "learning_rate": 4.6979634104245774e-05, "loss": 2.3465, "reason_loss": 0.5420234203338623, "step": 497, "utility_loss": 1.8045148849487305 }, { "cosine_similarity": 0, "epoch": 0.4641192917054986, "grad_norm": 1.2620356335700922, "learning_rate": 4.696237487055575e-05, "loss": 1.6191, "reason_loss": 0.5157098770141602, "step": 498, "utility_loss": 1.103377103805542 }, { "cosine_similarity": 0, "epoch": 0.46505125815470644, "grad_norm": 1.3421655455211465, "learning_rate": 4.694511563686573e-05, "loss": 2.2279, "reason_loss": 0.5301005244255066, "step": 499, "utility_loss": 1.6978365182876587 }, { "cosine_similarity": 0, "epoch": 0.4659832246039143, "grad_norm": 1.4770168708532605, "learning_rate": 4.6927856403175704e-05, "loss": 2.2401, "reason_loss": 0.5368696451187134, "step": 500, "utility_loss": 1.7032221555709839 }, { "cosine_similarity": 0, "epoch": 0.46691519105312207, "grad_norm": 1.5613234078117786, "learning_rate": 4.691059716948568e-05, "loss": 2.3752, "reason_loss": 0.5377540588378906, "step": 501, "utility_loss": 1.837418794631958 }, { "cosine_similarity": 0, "epoch": 0.4678471575023299, "grad_norm": 1.3398883928497234, "learning_rate": 4.689333793579565e-05, "loss": 2.4746, "reason_loss": 0.5227473974227905, "step": 502, "utility_loss": 1.9518826007843018 }, { "cosine_similarity": 0, "epoch": 0.46877912395153776, "grad_norm": 1.9085706528338489, "learning_rate": 4.6876078702105634e-05, "loss": 2.1032, "reason_loss": 0.498954176902771, "step": 503, "utility_loss": 1.6042636632919312 }, { "cosine_similarity": 0, "epoch": 0.46971109040074555, "grad_norm": 1.2855793213597368, "learning_rate": 4.685881946841561e-05, "loss": 2.107, "reason_loss": 0.5074001550674438, "step": 504, "utility_loss": 1.599576711654663 }, { "cosine_similarity": 0, "epoch": 0.4706430568499534, "grad_norm": 1.3478876207162924, "learning_rate": 4.684156023472558e-05, "loss": 1.9816, "reason_loss": 0.5113623738288879, "step": 505, "utility_loss": 1.4702385663986206 }, { "cosine_similarity": 0, "epoch": 0.47157502329916123, "grad_norm": 1.3373765687781467, "learning_rate": 4.6824301001035556e-05, "loss": 1.9353, "reason_loss": 0.5282237529754639, "step": 506, "utility_loss": 1.4070866107940674 }, { "cosine_similarity": 0, "epoch": 0.4725069897483691, "grad_norm": 1.659858543568998, "learning_rate": 4.680704176734553e-05, "loss": 2.4119, "reason_loss": 0.5344817638397217, "step": 507, "utility_loss": 1.8774089813232422 }, { "cosine_similarity": 0, "epoch": 0.47343895619757687, "grad_norm": 1.2631745649827413, "learning_rate": 4.6789782533655505e-05, "loss": 2.2143, "reason_loss": 0.5306894779205322, "step": 508, "utility_loss": 1.6836276054382324 }, { "cosine_similarity": 0, "epoch": 0.4743709226467847, "grad_norm": 1.3429374885248033, "learning_rate": 4.677252329996548e-05, "loss": 2.1382, "reason_loss": 0.5485779643058777, "step": 509, "utility_loss": 1.5895919799804688 }, { "cosine_similarity": 0, "epoch": 0.47530288909599255, "grad_norm": 1.2843030227531833, "learning_rate": 4.675526406627546e-05, "loss": 1.9662, "reason_loss": 0.49043509364128113, "step": 510, "utility_loss": 1.4757325649261475 }, { "cosine_similarity": 0, "epoch": 0.4762348555452004, "grad_norm": 1.5527157789769233, "learning_rate": 4.6738004832585435e-05, "loss": 1.9993, "reason_loss": 0.5164056420326233, "step": 511, "utility_loss": 1.4829127788543701 }, { "cosine_similarity": 0, "epoch": 0.4771668219944082, "grad_norm": 1.270402812936506, "learning_rate": 4.672074559889541e-05, "loss": 2.0284, "reason_loss": 0.5127511620521545, "step": 512, "utility_loss": 1.5156859159469604 }, { "cosine_similarity": 0, "epoch": 0.47809878844361603, "grad_norm": 1.302149303735094, "learning_rate": 4.670348636520538e-05, "loss": 1.9942, "reason_loss": 0.5531581044197083, "step": 513, "utility_loss": 1.4410006999969482 }, { "cosine_similarity": 0, "epoch": 0.4790307548928239, "grad_norm": 1.2298916841821557, "learning_rate": 4.6686227131515364e-05, "loss": 2.1474, "reason_loss": 0.5182042121887207, "step": 514, "utility_loss": 1.6291544437408447 }, { "cosine_similarity": 0, "epoch": 0.47996272134203166, "grad_norm": 1.4557428386439675, "learning_rate": 4.666896789782534e-05, "loss": 2.1735, "reason_loss": 0.5186647176742554, "step": 515, "utility_loss": 1.6547889709472656 }, { "cosine_similarity": 0, "epoch": 0.4808946877912395, "grad_norm": 1.2831595064468642, "learning_rate": 4.665170866413531e-05, "loss": 2.0695, "reason_loss": 0.5239193439483643, "step": 516, "utility_loss": 1.5455677509307861 }, { "cosine_similarity": 0, "epoch": 0.48182665424044735, "grad_norm": 1.453825421203861, "learning_rate": 4.6634449430445294e-05, "loss": 2.175, "reason_loss": 0.568766713142395, "step": 517, "utility_loss": 1.606223225593567 }, { "cosine_similarity": 0, "epoch": 0.4827586206896552, "grad_norm": 1.5738698903339967, "learning_rate": 4.661719019675527e-05, "loss": 2.0645, "reason_loss": 0.5076650977134705, "step": 518, "utility_loss": 1.5568790435791016 }, { "cosine_similarity": 0, "epoch": 0.483690587138863, "grad_norm": 1.1362873647514073, "learning_rate": 4.659993096306524e-05, "loss": 2.2685, "reason_loss": 0.51921546459198, "step": 519, "utility_loss": 1.7492377758026123 }, { "cosine_similarity": 0, "epoch": 0.4846225535880708, "grad_norm": 1.7213229345800451, "learning_rate": 4.658267172937522e-05, "loss": 2.2907, "reason_loss": 0.5255305767059326, "step": 520, "utility_loss": 1.7651493549346924 }, { "cosine_similarity": 0, "epoch": 0.48555452003727867, "grad_norm": 1.3105025392970637, "learning_rate": 4.65654124956852e-05, "loss": 2.0363, "reason_loss": 0.5001587271690369, "step": 521, "utility_loss": 1.536112666130066 }, { "cosine_similarity": 0, "epoch": 0.4864864864864865, "grad_norm": 1.1950069499706035, "learning_rate": 4.654815326199517e-05, "loss": 2.3342, "reason_loss": 0.5294613242149353, "step": 522, "utility_loss": 1.8047587871551514 }, { "cosine_similarity": 0, "epoch": 0.4874184529356943, "grad_norm": 1.3527653479994128, "learning_rate": 4.6530894028305146e-05, "loss": 2.2127, "reason_loss": 0.5232475996017456, "step": 523, "utility_loss": 1.6894282102584839 }, { "cosine_similarity": 0, "epoch": 0.48835041938490215, "grad_norm": 1.372375600368904, "learning_rate": 4.651363479461512e-05, "loss": 2.1458, "reason_loss": 0.525826096534729, "step": 524, "utility_loss": 1.619999885559082 }, { "cosine_similarity": 0, "epoch": 0.48928238583411, "grad_norm": 1.517028777331515, "learning_rate": 4.64963755609251e-05, "loss": 2.218, "reason_loss": 0.5610690116882324, "step": 525, "utility_loss": 1.6569377183914185 }, { "cosine_similarity": 0, "epoch": 0.4902143522833178, "grad_norm": 2.5065649224547855, "learning_rate": 4.6479116327235076e-05, "loss": 2.1368, "reason_loss": 0.5439552664756775, "step": 526, "utility_loss": 1.5928611755371094 }, { "cosine_similarity": 0, "epoch": 0.4911463187325256, "grad_norm": 1.3169699516553677, "learning_rate": 4.646185709354505e-05, "loss": 2.4686, "reason_loss": 0.5165522694587708, "step": 527, "utility_loss": 1.9520503282546997 }, { "cosine_similarity": 0, "epoch": 0.49207828518173347, "grad_norm": 1.5608609723541969, "learning_rate": 4.6444597859855025e-05, "loss": 1.9551, "reason_loss": 0.5328677892684937, "step": 528, "utility_loss": 1.4222416877746582 }, { "cosine_similarity": 0, "epoch": 0.4930102516309413, "grad_norm": 1.696909407071704, "learning_rate": 4.6427338626165e-05, "loss": 2.4369, "reason_loss": 0.4961630403995514, "step": 529, "utility_loss": 1.9407832622528076 }, { "cosine_similarity": 0, "epoch": 0.4939422180801491, "grad_norm": 1.3520729261382392, "learning_rate": 4.641007939247497e-05, "loss": 2.4669, "reason_loss": 0.5329133868217468, "step": 530, "utility_loss": 1.934004306793213 }, { "cosine_similarity": 0, "epoch": 0.49487418452935694, "grad_norm": 1.8432242488346946, "learning_rate": 4.639282015878495e-05, "loss": 2.319, "reason_loss": 0.5297527313232422, "step": 531, "utility_loss": 1.7892428636550903 }, { "cosine_similarity": 0, "epoch": 0.4958061509785648, "grad_norm": 1.2973702756403642, "learning_rate": 4.637556092509493e-05, "loss": 1.9866, "reason_loss": 0.5077484846115112, "step": 532, "utility_loss": 1.4788753986358643 }, { "cosine_similarity": 0, "epoch": 0.4967381174277726, "grad_norm": 1.4835131438502005, "learning_rate": 4.63583016914049e-05, "loss": 2.0394, "reason_loss": 0.5443150997161865, "step": 533, "utility_loss": 1.4951188564300537 }, { "cosine_similarity": 0, "epoch": 0.4976700838769804, "grad_norm": 1.2004098537737442, "learning_rate": 4.634104245771488e-05, "loss": 2.4631, "reason_loss": 0.541542649269104, "step": 534, "utility_loss": 1.9215130805969238 }, { "cosine_similarity": 0, "epoch": 0.49860205032618826, "grad_norm": 2.36971748821837, "learning_rate": 4.632378322402486e-05, "loss": 1.8476, "reason_loss": 0.5016872882843018, "step": 535, "utility_loss": 1.3459348678588867 }, { "cosine_similarity": 0, "epoch": 0.4995340167753961, "grad_norm": 1.3278555043414288, "learning_rate": 4.630652399033483e-05, "loss": 2.0274, "reason_loss": 0.5174908638000488, "step": 536, "utility_loss": 1.509925127029419 }, { "cosine_similarity": 0, "epoch": 0.5004659832246039, "grad_norm": 1.238361614880836, "learning_rate": 4.628926475664481e-05, "loss": 1.9937, "reason_loss": 0.4846946597099304, "step": 537, "utility_loss": 1.5089728832244873 }, { "cosine_similarity": 0, "epoch": 0.5013979496738118, "grad_norm": 1.341374100903493, "learning_rate": 4.627200552295478e-05, "loss": 1.9292, "reason_loss": 0.5275198817253113, "step": 538, "utility_loss": 1.4016779661178589 }, { "cosine_similarity": 0, "epoch": 0.5023299161230196, "grad_norm": 1.3645359556881287, "learning_rate": 4.625474628926476e-05, "loss": 2.2066, "reason_loss": 0.5319817066192627, "step": 539, "utility_loss": 1.6746528148651123 }, { "cosine_similarity": 0, "epoch": 0.5032618825722274, "grad_norm": 1.3947355417614953, "learning_rate": 4.6237487055574736e-05, "loss": 2.395, "reason_loss": 0.5032578110694885, "step": 540, "utility_loss": 1.891702651977539 }, { "cosine_similarity": 0, "epoch": 0.5041938490214353, "grad_norm": 1.608816552218464, "learning_rate": 4.622022782188471e-05, "loss": 2.3332, "reason_loss": 0.5144550800323486, "step": 541, "utility_loss": 1.8187485933303833 }, { "cosine_similarity": 0, "epoch": 0.5051258154706431, "grad_norm": 1.7344196945824033, "learning_rate": 4.6202968588194685e-05, "loss": 2.272, "reason_loss": 0.5226659774780273, "step": 542, "utility_loss": 1.7493633031845093 }, { "cosine_similarity": 0, "epoch": 0.5060577819198508, "grad_norm": 2.019595429431125, "learning_rate": 4.6185709354504666e-05, "loss": 2.2577, "reason_loss": 0.5052197575569153, "step": 543, "utility_loss": 1.7525025606155396 }, { "cosine_similarity": 0, "epoch": 0.5069897483690587, "grad_norm": 1.2863197830520003, "learning_rate": 4.616845012081464e-05, "loss": 2.0225, "reason_loss": 0.5358136892318726, "step": 544, "utility_loss": 1.4867249727249146 }, { "cosine_similarity": 0, "epoch": 0.5079217148182665, "grad_norm": 1.4802706731450455, "learning_rate": 4.6151190887124615e-05, "loss": 2.2233, "reason_loss": 0.5193350315093994, "step": 545, "utility_loss": 1.7039620876312256 }, { "cosine_similarity": 0, "epoch": 0.5088536812674743, "grad_norm": 1.6978965384681943, "learning_rate": 4.613393165343459e-05, "loss": 2.6983, "reason_loss": 0.5104342103004456, "step": 546, "utility_loss": 2.1878907680511475 }, { "cosine_similarity": 0, "epoch": 0.5097856477166822, "grad_norm": 1.3791162399595187, "learning_rate": 4.611667241974456e-05, "loss": 2.2984, "reason_loss": 0.5266740322113037, "step": 547, "utility_loss": 1.7716877460479736 }, { "cosine_similarity": 0, "epoch": 0.51071761416589, "grad_norm": 1.1606801091399928, "learning_rate": 4.609941318605454e-05, "loss": 2.0771, "reason_loss": 0.4954060912132263, "step": 548, "utility_loss": 1.5816504955291748 }, { "cosine_similarity": 0, "epoch": 0.5116495806150979, "grad_norm": 1.3425998323905521, "learning_rate": 4.608215395236451e-05, "loss": 2.6672, "reason_loss": 0.5323330163955688, "step": 549, "utility_loss": 2.1348326206207275 }, { "cosine_similarity": 0, "epoch": 0.5125815470643057, "grad_norm": 1.6218604298617973, "learning_rate": 4.606489471867449e-05, "loss": 2.3368, "reason_loss": 0.530220627784729, "step": 550, "utility_loss": 1.8065458536148071 }, { "cosine_similarity": 0, "epoch": 0.5135135135135135, "grad_norm": 1.3477850484047411, "learning_rate": 4.604763548498447e-05, "loss": 2.2188, "reason_loss": 0.5416022539138794, "step": 551, "utility_loss": 1.6772104501724243 }, { "cosine_similarity": 0, "epoch": 0.5144454799627214, "grad_norm": 1.4136089307669968, "learning_rate": 4.603037625129444e-05, "loss": 2.2849, "reason_loss": 0.5004110932350159, "step": 552, "utility_loss": 1.7845172882080078 }, { "cosine_similarity": 0, "epoch": 0.5153774464119292, "grad_norm": 1.767030848338039, "learning_rate": 4.6013117017604416e-05, "loss": 2.1746, "reason_loss": 0.4678601026535034, "step": 553, "utility_loss": 1.706709861755371 }, { "cosine_similarity": 0, "epoch": 0.516309412861137, "grad_norm": 1.4577513731737821, "learning_rate": 4.59958577839144e-05, "loss": 2.1692, "reason_loss": 0.5191971659660339, "step": 554, "utility_loss": 1.6500287055969238 }, { "cosine_similarity": 0, "epoch": 0.5172413793103449, "grad_norm": 1.6823872365389, "learning_rate": 4.597859855022437e-05, "loss": 2.3438, "reason_loss": 0.5404008030891418, "step": 555, "utility_loss": 1.8034182786941528 }, { "cosine_similarity": 0, "epoch": 0.5181733457595527, "grad_norm": 1.4748919329256396, "learning_rate": 4.5961339316534345e-05, "loss": 2.4265, "reason_loss": 0.583427906036377, "step": 556, "utility_loss": 1.8430476188659668 }, { "cosine_similarity": 0, "epoch": 0.5191053122087604, "grad_norm": 1.2224079025011163, "learning_rate": 4.5944080082844326e-05, "loss": 2.2602, "reason_loss": 0.5068169832229614, "step": 557, "utility_loss": 1.7534263134002686 }, { "cosine_similarity": 0, "epoch": 0.5200372786579683, "grad_norm": 1.5563931528800385, "learning_rate": 4.59268208491543e-05, "loss": 2.3016, "reason_loss": 0.5663739442825317, "step": 558, "utility_loss": 1.735219955444336 }, { "cosine_similarity": 0, "epoch": 0.5209692451071761, "grad_norm": 1.464950063915516, "learning_rate": 4.5909561615464275e-05, "loss": 1.9263, "reason_loss": 0.5213688611984253, "step": 559, "utility_loss": 1.4049701690673828 }, { "cosine_similarity": 0, "epoch": 0.5219012115563839, "grad_norm": 1.354577721210817, "learning_rate": 4.589230238177425e-05, "loss": 1.9757, "reason_loss": 0.5090955495834351, "step": 560, "utility_loss": 1.466614007949829 }, { "cosine_similarity": 0, "epoch": 0.5228331780055918, "grad_norm": 1.3165027997258645, "learning_rate": 4.587504314808423e-05, "loss": 2.077, "reason_loss": 0.5280618071556091, "step": 561, "utility_loss": 1.5488959550857544 }, { "cosine_similarity": 0, "epoch": 0.5237651444547996, "grad_norm": 1.3433862769898524, "learning_rate": 4.5857783914394205e-05, "loss": 1.9194, "reason_loss": 0.5238046646118164, "step": 562, "utility_loss": 1.3955732583999634 }, { "cosine_similarity": 0, "epoch": 0.5246971109040075, "grad_norm": 1.243993046604488, "learning_rate": 4.584052468070418e-05, "loss": 2.1458, "reason_loss": 0.5014919638633728, "step": 563, "utility_loss": 1.6442604064941406 }, { "cosine_similarity": 0, "epoch": 0.5256290773532153, "grad_norm": 1.140236478056064, "learning_rate": 4.582326544701415e-05, "loss": 1.8376, "reason_loss": 0.5126690864562988, "step": 564, "utility_loss": 1.3248875141143799 }, { "cosine_similarity": 0, "epoch": 0.5265610438024231, "grad_norm": 1.23831887799479, "learning_rate": 4.5806006213324134e-05, "loss": 1.9622, "reason_loss": 0.48942357301712036, "step": 565, "utility_loss": 1.4727518558502197 }, { "cosine_similarity": 0, "epoch": 0.527493010251631, "grad_norm": 1.6979747921657793, "learning_rate": 4.578874697963411e-05, "loss": 2.2403, "reason_loss": 0.5227857828140259, "step": 566, "utility_loss": 1.7175049781799316 }, { "cosine_similarity": 0, "epoch": 0.5284249767008388, "grad_norm": 1.4017135555695786, "learning_rate": 4.577148774594408e-05, "loss": 2.1064, "reason_loss": 0.5442205667495728, "step": 567, "utility_loss": 1.5621391534805298 }, { "cosine_similarity": 0, "epoch": 0.5293569431500466, "grad_norm": 1.2198706628389933, "learning_rate": 4.575422851225406e-05, "loss": 2.0268, "reason_loss": 0.4815240502357483, "step": 568, "utility_loss": 1.545296311378479 }, { "cosine_similarity": 0, "epoch": 0.5302889095992545, "grad_norm": 1.6391129198031276, "learning_rate": 4.573696927856403e-05, "loss": 2.4285, "reason_loss": 0.5054231286048889, "step": 569, "utility_loss": 1.9230732917785645 }, { "cosine_similarity": 0, "epoch": 0.5312208760484622, "grad_norm": 1.6293909190258855, "learning_rate": 4.5719710044874006e-05, "loss": 2.3097, "reason_loss": 0.5049304962158203, "step": 570, "utility_loss": 1.804729700088501 }, { "cosine_similarity": 0, "epoch": 0.53215284249767, "grad_norm": 1.2731509019991287, "learning_rate": 4.570245081118398e-05, "loss": 1.8563, "reason_loss": 0.4900267720222473, "step": 571, "utility_loss": 1.366288423538208 }, { "cosine_similarity": 0, "epoch": 0.5330848089468779, "grad_norm": 1.2907686391255044, "learning_rate": 4.568519157749396e-05, "loss": 2.2296, "reason_loss": 0.5319018363952637, "step": 572, "utility_loss": 1.6976909637451172 }, { "cosine_similarity": 0, "epoch": 0.5340167753960857, "grad_norm": 1.3422462012252188, "learning_rate": 4.5667932343803935e-05, "loss": 2.3938, "reason_loss": 0.537865400314331, "step": 573, "utility_loss": 1.855973482131958 }, { "cosine_similarity": 0, "epoch": 0.5349487418452936, "grad_norm": 3.303520232960189, "learning_rate": 4.565067311011391e-05, "loss": 2.0444, "reason_loss": 0.5219470858573914, "step": 574, "utility_loss": 1.5224294662475586 }, { "cosine_similarity": 0, "epoch": 0.5358807082945014, "grad_norm": 1.4883539967171777, "learning_rate": 4.563341387642389e-05, "loss": 2.2704, "reason_loss": 0.5119320154190063, "step": 575, "utility_loss": 1.758468747138977 }, { "cosine_similarity": 0, "epoch": 0.5368126747437092, "grad_norm": 1.370079819405049, "learning_rate": 4.5616154642733865e-05, "loss": 2.0371, "reason_loss": 0.49924561381340027, "step": 576, "utility_loss": 1.537859559059143 }, { "cosine_similarity": 0, "epoch": 0.5377446411929171, "grad_norm": 1.2461003616594062, "learning_rate": 4.559889540904384e-05, "loss": 2.2052, "reason_loss": 0.5109868049621582, "step": 577, "utility_loss": 1.694230556488037 }, { "cosine_similarity": 0, "epoch": 0.5386766076421249, "grad_norm": 1.4566344419198682, "learning_rate": 4.5581636175353814e-05, "loss": 2.2959, "reason_loss": 0.5112543106079102, "step": 578, "utility_loss": 1.7846893072128296 }, { "cosine_similarity": 0, "epoch": 0.5396085740913327, "grad_norm": 1.2649697222838245, "learning_rate": 4.5564376941663795e-05, "loss": 2.1897, "reason_loss": 0.5277584195137024, "step": 579, "utility_loss": 1.6619343757629395 }, { "cosine_similarity": 0, "epoch": 0.5405405405405406, "grad_norm": 1.3980882736283395, "learning_rate": 4.554711770797377e-05, "loss": 2.3708, "reason_loss": 0.5405548810958862, "step": 580, "utility_loss": 1.8302884101867676 }, { "cosine_similarity": 0, "epoch": 0.5414725069897484, "grad_norm": 1.1500146893179204, "learning_rate": 4.552985847428374e-05, "loss": 2.3551, "reason_loss": 0.5282512903213501, "step": 581, "utility_loss": 1.8268550634384155 }, { "cosine_similarity": 0, "epoch": 0.5424044734389561, "grad_norm": 1.2185834731000653, "learning_rate": 4.551259924059372e-05, "loss": 2.1013, "reason_loss": 0.5314258337020874, "step": 582, "utility_loss": 1.569908618927002 }, { "cosine_similarity": 0, "epoch": 0.543336439888164, "grad_norm": 1.1890018732487593, "learning_rate": 4.54953400069037e-05, "loss": 1.8903, "reason_loss": 0.48237377405166626, "step": 583, "utility_loss": 1.407899022102356 }, { "cosine_similarity": 0, "epoch": 0.5442684063373718, "grad_norm": 1.193389533092556, "learning_rate": 4.547808077321367e-05, "loss": 2.1034, "reason_loss": 0.5280337333679199, "step": 584, "utility_loss": 1.5753700733184814 }, { "cosine_similarity": 0, "epoch": 0.5452003727865797, "grad_norm": 1.2656853315462862, "learning_rate": 4.546082153952365e-05, "loss": 2.0729, "reason_loss": 0.5071909427642822, "step": 585, "utility_loss": 1.5657310485839844 }, { "cosine_similarity": 0, "epoch": 0.5461323392357875, "grad_norm": 1.3850163325670766, "learning_rate": 4.544356230583363e-05, "loss": 1.8437, "reason_loss": 0.5037398338317871, "step": 586, "utility_loss": 1.3399887084960938 }, { "cosine_similarity": 0, "epoch": 0.5470643056849953, "grad_norm": 1.5282857023113532, "learning_rate": 4.54263030721436e-05, "loss": 2.114, "reason_loss": 0.469740092754364, "step": 587, "utility_loss": 1.6442506313323975 }, { "cosine_similarity": 0, "epoch": 0.5479962721342032, "grad_norm": 7.903092789857441, "learning_rate": 4.540904383845358e-05, "loss": 2.4563, "reason_loss": 0.5022357702255249, "step": 588, "utility_loss": 1.9540410041809082 }, { "cosine_similarity": 0, "epoch": 0.548928238583411, "grad_norm": 1.1771797896647795, "learning_rate": 4.539178460476355e-05, "loss": 2.2215, "reason_loss": 0.5734290480613708, "step": 589, "utility_loss": 1.6480615139007568 }, { "cosine_similarity": 0, "epoch": 0.5498602050326188, "grad_norm": 1.138861711798034, "learning_rate": 4.5374525371073526e-05, "loss": 2.1356, "reason_loss": 0.5137660503387451, "step": 590, "utility_loss": 1.621843695640564 }, { "cosine_similarity": 0, "epoch": 0.5507921714818267, "grad_norm": 1.2801223332162395, "learning_rate": 4.53572661373835e-05, "loss": 2.43, "reason_loss": 0.5190893411636353, "step": 591, "utility_loss": 1.9109166860580444 }, { "cosine_similarity": 0, "epoch": 0.5517241379310345, "grad_norm": 1.1278609832699034, "learning_rate": 4.5340006903693474e-05, "loss": 1.9984, "reason_loss": 0.5014307498931885, "step": 592, "utility_loss": 1.496928095817566 }, { "cosine_similarity": 0, "epoch": 0.5526561043802423, "grad_norm": 1.505463722545293, "learning_rate": 4.532274767000345e-05, "loss": 2.1462, "reason_loss": 0.4961804151535034, "step": 593, "utility_loss": 1.6499993801116943 }, { "cosine_similarity": 0, "epoch": 0.5535880708294502, "grad_norm": 1.3499786647180878, "learning_rate": 4.530548843631343e-05, "loss": 2.4397, "reason_loss": 0.5363426208496094, "step": 594, "utility_loss": 1.9033241271972656 }, { "cosine_similarity": 0, "epoch": 0.554520037278658, "grad_norm": 1.3495372030165296, "learning_rate": 4.5288229202623404e-05, "loss": 2.0935, "reason_loss": 0.5212032794952393, "step": 595, "utility_loss": 1.572320818901062 }, { "cosine_similarity": 0, "epoch": 0.5554520037278659, "grad_norm": 1.4066247043155495, "learning_rate": 4.527096996893338e-05, "loss": 2.1618, "reason_loss": 0.5055177211761475, "step": 596, "utility_loss": 1.6562464237213135 }, { "cosine_similarity": 0, "epoch": 0.5563839701770736, "grad_norm": 1.5080125879588033, "learning_rate": 4.525371073524336e-05, "loss": 2.3328, "reason_loss": 0.5004564523696899, "step": 597, "utility_loss": 1.8323063850402832 }, { "cosine_similarity": 0, "epoch": 0.5573159366262814, "grad_norm": 1.411497010923548, "learning_rate": 4.523645150155333e-05, "loss": 2.0994, "reason_loss": 0.5175091028213501, "step": 598, "utility_loss": 1.5818699598312378 }, { "cosine_similarity": 0, "epoch": 0.5582479030754893, "grad_norm": 1.4787432938481138, "learning_rate": 4.521919226786331e-05, "loss": 2.4314, "reason_loss": 0.5274102091789246, "step": 599, "utility_loss": 1.9039666652679443 }, { "cosine_similarity": 0, "epoch": 0.5591798695246971, "grad_norm": 1.323334110932187, "learning_rate": 4.520193303417328e-05, "loss": 2.2326, "reason_loss": 0.5108534097671509, "step": 600, "utility_loss": 1.72176194190979 }, { "cosine_similarity": 0, "epoch": 0.5601118359739049, "grad_norm": 1.6293034096922716, "learning_rate": 4.518467380048326e-05, "loss": 2.0933, "reason_loss": 0.5105456113815308, "step": 601, "utility_loss": 1.5827438831329346 }, { "cosine_similarity": 0, "epoch": 0.5610438024231128, "grad_norm": 1.0801026054036593, "learning_rate": 4.516741456679324e-05, "loss": 1.8767, "reason_loss": 0.521892786026001, "step": 602, "utility_loss": 1.3547614812850952 }, { "cosine_similarity": 0, "epoch": 0.5619757688723206, "grad_norm": 1.4178722952741711, "learning_rate": 4.515015533310321e-05, "loss": 2.069, "reason_loss": 0.5240997672080994, "step": 603, "utility_loss": 1.5448592901229858 }, { "cosine_similarity": 0, "epoch": 0.5629077353215284, "grad_norm": 2.041531829299044, "learning_rate": 4.5132896099413186e-05, "loss": 2.4196, "reason_loss": 0.5161885619163513, "step": 604, "utility_loss": 1.903381586074829 }, { "cosine_similarity": 0, "epoch": 0.5638397017707363, "grad_norm": 1.2716444849856958, "learning_rate": 4.511563686572317e-05, "loss": 2.2513, "reason_loss": 0.48640477657318115, "step": 605, "utility_loss": 1.764845371246338 }, { "cosine_similarity": 0, "epoch": 0.5647716682199441, "grad_norm": 1.3571023547631287, "learning_rate": 4.509837763203314e-05, "loss": 2.0922, "reason_loss": 0.5524231791496277, "step": 606, "utility_loss": 1.5398035049438477 }, { "cosine_similarity": 0, "epoch": 0.5657036346691519, "grad_norm": 1.343698044788331, "learning_rate": 4.5081118398343116e-05, "loss": 2.3147, "reason_loss": 0.5121947526931763, "step": 607, "utility_loss": 1.8024811744689941 }, { "cosine_similarity": 0, "epoch": 0.5666356011183598, "grad_norm": 1.392164650715359, "learning_rate": 4.5063859164653097e-05, "loss": 2.0147, "reason_loss": 0.5420544147491455, "step": 608, "utility_loss": 1.4726163148880005 }, { "cosine_similarity": 0, "epoch": 0.5675675675675675, "grad_norm": 1.3133107810088973, "learning_rate": 4.504659993096307e-05, "loss": 2.1624, "reason_loss": 0.5186713337898254, "step": 609, "utility_loss": 1.6437551975250244 }, { "cosine_similarity": 0, "epoch": 0.5684995340167754, "grad_norm": 1.6928754536720658, "learning_rate": 4.5029340697273045e-05, "loss": 2.0639, "reason_loss": 0.5008201599121094, "step": 610, "utility_loss": 1.5630406141281128 }, { "cosine_similarity": 0, "epoch": 0.5694315004659832, "grad_norm": 1.3443169324694522, "learning_rate": 4.501208146358302e-05, "loss": 2.262, "reason_loss": 0.49782925844192505, "step": 611, "utility_loss": 1.7641749382019043 }, { "cosine_similarity": 0, "epoch": 0.570363466915191, "grad_norm": 1.4088211527775705, "learning_rate": 4.4994822229892994e-05, "loss": 2.1294, "reason_loss": 0.4892703890800476, "step": 612, "utility_loss": 1.6401667594909668 }, { "cosine_similarity": 0, "epoch": 0.5712954333643989, "grad_norm": 1.6273897134849042, "learning_rate": 4.497756299620297e-05, "loss": 2.2558, "reason_loss": 0.5402896404266357, "step": 613, "utility_loss": 1.7155036926269531 }, { "cosine_similarity": 0, "epoch": 0.5722273998136067, "grad_norm": 1.3511391678527498, "learning_rate": 4.496030376251294e-05, "loss": 2.3218, "reason_loss": 0.5223166346549988, "step": 614, "utility_loss": 1.7995164394378662 }, { "cosine_similarity": 0, "epoch": 0.5731593662628145, "grad_norm": 1.178498084707057, "learning_rate": 4.4943044528822923e-05, "loss": 1.9358, "reason_loss": 0.5024049282073975, "step": 615, "utility_loss": 1.4333480596542358 }, { "cosine_similarity": 0, "epoch": 0.5740913327120224, "grad_norm": 1.4968388900003164, "learning_rate": 4.49257852951329e-05, "loss": 2.4132, "reason_loss": 0.523444414138794, "step": 616, "utility_loss": 1.889795184135437 }, { "cosine_similarity": 0, "epoch": 0.5750232991612302, "grad_norm": 1.2440584031394235, "learning_rate": 4.490852606144287e-05, "loss": 2.3806, "reason_loss": 0.4926230311393738, "step": 617, "utility_loss": 1.8879668712615967 }, { "cosine_similarity": 0, "epoch": 0.575955265610438, "grad_norm": 1.2524788782253689, "learning_rate": 4.4891266827752846e-05, "loss": 2.3024, "reason_loss": 0.516285240650177, "step": 618, "utility_loss": 1.7860745191574097 }, { "cosine_similarity": 0, "epoch": 0.5768872320596459, "grad_norm": 1.285704926400398, "learning_rate": 4.487400759406283e-05, "loss": 2.0198, "reason_loss": 0.5016677379608154, "step": 619, "utility_loss": 1.51810622215271 }, { "cosine_similarity": 0, "epoch": 0.5778191985088537, "grad_norm": 1.3876483895634104, "learning_rate": 4.48567483603728e-05, "loss": 2.0199, "reason_loss": 0.4939366579055786, "step": 620, "utility_loss": 1.525919795036316 }, { "cosine_similarity": 0, "epoch": 0.5787511649580616, "grad_norm": 1.0803814625060857, "learning_rate": 4.4839489126682776e-05, "loss": 1.8555, "reason_loss": 0.5062704086303711, "step": 621, "utility_loss": 1.3492777347564697 }, { "cosine_similarity": 0, "epoch": 0.5796831314072693, "grad_norm": 1.2729988950947282, "learning_rate": 4.482222989299275e-05, "loss": 2.274, "reason_loss": 0.5509371757507324, "step": 622, "utility_loss": 1.7230273485183716 }, { "cosine_similarity": 0, "epoch": 0.5806150978564771, "grad_norm": 1.3729185586472552, "learning_rate": 4.480497065930273e-05, "loss": 1.8365, "reason_loss": 0.5235907435417175, "step": 623, "utility_loss": 1.3129229545593262 }, { "cosine_similarity": 0, "epoch": 0.581547064305685, "grad_norm": 1.576016055662189, "learning_rate": 4.4787711425612706e-05, "loss": 2.2207, "reason_loss": 0.5355901718139648, "step": 624, "utility_loss": 1.6851091384887695 }, { "cosine_similarity": 0, "epoch": 0.5824790307548928, "grad_norm": 1.2073696762281572, "learning_rate": 4.477045219192268e-05, "loss": 2.3153, "reason_loss": 0.5502339005470276, "step": 625, "utility_loss": 1.7650892734527588 }, { "cosine_similarity": 0, "epoch": 0.5834109972041006, "grad_norm": 1.3819200883890752, "learning_rate": 4.475319295823266e-05, "loss": 2.4379, "reason_loss": 0.5357012748718262, "step": 626, "utility_loss": 1.9021904468536377 }, { "cosine_similarity": 0, "epoch": 0.5843429636533085, "grad_norm": 1.4000409215037937, "learning_rate": 4.4735933724542635e-05, "loss": 2.1243, "reason_loss": 0.5548183917999268, "step": 627, "utility_loss": 1.5694795846939087 }, { "cosine_similarity": 0, "epoch": 0.5852749301025163, "grad_norm": 1.4198986100944042, "learning_rate": 4.471867449085261e-05, "loss": 2.2186, "reason_loss": 0.5252997875213623, "step": 628, "utility_loss": 1.693303108215332 }, { "cosine_similarity": 0, "epoch": 0.5862068965517241, "grad_norm": 1.4483828272180748, "learning_rate": 4.4701415257162584e-05, "loss": 2.1916, "reason_loss": 0.5030131936073303, "step": 629, "utility_loss": 1.6886322498321533 }, { "cosine_similarity": 0, "epoch": 0.587138863000932, "grad_norm": 1.6793553620793455, "learning_rate": 4.4684156023472565e-05, "loss": 2.2512, "reason_loss": 0.502565324306488, "step": 630, "utility_loss": 1.7486389875411987 }, { "cosine_similarity": 0, "epoch": 0.5880708294501398, "grad_norm": 1.413566102019781, "learning_rate": 4.466689678978254e-05, "loss": 2.2439, "reason_loss": 0.5075943470001221, "step": 631, "utility_loss": 1.7363226413726807 }, { "cosine_similarity": 0, "epoch": 0.5890027958993477, "grad_norm": 1.0865638613602424, "learning_rate": 4.4649637556092513e-05, "loss": 2.0608, "reason_loss": 0.5152700543403625, "step": 632, "utility_loss": 1.5455601215362549 }, { "cosine_similarity": 0, "epoch": 0.5899347623485555, "grad_norm": 1.6998610298479522, "learning_rate": 4.463237832240249e-05, "loss": 2.2745, "reason_loss": 0.5196971893310547, "step": 633, "utility_loss": 1.7547528743743896 }, { "cosine_similarity": 0, "epoch": 0.5908667287977633, "grad_norm": 1.1048960010926592, "learning_rate": 4.461511908871246e-05, "loss": 2.1516, "reason_loss": 0.5488042235374451, "step": 634, "utility_loss": 1.6027584075927734 }, { "cosine_similarity": 0, "epoch": 0.5917986952469712, "grad_norm": 1.3017521198451218, "learning_rate": 4.4597859855022436e-05, "loss": 2.2701, "reason_loss": 0.5406550168991089, "step": 635, "utility_loss": 1.7294703722000122 }, { "cosine_similarity": 0, "epoch": 0.5927306616961789, "grad_norm": 1.3397041452210114, "learning_rate": 4.458060062133241e-05, "loss": 2.0922, "reason_loss": 0.516606867313385, "step": 636, "utility_loss": 1.5755434036254883 }, { "cosine_similarity": 0, "epoch": 0.5936626281453867, "grad_norm": 1.236239211855064, "learning_rate": 4.456334138764239e-05, "loss": 2.3269, "reason_loss": 0.5447230339050293, "step": 637, "utility_loss": 1.7821804285049438 }, { "cosine_similarity": 0, "epoch": 0.5945945945945946, "grad_norm": 1.6486713582064874, "learning_rate": 4.4546082153952366e-05, "loss": 2.2758, "reason_loss": 0.5094864964485168, "step": 638, "utility_loss": 1.7663581371307373 }, { "cosine_similarity": 0, "epoch": 0.5955265610438024, "grad_norm": 1.4827076223680642, "learning_rate": 4.452882292026234e-05, "loss": 2.2457, "reason_loss": 0.5217673778533936, "step": 639, "utility_loss": 1.7239818572998047 }, { "cosine_similarity": 0, "epoch": 0.5964585274930102, "grad_norm": 1.5236573866514331, "learning_rate": 4.4511563686572315e-05, "loss": 1.9917, "reason_loss": 0.495290070772171, "step": 640, "utility_loss": 1.4964303970336914 }, { "cosine_similarity": 0, "epoch": 0.5973904939422181, "grad_norm": 1.274045620744166, "learning_rate": 4.4494304452882296e-05, "loss": 2.0431, "reason_loss": 0.5091649889945984, "step": 641, "utility_loss": 1.533982753753662 }, { "cosine_similarity": 0, "epoch": 0.5983224603914259, "grad_norm": 1.1550369293656417, "learning_rate": 4.447704521919227e-05, "loss": 2.082, "reason_loss": 0.5417892336845398, "step": 642, "utility_loss": 1.5401703119277954 }, { "cosine_similarity": 0, "epoch": 0.5992544268406338, "grad_norm": 1.4171228281697872, "learning_rate": 4.4459785985502244e-05, "loss": 2.1053, "reason_loss": 0.5203596353530884, "step": 643, "utility_loss": 1.584938406944275 }, { "cosine_similarity": 0, "epoch": 0.6001863932898416, "grad_norm": 1.4156238509533952, "learning_rate": 4.444252675181222e-05, "loss": 2.0941, "reason_loss": 0.5081818103790283, "step": 644, "utility_loss": 1.5859506130218506 }, { "cosine_similarity": 0, "epoch": 0.6011183597390494, "grad_norm": 1.3686489123612362, "learning_rate": 4.44252675181222e-05, "loss": 2.0955, "reason_loss": 0.50408935546875, "step": 645, "utility_loss": 1.5913963317871094 }, { "cosine_similarity": 0, "epoch": 0.6020503261882573, "grad_norm": 1.2838179022783567, "learning_rate": 4.4408008284432174e-05, "loss": 1.7744, "reason_loss": 0.5270247459411621, "step": 646, "utility_loss": 1.2474133968353271 }, { "cosine_similarity": 0, "epoch": 0.6029822926374651, "grad_norm": 1.6392537807647578, "learning_rate": 4.439074905074215e-05, "loss": 2.1183, "reason_loss": 0.5414897799491882, "step": 647, "utility_loss": 1.576810598373413 }, { "cosine_similarity": 0, "epoch": 0.6039142590866728, "grad_norm": 1.488744186218734, "learning_rate": 4.437348981705213e-05, "loss": 2.0093, "reason_loss": 0.539315938949585, "step": 648, "utility_loss": 1.470030426979065 }, { "cosine_similarity": 0, "epoch": 0.6048462255358807, "grad_norm": 1.5516687220907182, "learning_rate": 4.4356230583362103e-05, "loss": 1.9179, "reason_loss": 0.5060023069381714, "step": 649, "utility_loss": 1.411872386932373 }, { "cosine_similarity": 0, "epoch": 0.6057781919850885, "grad_norm": 1.1833315173452341, "learning_rate": 4.433897134967208e-05, "loss": 1.8125, "reason_loss": 0.5161495208740234, "step": 650, "utility_loss": 1.2963107824325562 }, { "cosine_similarity": 0, "epoch": 0.6067101584342963, "grad_norm": 1.458964788482795, "learning_rate": 4.432171211598205e-05, "loss": 2.0631, "reason_loss": 0.5381922125816345, "step": 651, "utility_loss": 1.5248692035675049 }, { "cosine_similarity": 0, "epoch": 0.6076421248835042, "grad_norm": 1.5468523117590076, "learning_rate": 4.430445288229203e-05, "loss": 1.9667, "reason_loss": 0.5258277058601379, "step": 652, "utility_loss": 1.4408966302871704 }, { "cosine_similarity": 0, "epoch": 0.608574091332712, "grad_norm": 1.1738495719992668, "learning_rate": 4.428719364860201e-05, "loss": 2.1832, "reason_loss": 0.5030214786529541, "step": 653, "utility_loss": 1.6801741123199463 }, { "cosine_similarity": 0, "epoch": 0.6095060577819198, "grad_norm": 1.0840619481452, "learning_rate": 4.426993441491198e-05, "loss": 2.1903, "reason_loss": 0.5021182298660278, "step": 654, "utility_loss": 1.688172698020935 }, { "cosine_similarity": 0, "epoch": 0.6104380242311277, "grad_norm": 1.4331647752637058, "learning_rate": 4.4252675181221956e-05, "loss": 2.1983, "reason_loss": 0.5436684489250183, "step": 655, "utility_loss": 1.6545850038528442 }, { "cosine_similarity": 0, "epoch": 0.6113699906803355, "grad_norm": 1.6294648151691824, "learning_rate": 4.423541594753193e-05, "loss": 2.1262, "reason_loss": 0.5146447420120239, "step": 656, "utility_loss": 1.6115931272506714 }, { "cosine_similarity": 0, "epoch": 0.6123019571295434, "grad_norm": 1.3324965497853969, "learning_rate": 4.4218156713841905e-05, "loss": 2.2558, "reason_loss": 0.5186060667037964, "step": 657, "utility_loss": 1.7372387647628784 }, { "cosine_similarity": 0, "epoch": 0.6132339235787512, "grad_norm": 1.2599322225109248, "learning_rate": 4.420089748015188e-05, "loss": 2.1266, "reason_loss": 0.47852471470832825, "step": 658, "utility_loss": 1.6481053829193115 }, { "cosine_similarity": 0, "epoch": 0.614165890027959, "grad_norm": 1.023425025146838, "learning_rate": 4.418363824646186e-05, "loss": 1.8988, "reason_loss": 0.4736686944961548, "step": 659, "utility_loss": 1.4251664876937866 }, { "cosine_similarity": 0, "epoch": 0.6150978564771669, "grad_norm": 1.1755698766755704, "learning_rate": 4.4166379012771834e-05, "loss": 1.8713, "reason_loss": 0.5163425207138062, "step": 660, "utility_loss": 1.3549879789352417 }, { "cosine_similarity": 0, "epoch": 0.6160298229263746, "grad_norm": 1.3379880767353858, "learning_rate": 4.414911977908181e-05, "loss": 1.8653, "reason_loss": 0.48346447944641113, "step": 661, "utility_loss": 1.3818234205245972 }, { "cosine_similarity": 0, "epoch": 0.6169617893755824, "grad_norm": 1.119917742626786, "learning_rate": 4.413186054539178e-05, "loss": 1.806, "reason_loss": 0.4886779189109802, "step": 662, "utility_loss": 1.3172852993011475 }, { "cosine_similarity": 0, "epoch": 0.6178937558247903, "grad_norm": 1.126900404190736, "learning_rate": 4.4114601311701764e-05, "loss": 1.8795, "reason_loss": 0.5289723873138428, "step": 663, "utility_loss": 1.350572109222412 }, { "cosine_similarity": 0, "epoch": 0.6188257222739981, "grad_norm": 1.5160198090589292, "learning_rate": 4.409734207801174e-05, "loss": 2.0815, "reason_loss": 0.5144928693771362, "step": 664, "utility_loss": 1.5669703483581543 }, { "cosine_similarity": 0, "epoch": 0.6197576887232059, "grad_norm": 1.2089004116135187, "learning_rate": 4.408008284432171e-05, "loss": 2.2648, "reason_loss": 0.5076634883880615, "step": 665, "utility_loss": 1.7571511268615723 }, { "cosine_similarity": 0, "epoch": 0.6206896551724138, "grad_norm": 1.285380785311221, "learning_rate": 4.4062823610631694e-05, "loss": 2.1729, "reason_loss": 0.5182581543922424, "step": 666, "utility_loss": 1.654592752456665 }, { "cosine_similarity": 0, "epoch": 0.6216216216216216, "grad_norm": 1.17114139614502, "learning_rate": 4.404556437694167e-05, "loss": 2.4007, "reason_loss": 0.5022516250610352, "step": 667, "utility_loss": 1.8984887599945068 }, { "cosine_similarity": 0, "epoch": 0.6225535880708295, "grad_norm": 1.1651897223353131, "learning_rate": 4.402830514325164e-05, "loss": 2.0622, "reason_loss": 0.48108425736427307, "step": 668, "utility_loss": 1.581076979637146 }, { "cosine_similarity": 0, "epoch": 0.6234855545200373, "grad_norm": 1.41767541132224, "learning_rate": 4.4011045909561616e-05, "loss": 2.2139, "reason_loss": 0.5192701816558838, "step": 669, "utility_loss": 1.6946032047271729 }, { "cosine_similarity": 0, "epoch": 0.6244175209692451, "grad_norm": 1.7121574829335473, "learning_rate": 4.39937866758716e-05, "loss": 2.1712, "reason_loss": 0.4901948571205139, "step": 670, "utility_loss": 1.6809828281402588 }, { "cosine_similarity": 0, "epoch": 0.625349487418453, "grad_norm": 1.2354153163450794, "learning_rate": 4.397652744218157e-05, "loss": 2.0515, "reason_loss": 0.5290859937667847, "step": 671, "utility_loss": 1.5224032402038574 }, { "cosine_similarity": 0, "epoch": 0.6262814538676608, "grad_norm": 1.1312984048941253, "learning_rate": 4.3959268208491546e-05, "loss": 2.1237, "reason_loss": 0.5298888087272644, "step": 672, "utility_loss": 1.5937639474868774 }, { "cosine_similarity": 0, "epoch": 0.6272134203168686, "grad_norm": 1.1450594579789355, "learning_rate": 4.394200897480152e-05, "loss": 2.0606, "reason_loss": 0.5116117000579834, "step": 673, "utility_loss": 1.5489482879638672 }, { "cosine_similarity": 0, "epoch": 0.6281453867660765, "grad_norm": 1.7297720935260985, "learning_rate": 4.39247497411115e-05, "loss": 2.08, "reason_loss": 0.5035473108291626, "step": 674, "utility_loss": 1.5764100551605225 }, { "cosine_similarity": 0, "epoch": 0.6290773532152842, "grad_norm": 1.2530214621872755, "learning_rate": 4.3907490507421476e-05, "loss": 1.8947, "reason_loss": 0.4890802502632141, "step": 675, "utility_loss": 1.4056079387664795 }, { "cosine_similarity": 0, "epoch": 0.630009319664492, "grad_norm": 1.4120379313506666, "learning_rate": 4.389023127373145e-05, "loss": 2.4644, "reason_loss": 0.5211763381958008, "step": 676, "utility_loss": 1.9432594776153564 }, { "cosine_similarity": 0, "epoch": 0.6309412861136999, "grad_norm": 1.4190889592118618, "learning_rate": 4.3872972040041424e-05, "loss": 2.2891, "reason_loss": 0.49446621537208557, "step": 677, "utility_loss": 1.7946523427963257 }, { "cosine_similarity": 0, "epoch": 0.6318732525629077, "grad_norm": 1.3498454281980337, "learning_rate": 4.38557128063514e-05, "loss": 2.4529, "reason_loss": 0.4861226975917816, "step": 678, "utility_loss": 1.9667428731918335 }, { "cosine_similarity": 0, "epoch": 0.6328052190121156, "grad_norm": 1.481068037367187, "learning_rate": 4.383845357266137e-05, "loss": 2.3428, "reason_loss": 0.5219088792800903, "step": 679, "utility_loss": 1.820939064025879 }, { "cosine_similarity": 0, "epoch": 0.6337371854613234, "grad_norm": 1.3880895579274266, "learning_rate": 4.382119433897135e-05, "loss": 2.2472, "reason_loss": 0.5481805801391602, "step": 680, "utility_loss": 1.6990578174591064 }, { "cosine_similarity": 0, "epoch": 0.6346691519105312, "grad_norm": 1.3199928848294429, "learning_rate": 4.380393510528133e-05, "loss": 2.2829, "reason_loss": 0.5454773902893066, "step": 681, "utility_loss": 1.73745596408844 }, { "cosine_similarity": 0, "epoch": 0.6356011183597391, "grad_norm": 1.1755653435185593, "learning_rate": 4.37866758715913e-05, "loss": 2.194, "reason_loss": 0.5181068778038025, "step": 682, "utility_loss": 1.675851583480835 }, { "cosine_similarity": 0, "epoch": 0.6365330848089469, "grad_norm": 1.1455162001322072, "learning_rate": 4.376941663790128e-05, "loss": 2.1501, "reason_loss": 0.5276926755905151, "step": 683, "utility_loss": 1.6224254369735718 }, { "cosine_similarity": 0, "epoch": 0.6374650512581547, "grad_norm": 1.286167319346364, "learning_rate": 4.375215740421125e-05, "loss": 2.0938, "reason_loss": 0.5348339080810547, "step": 684, "utility_loss": 1.5589818954467773 }, { "cosine_similarity": 0, "epoch": 0.6383970177073626, "grad_norm": 1.1829404996532866, "learning_rate": 4.373489817052123e-05, "loss": 2.2786, "reason_loss": 0.5072623491287231, "step": 685, "utility_loss": 1.7713077068328857 }, { "cosine_similarity": 0, "epoch": 0.6393289841565704, "grad_norm": 1.3529453797763629, "learning_rate": 4.3717638936831206e-05, "loss": 2.0092, "reason_loss": 0.5089508891105652, "step": 686, "utility_loss": 1.500248908996582 }, { "cosine_similarity": 0, "epoch": 0.6402609506057781, "grad_norm": 1.4073341759718303, "learning_rate": 4.370037970314118e-05, "loss": 1.8813, "reason_loss": 0.5266621112823486, "step": 687, "utility_loss": 1.3546779155731201 }, { "cosine_similarity": 0, "epoch": 0.641192917054986, "grad_norm": 1.2878120482071083, "learning_rate": 4.368312046945116e-05, "loss": 2.0526, "reason_loss": 0.4987032115459442, "step": 688, "utility_loss": 1.5539405345916748 }, { "cosine_similarity": 0, "epoch": 0.6421248835041938, "grad_norm": 1.2662565964689474, "learning_rate": 4.3665861235761136e-05, "loss": 2.3041, "reason_loss": 0.5122442245483398, "step": 689, "utility_loss": 1.791872501373291 }, { "cosine_similarity": 0, "epoch": 0.6430568499534017, "grad_norm": 1.3457614249311354, "learning_rate": 4.364860200207111e-05, "loss": 2.0705, "reason_loss": 0.48314473032951355, "step": 690, "utility_loss": 1.5873512029647827 }, { "cosine_similarity": 0, "epoch": 0.6439888164026095, "grad_norm": 1.46326409333404, "learning_rate": 4.3631342768381085e-05, "loss": 1.8498, "reason_loss": 0.5191049575805664, "step": 691, "utility_loss": 1.3307045698165894 }, { "cosine_similarity": 0, "epoch": 0.6449207828518173, "grad_norm": 1.2917173261994912, "learning_rate": 4.3614083534691066e-05, "loss": 1.9482, "reason_loss": 0.49857401847839355, "step": 692, "utility_loss": 1.4496688842773438 }, { "cosine_similarity": 0, "epoch": 0.6458527493010252, "grad_norm": 1.304915497536585, "learning_rate": 4.359682430100104e-05, "loss": 2.3093, "reason_loss": 0.5066743493080139, "step": 693, "utility_loss": 1.8026530742645264 }, { "cosine_similarity": 0, "epoch": 0.646784715750233, "grad_norm": 1.217489866832762, "learning_rate": 4.3579565067311014e-05, "loss": 1.8297, "reason_loss": 0.5255012512207031, "step": 694, "utility_loss": 1.3041528463363647 }, { "cosine_similarity": 0, "epoch": 0.6477166821994408, "grad_norm": 1.2715416269350799, "learning_rate": 4.356230583362099e-05, "loss": 2.1343, "reason_loss": 0.5004305243492126, "step": 695, "utility_loss": 1.6339030265808105 }, { "cosine_similarity": 0, "epoch": 0.6486486486486487, "grad_norm": 1.3135358120003549, "learning_rate": 4.354504659993097e-05, "loss": 2.1442, "reason_loss": 0.5473692417144775, "step": 696, "utility_loss": 1.5968201160430908 }, { "cosine_similarity": 0, "epoch": 0.6495806150978565, "grad_norm": 1.3604460894429014, "learning_rate": 4.3527787366240944e-05, "loss": 1.9068, "reason_loss": 0.5235795974731445, "step": 697, "utility_loss": 1.3832064867019653 }, { "cosine_similarity": 0, "epoch": 0.6505125815470643, "grad_norm": 1.5192181671114342, "learning_rate": 4.351052813255092e-05, "loss": 2.4049, "reason_loss": 0.5189770460128784, "step": 698, "utility_loss": 1.8858758211135864 }, { "cosine_similarity": 0, "epoch": 0.6514445479962722, "grad_norm": 1.5898143598539598, "learning_rate": 4.349326889886089e-05, "loss": 2.3224, "reason_loss": 0.5205299258232117, "step": 699, "utility_loss": 1.8019160032272339 }, { "cosine_similarity": 0, "epoch": 0.65237651444548, "grad_norm": 1.3658822160779571, "learning_rate": 4.347600966517087e-05, "loss": 2.1617, "reason_loss": 0.4941839873790741, "step": 700, "utility_loss": 1.6675612926483154 }, { "cosine_similarity": 0, "epoch": 0.6533084808946877, "grad_norm": 1.2388359911041908, "learning_rate": 4.345875043148084e-05, "loss": 2.1708, "reason_loss": 0.4784400463104248, "step": 701, "utility_loss": 1.6923245191574097 }, { "cosine_similarity": 0, "epoch": 0.6542404473438956, "grad_norm": 1.5629671730044172, "learning_rate": 4.3441491197790815e-05, "loss": 2.41, "reason_loss": 0.5559320449829102, "step": 702, "utility_loss": 1.8540570735931396 }, { "cosine_similarity": 0, "epoch": 0.6551724137931034, "grad_norm": 1.2089649229875725, "learning_rate": 4.3424231964100796e-05, "loss": 2.3681, "reason_loss": 0.5461908578872681, "step": 703, "utility_loss": 1.8219258785247803 }, { "cosine_similarity": 0, "epoch": 0.6561043802423113, "grad_norm": 1.3698161801115183, "learning_rate": 4.340697273041077e-05, "loss": 2.1829, "reason_loss": 0.5135022401809692, "step": 704, "utility_loss": 1.6694163084030151 }, { "cosine_similarity": 0, "epoch": 0.6570363466915191, "grad_norm": 1.1116189520884876, "learning_rate": 4.3389713496720745e-05, "loss": 2.0835, "reason_loss": 0.4815235435962677, "step": 705, "utility_loss": 1.601976990699768 }, { "cosine_similarity": 0, "epoch": 0.6579683131407269, "grad_norm": 1.2839421104769952, "learning_rate": 4.3372454263030726e-05, "loss": 2.0081, "reason_loss": 0.48509901762008667, "step": 706, "utility_loss": 1.5230093002319336 }, { "cosine_similarity": 0, "epoch": 0.6589002795899348, "grad_norm": 1.3238742978311944, "learning_rate": 4.33551950293407e-05, "loss": 2.4307, "reason_loss": 0.5370229482650757, "step": 707, "utility_loss": 1.8936628103256226 }, { "cosine_similarity": 0, "epoch": 0.6598322460391426, "grad_norm": 1.2828786382027209, "learning_rate": 4.3337935795650675e-05, "loss": 1.7981, "reason_loss": 0.5094563364982605, "step": 708, "utility_loss": 1.2886812686920166 }, { "cosine_similarity": 0, "epoch": 0.6607642124883504, "grad_norm": 1.1596463063031572, "learning_rate": 4.332067656196065e-05, "loss": 2.4512, "reason_loss": 0.5194700360298157, "step": 709, "utility_loss": 1.9317705631256104 }, { "cosine_similarity": 0, "epoch": 0.6616961789375583, "grad_norm": 1.1818141762942893, "learning_rate": 4.330341732827063e-05, "loss": 2.1069, "reason_loss": 0.526385486125946, "step": 710, "utility_loss": 1.5805165767669678 }, { "cosine_similarity": 0, "epoch": 0.6626281453867661, "grad_norm": 1.3469626708142788, "learning_rate": 4.3286158094580604e-05, "loss": 2.139, "reason_loss": 0.5127900838851929, "step": 711, "utility_loss": 1.626173496246338 }, { "cosine_similarity": 0, "epoch": 0.6635601118359739, "grad_norm": 1.1138827089713172, "learning_rate": 4.326889886089058e-05, "loss": 2.3168, "reason_loss": 0.4933985471725464, "step": 712, "utility_loss": 1.8233946561813354 }, { "cosine_similarity": 0, "epoch": 0.6644920782851818, "grad_norm": 1.2710442691955928, "learning_rate": 4.325163962720055e-05, "loss": 1.9335, "reason_loss": 0.5065023899078369, "step": 713, "utility_loss": 1.4269680976867676 }, { "cosine_similarity": 0, "epoch": 0.6654240447343895, "grad_norm": 1.1544961434052035, "learning_rate": 4.3234380393510534e-05, "loss": 2.2219, "reason_loss": 0.488813579082489, "step": 714, "utility_loss": 1.7331092357635498 }, { "cosine_similarity": 0, "epoch": 0.6663560111835974, "grad_norm": 1.1879600347972084, "learning_rate": 4.321712115982051e-05, "loss": 2.0892, "reason_loss": 0.5078752040863037, "step": 715, "utility_loss": 1.5813560485839844 }, { "cosine_similarity": 0, "epoch": 0.6672879776328052, "grad_norm": 1.3326969167403229, "learning_rate": 4.319986192613048e-05, "loss": 1.9139, "reason_loss": 0.5071653127670288, "step": 716, "utility_loss": 1.406773328781128 }, { "cosine_similarity": 0, "epoch": 0.668219944082013, "grad_norm": 1.1872795391930615, "learning_rate": 4.3182602692440464e-05, "loss": 2.2645, "reason_loss": 0.5085234642028809, "step": 717, "utility_loss": 1.756020188331604 }, { "cosine_similarity": 0, "epoch": 0.6691519105312209, "grad_norm": 1.0958883588400261, "learning_rate": 4.316534345875044e-05, "loss": 2.1305, "reason_loss": 0.533103346824646, "step": 718, "utility_loss": 1.5974342823028564 }, { "cosine_similarity": 0, "epoch": 0.6700838769804287, "grad_norm": 1.2992794900323066, "learning_rate": 4.314808422506041e-05, "loss": 2.2233, "reason_loss": 0.4996377229690552, "step": 719, "utility_loss": 1.72367525100708 }, { "cosine_similarity": 0, "epoch": 0.6710158434296365, "grad_norm": 1.283108450633052, "learning_rate": 4.3130824991370387e-05, "loss": 2.2347, "reason_loss": 0.5276464819908142, "step": 720, "utility_loss": 1.7070945501327515 }, { "cosine_similarity": 0, "epoch": 0.6719478098788444, "grad_norm": 1.4599396871044032, "learning_rate": 4.311356575768036e-05, "loss": 2.0349, "reason_loss": 0.5395498275756836, "step": 721, "utility_loss": 1.4953217506408691 }, { "cosine_similarity": 0, "epoch": 0.6728797763280522, "grad_norm": 1.2444067769393508, "learning_rate": 4.3096306523990335e-05, "loss": 2.1809, "reason_loss": 0.48176902532577515, "step": 722, "utility_loss": 1.699151635169983 }, { "cosine_similarity": 0, "epoch": 0.67381174277726, "grad_norm": 1.186616482014689, "learning_rate": 4.307904729030031e-05, "loss": 1.9723, "reason_loss": 0.5266585350036621, "step": 723, "utility_loss": 1.4456900358200073 }, { "cosine_similarity": 0, "epoch": 0.6747437092264679, "grad_norm": 1.7712242782921177, "learning_rate": 4.3061788056610284e-05, "loss": 2.2173, "reason_loss": 0.5160083770751953, "step": 724, "utility_loss": 1.7013120651245117 }, { "cosine_similarity": 0, "epoch": 0.6756756756756757, "grad_norm": 1.2983473260246783, "learning_rate": 4.3044528822920265e-05, "loss": 2.3968, "reason_loss": 0.5042428374290466, "step": 725, "utility_loss": 1.8925704956054688 }, { "cosine_similarity": 0, "epoch": 0.6766076421248836, "grad_norm": 2.437872343907488, "learning_rate": 4.302726958923024e-05, "loss": 2.1333, "reason_loss": 0.5078662633895874, "step": 726, "utility_loss": 1.6254019737243652 }, { "cosine_similarity": 0, "epoch": 0.6775396085740913, "grad_norm": 1.2920040973129303, "learning_rate": 4.301001035554021e-05, "loss": 1.9666, "reason_loss": 0.5147291421890259, "step": 727, "utility_loss": 1.4519157409667969 }, { "cosine_similarity": 0, "epoch": 0.6784715750232991, "grad_norm": 1.1617873656078392, "learning_rate": 4.2992751121850194e-05, "loss": 2.0698, "reason_loss": 0.4946753978729248, "step": 728, "utility_loss": 1.5751535892486572 }, { "cosine_similarity": 0, "epoch": 0.679403541472507, "grad_norm": 1.4785222843347952, "learning_rate": 4.297549188816017e-05, "loss": 1.9399, "reason_loss": 0.49467670917510986, "step": 729, "utility_loss": 1.4452292919158936 }, { "cosine_similarity": 0, "epoch": 0.6803355079217148, "grad_norm": 1.296604228586063, "learning_rate": 4.295823265447014e-05, "loss": 1.985, "reason_loss": 0.5184454321861267, "step": 730, "utility_loss": 1.4665145874023438 }, { "cosine_similarity": 0, "epoch": 0.6812674743709226, "grad_norm": 1.1016623562944556, "learning_rate": 4.294097342078012e-05, "loss": 2.0971, "reason_loss": 0.49696090817451477, "step": 731, "utility_loss": 1.6000943183898926 }, { "cosine_similarity": 0, "epoch": 0.6821994408201305, "grad_norm": 1.1939010069639229, "learning_rate": 4.29237141870901e-05, "loss": 2.0393, "reason_loss": 0.5095247030258179, "step": 732, "utility_loss": 1.5298182964324951 }, { "cosine_similarity": 0, "epoch": 0.6831314072693383, "grad_norm": 1.3081973880275077, "learning_rate": 4.290645495340007e-05, "loss": 2.0864, "reason_loss": 0.5190695524215698, "step": 733, "utility_loss": 1.5673065185546875 }, { "cosine_similarity": 0, "epoch": 0.6840633737185461, "grad_norm": 1.1548538298766948, "learning_rate": 4.288919571971005e-05, "loss": 2.26, "reason_loss": 0.5146658420562744, "step": 734, "utility_loss": 1.7453551292419434 }, { "cosine_similarity": 0, "epoch": 0.684995340167754, "grad_norm": 1.291230828169703, "learning_rate": 4.287193648602002e-05, "loss": 1.9728, "reason_loss": 0.5190690159797668, "step": 735, "utility_loss": 1.4537595510482788 }, { "cosine_similarity": 0, "epoch": 0.6859273066169618, "grad_norm": 1.2075977404062737, "learning_rate": 4.285467725233e-05, "loss": 2.101, "reason_loss": 0.5165177583694458, "step": 736, "utility_loss": 1.584445595741272 }, { "cosine_similarity": 0, "epoch": 0.6868592730661697, "grad_norm": 1.7758395628463588, "learning_rate": 4.2837418018639977e-05, "loss": 2.0884, "reason_loss": 0.5178109407424927, "step": 737, "utility_loss": 1.5706186294555664 }, { "cosine_similarity": 0, "epoch": 0.6877912395153775, "grad_norm": 1.6728807990955339, "learning_rate": 4.282015878494995e-05, "loss": 2.1846, "reason_loss": 0.4838274121284485, "step": 738, "utility_loss": 1.7008057832717896 }, { "cosine_similarity": 0, "epoch": 0.6887232059645852, "grad_norm": 1.2407362930058266, "learning_rate": 4.2802899551259925e-05, "loss": 2.241, "reason_loss": 0.5227676033973694, "step": 739, "utility_loss": 1.7182165384292603 }, { "cosine_similarity": 0, "epoch": 0.6896551724137931, "grad_norm": 1.100771701890082, "learning_rate": 4.27856403175699e-05, "loss": 1.8497, "reason_loss": 0.4808853268623352, "step": 740, "utility_loss": 1.3688358068466187 }, { "cosine_similarity": 0, "epoch": 0.6905871388630009, "grad_norm": 1.0945280634992878, "learning_rate": 4.2768381083879874e-05, "loss": 1.8978, "reason_loss": 0.49048683047294617, "step": 741, "utility_loss": 1.4073103666305542 }, { "cosine_similarity": 0, "epoch": 0.6915191053122087, "grad_norm": 1.2376395000056402, "learning_rate": 4.2751121850189855e-05, "loss": 1.859, "reason_loss": 0.49134573340415955, "step": 742, "utility_loss": 1.3676602840423584 }, { "cosine_similarity": 0, "epoch": 0.6924510717614166, "grad_norm": 1.09764901022484, "learning_rate": 4.273386261649983e-05, "loss": 2.1005, "reason_loss": 0.4797595143318176, "step": 743, "utility_loss": 1.6207818984985352 }, { "cosine_similarity": 0, "epoch": 0.6933830382106244, "grad_norm": 1.3598889801142062, "learning_rate": 4.27166033828098e-05, "loss": 2.0374, "reason_loss": 0.5138674378395081, "step": 744, "utility_loss": 1.5235110521316528 }, { "cosine_similarity": 0, "epoch": 0.6943150046598322, "grad_norm": 1.2347480400401118, "learning_rate": 4.269934414911978e-05, "loss": 2.2514, "reason_loss": 0.5166036486625671, "step": 745, "utility_loss": 1.7347586154937744 }, { "cosine_similarity": 0, "epoch": 0.6952469711090401, "grad_norm": 1.3726550765380057, "learning_rate": 4.268208491542976e-05, "loss": 2.1234, "reason_loss": 0.4771210551261902, "step": 746, "utility_loss": 1.6462591886520386 }, { "cosine_similarity": 0, "epoch": 0.6961789375582479, "grad_norm": 1.1602441342041032, "learning_rate": 4.266482568173973e-05, "loss": 1.866, "reason_loss": 0.5158463716506958, "step": 747, "utility_loss": 1.350163459777832 }, { "cosine_similarity": 0, "epoch": 0.6971109040074557, "grad_norm": 1.2101688896826823, "learning_rate": 4.264756644804971e-05, "loss": 2.0281, "reason_loss": 0.5006923675537109, "step": 748, "utility_loss": 1.527419090270996 }, { "cosine_similarity": 0, "epoch": 0.6980428704566636, "grad_norm": 1.1847780349687842, "learning_rate": 4.263030721435968e-05, "loss": 2.1031, "reason_loss": 0.5223158597946167, "step": 749, "utility_loss": 1.5808049440383911 }, { "cosine_similarity": 0, "epoch": 0.6989748369058714, "grad_norm": 3.4513955727479266, "learning_rate": 4.261304798066966e-05, "loss": 1.8613, "reason_loss": 0.5002682209014893, "step": 750, "utility_loss": 1.361079454421997 }, { "cosine_similarity": 0, "epoch": 0.6999068033550793, "grad_norm": 1.0685080285020672, "learning_rate": 4.259578874697964e-05, "loss": 1.8928, "reason_loss": 0.5201910734176636, "step": 751, "utility_loss": 1.3726471662521362 }, { "cosine_similarity": 0, "epoch": 0.700838769804287, "grad_norm": 1.4047568510406587, "learning_rate": 4.257852951328961e-05, "loss": 2.041, "reason_loss": 0.4906357526779175, "step": 752, "utility_loss": 1.5503692626953125 }, { "cosine_similarity": 0, "epoch": 0.7017707362534948, "grad_norm": 1.2821302007073732, "learning_rate": 4.2561270279599586e-05, "loss": 2.3241, "reason_loss": 0.5026756525039673, "step": 753, "utility_loss": 1.8214187622070312 }, { "cosine_similarity": 0, "epoch": 0.7027027027027027, "grad_norm": 1.277802482059349, "learning_rate": 4.2544011045909567e-05, "loss": 2.2933, "reason_loss": 0.4981810450553894, "step": 754, "utility_loss": 1.795127034187317 }, { "cosine_similarity": 0, "epoch": 0.7036346691519105, "grad_norm": 1.115226947603361, "learning_rate": 4.252675181221954e-05, "loss": 2.1627, "reason_loss": 0.5191624164581299, "step": 755, "utility_loss": 1.6435680389404297 }, { "cosine_similarity": 0, "epoch": 0.7045666356011183, "grad_norm": 1.253445978290375, "learning_rate": 4.2509492578529515e-05, "loss": 1.8602, "reason_loss": 0.49962982535362244, "step": 756, "utility_loss": 1.3605751991271973 }, { "cosine_similarity": 0, "epoch": 0.7054986020503262, "grad_norm": 1.2613706653897647, "learning_rate": 4.2492233344839496e-05, "loss": 1.8589, "reason_loss": 0.5054641962051392, "step": 757, "utility_loss": 1.3534212112426758 }, { "cosine_similarity": 0, "epoch": 0.706430568499534, "grad_norm": 1.641094940653569, "learning_rate": 4.247497411114947e-05, "loss": 2.2478, "reason_loss": 0.5341111421585083, "step": 758, "utility_loss": 1.7136642932891846 }, { "cosine_similarity": 0, "epoch": 0.7073625349487418, "grad_norm": 1.3010936397073123, "learning_rate": 4.2457714877459445e-05, "loss": 2.2117, "reason_loss": 0.5180829763412476, "step": 759, "utility_loss": 1.6936464309692383 }, { "cosine_similarity": 0, "epoch": 0.7082945013979497, "grad_norm": 1.1953466092213065, "learning_rate": 4.244045564376942e-05, "loss": 1.9667, "reason_loss": 0.5007414817810059, "step": 760, "utility_loss": 1.4659347534179688 }, { "cosine_similarity": 0, "epoch": 0.7092264678471575, "grad_norm": 1.427847565384237, "learning_rate": 4.242319641007939e-05, "loss": 1.957, "reason_loss": 0.5000611543655396, "step": 761, "utility_loss": 1.4569721221923828 }, { "cosine_similarity": 0, "epoch": 0.7101584342963654, "grad_norm": 1.2518676852195258, "learning_rate": 4.240593717638937e-05, "loss": 2.7673, "reason_loss": 0.5134106278419495, "step": 762, "utility_loss": 2.253897190093994 }, { "cosine_similarity": 0, "epoch": 0.7110904007455732, "grad_norm": 1.256867505175878, "learning_rate": 4.238867794269934e-05, "loss": 2.2582, "reason_loss": 0.4635140895843506, "step": 763, "utility_loss": 1.794687271118164 }, { "cosine_similarity": 0, "epoch": 0.712022367194781, "grad_norm": 1.2269902934288528, "learning_rate": 4.2371418709009316e-05, "loss": 2.1695, "reason_loss": 0.4885486364364624, "step": 764, "utility_loss": 1.6809020042419434 }, { "cosine_similarity": 0, "epoch": 0.7129543336439889, "grad_norm": 1.1440295737092483, "learning_rate": 4.23541594753193e-05, "loss": 2.015, "reason_loss": 0.49043506383895874, "step": 765, "utility_loss": 1.5245535373687744 }, { "cosine_similarity": 0, "epoch": 0.7138863000931966, "grad_norm": 1.5957811097902317, "learning_rate": 4.233690024162927e-05, "loss": 2.236, "reason_loss": 0.5211875438690186, "step": 766, "utility_loss": 1.7147748470306396 }, { "cosine_similarity": 0, "epoch": 0.7148182665424044, "grad_norm": 1.2275276686414032, "learning_rate": 4.2319641007939246e-05, "loss": 2.0438, "reason_loss": 0.4858793020248413, "step": 767, "utility_loss": 1.5579032897949219 }, { "cosine_similarity": 0, "epoch": 0.7157502329916123, "grad_norm": 1.1101544586233818, "learning_rate": 4.230238177424923e-05, "loss": 2.1081, "reason_loss": 0.4978412687778473, "step": 768, "utility_loss": 1.6102840900421143 }, { "cosine_similarity": 0, "epoch": 0.7166821994408201, "grad_norm": 1.0279594823392044, "learning_rate": 4.22851225405592e-05, "loss": 2.0069, "reason_loss": 0.5125325918197632, "step": 769, "utility_loss": 1.49433171749115 }, { "cosine_similarity": 0, "epoch": 0.7176141658900279, "grad_norm": 1.325864684497613, "learning_rate": 4.2267863306869176e-05, "loss": 1.8916, "reason_loss": 0.4897858500480652, "step": 770, "utility_loss": 1.4018325805664062 }, { "cosine_similarity": 0, "epoch": 0.7185461323392358, "grad_norm": 1.5191977648622692, "learning_rate": 4.225060407317915e-05, "loss": 1.9785, "reason_loss": 0.5159389972686768, "step": 771, "utility_loss": 1.462555170059204 }, { "cosine_similarity": 0, "epoch": 0.7194780987884436, "grad_norm": 1.234219522644618, "learning_rate": 4.223334483948913e-05, "loss": 2.1733, "reason_loss": 0.5433069467544556, "step": 772, "utility_loss": 1.630021095275879 }, { "cosine_similarity": 0, "epoch": 0.7204100652376515, "grad_norm": 1.1029320687900819, "learning_rate": 4.2216085605799105e-05, "loss": 2.0145, "reason_loss": 0.5057668685913086, "step": 773, "utility_loss": 1.508744239807129 }, { "cosine_similarity": 0, "epoch": 0.7213420316868593, "grad_norm": 1.1180287783198641, "learning_rate": 4.219882637210908e-05, "loss": 1.9777, "reason_loss": 0.524655818939209, "step": 774, "utility_loss": 1.4530086517333984 }, { "cosine_similarity": 0, "epoch": 0.7222739981360671, "grad_norm": 1.3044515830069872, "learning_rate": 4.2181567138419054e-05, "loss": 2.4286, "reason_loss": 0.5095747709274292, "step": 775, "utility_loss": 1.9190161228179932 }, { "cosine_similarity": 0, "epoch": 0.723205964585275, "grad_norm": 1.3191285509012904, "learning_rate": 4.2164307904729035e-05, "loss": 2.0285, "reason_loss": 0.5185830593109131, "step": 776, "utility_loss": 1.5099173784255981 }, { "cosine_similarity": 0, "epoch": 0.7241379310344828, "grad_norm": 1.1441894548051814, "learning_rate": 4.214704867103901e-05, "loss": 1.7544, "reason_loss": 0.490003764629364, "step": 777, "utility_loss": 1.2643468379974365 }, { "cosine_similarity": 0, "epoch": 0.7250698974836906, "grad_norm": 1.5530312065813257, "learning_rate": 4.2129789437348983e-05, "loss": 1.8826, "reason_loss": 0.5188625454902649, "step": 778, "utility_loss": 1.3637776374816895 }, { "cosine_similarity": 0, "epoch": 0.7260018639328985, "grad_norm": 1.8127228196089777, "learning_rate": 4.2112530203658964e-05, "loss": 2.2416, "reason_loss": 0.5022637248039246, "step": 779, "utility_loss": 1.7393122911453247 }, { "cosine_similarity": 0, "epoch": 0.7269338303821062, "grad_norm": 1.2134996596449303, "learning_rate": 4.209527096996894e-05, "loss": 1.6781, "reason_loss": 0.47108978033065796, "step": 780, "utility_loss": 1.2069776058197021 }, { "cosine_similarity": 0, "epoch": 0.727865796831314, "grad_norm": 1.2617377612944183, "learning_rate": 4.207801173627891e-05, "loss": 2.2374, "reason_loss": 0.5072668790817261, "step": 781, "utility_loss": 1.7301323413848877 }, { "cosine_similarity": 0, "epoch": 0.7287977632805219, "grad_norm": 1.1803139333566635, "learning_rate": 4.206075250258889e-05, "loss": 2.0764, "reason_loss": 0.5081530809402466, "step": 782, "utility_loss": 1.5682872533798218 }, { "cosine_similarity": 0, "epoch": 0.7297297297297297, "grad_norm": 1.0899152768166496, "learning_rate": 4.204349326889886e-05, "loss": 1.7045, "reason_loss": 0.5070327520370483, "step": 783, "utility_loss": 1.1974458694458008 }, { "cosine_similarity": 0, "epoch": 0.7306616961789375, "grad_norm": 1.3143511163808708, "learning_rate": 4.2026234035208836e-05, "loss": 1.9086, "reason_loss": 0.5098279714584351, "step": 784, "utility_loss": 1.3988075256347656 }, { "cosine_similarity": 0, "epoch": 0.7315936626281454, "grad_norm": 1.722386205550392, "learning_rate": 4.200897480151881e-05, "loss": 2.4533, "reason_loss": 0.5104396343231201, "step": 785, "utility_loss": 1.9428412914276123 }, { "cosine_similarity": 0, "epoch": 0.7325256290773532, "grad_norm": 1.501153189183196, "learning_rate": 4.1991715567828785e-05, "loss": 2.3719, "reason_loss": 0.5100609064102173, "step": 786, "utility_loss": 1.8618803024291992 }, { "cosine_similarity": 0, "epoch": 0.7334575955265611, "grad_norm": 1.3888802628467667, "learning_rate": 4.1974456334138766e-05, "loss": 2.0113, "reason_loss": 0.4946002960205078, "step": 787, "utility_loss": 1.5166603326797485 }, { "cosine_similarity": 0, "epoch": 0.7343895619757689, "grad_norm": 1.7037462641825183, "learning_rate": 4.195719710044874e-05, "loss": 2.2556, "reason_loss": 0.5170273780822754, "step": 788, "utility_loss": 1.738579273223877 }, { "cosine_similarity": 0, "epoch": 0.7353215284249767, "grad_norm": 1.7248093249908412, "learning_rate": 4.1939937866758714e-05, "loss": 2.0763, "reason_loss": 0.532079815864563, "step": 789, "utility_loss": 1.5442618131637573 }, { "cosine_similarity": 0, "epoch": 0.7362534948741846, "grad_norm": 1.3442126548017475, "learning_rate": 4.1922678633068695e-05, "loss": 2.2798, "reason_loss": 0.517012357711792, "step": 790, "utility_loss": 1.7628166675567627 }, { "cosine_similarity": 0, "epoch": 0.7371854613233924, "grad_norm": 1.0651714959127705, "learning_rate": 4.190541939937867e-05, "loss": 1.7844, "reason_loss": 0.5143442153930664, "step": 791, "utility_loss": 1.270082950592041 }, { "cosine_similarity": 0, "epoch": 0.7381174277726001, "grad_norm": 1.6131048614644161, "learning_rate": 4.1888160165688644e-05, "loss": 1.9172, "reason_loss": 0.5270212888717651, "step": 792, "utility_loss": 1.390138030052185 }, { "cosine_similarity": 0, "epoch": 0.739049394221808, "grad_norm": 1.3796408208701205, "learning_rate": 4.187090093199862e-05, "loss": 2.1984, "reason_loss": 0.5115718245506287, "step": 793, "utility_loss": 1.6867847442626953 }, { "cosine_similarity": 0, "epoch": 0.7399813606710158, "grad_norm": 1.2797691363700683, "learning_rate": 4.18536416983086e-05, "loss": 1.922, "reason_loss": 0.48495781421661377, "step": 794, "utility_loss": 1.4370059967041016 }, { "cosine_similarity": 0, "epoch": 0.7409133271202236, "grad_norm": 1.3069978303158718, "learning_rate": 4.1836382464618573e-05, "loss": 2.0862, "reason_loss": 0.4910961389541626, "step": 795, "utility_loss": 1.5951220989227295 }, { "cosine_similarity": 0, "epoch": 0.7418452935694315, "grad_norm": 1.0751911786070256, "learning_rate": 4.181912323092855e-05, "loss": 1.7111, "reason_loss": 0.4901933968067169, "step": 796, "utility_loss": 1.2208999395370483 }, { "cosine_similarity": 0, "epoch": 0.7427772600186393, "grad_norm": 1.2039036537195615, "learning_rate": 4.180186399723853e-05, "loss": 2.2902, "reason_loss": 0.5237537622451782, "step": 797, "utility_loss": 1.7664635181427002 }, { "cosine_similarity": 0, "epoch": 0.7437092264678472, "grad_norm": 1.2389516230385553, "learning_rate": 4.17846047635485e-05, "loss": 2.0063, "reason_loss": 0.503221869468689, "step": 798, "utility_loss": 1.503109335899353 }, { "cosine_similarity": 0, "epoch": 0.744641192917055, "grad_norm": 1.973866003531923, "learning_rate": 4.176734552985848e-05, "loss": 2.0643, "reason_loss": 0.5043301582336426, "step": 799, "utility_loss": 1.559938669204712 }, { "cosine_similarity": 0, "epoch": 0.7455731593662628, "grad_norm": 1.2345692991417665, "learning_rate": 4.175008629616845e-05, "loss": 1.9652, "reason_loss": 0.4908900856971741, "step": 800, "utility_loss": 1.4743049144744873 }, { "cosine_similarity": 0, "epoch": 0.7465051258154707, "grad_norm": 1.2434919429891262, "learning_rate": 4.173282706247843e-05, "loss": 1.9715, "reason_loss": 0.5068848729133606, "step": 801, "utility_loss": 1.4646453857421875 }, { "cosine_similarity": 0, "epoch": 0.7474370922646785, "grad_norm": 1.1094388254550955, "learning_rate": 4.171556782878841e-05, "loss": 1.9729, "reason_loss": 0.4965156018733978, "step": 802, "utility_loss": 1.4763853549957275 }, { "cosine_similarity": 0, "epoch": 0.7483690587138863, "grad_norm": 1.2230214478545587, "learning_rate": 4.169830859509838e-05, "loss": 1.8177, "reason_loss": 0.5156282186508179, "step": 803, "utility_loss": 1.302121877670288 }, { "cosine_similarity": 0, "epoch": 0.7493010251630942, "grad_norm": 1.3191129195774884, "learning_rate": 4.1681049361408356e-05, "loss": 1.8621, "reason_loss": 0.5008691549301147, "step": 804, "utility_loss": 1.3612253665924072 }, { "cosine_similarity": 0, "epoch": 0.750232991612302, "grad_norm": 1.138367782716212, "learning_rate": 4.166379012771833e-05, "loss": 1.992, "reason_loss": 0.4959714710712433, "step": 805, "utility_loss": 1.4959797859191895 }, { "cosine_similarity": 0, "epoch": 0.7511649580615097, "grad_norm": 1.4897149796796492, "learning_rate": 4.1646530894028304e-05, "loss": 1.8967, "reason_loss": 0.5257166028022766, "step": 806, "utility_loss": 1.3710105419158936 }, { "cosine_similarity": 0, "epoch": 0.7520969245107176, "grad_norm": 1.3270478028960437, "learning_rate": 4.162927166033828e-05, "loss": 2.3469, "reason_loss": 0.5173791646957397, "step": 807, "utility_loss": 1.8295161724090576 }, { "cosine_similarity": 0, "epoch": 0.7530288909599254, "grad_norm": 1.2479804469642966, "learning_rate": 4.161201242664826e-05, "loss": 2.4071, "reason_loss": 0.5375333428382874, "step": 808, "utility_loss": 1.869537115097046 }, { "cosine_similarity": 0, "epoch": 0.7539608574091333, "grad_norm": 1.1800095683967318, "learning_rate": 4.1594753192958234e-05, "loss": 2.222, "reason_loss": 0.519174337387085, "step": 809, "utility_loss": 1.702871561050415 }, { "cosine_similarity": 0, "epoch": 0.7548928238583411, "grad_norm": 1.6388577005935372, "learning_rate": 4.157749395926821e-05, "loss": 1.9097, "reason_loss": 0.5365058183670044, "step": 810, "utility_loss": 1.3732078075408936 }, { "cosine_similarity": 0, "epoch": 0.7558247903075489, "grad_norm": 1.0806922051357903, "learning_rate": 4.156023472557818e-05, "loss": 2.0825, "reason_loss": 0.5160138010978699, "step": 811, "utility_loss": 1.5664865970611572 }, { "cosine_similarity": 0, "epoch": 0.7567567567567568, "grad_norm": 1.3789435238366479, "learning_rate": 4.1542975491888163e-05, "loss": 2.1811, "reason_loss": 0.493459016084671, "step": 812, "utility_loss": 1.6876575946807861 }, { "cosine_similarity": 0, "epoch": 0.7576887232059646, "grad_norm": 1.2802343688171656, "learning_rate": 4.152571625819814e-05, "loss": 2.2014, "reason_loss": 0.507390022277832, "step": 813, "utility_loss": 1.6940076351165771 }, { "cosine_similarity": 0, "epoch": 0.7586206896551724, "grad_norm": 1.1868353444190833, "learning_rate": 4.150845702450811e-05, "loss": 2.2267, "reason_loss": 0.5062446594238281, "step": 814, "utility_loss": 1.720470905303955 }, { "cosine_similarity": 0, "epoch": 0.7595526561043803, "grad_norm": 1.0918767925025916, "learning_rate": 4.1491197790818086e-05, "loss": 1.7754, "reason_loss": 0.4504263997077942, "step": 815, "utility_loss": 1.3250107765197754 }, { "cosine_similarity": 0, "epoch": 0.7604846225535881, "grad_norm": 1.0756709175588568, "learning_rate": 4.147393855712807e-05, "loss": 1.6196, "reason_loss": 0.49767956137657166, "step": 816, "utility_loss": 1.1219009160995483 }, { "cosine_similarity": 0, "epoch": 0.7614165890027959, "grad_norm": 1.1850395077750724, "learning_rate": 4.145667932343804e-05, "loss": 2.1312, "reason_loss": 0.5268201231956482, "step": 817, "utility_loss": 1.604355812072754 }, { "cosine_similarity": 0, "epoch": 0.7623485554520038, "grad_norm": 1.0453355382290557, "learning_rate": 4.1439420089748016e-05, "loss": 1.6708, "reason_loss": 0.5244029760360718, "step": 818, "utility_loss": 1.1464120149612427 }, { "cosine_similarity": 0, "epoch": 0.7632805219012115, "grad_norm": 1.0335754981506036, "learning_rate": 4.1422160856058e-05, "loss": 1.8354, "reason_loss": 0.4900023937225342, "step": 819, "utility_loss": 1.3453913927078247 }, { "cosine_similarity": 0, "epoch": 0.7642124883504194, "grad_norm": 1.0571798052143242, "learning_rate": 4.140490162236797e-05, "loss": 1.8167, "reason_loss": 0.5319384336471558, "step": 820, "utility_loss": 1.2847200632095337 }, { "cosine_similarity": 0, "epoch": 0.7651444547996272, "grad_norm": 1.1729306756311018, "learning_rate": 4.1387642388677946e-05, "loss": 2.0083, "reason_loss": 0.48557406663894653, "step": 821, "utility_loss": 1.5226938724517822 }, { "cosine_similarity": 0, "epoch": 0.766076421248835, "grad_norm": 1.37442628283818, "learning_rate": 4.137038315498792e-05, "loss": 1.9872, "reason_loss": 0.5122336149215698, "step": 822, "utility_loss": 1.4750101566314697 }, { "cosine_similarity": 0, "epoch": 0.7670083876980429, "grad_norm": 1.37713007332719, "learning_rate": 4.13531239212979e-05, "loss": 2.1757, "reason_loss": 0.48824506998062134, "step": 823, "utility_loss": 1.6874279975891113 }, { "cosine_similarity": 0, "epoch": 0.7679403541472507, "grad_norm": 1.3389901544385086, "learning_rate": 4.1335864687607875e-05, "loss": 2.4781, "reason_loss": 0.5243354439735413, "step": 824, "utility_loss": 1.9537395238876343 }, { "cosine_similarity": 0, "epoch": 0.7688723205964585, "grad_norm": 1.63628992303459, "learning_rate": 4.131860545391785e-05, "loss": 2.3226, "reason_loss": 0.5111551284790039, "step": 825, "utility_loss": 1.8114323616027832 }, { "cosine_similarity": 0, "epoch": 0.7698042870456664, "grad_norm": 0.9726629624821147, "learning_rate": 4.1301346220227824e-05, "loss": 1.8458, "reason_loss": 0.49699467420578003, "step": 826, "utility_loss": 1.3487708568572998 }, { "cosine_similarity": 0, "epoch": 0.7707362534948742, "grad_norm": 1.7664521615191429, "learning_rate": 4.12840869865378e-05, "loss": 2.1605, "reason_loss": 0.4743490219116211, "step": 827, "utility_loss": 1.6861172914505005 }, { "cosine_similarity": 0, "epoch": 0.771668219944082, "grad_norm": 1.2138595860157735, "learning_rate": 4.126682775284777e-05, "loss": 2.0563, "reason_loss": 0.5133187770843506, "step": 828, "utility_loss": 1.5429413318634033 }, { "cosine_similarity": 0, "epoch": 0.7726001863932899, "grad_norm": 1.356480496822241, "learning_rate": 4.124956851915775e-05, "loss": 1.8263, "reason_loss": 0.4971897602081299, "step": 829, "utility_loss": 1.3291196823120117 }, { "cosine_similarity": 0, "epoch": 0.7735321528424977, "grad_norm": 1.3565359987939045, "learning_rate": 4.123230928546773e-05, "loss": 2.3439, "reason_loss": 0.5081772208213806, "step": 830, "utility_loss": 1.8356833457946777 }, { "cosine_similarity": 0, "epoch": 0.7744641192917054, "grad_norm": 1.0648820878029315, "learning_rate": 4.12150500517777e-05, "loss": 1.8835, "reason_loss": 0.522675096988678, "step": 831, "utility_loss": 1.360785961151123 }, { "cosine_similarity": 0, "epoch": 0.7753960857409133, "grad_norm": 1.2356394131539685, "learning_rate": 4.1197790818087676e-05, "loss": 1.8464, "reason_loss": 0.4993290603160858, "step": 832, "utility_loss": 1.3470604419708252 }, { "cosine_similarity": 0, "epoch": 0.7763280521901211, "grad_norm": 1.06230313522667, "learning_rate": 4.118053158439765e-05, "loss": 2.129, "reason_loss": 0.47934669256210327, "step": 833, "utility_loss": 1.6496238708496094 }, { "cosine_similarity": 0, "epoch": 0.777260018639329, "grad_norm": 1.0583312142233516, "learning_rate": 4.116327235070763e-05, "loss": 2.0063, "reason_loss": 0.5043536424636841, "step": 834, "utility_loss": 1.5019948482513428 }, { "cosine_similarity": 0, "epoch": 0.7781919850885368, "grad_norm": 1.3993908440836238, "learning_rate": 4.1146013117017606e-05, "loss": 2.1037, "reason_loss": 0.5230122208595276, "step": 835, "utility_loss": 1.5806515216827393 }, { "cosine_similarity": 0, "epoch": 0.7791239515377446, "grad_norm": 1.7526501755654171, "learning_rate": 4.112875388332758e-05, "loss": 2.0756, "reason_loss": 0.4954591691493988, "step": 836, "utility_loss": 1.5801329612731934 }, { "cosine_similarity": 0, "epoch": 0.7800559179869525, "grad_norm": 1.1880473479683877, "learning_rate": 4.111149464963756e-05, "loss": 2.2401, "reason_loss": 0.5023292303085327, "step": 837, "utility_loss": 1.7378013134002686 }, { "cosine_similarity": 0, "epoch": 0.7809878844361603, "grad_norm": 1.3587845672580685, "learning_rate": 4.1094235415947536e-05, "loss": 1.9342, "reason_loss": 0.5189201831817627, "step": 838, "utility_loss": 1.4152381420135498 }, { "cosine_similarity": 0, "epoch": 0.7819198508853681, "grad_norm": 1.263485409885884, "learning_rate": 4.107697618225751e-05, "loss": 2.2836, "reason_loss": 0.49132251739501953, "step": 839, "utility_loss": 1.7922651767730713 }, { "cosine_similarity": 0, "epoch": 0.782851817334576, "grad_norm": 1.1546322346241866, "learning_rate": 4.1059716948567484e-05, "loss": 2.1385, "reason_loss": 0.5469542741775513, "step": 840, "utility_loss": 1.5915641784667969 }, { "cosine_similarity": 0, "epoch": 0.7837837837837838, "grad_norm": 1.3192667526832322, "learning_rate": 4.1042457714877465e-05, "loss": 2.062, "reason_loss": 0.5356332063674927, "step": 841, "utility_loss": 1.5263453722000122 }, { "cosine_similarity": 0, "epoch": 0.7847157502329916, "grad_norm": 0.9946273897390521, "learning_rate": 4.102519848118744e-05, "loss": 1.8302, "reason_loss": 0.517972469329834, "step": 842, "utility_loss": 1.312196135520935 }, { "cosine_similarity": 0, "epoch": 0.7856477166821995, "grad_norm": 1.3064628660557696, "learning_rate": 4.1007939247497414e-05, "loss": 2.1575, "reason_loss": 0.5190471410751343, "step": 843, "utility_loss": 1.6384961605072021 }, { "cosine_similarity": 0, "epoch": 0.7865796831314072, "grad_norm": 1.1608122761480655, "learning_rate": 4.099068001380739e-05, "loss": 1.8254, "reason_loss": 0.5186717510223389, "step": 844, "utility_loss": 1.3067634105682373 }, { "cosine_similarity": 0, "epoch": 0.7875116495806151, "grad_norm": 1.3405119643876862, "learning_rate": 4.097342078011737e-05, "loss": 2.0307, "reason_loss": 0.5155631303787231, "step": 845, "utility_loss": 1.5151501893997192 }, { "cosine_similarity": 0, "epoch": 0.7884436160298229, "grad_norm": 1.3187507408587984, "learning_rate": 4.0956161546427344e-05, "loss": 2.2128, "reason_loss": 0.5297638177871704, "step": 846, "utility_loss": 1.6830530166625977 }, { "cosine_similarity": 0, "epoch": 0.7893755824790307, "grad_norm": 1.124330355853132, "learning_rate": 4.093890231273732e-05, "loss": 1.7676, "reason_loss": 0.49349093437194824, "step": 847, "utility_loss": 1.274060606956482 }, { "cosine_similarity": 0, "epoch": 0.7903075489282386, "grad_norm": 1.1390490240461413, "learning_rate": 4.092164307904729e-05, "loss": 1.8186, "reason_loss": 0.5059475898742676, "step": 848, "utility_loss": 1.312684416770935 }, { "cosine_similarity": 0, "epoch": 0.7912395153774464, "grad_norm": 1.1305200605209884, "learning_rate": 4.0904383845357266e-05, "loss": 1.76, "reason_loss": 0.48887699842453003, "step": 849, "utility_loss": 1.27115797996521 }, { "cosine_similarity": 0, "epoch": 0.7921714818266542, "grad_norm": 1.105759513242291, "learning_rate": 4.088712461166724e-05, "loss": 2.0, "reason_loss": 0.516809344291687, "step": 850, "utility_loss": 1.4832122325897217 }, { "cosine_similarity": 0, "epoch": 0.7931034482758621, "grad_norm": 1.1076844794768872, "learning_rate": 4.0869865377977215e-05, "loss": 2.3139, "reason_loss": 0.5396866202354431, "step": 851, "utility_loss": 1.7741811275482178 }, { "cosine_similarity": 0, "epoch": 0.7940354147250699, "grad_norm": 1.6922158387525503, "learning_rate": 4.0852606144287196e-05, "loss": 2.5528, "reason_loss": 0.4995615482330322, "step": 852, "utility_loss": 2.0532121658325195 }, { "cosine_similarity": 0, "epoch": 0.7949673811742777, "grad_norm": 1.2915471879779108, "learning_rate": 4.083534691059717e-05, "loss": 2.0263, "reason_loss": 0.4840278625488281, "step": 853, "utility_loss": 1.5422499179840088 }, { "cosine_similarity": 0, "epoch": 0.7958993476234856, "grad_norm": 1.3213302692839795, "learning_rate": 4.0818087676907145e-05, "loss": 2.4181, "reason_loss": 0.5018784999847412, "step": 854, "utility_loss": 1.9161837100982666 }, { "cosine_similarity": 0, "epoch": 0.7968313140726934, "grad_norm": 2.032279303154793, "learning_rate": 4.080082844321712e-05, "loss": 2.0824, "reason_loss": 0.52651047706604, "step": 855, "utility_loss": 1.5558862686157227 }, { "cosine_similarity": 0, "epoch": 0.7977632805219013, "grad_norm": 1.4834191073180998, "learning_rate": 4.07835692095271e-05, "loss": 2.3517, "reason_loss": 0.5275239944458008, "step": 856, "utility_loss": 1.8241770267486572 }, { "cosine_similarity": 0, "epoch": 0.798695246971109, "grad_norm": 0.9295117285699642, "learning_rate": 4.0766309975837074e-05, "loss": 1.603, "reason_loss": 0.5058144330978394, "step": 857, "utility_loss": 1.097204327583313 }, { "cosine_similarity": 0, "epoch": 0.7996272134203168, "grad_norm": 1.0810430970679676, "learning_rate": 4.074905074214705e-05, "loss": 1.7709, "reason_loss": 0.49420690536499023, "step": 858, "utility_loss": 1.2766571044921875 }, { "cosine_similarity": 0, "epoch": 0.8005591798695247, "grad_norm": 1.8428569714499135, "learning_rate": 4.073179150845703e-05, "loss": 2.4064, "reason_loss": 0.5689071416854858, "step": 859, "utility_loss": 1.8375394344329834 }, { "cosine_similarity": 0, "epoch": 0.8014911463187325, "grad_norm": 1.1859090197865942, "learning_rate": 4.0714532274767004e-05, "loss": 1.9739, "reason_loss": 0.5039807558059692, "step": 860, "utility_loss": 1.469937801361084 }, { "cosine_similarity": 0, "epoch": 0.8024231127679403, "grad_norm": 1.18340224176597, "learning_rate": 4.069727304107698e-05, "loss": 2.0136, "reason_loss": 0.47960081696510315, "step": 861, "utility_loss": 1.5339744091033936 }, { "cosine_similarity": 0, "epoch": 0.8033550792171482, "grad_norm": 1.0056435012375737, "learning_rate": 4.068001380738695e-05, "loss": 1.8351, "reason_loss": 0.5249745845794678, "step": 862, "utility_loss": 1.3101613521575928 }, { "cosine_similarity": 0, "epoch": 0.804287045666356, "grad_norm": 1.109601608040777, "learning_rate": 4.0662754573696934e-05, "loss": 1.9225, "reason_loss": 0.5136537551879883, "step": 863, "utility_loss": 1.4088270664215088 }, { "cosine_similarity": 0, "epoch": 0.8052190121155638, "grad_norm": 1.248892835913684, "learning_rate": 4.064549534000691e-05, "loss": 2.0795, "reason_loss": 0.45663851499557495, "step": 864, "utility_loss": 1.6228678226470947 }, { "cosine_similarity": 0, "epoch": 0.8061509785647717, "grad_norm": 1.8635646102427474, "learning_rate": 4.062823610631688e-05, "loss": 2.1224, "reason_loss": 0.5002682209014893, "step": 865, "utility_loss": 1.6221423149108887 }, { "cosine_similarity": 0, "epoch": 0.8070829450139795, "grad_norm": 1.2456840940982719, "learning_rate": 4.0610976872626856e-05, "loss": 2.2377, "reason_loss": 0.5122939348220825, "step": 866, "utility_loss": 1.7254090309143066 }, { "cosine_similarity": 0, "epoch": 0.8080149114631874, "grad_norm": 1.087904521978006, "learning_rate": 4.059371763893684e-05, "loss": 1.9792, "reason_loss": 0.49591493606567383, "step": 867, "utility_loss": 1.483290195465088 }, { "cosine_similarity": 0, "epoch": 0.8089468779123952, "grad_norm": 1.2479884217718378, "learning_rate": 4.057645840524681e-05, "loss": 2.2693, "reason_loss": 0.5298877954483032, "step": 868, "utility_loss": 1.739437460899353 }, { "cosine_similarity": 0, "epoch": 0.809878844361603, "grad_norm": 1.549285783783026, "learning_rate": 4.0559199171556786e-05, "loss": 2.2414, "reason_loss": 0.5228057503700256, "step": 869, "utility_loss": 1.7185533046722412 }, { "cosine_similarity": 0, "epoch": 0.8108108108108109, "grad_norm": 1.1942145478066737, "learning_rate": 4.054193993786676e-05, "loss": 2.1064, "reason_loss": 0.5092583298683167, "step": 870, "utility_loss": 1.597111463546753 }, { "cosine_similarity": 0, "epoch": 0.8117427772600186, "grad_norm": 1.435043117361527, "learning_rate": 4.0524680704176735e-05, "loss": 1.8798, "reason_loss": 0.5292835831642151, "step": 871, "utility_loss": 1.350520133972168 }, { "cosine_similarity": 0, "epoch": 0.8126747437092264, "grad_norm": 1.243352601330252, "learning_rate": 4.050742147048671e-05, "loss": 2.0405, "reason_loss": 0.503909707069397, "step": 872, "utility_loss": 1.5365673303604126 }, { "cosine_similarity": 0, "epoch": 0.8136067101584343, "grad_norm": 1.6961656719500093, "learning_rate": 4.049016223679668e-05, "loss": 2.2, "reason_loss": 0.48745694756507874, "step": 873, "utility_loss": 1.7125067710876465 }, { "cosine_similarity": 0, "epoch": 0.8145386766076421, "grad_norm": 1.0249416771534066, "learning_rate": 4.0472903003106664e-05, "loss": 1.7867, "reason_loss": 0.5143905282020569, "step": 874, "utility_loss": 1.2722604274749756 }, { "cosine_similarity": 0, "epoch": 0.8154706430568499, "grad_norm": 1.238832011901741, "learning_rate": 4.045564376941664e-05, "loss": 2.1881, "reason_loss": 0.5052376985549927, "step": 875, "utility_loss": 1.6828820705413818 }, { "cosine_similarity": 0, "epoch": 0.8164026095060578, "grad_norm": 1.1883238863197974, "learning_rate": 4.043838453572661e-05, "loss": 2.0526, "reason_loss": 0.5028994083404541, "step": 876, "utility_loss": 1.5496547222137451 }, { "cosine_similarity": 0, "epoch": 0.8173345759552656, "grad_norm": 1.18052186709254, "learning_rate": 4.042112530203659e-05, "loss": 1.8461, "reason_loss": 0.5207793712615967, "step": 877, "utility_loss": 1.3253240585327148 }, { "cosine_similarity": 0, "epoch": 0.8182665424044734, "grad_norm": 1.2535629162534372, "learning_rate": 4.040386606834657e-05, "loss": 1.9648, "reason_loss": 0.4692766070365906, "step": 878, "utility_loss": 1.495538353919983 }, { "cosine_similarity": 0, "epoch": 0.8191985088536813, "grad_norm": 0.9581162563676103, "learning_rate": 4.038660683465654e-05, "loss": 1.8177, "reason_loss": 0.5024210810661316, "step": 879, "utility_loss": 1.3153003454208374 }, { "cosine_similarity": 0, "epoch": 0.8201304753028891, "grad_norm": 1.1584772935642595, "learning_rate": 4.036934760096652e-05, "loss": 1.7581, "reason_loss": 0.48371464014053345, "step": 880, "utility_loss": 1.2743909358978271 }, { "cosine_similarity": 0, "epoch": 0.821062441752097, "grad_norm": 1.437755020257512, "learning_rate": 4.03520883672765e-05, "loss": 2.1565, "reason_loss": 0.5156491994857788, "step": 881, "utility_loss": 1.6408140659332275 }, { "cosine_similarity": 0, "epoch": 0.8219944082013048, "grad_norm": 1.2527792499154278, "learning_rate": 4.033482913358647e-05, "loss": 2.1204, "reason_loss": 0.5105090737342834, "step": 882, "utility_loss": 1.6099143028259277 }, { "cosine_similarity": 0, "epoch": 0.8229263746505125, "grad_norm": 1.1945558113516146, "learning_rate": 4.0317569899896447e-05, "loss": 1.977, "reason_loss": 0.5284585952758789, "step": 883, "utility_loss": 1.4485673904418945 }, { "cosine_similarity": 0, "epoch": 0.8238583410997204, "grad_norm": 1.020693956403362, "learning_rate": 4.030031066620642e-05, "loss": 1.9153, "reason_loss": 0.511307954788208, "step": 884, "utility_loss": 1.4039835929870605 }, { "cosine_similarity": 0, "epoch": 0.8247903075489282, "grad_norm": 1.2753927762143453, "learning_rate": 4.02830514325164e-05, "loss": 2.1103, "reason_loss": 0.508277416229248, "step": 885, "utility_loss": 1.6020253896713257 }, { "cosine_similarity": 0, "epoch": 0.825722273998136, "grad_norm": 1.2540707800073496, "learning_rate": 4.0265792198826376e-05, "loss": 1.8423, "reason_loss": 0.49412232637405396, "step": 886, "utility_loss": 1.3481528759002686 }, { "cosine_similarity": 0, "epoch": 0.8266542404473439, "grad_norm": 1.2732969763555135, "learning_rate": 4.024853296513635e-05, "loss": 1.8425, "reason_loss": 0.4935147166252136, "step": 887, "utility_loss": 1.3490345478057861 }, { "cosine_similarity": 0, "epoch": 0.8275862068965517, "grad_norm": 1.4605287599057424, "learning_rate": 4.023127373144633e-05, "loss": 1.9843, "reason_loss": 0.4973187744617462, "step": 888, "utility_loss": 1.486969232559204 }, { "cosine_similarity": 0, "epoch": 0.8285181733457595, "grad_norm": 1.1057927632740125, "learning_rate": 4.0214014497756306e-05, "loss": 1.9393, "reason_loss": 0.49620938301086426, "step": 889, "utility_loss": 1.443125605583191 }, { "cosine_similarity": 0, "epoch": 0.8294501397949674, "grad_norm": 1.1826142006442346, "learning_rate": 4.019675526406628e-05, "loss": 1.9637, "reason_loss": 0.5130578875541687, "step": 890, "utility_loss": 1.4506542682647705 }, { "cosine_similarity": 0, "epoch": 0.8303821062441752, "grad_norm": 1.178366112875472, "learning_rate": 4.0179496030376254e-05, "loss": 1.9036, "reason_loss": 0.5227656364440918, "step": 891, "utility_loss": 1.3808631896972656 }, { "cosine_similarity": 0, "epoch": 0.8313140726933831, "grad_norm": 1.0611366180162802, "learning_rate": 4.016223679668623e-05, "loss": 1.628, "reason_loss": 0.5666618347167969, "step": 892, "utility_loss": 1.0613157749176025 }, { "cosine_similarity": 0, "epoch": 0.8322460391425909, "grad_norm": 1.0509995190292427, "learning_rate": 4.01449775629962e-05, "loss": 1.8454, "reason_loss": 0.5011955499649048, "step": 893, "utility_loss": 1.344160556793213 }, { "cosine_similarity": 0, "epoch": 0.8331780055917987, "grad_norm": 1.0738743548541236, "learning_rate": 4.012771832930618e-05, "loss": 1.9452, "reason_loss": 0.5048657059669495, "step": 894, "utility_loss": 1.440286636352539 }, { "cosine_similarity": 0, "epoch": 0.8341099720410066, "grad_norm": 1.2090426798819902, "learning_rate": 4.011045909561615e-05, "loss": 2.112, "reason_loss": 0.5194175243377686, "step": 895, "utility_loss": 1.5925946235656738 }, { "cosine_similarity": 0, "epoch": 0.8350419384902144, "grad_norm": 1.3989802914834584, "learning_rate": 4.009319986192613e-05, "loss": 2.35, "reason_loss": 0.5117252469062805, "step": 896, "utility_loss": 1.8382986783981323 }, { "cosine_similarity": 0, "epoch": 0.8359739049394221, "grad_norm": 1.1699713918559986, "learning_rate": 4.007594062823611e-05, "loss": 1.6834, "reason_loss": 0.48197171092033386, "step": 897, "utility_loss": 1.2014381885528564 }, { "cosine_similarity": 0, "epoch": 0.83690587138863, "grad_norm": 1.0298027696612704, "learning_rate": 4.005868139454608e-05, "loss": 1.6706, "reason_loss": 0.5186983346939087, "step": 898, "utility_loss": 1.151932716369629 }, { "cosine_similarity": 0, "epoch": 0.8378378378378378, "grad_norm": 1.1999047490802228, "learning_rate": 4.004142216085606e-05, "loss": 2.1849, "reason_loss": 0.4951364994049072, "step": 899, "utility_loss": 1.6897192001342773 }, { "cosine_similarity": 0, "epoch": 0.8387698042870456, "grad_norm": 1.4942381225616053, "learning_rate": 4.0024162927166037e-05, "loss": 2.2302, "reason_loss": 0.4987063705921173, "step": 900, "utility_loss": 1.7314988374710083 }, { "cosine_similarity": 0, "epoch": 0.8397017707362535, "grad_norm": 1.3222512876285846, "learning_rate": 4.000690369347601e-05, "loss": 2.1693, "reason_loss": 0.5213974714279175, "step": 901, "utility_loss": 1.6479520797729492 }, { "cosine_similarity": 0, "epoch": 0.8406337371854613, "grad_norm": 1.120617192670916, "learning_rate": 3.9989644459785985e-05, "loss": 1.9229, "reason_loss": 0.49841365218162537, "step": 902, "utility_loss": 1.4244836568832397 }, { "cosine_similarity": 0, "epoch": 0.8415657036346692, "grad_norm": 1.0872844272410949, "learning_rate": 3.9972385226095966e-05, "loss": 1.7174, "reason_loss": 0.5110815763473511, "step": 903, "utility_loss": 1.2063016891479492 }, { "cosine_similarity": 0, "epoch": 0.842497670083877, "grad_norm": 1.0319369992752785, "learning_rate": 3.995512599240594e-05, "loss": 1.9973, "reason_loss": 0.5057991147041321, "step": 904, "utility_loss": 1.49148428440094 }, { "cosine_similarity": 0, "epoch": 0.8434296365330848, "grad_norm": 1.2903135709861937, "learning_rate": 3.9937866758715915e-05, "loss": 1.9689, "reason_loss": 0.47442400455474854, "step": 905, "utility_loss": 1.4945039749145508 }, { "cosine_similarity": 0, "epoch": 0.8443616029822927, "grad_norm": 1.1291277238803727, "learning_rate": 3.992060752502589e-05, "loss": 2.0638, "reason_loss": 0.5379607677459717, "step": 906, "utility_loss": 1.5258336067199707 }, { "cosine_similarity": 0, "epoch": 0.8452935694315005, "grad_norm": 1.2965549017040472, "learning_rate": 3.990334829133587e-05, "loss": 2.2976, "reason_loss": 0.530792236328125, "step": 907, "utility_loss": 1.7667983770370483 }, { "cosine_similarity": 0, "epoch": 0.8462255358807083, "grad_norm": 1.165320513512281, "learning_rate": 3.9886089057645844e-05, "loss": 2.0656, "reason_loss": 0.5022261142730713, "step": 908, "utility_loss": 1.563399314880371 }, { "cosine_similarity": 0, "epoch": 0.8471575023299162, "grad_norm": 1.1256669705191875, "learning_rate": 3.986882982395582e-05, "loss": 1.7502, "reason_loss": 0.4817016124725342, "step": 909, "utility_loss": 1.2684593200683594 }, { "cosine_similarity": 0, "epoch": 0.848089468779124, "grad_norm": 2.7487428185097817, "learning_rate": 3.98515705902658e-05, "loss": 1.8861, "reason_loss": 0.5029974579811096, "step": 910, "utility_loss": 1.3831300735473633 }, { "cosine_similarity": 0, "epoch": 0.8490214352283317, "grad_norm": 1.0817406408274177, "learning_rate": 3.9834311356575774e-05, "loss": 1.9894, "reason_loss": 0.5205941796302795, "step": 911, "utility_loss": 1.4688043594360352 }, { "cosine_similarity": 0, "epoch": 0.8499534016775396, "grad_norm": 1.0653393964655091, "learning_rate": 3.981705212288575e-05, "loss": 2.06, "reason_loss": 0.47165971994400024, "step": 912, "utility_loss": 1.5883066654205322 }, { "cosine_similarity": 0, "epoch": 0.8508853681267474, "grad_norm": 1.1357065329029707, "learning_rate": 3.979979288919572e-05, "loss": 2.0369, "reason_loss": 0.501076340675354, "step": 913, "utility_loss": 1.5358545780181885 }, { "cosine_similarity": 0, "epoch": 0.8518173345759553, "grad_norm": 1.212177827044767, "learning_rate": 3.97825336555057e-05, "loss": 1.9206, "reason_loss": 0.49502843618392944, "step": 914, "utility_loss": 1.42555570602417 }, { "cosine_similarity": 0, "epoch": 0.8527493010251631, "grad_norm": 3.0007262568870314, "learning_rate": 3.976527442181567e-05, "loss": 2.0465, "reason_loss": 0.5156441926956177, "step": 915, "utility_loss": 1.530814290046692 }, { "cosine_similarity": 0, "epoch": 0.8536812674743709, "grad_norm": 1.2026796085864626, "learning_rate": 3.9748015188125646e-05, "loss": 2.3611, "reason_loss": 0.5037523508071899, "step": 916, "utility_loss": 1.8573192358016968 }, { "cosine_similarity": 0, "epoch": 0.8546132339235788, "grad_norm": 1.2975947866745536, "learning_rate": 3.973075595443562e-05, "loss": 2.2278, "reason_loss": 0.5164936780929565, "step": 917, "utility_loss": 1.7113499641418457 }, { "cosine_similarity": 0, "epoch": 0.8555452003727866, "grad_norm": 1.0748945324601913, "learning_rate": 3.97134967207456e-05, "loss": 1.7806, "reason_loss": 0.49634093046188354, "step": 918, "utility_loss": 1.284233808517456 }, { "cosine_similarity": 0, "epoch": 0.8564771668219944, "grad_norm": 1.1436771718538796, "learning_rate": 3.9696237487055575e-05, "loss": 2.2725, "reason_loss": 0.5200469493865967, "step": 919, "utility_loss": 1.752498745918274 }, { "cosine_similarity": 0, "epoch": 0.8574091332712023, "grad_norm": 1.0926894112598433, "learning_rate": 3.967897825336555e-05, "loss": 1.9115, "reason_loss": 0.5193696022033691, "step": 920, "utility_loss": 1.3921217918395996 }, { "cosine_similarity": 0, "epoch": 0.8583410997204101, "grad_norm": 1.1504225319731145, "learning_rate": 3.966171901967553e-05, "loss": 1.7768, "reason_loss": 0.4761299788951874, "step": 921, "utility_loss": 1.3006256818771362 }, { "cosine_similarity": 0, "epoch": 0.8592730661696178, "grad_norm": 1.098925221242001, "learning_rate": 3.9644459785985505e-05, "loss": 2.2479, "reason_loss": 0.48399415612220764, "step": 922, "utility_loss": 1.7638986110687256 }, { "cosine_similarity": 0, "epoch": 0.8602050326188257, "grad_norm": 1.3972668749907156, "learning_rate": 3.962720055229548e-05, "loss": 1.9479, "reason_loss": 0.510692298412323, "step": 923, "utility_loss": 1.4371827840805054 }, { "cosine_similarity": 0, "epoch": 0.8611369990680335, "grad_norm": 1.0639858969157838, "learning_rate": 3.9609941318605453e-05, "loss": 1.8996, "reason_loss": 0.4993259012699127, "step": 924, "utility_loss": 1.400240182876587 }, { "cosine_similarity": 0, "epoch": 0.8620689655172413, "grad_norm": 1.8755791139877436, "learning_rate": 3.9592682084915434e-05, "loss": 2.0284, "reason_loss": 0.5026329159736633, "step": 925, "utility_loss": 1.5257484912872314 }, { "cosine_similarity": 0, "epoch": 0.8630009319664492, "grad_norm": 1.1244430304344517, "learning_rate": 3.957542285122541e-05, "loss": 2.1323, "reason_loss": 0.4834645688533783, "step": 926, "utility_loss": 1.648825764656067 }, { "cosine_similarity": 0, "epoch": 0.863932898415657, "grad_norm": 0.9664453246334044, "learning_rate": 3.955816361753538e-05, "loss": 1.7776, "reason_loss": 0.5089116096496582, "step": 927, "utility_loss": 1.2686705589294434 }, { "cosine_similarity": 0, "epoch": 0.8648648648648649, "grad_norm": 1.1954916755915244, "learning_rate": 3.9540904383845364e-05, "loss": 2.2445, "reason_loss": 0.4870454668998718, "step": 928, "utility_loss": 1.7574920654296875 }, { "cosine_similarity": 0, "epoch": 0.8657968313140727, "grad_norm": 1.5841017563906745, "learning_rate": 3.952364515015534e-05, "loss": 1.9701, "reason_loss": 0.4872762858867645, "step": 929, "utility_loss": 1.4828119277954102 }, { "cosine_similarity": 0, "epoch": 0.8667287977632805, "grad_norm": 1.222794993647101, "learning_rate": 3.950638591646531e-05, "loss": 1.876, "reason_loss": 0.4866977334022522, "step": 930, "utility_loss": 1.389284372329712 }, { "cosine_similarity": 0, "epoch": 0.8676607642124884, "grad_norm": 1.142487058858149, "learning_rate": 3.948912668277529e-05, "loss": 2.2599, "reason_loss": 0.5082333087921143, "step": 931, "utility_loss": 1.7516313791275024 }, { "cosine_similarity": 0, "epoch": 0.8685927306616962, "grad_norm": 2.704047758264977, "learning_rate": 3.947186744908527e-05, "loss": 2.0917, "reason_loss": 0.5054240226745605, "step": 932, "utility_loss": 1.5862901210784912 }, { "cosine_similarity": 0, "epoch": 0.869524697110904, "grad_norm": 1.0801378470890726, "learning_rate": 3.945460821539524e-05, "loss": 1.8538, "reason_loss": 0.5225661993026733, "step": 933, "utility_loss": 1.3312517404556274 }, { "cosine_similarity": 0, "epoch": 0.8704566635601119, "grad_norm": 1.0518241347927277, "learning_rate": 3.943734898170522e-05, "loss": 1.9405, "reason_loss": 0.49043008685112, "step": 934, "utility_loss": 1.4500629901885986 }, { "cosine_similarity": 0, "epoch": 0.8713886300093197, "grad_norm": 1.0184267324367762, "learning_rate": 3.942008974801519e-05, "loss": 1.9552, "reason_loss": 0.508507251739502, "step": 935, "utility_loss": 1.446692943572998 }, { "cosine_similarity": 0, "epoch": 0.8723205964585274, "grad_norm": 1.3246673839500118, "learning_rate": 3.9402830514325165e-05, "loss": 2.1845, "reason_loss": 0.513114333152771, "step": 936, "utility_loss": 1.6714115142822266 }, { "cosine_similarity": 0, "epoch": 0.8732525629077353, "grad_norm": 1.2023738721131545, "learning_rate": 3.938557128063514e-05, "loss": 2.0137, "reason_loss": 0.49528810381889343, "step": 937, "utility_loss": 1.5184494256973267 }, { "cosine_similarity": 0, "epoch": 0.8741845293569431, "grad_norm": 1.1359981137180526, "learning_rate": 3.9368312046945114e-05, "loss": 1.7288, "reason_loss": 0.4861273169517517, "step": 938, "utility_loss": 1.242654800415039 }, { "cosine_similarity": 0, "epoch": 0.875116495806151, "grad_norm": 1.0763309635073401, "learning_rate": 3.9351052813255095e-05, "loss": 1.9983, "reason_loss": 0.5007020235061646, "step": 939, "utility_loss": 1.4975841045379639 }, { "cosine_similarity": 0, "epoch": 0.8760484622553588, "grad_norm": 1.2306864456205338, "learning_rate": 3.933379357956507e-05, "loss": 2.1387, "reason_loss": 0.5108064413070679, "step": 940, "utility_loss": 1.627920150756836 }, { "cosine_similarity": 0, "epoch": 0.8769804287045666, "grad_norm": 1.4410580871529832, "learning_rate": 3.9316534345875043e-05, "loss": 2.1744, "reason_loss": 0.4856778085231781, "step": 941, "utility_loss": 1.6887013912200928 }, { "cosine_similarity": 0, "epoch": 0.8779123951537745, "grad_norm": 1.0880496714366579, "learning_rate": 3.929927511218502e-05, "loss": 2.0065, "reason_loss": 0.5091671347618103, "step": 942, "utility_loss": 1.4972938299179077 }, { "cosine_similarity": 0, "epoch": 0.8788443616029823, "grad_norm": 1.0827125890808396, "learning_rate": 3.9282015878495e-05, "loss": 1.9975, "reason_loss": 0.5037862658500671, "step": 943, "utility_loss": 1.4937291145324707 }, { "cosine_similarity": 0, "epoch": 0.8797763280521901, "grad_norm": 1.2422716049824787, "learning_rate": 3.926475664480497e-05, "loss": 2.0678, "reason_loss": 0.5397534370422363, "step": 944, "utility_loss": 1.528032660484314 }, { "cosine_similarity": 0, "epoch": 0.880708294501398, "grad_norm": 1.2033930550046024, "learning_rate": 3.924749741111495e-05, "loss": 1.6386, "reason_loss": 0.49648618698120117, "step": 945, "utility_loss": 1.1421605348587036 }, { "cosine_similarity": 0, "epoch": 0.8816402609506058, "grad_norm": 1.367160950513988, "learning_rate": 3.923023817742492e-05, "loss": 2.0704, "reason_loss": 0.5191246867179871, "step": 946, "utility_loss": 1.5512733459472656 }, { "cosine_similarity": 0, "epoch": 0.8825722273998136, "grad_norm": 0.9676790865919653, "learning_rate": 3.92129789437349e-05, "loss": 2.0688, "reason_loss": 0.5134854912757874, "step": 947, "utility_loss": 1.5553535223007202 }, { "cosine_similarity": 0, "epoch": 0.8835041938490215, "grad_norm": 1.8884341761082002, "learning_rate": 3.919571971004488e-05, "loss": 2.3204, "reason_loss": 0.5043224096298218, "step": 948, "utility_loss": 1.816070318222046 }, { "cosine_similarity": 0, "epoch": 0.8844361602982292, "grad_norm": 1.0130865355159615, "learning_rate": 3.917846047635485e-05, "loss": 1.8311, "reason_loss": 0.5290892124176025, "step": 949, "utility_loss": 1.3019981384277344 }, { "cosine_similarity": 0, "epoch": 0.8853681267474371, "grad_norm": 1.4568033276066463, "learning_rate": 3.916120124266483e-05, "loss": 2.2332, "reason_loss": 0.5040397047996521, "step": 950, "utility_loss": 1.729111909866333 }, { "cosine_similarity": 0, "epoch": 0.8863000931966449, "grad_norm": 1.3194419753271587, "learning_rate": 3.914394200897481e-05, "loss": 1.9301, "reason_loss": 0.4978778064250946, "step": 951, "utility_loss": 1.432235598564148 }, { "cosine_similarity": 0, "epoch": 0.8872320596458527, "grad_norm": 1.4653757955368967, "learning_rate": 3.912668277528478e-05, "loss": 2.1369, "reason_loss": 0.4858909845352173, "step": 952, "utility_loss": 1.6510024070739746 }, { "cosine_similarity": 0, "epoch": 0.8881640260950606, "grad_norm": 1.095019026627078, "learning_rate": 3.9109423541594755e-05, "loss": 2.0165, "reason_loss": 0.5462761521339417, "step": 953, "utility_loss": 1.4701820611953735 }, { "cosine_similarity": 0, "epoch": 0.8890959925442684, "grad_norm": 1.2441938931134917, "learning_rate": 3.909216430790473e-05, "loss": 1.9753, "reason_loss": 0.5128841996192932, "step": 954, "utility_loss": 1.4624114036560059 }, { "cosine_similarity": 0, "epoch": 0.8900279589934762, "grad_norm": 1.1506294899912952, "learning_rate": 3.9074905074214704e-05, "loss": 1.9039, "reason_loss": 0.5358723402023315, "step": 955, "utility_loss": 1.3679883480072021 }, { "cosine_similarity": 0, "epoch": 0.8909599254426841, "grad_norm": 1.2994429675078043, "learning_rate": 3.905764584052468e-05, "loss": 1.958, "reason_loss": 0.5118588209152222, "step": 956, "utility_loss": 1.4461848735809326 }, { "cosine_similarity": 0, "epoch": 0.8918918918918919, "grad_norm": 1.387639395907661, "learning_rate": 3.904038660683465e-05, "loss": 2.205, "reason_loss": 0.5427026748657227, "step": 957, "utility_loss": 1.662260890007019 }, { "cosine_similarity": 0, "epoch": 0.8928238583410997, "grad_norm": 1.1139241127466866, "learning_rate": 3.9023127373144633e-05, "loss": 2.1988, "reason_loss": 0.5034048557281494, "step": 958, "utility_loss": 1.695425033569336 }, { "cosine_similarity": 0, "epoch": 0.8937558247903076, "grad_norm": 1.005986677211045, "learning_rate": 3.900586813945461e-05, "loss": 1.6451, "reason_loss": 0.4754495620727539, "step": 959, "utility_loss": 1.1696170568466187 }, { "cosine_similarity": 0, "epoch": 0.8946877912395154, "grad_norm": 1.1113285724289108, "learning_rate": 3.898860890576458e-05, "loss": 2.3745, "reason_loss": 0.4991533160209656, "step": 960, "utility_loss": 1.8753795623779297 }, { "cosine_similarity": 0, "epoch": 0.8956197576887233, "grad_norm": 1.4661344329838193, "learning_rate": 3.897134967207456e-05, "loss": 2.1517, "reason_loss": 0.5186445713043213, "step": 961, "utility_loss": 1.633028268814087 }, { "cosine_similarity": 0, "epoch": 0.896551724137931, "grad_norm": 1.0745431108543058, "learning_rate": 3.895409043838454e-05, "loss": 2.077, "reason_loss": 0.4813672602176666, "step": 962, "utility_loss": 1.5956419706344604 }, { "cosine_similarity": 0, "epoch": 0.8974836905871388, "grad_norm": 1.1467249987161043, "learning_rate": 3.893683120469451e-05, "loss": 1.8377, "reason_loss": 0.5039821863174438, "step": 963, "utility_loss": 1.3336803913116455 }, { "cosine_similarity": 0, "epoch": 0.8984156570363467, "grad_norm": 1.1475963700891705, "learning_rate": 3.8919571971004486e-05, "loss": 1.6459, "reason_loss": 0.47367650270462036, "step": 964, "utility_loss": 1.1722073554992676 }, { "cosine_similarity": 0, "epoch": 0.8993476234855545, "grad_norm": 1.1946917539447748, "learning_rate": 3.890231273731447e-05, "loss": 1.9417, "reason_loss": 0.5098747611045837, "step": 965, "utility_loss": 1.4317827224731445 }, { "cosine_similarity": 0, "epoch": 0.9002795899347623, "grad_norm": 1.02025352628711, "learning_rate": 3.888505350362444e-05, "loss": 1.7412, "reason_loss": 0.49223804473876953, "step": 966, "utility_loss": 1.248990535736084 }, { "cosine_similarity": 0, "epoch": 0.9012115563839702, "grad_norm": 0.9823645933450794, "learning_rate": 3.8867794269934416e-05, "loss": 1.9688, "reason_loss": 0.5058702230453491, "step": 967, "utility_loss": 1.4629151821136475 }, { "cosine_similarity": 0, "epoch": 0.902143522833178, "grad_norm": 1.2707220377343127, "learning_rate": 3.885053503624439e-05, "loss": 1.9306, "reason_loss": 0.49706438183784485, "step": 968, "utility_loss": 1.4335649013519287 }, { "cosine_similarity": 0, "epoch": 0.9030754892823858, "grad_norm": 1.3481743360777125, "learning_rate": 3.883327580255437e-05, "loss": 2.2058, "reason_loss": 0.5014156103134155, "step": 969, "utility_loss": 1.7044254541397095 }, { "cosine_similarity": 0, "epoch": 0.9040074557315937, "grad_norm": 1.278883888109623, "learning_rate": 3.8816016568864345e-05, "loss": 2.1541, "reason_loss": 0.47489863634109497, "step": 970, "utility_loss": 1.679236888885498 }, { "cosine_similarity": 0, "epoch": 0.9049394221808015, "grad_norm": 1.470301298578172, "learning_rate": 3.879875733517432e-05, "loss": 1.9501, "reason_loss": 0.5008561611175537, "step": 971, "utility_loss": 1.4492099285125732 }, { "cosine_similarity": 0, "epoch": 0.9058713886300093, "grad_norm": 1.3349993329306016, "learning_rate": 3.87814981014843e-05, "loss": 2.121, "reason_loss": 0.5107606649398804, "step": 972, "utility_loss": 1.6102180480957031 }, { "cosine_similarity": 0, "epoch": 0.9068033550792172, "grad_norm": 1.08867258312727, "learning_rate": 3.8764238867794275e-05, "loss": 1.7486, "reason_loss": 0.5319422483444214, "step": 973, "utility_loss": 1.2166894674301147 }, { "cosine_similarity": 0, "epoch": 0.907735321528425, "grad_norm": 1.3216839615795435, "learning_rate": 3.874697963410425e-05, "loss": 1.8226, "reason_loss": 0.525170087814331, "step": 974, "utility_loss": 1.2973954677581787 }, { "cosine_similarity": 0, "epoch": 0.9086672879776329, "grad_norm": 1.4888128397519824, "learning_rate": 3.8729720400414224e-05, "loss": 2.1806, "reason_loss": 0.5168616771697998, "step": 975, "utility_loss": 1.6637632846832275 }, { "cosine_similarity": 0, "epoch": 0.9095992544268406, "grad_norm": 1.090519406692125, "learning_rate": 3.87124611667242e-05, "loss": 2.1, "reason_loss": 0.48900943994522095, "step": 976, "utility_loss": 1.6109521389007568 }, { "cosine_similarity": 0, "epoch": 0.9105312208760484, "grad_norm": 0.9736265331252059, "learning_rate": 3.869520193303417e-05, "loss": 1.9734, "reason_loss": 0.49982762336730957, "step": 977, "utility_loss": 1.4735866785049438 }, { "cosine_similarity": 0, "epoch": 0.9114631873252563, "grad_norm": 1.1586088894778401, "learning_rate": 3.8677942699344146e-05, "loss": 1.4412, "reason_loss": 0.4934805631637573, "step": 978, "utility_loss": 0.9476945400238037 }, { "cosine_similarity": 0, "epoch": 0.9123951537744641, "grad_norm": 1.335966178896859, "learning_rate": 3.866068346565413e-05, "loss": 1.7573, "reason_loss": 0.5078773498535156, "step": 979, "utility_loss": 1.2493774890899658 }, { "cosine_similarity": 0, "epoch": 0.9133271202236719, "grad_norm": 1.2129749644109329, "learning_rate": 3.86434242319641e-05, "loss": 1.9226, "reason_loss": 0.5215517282485962, "step": 980, "utility_loss": 1.4010567665100098 }, { "cosine_similarity": 0, "epoch": 0.9142590866728798, "grad_norm": 1.205010185522061, "learning_rate": 3.8626164998274076e-05, "loss": 2.0658, "reason_loss": 0.501057505607605, "step": 981, "utility_loss": 1.5646947622299194 }, { "cosine_similarity": 0, "epoch": 0.9151910531220876, "grad_norm": 1.0597958149162663, "learning_rate": 3.860890576458405e-05, "loss": 2.2252, "reason_loss": 0.48562633991241455, "step": 982, "utility_loss": 1.7395517826080322 }, { "cosine_similarity": 0, "epoch": 0.9161230195712954, "grad_norm": 1.3808566066723094, "learning_rate": 3.859164653089403e-05, "loss": 1.9796, "reason_loss": 0.5108687877655029, "step": 983, "utility_loss": 1.4687752723693848 }, { "cosine_similarity": 0, "epoch": 0.9170549860205033, "grad_norm": 1.1287933046253622, "learning_rate": 3.8574387297204006e-05, "loss": 1.9119, "reason_loss": 0.49852919578552246, "step": 984, "utility_loss": 1.4133856296539307 }, { "cosine_similarity": 0, "epoch": 0.9179869524697111, "grad_norm": 1.1966497466183903, "learning_rate": 3.855712806351398e-05, "loss": 1.958, "reason_loss": 0.5194096565246582, "step": 985, "utility_loss": 1.4385535717010498 }, { "cosine_similarity": 0, "epoch": 0.918918918918919, "grad_norm": 1.4234765516487824, "learning_rate": 3.8539868829823954e-05, "loss": 2.3894, "reason_loss": 0.4824707508087158, "step": 986, "utility_loss": 1.906950831413269 }, { "cosine_similarity": 0, "epoch": 0.9198508853681268, "grad_norm": 1.0998096944483418, "learning_rate": 3.8522609596133935e-05, "loss": 1.7456, "reason_loss": 0.5049363374710083, "step": 987, "utility_loss": 1.2406840324401855 }, { "cosine_similarity": 0, "epoch": 0.9207828518173345, "grad_norm": 0.9724215101448139, "learning_rate": 3.850535036244391e-05, "loss": 2.1243, "reason_loss": 0.5268092751502991, "step": 988, "utility_loss": 1.5974411964416504 }, { "cosine_similarity": 0, "epoch": 0.9217148182665424, "grad_norm": 1.1674662579641601, "learning_rate": 3.8488091128753884e-05, "loss": 2.018, "reason_loss": 0.4843485951423645, "step": 989, "utility_loss": 1.5336766242980957 }, { "cosine_similarity": 0, "epoch": 0.9226467847157502, "grad_norm": 1.0705927924008796, "learning_rate": 3.8470831895063865e-05, "loss": 1.7926, "reason_loss": 0.504626989364624, "step": 990, "utility_loss": 1.2879257202148438 }, { "cosine_similarity": 0, "epoch": 0.923578751164958, "grad_norm": 1.2770171527342817, "learning_rate": 3.845357266137384e-05, "loss": 2.161, "reason_loss": 0.48773622512817383, "step": 991, "utility_loss": 1.6732892990112305 }, { "cosine_similarity": 0, "epoch": 0.9245107176141659, "grad_norm": 1.1759567001543703, "learning_rate": 3.8436313427683814e-05, "loss": 2.1421, "reason_loss": 0.49529778957366943, "step": 992, "utility_loss": 1.646775484085083 }, { "cosine_similarity": 0, "epoch": 0.9254426840633737, "grad_norm": 1.6407240914134142, "learning_rate": 3.841905419399379e-05, "loss": 2.2521, "reason_loss": 0.4638071358203888, "step": 993, "utility_loss": 1.7883094549179077 }, { "cosine_similarity": 0, "epoch": 0.9263746505125815, "grad_norm": 1.0558816637907897, "learning_rate": 3.840179496030377e-05, "loss": 2.1157, "reason_loss": 0.5324944257736206, "step": 994, "utility_loss": 1.5831725597381592 }, { "cosine_similarity": 0, "epoch": 0.9273066169617894, "grad_norm": 1.3458488360214147, "learning_rate": 3.838453572661374e-05, "loss": 2.008, "reason_loss": 0.49519577622413635, "step": 995, "utility_loss": 1.512829065322876 }, { "cosine_similarity": 0, "epoch": 0.9282385834109972, "grad_norm": 1.3148613459407728, "learning_rate": 3.836727649292372e-05, "loss": 2.0522, "reason_loss": 0.5055964589118958, "step": 996, "utility_loss": 1.5466086864471436 }, { "cosine_similarity": 0, "epoch": 0.9291705498602051, "grad_norm": 1.6492149179101305, "learning_rate": 3.835001725923369e-05, "loss": 2.0774, "reason_loss": 0.49092742800712585, "step": 997, "utility_loss": 1.586470365524292 }, { "cosine_similarity": 0, "epoch": 0.9301025163094129, "grad_norm": 1.3502111417556346, "learning_rate": 3.8332758025543666e-05, "loss": 1.9934, "reason_loss": 0.49056532979011536, "step": 998, "utility_loss": 1.502830982208252 }, { "cosine_similarity": 0, "epoch": 0.9310344827586207, "grad_norm": 1.116509654946442, "learning_rate": 3.831549879185364e-05, "loss": 1.7468, "reason_loss": 0.507625162601471, "step": 999, "utility_loss": 1.2391586303710938 }, { "cosine_similarity": 0, "epoch": 0.9319664492078286, "grad_norm": 1.2530232008145654, "learning_rate": 3.8298239558163615e-05, "loss": 2.2793, "reason_loss": 0.4968072772026062, "step": 1000, "utility_loss": 1.7825300693511963 }, { "cosine_similarity": 0, "epoch": 0.9328984156570364, "grad_norm": 1.0826313823988662, "learning_rate": 3.8280980324473596e-05, "loss": 1.9818, "reason_loss": 0.4878356456756592, "step": 1001, "utility_loss": 1.4939262866973877 }, { "cosine_similarity": 0, "epoch": 0.9338303821062441, "grad_norm": 1.1475214205912145, "learning_rate": 3.826372109078357e-05, "loss": 2.0013, "reason_loss": 0.505653440952301, "step": 1002, "utility_loss": 1.4956802129745483 }, { "cosine_similarity": 0, "epoch": 0.934762348555452, "grad_norm": 0.9933408532338082, "learning_rate": 3.8246461857093544e-05, "loss": 1.9016, "reason_loss": 0.5042070150375366, "step": 1003, "utility_loss": 1.3973872661590576 }, { "cosine_similarity": 0, "epoch": 0.9356943150046598, "grad_norm": 1.0547491133282543, "learning_rate": 3.822920262340352e-05, "loss": 2.2754, "reason_loss": 0.5136334896087646, "step": 1004, "utility_loss": 1.7617988586425781 }, { "cosine_similarity": 0, "epoch": 0.9366262814538676, "grad_norm": 1.1561817127557659, "learning_rate": 3.82119433897135e-05, "loss": 2.0642, "reason_loss": 0.509408175945282, "step": 1005, "utility_loss": 1.554814100265503 }, { "cosine_similarity": 0, "epoch": 0.9375582479030755, "grad_norm": 1.8071762622174694, "learning_rate": 3.8194684156023474e-05, "loss": 2.167, "reason_loss": 0.5062904357910156, "step": 1006, "utility_loss": 1.6607472896575928 }, { "cosine_similarity": 0, "epoch": 0.9384902143522833, "grad_norm": 1.12016245434109, "learning_rate": 3.817742492233345e-05, "loss": 2.0592, "reason_loss": 0.49281665682792664, "step": 1007, "utility_loss": 1.5664252042770386 }, { "cosine_similarity": 0, "epoch": 0.9394221808014911, "grad_norm": 1.0687824641858057, "learning_rate": 3.816016568864342e-05, "loss": 2.3175, "reason_loss": 0.46422216296195984, "step": 1008, "utility_loss": 1.8533039093017578 }, { "cosine_similarity": 0, "epoch": 0.940354147250699, "grad_norm": 0.9853937410133065, "learning_rate": 3.8142906454953404e-05, "loss": 1.6957, "reason_loss": 0.5224463939666748, "step": 1009, "utility_loss": 1.1732215881347656 }, { "cosine_similarity": 0, "epoch": 0.9412861136999068, "grad_norm": 0.9541911033889704, "learning_rate": 3.812564722126338e-05, "loss": 1.6248, "reason_loss": 0.5040477514266968, "step": 1010, "utility_loss": 1.1207224130630493 }, { "cosine_similarity": 0, "epoch": 0.9422180801491147, "grad_norm": 1.1060389822554968, "learning_rate": 3.810838798757335e-05, "loss": 1.6358, "reason_loss": 0.5207394361495972, "step": 1011, "utility_loss": 1.1150356531143188 }, { "cosine_similarity": 0, "epoch": 0.9431500465983225, "grad_norm": 1.0718883796678953, "learning_rate": 3.809112875388333e-05, "loss": 2.4293, "reason_loss": 0.5077242851257324, "step": 1012, "utility_loss": 1.921537160873413 }, { "cosine_similarity": 0, "epoch": 0.9440820130475303, "grad_norm": 0.8627093615836726, "learning_rate": 3.807386952019331e-05, "loss": 1.5701, "reason_loss": 0.4965894818305969, "step": 1013, "utility_loss": 1.0734667778015137 }, { "cosine_similarity": 0, "epoch": 0.9450139794967382, "grad_norm": 1.548152844224652, "learning_rate": 3.805661028650328e-05, "loss": 2.0539, "reason_loss": 0.4926927089691162, "step": 1014, "utility_loss": 1.5611966848373413 }, { "cosine_similarity": 0, "epoch": 0.9459459459459459, "grad_norm": 1.1307271446136002, "learning_rate": 3.8039351052813256e-05, "loss": 2.1078, "reason_loss": 0.5204763412475586, "step": 1015, "utility_loss": 1.5872849225997925 }, { "cosine_similarity": 0, "epoch": 0.9468779123951537, "grad_norm": 1.1638262248870457, "learning_rate": 3.802209181912324e-05, "loss": 1.8678, "reason_loss": 0.49627792835235596, "step": 1016, "utility_loss": 1.371571660041809 }, { "cosine_similarity": 0, "epoch": 0.9478098788443616, "grad_norm": 1.263656733705239, "learning_rate": 3.800483258543321e-05, "loss": 1.9557, "reason_loss": 0.49124816060066223, "step": 1017, "utility_loss": 1.4644238948822021 }, { "cosine_similarity": 0, "epoch": 0.9487418452935694, "grad_norm": 1.129949367175358, "learning_rate": 3.7987573351743186e-05, "loss": 1.8661, "reason_loss": 0.5119277238845825, "step": 1018, "utility_loss": 1.354217290878296 }, { "cosine_similarity": 0, "epoch": 0.9496738117427772, "grad_norm": 1.3033529987034602, "learning_rate": 3.797031411805316e-05, "loss": 2.0649, "reason_loss": 0.5103816986083984, "step": 1019, "utility_loss": 1.5545321702957153 }, { "cosine_similarity": 0, "epoch": 0.9506057781919851, "grad_norm": 1.2390021596757836, "learning_rate": 3.7953054884363134e-05, "loss": 1.9849, "reason_loss": 0.4828445315361023, "step": 1020, "utility_loss": 1.5020558834075928 }, { "cosine_similarity": 0, "epoch": 0.9515377446411929, "grad_norm": 1.4838786253374008, "learning_rate": 3.793579565067311e-05, "loss": 2.0538, "reason_loss": 0.5030763149261475, "step": 1021, "utility_loss": 1.5507664680480957 }, { "cosine_similarity": 0, "epoch": 0.9524697110904008, "grad_norm": 1.1957452732828373, "learning_rate": 3.791853641698308e-05, "loss": 2.0706, "reason_loss": 0.5069243907928467, "step": 1022, "utility_loss": 1.5637179613113403 }, { "cosine_similarity": 0, "epoch": 0.9534016775396086, "grad_norm": 1.124059752722466, "learning_rate": 3.7901277183293064e-05, "loss": 2.0907, "reason_loss": 0.4909021854400635, "step": 1023, "utility_loss": 1.5997618436813354 }, { "cosine_similarity": 0, "epoch": 0.9543336439888164, "grad_norm": 1.3053204413343669, "learning_rate": 3.788401794960304e-05, "loss": 2.055, "reason_loss": 0.5067988634109497, "step": 1024, "utility_loss": 1.5482028722763062 }, { "cosine_similarity": 0, "epoch": 0.9552656104380243, "grad_norm": 1.1079485565803449, "learning_rate": 3.786675871591301e-05, "loss": 2.148, "reason_loss": 0.5123288631439209, "step": 1025, "utility_loss": 1.6357109546661377 }, { "cosine_similarity": 0, "epoch": 0.9561975768872321, "grad_norm": 1.036851831601762, "learning_rate": 3.784949948222299e-05, "loss": 1.9633, "reason_loss": 0.505463719367981, "step": 1026, "utility_loss": 1.4578008651733398 }, { "cosine_similarity": 0, "epoch": 0.9571295433364398, "grad_norm": 1.1483679285956259, "learning_rate": 3.783224024853297e-05, "loss": 2.0967, "reason_loss": 0.509510338306427, "step": 1027, "utility_loss": 1.5872352123260498 }, { "cosine_similarity": 0, "epoch": 0.9580615097856477, "grad_norm": 1.138140989822015, "learning_rate": 3.781498101484294e-05, "loss": 2.0774, "reason_loss": 0.5488554239273071, "step": 1028, "utility_loss": 1.5285639762878418 }, { "cosine_similarity": 0, "epoch": 0.9589934762348555, "grad_norm": 0.9729160386469079, "learning_rate": 3.7797721781152916e-05, "loss": 1.8703, "reason_loss": 0.5104951858520508, "step": 1029, "utility_loss": 1.3598291873931885 }, { "cosine_similarity": 0, "epoch": 0.9599254426840633, "grad_norm": 1.1570972056482318, "learning_rate": 3.77804625474629e-05, "loss": 1.9406, "reason_loss": 0.49243083596229553, "step": 1030, "utility_loss": 1.4481618404388428 }, { "cosine_similarity": 0, "epoch": 0.9608574091332712, "grad_norm": 1.140530059870414, "learning_rate": 3.776320331377287e-05, "loss": 2.0705, "reason_loss": 0.5265704989433289, "step": 1031, "utility_loss": 1.5438814163208008 }, { "cosine_similarity": 0, "epoch": 0.961789375582479, "grad_norm": 1.1256417830567604, "learning_rate": 3.7745944080082846e-05, "loss": 2.0139, "reason_loss": 0.4896913170814514, "step": 1032, "utility_loss": 1.5241715908050537 }, { "cosine_similarity": 0, "epoch": 0.9627213420316869, "grad_norm": 1.2042171393564294, "learning_rate": 3.772868484639282e-05, "loss": 2.0118, "reason_loss": 0.5109682083129883, "step": 1033, "utility_loss": 1.5008502006530762 }, { "cosine_similarity": 0, "epoch": 0.9636533084808947, "grad_norm": 1.6220094138919454, "learning_rate": 3.77114256127028e-05, "loss": 1.9886, "reason_loss": 0.4742352366447449, "step": 1034, "utility_loss": 1.5143308639526367 }, { "cosine_similarity": 0, "epoch": 0.9645852749301025, "grad_norm": 1.2230498018541576, "learning_rate": 3.7694166379012776e-05, "loss": 2.268, "reason_loss": 0.49544596672058105, "step": 1035, "utility_loss": 1.7725894451141357 }, { "cosine_similarity": 0, "epoch": 0.9655172413793104, "grad_norm": 1.767964871660349, "learning_rate": 3.767690714532275e-05, "loss": 1.9994, "reason_loss": 0.48626017570495605, "step": 1036, "utility_loss": 1.513094425201416 }, { "cosine_similarity": 0, "epoch": 0.9664492078285182, "grad_norm": 1.1124988617859592, "learning_rate": 3.7659647911632724e-05, "loss": 1.6524, "reason_loss": 0.5001864433288574, "step": 1037, "utility_loss": 1.1522144079208374 }, { "cosine_similarity": 0, "epoch": 0.967381174277726, "grad_norm": 0.9506217208573404, "learning_rate": 3.7642388677942705e-05, "loss": 1.7489, "reason_loss": 0.497324675321579, "step": 1038, "utility_loss": 1.2516123056411743 }, { "cosine_similarity": 0, "epoch": 0.9683131407269339, "grad_norm": 1.3051437109870323, "learning_rate": 3.762512944425268e-05, "loss": 2.0477, "reason_loss": 0.5348929762840271, "step": 1039, "utility_loss": 1.5127946138381958 }, { "cosine_similarity": 0, "epoch": 0.9692451071761417, "grad_norm": 1.3273592322518626, "learning_rate": 3.7607870210562654e-05, "loss": 1.6053, "reason_loss": 0.4722653031349182, "step": 1040, "utility_loss": 1.1330796480178833 }, { "cosine_similarity": 0, "epoch": 0.9701770736253494, "grad_norm": 1.1243131255733005, "learning_rate": 3.759061097687263e-05, "loss": 1.8081, "reason_loss": 0.4863489866256714, "step": 1041, "utility_loss": 1.3217883110046387 }, { "cosine_similarity": 0, "epoch": 0.9711090400745573, "grad_norm": 1.7465577321004266, "learning_rate": 3.75733517431826e-05, "loss": 2.1798, "reason_loss": 0.49429333209991455, "step": 1042, "utility_loss": 1.685546875 }, { "cosine_similarity": 0, "epoch": 0.9720410065237651, "grad_norm": 1.4443040262400137, "learning_rate": 3.755609250949258e-05, "loss": 2.0998, "reason_loss": 0.47467851638793945, "step": 1043, "utility_loss": 1.625131368637085 }, { "cosine_similarity": 0, "epoch": 0.972972972972973, "grad_norm": 1.339774750757046, "learning_rate": 3.753883327580255e-05, "loss": 1.8755, "reason_loss": 0.48020803928375244, "step": 1044, "utility_loss": 1.3952676057815552 }, { "cosine_similarity": 0, "epoch": 0.9739049394221808, "grad_norm": 1.078396432410239, "learning_rate": 3.752157404211253e-05, "loss": 2.1665, "reason_loss": 0.5003958940505981, "step": 1045, "utility_loss": 1.6660830974578857 }, { "cosine_similarity": 0, "epoch": 0.9748369058713886, "grad_norm": 1.1244229033708977, "learning_rate": 3.7504314808422507e-05, "loss": 2.1011, "reason_loss": 0.532585859298706, "step": 1046, "utility_loss": 1.5685093402862549 }, { "cosine_similarity": 0, "epoch": 0.9757688723205965, "grad_norm": 1.2943865225452547, "learning_rate": 3.748705557473248e-05, "loss": 2.4959, "reason_loss": 0.5426889061927795, "step": 1047, "utility_loss": 1.9532116651535034 }, { "cosine_similarity": 0, "epoch": 0.9767008387698043, "grad_norm": 1.0303802729142901, "learning_rate": 3.7469796341042455e-05, "loss": 1.9403, "reason_loss": 0.49299973249435425, "step": 1048, "utility_loss": 1.447281837463379 }, { "cosine_similarity": 0, "epoch": 0.9776328052190121, "grad_norm": 1.1887470466214327, "learning_rate": 3.7452537107352436e-05, "loss": 2.2782, "reason_loss": 0.5221799612045288, "step": 1049, "utility_loss": 1.7560315132141113 }, { "cosine_similarity": 0, "epoch": 0.97856477166822, "grad_norm": 1.3083116819558678, "learning_rate": 3.743527787366241e-05, "loss": 1.9933, "reason_loss": 0.531501829624176, "step": 1050, "utility_loss": 1.461784839630127 }, { "cosine_similarity": 0, "epoch": 0.9794967381174278, "grad_norm": 1.3287966759129792, "learning_rate": 3.7418018639972385e-05, "loss": 1.9515, "reason_loss": 0.47097769379615784, "step": 1051, "utility_loss": 1.480496883392334 }, { "cosine_similarity": 0, "epoch": 0.9804287045666356, "grad_norm": 1.1783653048000893, "learning_rate": 3.7400759406282366e-05, "loss": 1.7923, "reason_loss": 0.5086819529533386, "step": 1052, "utility_loss": 1.2836031913757324 }, { "cosine_similarity": 0, "epoch": 0.9813606710158435, "grad_norm": 1.3173994417779409, "learning_rate": 3.738350017259234e-05, "loss": 2.079, "reason_loss": 0.4951108992099762, "step": 1053, "utility_loss": 1.5838849544525146 }, { "cosine_similarity": 0, "epoch": 0.9822926374650512, "grad_norm": 1.0112185475475601, "learning_rate": 3.7366240938902314e-05, "loss": 2.018, "reason_loss": 0.4893002510070801, "step": 1054, "utility_loss": 1.5287448167800903 }, { "cosine_similarity": 0, "epoch": 0.983224603914259, "grad_norm": 1.0133062207885166, "learning_rate": 3.734898170521229e-05, "loss": 2.1584, "reason_loss": 0.5224630832672119, "step": 1055, "utility_loss": 1.6359412670135498 }, { "cosine_similarity": 0, "epoch": 0.9841565703634669, "grad_norm": 0.9171477887626082, "learning_rate": 3.733172247152227e-05, "loss": 1.7286, "reason_loss": 0.5021859407424927, "step": 1056, "utility_loss": 1.226367712020874 }, { "cosine_similarity": 0, "epoch": 0.9850885368126747, "grad_norm": 1.355935126048519, "learning_rate": 3.7314463237832244e-05, "loss": 2.3527, "reason_loss": 0.5089027285575867, "step": 1057, "utility_loss": 1.8437930345535278 }, { "cosine_similarity": 0, "epoch": 0.9860205032618826, "grad_norm": 1.4004364975928998, "learning_rate": 3.729720400414222e-05, "loss": 1.998, "reason_loss": 0.488754540681839, "step": 1058, "utility_loss": 1.5092047452926636 }, { "cosine_similarity": 0, "epoch": 0.9869524697110904, "grad_norm": 1.00838888242953, "learning_rate": 3.72799447704522e-05, "loss": 2.3442, "reason_loss": 0.5079697370529175, "step": 1059, "utility_loss": 1.8362793922424316 }, { "cosine_similarity": 0, "epoch": 0.9878844361602982, "grad_norm": 1.3122131109535295, "learning_rate": 3.7262685536762174e-05, "loss": 1.8443, "reason_loss": 0.4709116220474243, "step": 1060, "utility_loss": 1.373358964920044 }, { "cosine_similarity": 0, "epoch": 0.9888164026095061, "grad_norm": 1.1665288642496967, "learning_rate": 3.724542630307215e-05, "loss": 2.0631, "reason_loss": 0.499590128660202, "step": 1061, "utility_loss": 1.5635488033294678 }, { "cosine_similarity": 0, "epoch": 0.9897483690587139, "grad_norm": 1.3178764661465243, "learning_rate": 3.722816706938212e-05, "loss": 1.9796, "reason_loss": 0.4906330704689026, "step": 1062, "utility_loss": 1.4889613389968872 }, { "cosine_similarity": 0, "epoch": 0.9906803355079217, "grad_norm": 0.990620618012273, "learning_rate": 3.7210907835692097e-05, "loss": 1.7237, "reason_loss": 0.5132319331169128, "step": 1063, "utility_loss": 1.2104336023330688 }, { "cosine_similarity": 0, "epoch": 0.9916123019571296, "grad_norm": 1.281816958099777, "learning_rate": 3.719364860200207e-05, "loss": 2.1657, "reason_loss": 0.4878380298614502, "step": 1064, "utility_loss": 1.6778924465179443 }, { "cosine_similarity": 0, "epoch": 0.9925442684063374, "grad_norm": 1.0063062528656015, "learning_rate": 3.7176389368312045e-05, "loss": 1.5776, "reason_loss": 0.5077890157699585, "step": 1065, "utility_loss": 1.0698555707931519 }, { "cosine_similarity": 0, "epoch": 0.9934762348555451, "grad_norm": 1.0729227804576233, "learning_rate": 3.715913013462202e-05, "loss": 1.9796, "reason_loss": 0.5316936373710632, "step": 1066, "utility_loss": 1.447871208190918 }, { "cosine_similarity": 0, "epoch": 0.994408201304753, "grad_norm": 1.12634697914765, "learning_rate": 3.7141870900932e-05, "loss": 1.7228, "reason_loss": 0.47916966676712036, "step": 1067, "utility_loss": 1.2435874938964844 }, { "cosine_similarity": 0, "epoch": 0.9953401677539608, "grad_norm": 1.2628136605231932, "learning_rate": 3.7124611667241975e-05, "loss": 2.1517, "reason_loss": 0.5240598917007446, "step": 1068, "utility_loss": 1.6276490688323975 }, { "cosine_similarity": 0, "epoch": 0.9962721342031687, "grad_norm": 1.1669691663727433, "learning_rate": 3.710735243355195e-05, "loss": 2.2861, "reason_loss": 0.4815957844257355, "step": 1069, "utility_loss": 1.8045424222946167 }, { "cosine_similarity": 0, "epoch": 0.9972041006523765, "grad_norm": 1.0411645524982072, "learning_rate": 3.709009319986193e-05, "loss": 1.8655, "reason_loss": 0.5034663081169128, "step": 1070, "utility_loss": 1.3620015382766724 }, { "cosine_similarity": 0, "epoch": 0.9981360671015843, "grad_norm": 1.1230576582619185, "learning_rate": 3.7072833966171904e-05, "loss": 1.8457, "reason_loss": 0.47859787940979004, "step": 1071, "utility_loss": 1.3670775890350342 }, { "cosine_similarity": 0, "epoch": 0.9990680335507922, "grad_norm": 1.200854604145529, "learning_rate": 3.705557473248188e-05, "loss": 2.3023, "reason_loss": 0.49758949875831604, "step": 1072, "utility_loss": 1.8046643733978271 }, { "cosine_similarity": 0, "epoch": 1.0, "grad_norm": 0.991592867171971, "learning_rate": 3.703831549879185e-05, "loss": 1.8998, "reason_loss": 0.4827273488044739, "step": 1073, "utility_loss": 1.4170286655426025 }, { "cosine_similarity": 0, "epoch": 1.0009319664492078, "grad_norm": 1.2226435807330311, "learning_rate": 3.7021056265101834e-05, "loss": 1.7724, "reason_loss": 0.44817572832107544, "step": 1074, "utility_loss": 1.324232816696167 }, { "cosine_similarity": 0, "epoch": 1.0018639328984156, "grad_norm": 1.275722965589367, "learning_rate": 3.700379703141181e-05, "loss": 1.786, "reason_loss": 0.4749443531036377, "step": 1075, "utility_loss": 1.311055302619934 }, { "cosine_similarity": 0, "epoch": 1.0027958993476236, "grad_norm": 0.9458556605381206, "learning_rate": 3.698653779772178e-05, "loss": 1.4499, "reason_loss": 0.4483465552330017, "step": 1076, "utility_loss": 1.001543641090393 }, { "cosine_similarity": 0, "epoch": 1.0037278657968314, "grad_norm": 1.059045040367483, "learning_rate": 3.696927856403176e-05, "loss": 1.4667, "reason_loss": 0.49791014194488525, "step": 1077, "utility_loss": 0.9688114523887634 }, { "cosine_similarity": 0, "epoch": 1.0046598322460392, "grad_norm": 0.9832928328248803, "learning_rate": 3.695201933034174e-05, "loss": 1.7226, "reason_loss": 0.49860963225364685, "step": 1078, "utility_loss": 1.2240145206451416 }, { "cosine_similarity": 0, "epoch": 1.005591798695247, "grad_norm": 0.9579167141349623, "learning_rate": 3.693476009665171e-05, "loss": 1.396, "reason_loss": 0.4839644432067871, "step": 1079, "utility_loss": 0.9120457768440247 }, { "cosine_similarity": 0, "epoch": 1.0065237651444547, "grad_norm": 1.0824573007578864, "learning_rate": 3.6917500862961687e-05, "loss": 1.2756, "reason_loss": 0.4929547905921936, "step": 1080, "utility_loss": 0.7826399803161621 }, { "cosine_similarity": 0, "epoch": 1.0074557315936625, "grad_norm": 1.1500366556569332, "learning_rate": 3.690024162927167e-05, "loss": 1.7899, "reason_loss": 0.445381224155426, "step": 1081, "utility_loss": 1.344500184059143 }, { "cosine_similarity": 0, "epoch": 1.0083876980428705, "grad_norm": 1.0432788228127994, "learning_rate": 3.688298239558164e-05, "loss": 1.4874, "reason_loss": 0.4881603717803955, "step": 1082, "utility_loss": 0.9991957545280457 }, { "cosine_similarity": 0, "epoch": 1.0093196644920783, "grad_norm": 1.155160835647265, "learning_rate": 3.6865723161891616e-05, "loss": 1.6287, "reason_loss": 0.4955388903617859, "step": 1083, "utility_loss": 1.1331219673156738 }, { "cosine_similarity": 0, "epoch": 1.0102516309412861, "grad_norm": 0.9824707723387044, "learning_rate": 3.684846392820159e-05, "loss": 1.4112, "reason_loss": 0.4860122501850128, "step": 1084, "utility_loss": 0.9251775741577148 }, { "cosine_similarity": 0, "epoch": 1.011183597390494, "grad_norm": 0.9488318270626891, "learning_rate": 3.6831204694511565e-05, "loss": 1.6416, "reason_loss": 0.522650420665741, "step": 1085, "utility_loss": 1.1189202070236206 }, { "cosine_similarity": 0, "epoch": 1.0121155638397017, "grad_norm": 0.9590952585833898, "learning_rate": 3.681394546082154e-05, "loss": 1.4678, "reason_loss": 0.4611983001232147, "step": 1086, "utility_loss": 1.0066297054290771 }, { "cosine_similarity": 0, "epoch": 1.0130475302889097, "grad_norm": 0.8049182549616172, "learning_rate": 3.6796686227131513e-05, "loss": 1.438, "reason_loss": 0.5188623666763306, "step": 1087, "utility_loss": 0.9191573858261108 }, { "cosine_similarity": 0, "epoch": 1.0139794967381175, "grad_norm": 1.1234298050953802, "learning_rate": 3.677942699344149e-05, "loss": 1.5011, "reason_loss": 0.5259297490119934, "step": 1088, "utility_loss": 0.9751354455947876 }, { "cosine_similarity": 0, "epoch": 1.0149114631873253, "grad_norm": 1.2820112054687285, "learning_rate": 3.676216775975147e-05, "loss": 1.6434, "reason_loss": 0.5263086557388306, "step": 1089, "utility_loss": 1.1171244382858276 }, { "cosine_similarity": 0, "epoch": 1.015843429636533, "grad_norm": 1.378363606511475, "learning_rate": 3.674490852606144e-05, "loss": 1.6456, "reason_loss": 0.4968600273132324, "step": 1090, "utility_loss": 1.1487562656402588 }, { "cosine_similarity": 0, "epoch": 1.0167753960857409, "grad_norm": 1.026600030006894, "learning_rate": 3.672764929237142e-05, "loss": 1.5102, "reason_loss": 0.46634483337402344, "step": 1091, "utility_loss": 1.0438289642333984 }, { "cosine_similarity": 0, "epoch": 1.0177073625349486, "grad_norm": 1.1206952450437357, "learning_rate": 3.67103900586814e-05, "loss": 1.6908, "reason_loss": 0.4950718879699707, "step": 1092, "utility_loss": 1.195727825164795 }, { "cosine_similarity": 0, "epoch": 1.0186393289841567, "grad_norm": 0.9956895958972743, "learning_rate": 3.669313082499137e-05, "loss": 1.7807, "reason_loss": 0.4857572019100189, "step": 1093, "utility_loss": 1.2949731349945068 }, { "cosine_similarity": 0, "epoch": 1.0195712954333644, "grad_norm": 1.338514301204468, "learning_rate": 3.667587159130135e-05, "loss": 1.6537, "reason_loss": 0.49216386675834656, "step": 1094, "utility_loss": 1.1615586280822754 }, { "cosine_similarity": 0, "epoch": 1.0205032618825722, "grad_norm": 0.9513137908783329, "learning_rate": 3.665861235761132e-05, "loss": 1.3027, "reason_loss": 0.4791445732116699, "step": 1095, "utility_loss": 0.8235465288162231 }, { "cosine_similarity": 0, "epoch": 1.02143522833178, "grad_norm": 0.9649969893115616, "learning_rate": 3.66413531239213e-05, "loss": 1.4259, "reason_loss": 0.45931288599967957, "step": 1096, "utility_loss": 0.9666125774383545 }, { "cosine_similarity": 0, "epoch": 1.0223671947809878, "grad_norm": 1.007001359435952, "learning_rate": 3.662409389023128e-05, "loss": 1.4077, "reason_loss": 0.4524427652359009, "step": 1097, "utility_loss": 0.9552205801010132 }, { "cosine_similarity": 0, "epoch": 1.0232991612301958, "grad_norm": 1.026626807831666, "learning_rate": 3.660683465654125e-05, "loss": 1.3081, "reason_loss": 0.4849950671195984, "step": 1098, "utility_loss": 0.8231246471405029 }, { "cosine_similarity": 0, "epoch": 1.0242311276794036, "grad_norm": 1.2203162864846264, "learning_rate": 3.6589575422851225e-05, "loss": 1.7404, "reason_loss": 0.46900713443756104, "step": 1099, "utility_loss": 1.2713773250579834 }, { "cosine_similarity": 0, "epoch": 1.0251630941286114, "grad_norm": 1.064893354591642, "learning_rate": 3.6572316189161206e-05, "loss": 1.5243, "reason_loss": 0.4886234998703003, "step": 1100, "utility_loss": 1.0357017517089844 }, { "cosine_similarity": 0, "epoch": 1.0260950605778192, "grad_norm": 0.9484541849381002, "learning_rate": 3.655505695547118e-05, "loss": 1.4346, "reason_loss": 0.47338032722473145, "step": 1101, "utility_loss": 0.9611794948577881 }, { "cosine_similarity": 0, "epoch": 1.027027027027027, "grad_norm": 1.1033323126248145, "learning_rate": 3.6537797721781155e-05, "loss": 1.687, "reason_loss": 0.48137933015823364, "step": 1102, "utility_loss": 1.205594778060913 }, { "cosine_similarity": 0, "epoch": 1.0279589934762348, "grad_norm": 1.1257576620974934, "learning_rate": 3.6520538488091136e-05, "loss": 1.4583, "reason_loss": 0.5033287405967712, "step": 1103, "utility_loss": 0.9549962282180786 }, { "cosine_similarity": 0, "epoch": 1.0288909599254428, "grad_norm": 0.9386778007578898, "learning_rate": 3.650327925440111e-05, "loss": 1.6375, "reason_loss": 0.47082632780075073, "step": 1104, "utility_loss": 1.1666569709777832 }, { "cosine_similarity": 0, "epoch": 1.0298229263746506, "grad_norm": 1.2421696061493976, "learning_rate": 3.6486020020711085e-05, "loss": 1.5506, "reason_loss": 0.4774731397628784, "step": 1105, "utility_loss": 1.073127269744873 }, { "cosine_similarity": 0, "epoch": 1.0307548928238583, "grad_norm": 1.0409922365430082, "learning_rate": 3.646876078702106e-05, "loss": 1.4616, "reason_loss": 0.49117693305015564, "step": 1106, "utility_loss": 0.9703838229179382 }, { "cosine_similarity": 0, "epoch": 1.0316868592730661, "grad_norm": 1.2314840809150962, "learning_rate": 3.645150155333103e-05, "loss": 1.5282, "reason_loss": 0.46293795108795166, "step": 1107, "utility_loss": 1.0652885437011719 }, { "cosine_similarity": 0, "epoch": 1.032618825722274, "grad_norm": 1.0341197747933457, "learning_rate": 3.643424231964101e-05, "loss": 1.6612, "reason_loss": 0.4890589118003845, "step": 1108, "utility_loss": 1.1721699237823486 }, { "cosine_similarity": 0, "epoch": 1.0335507921714817, "grad_norm": 1.1719026004033124, "learning_rate": 3.641698308595098e-05, "loss": 1.4837, "reason_loss": 0.4758601188659668, "step": 1109, "utility_loss": 1.0078010559082031 }, { "cosine_similarity": 0, "epoch": 1.0344827586206897, "grad_norm": 1.0274670356437194, "learning_rate": 3.639972385226096e-05, "loss": 1.4923, "reason_loss": 0.4698888957500458, "step": 1110, "utility_loss": 1.0224199295043945 }, { "cosine_similarity": 0, "epoch": 1.0354147250698975, "grad_norm": 1.275373346109067, "learning_rate": 3.638246461857094e-05, "loss": 1.3422, "reason_loss": 0.4806619882583618, "step": 1111, "utility_loss": 0.8615292906761169 }, { "cosine_similarity": 0, "epoch": 1.0363466915191053, "grad_norm": 0.9565394687757537, "learning_rate": 3.636520538488091e-05, "loss": 1.5133, "reason_loss": 0.463681161403656, "step": 1112, "utility_loss": 1.0496642589569092 }, { "cosine_similarity": 0, "epoch": 1.037278657968313, "grad_norm": 1.0963675106528417, "learning_rate": 3.6347946151190886e-05, "loss": 1.4992, "reason_loss": 0.4899528920650482, "step": 1113, "utility_loss": 1.009252667427063 }, { "cosine_similarity": 0, "epoch": 1.0382106244175209, "grad_norm": 1.0903888637729533, "learning_rate": 3.633068691750087e-05, "loss": 1.672, "reason_loss": 0.48214054107666016, "step": 1114, "utility_loss": 1.1898891925811768 }, { "cosine_similarity": 0, "epoch": 1.0391425908667289, "grad_norm": 1.1665229292619879, "learning_rate": 3.631342768381084e-05, "loss": 1.794, "reason_loss": 0.5006712675094604, "step": 1115, "utility_loss": 1.2933077812194824 }, { "cosine_similarity": 0, "epoch": 1.0400745573159367, "grad_norm": 1.0433351620146964, "learning_rate": 3.6296168450120815e-05, "loss": 1.3859, "reason_loss": 0.4547792673110962, "step": 1116, "utility_loss": 0.931127667427063 }, { "cosine_similarity": 0, "epoch": 1.0410065237651445, "grad_norm": 1.139487186573443, "learning_rate": 3.627890921643079e-05, "loss": 1.6655, "reason_loss": 0.5013740062713623, "step": 1117, "utility_loss": 1.1641483306884766 }, { "cosine_similarity": 0, "epoch": 1.0419384902143523, "grad_norm": 1.1974109298645503, "learning_rate": 3.626164998274077e-05, "loss": 1.7331, "reason_loss": 0.4661283493041992, "step": 1118, "utility_loss": 1.2669250965118408 }, { "cosine_similarity": 0, "epoch": 1.04287045666356, "grad_norm": 1.1223337912698077, "learning_rate": 3.6244390749050745e-05, "loss": 1.9511, "reason_loss": 0.506209135055542, "step": 1119, "utility_loss": 1.4449386596679688 }, { "cosine_similarity": 0, "epoch": 1.0438024231127678, "grad_norm": 1.0832541071543402, "learning_rate": 3.622713151536072e-05, "loss": 1.7032, "reason_loss": 0.48168909549713135, "step": 1120, "utility_loss": 1.2214796543121338 }, { "cosine_similarity": 0, "epoch": 1.0447343895619758, "grad_norm": 0.9075292397071985, "learning_rate": 3.62098722816707e-05, "loss": 1.4302, "reason_loss": 0.47270646691322327, "step": 1121, "utility_loss": 0.9574465751647949 }, { "cosine_similarity": 0, "epoch": 1.0456663560111836, "grad_norm": 0.96864166283832, "learning_rate": 3.6192613047980675e-05, "loss": 1.4624, "reason_loss": 0.4929375946521759, "step": 1122, "utility_loss": 0.9694465398788452 }, { "cosine_similarity": 0, "epoch": 1.0465983224603914, "grad_norm": 1.1590442090088395, "learning_rate": 3.617535381429065e-05, "loss": 1.8364, "reason_loss": 0.4773634672164917, "step": 1123, "utility_loss": 1.358997106552124 }, { "cosine_similarity": 0, "epoch": 1.0475302889095992, "grad_norm": 1.0560864262501704, "learning_rate": 3.615809458060062e-05, "loss": 1.7968, "reason_loss": 0.5093202590942383, "step": 1124, "utility_loss": 1.2874794006347656 }, { "cosine_similarity": 0, "epoch": 1.048462255358807, "grad_norm": 0.9826591973028718, "learning_rate": 3.6140835346910604e-05, "loss": 1.7123, "reason_loss": 0.4732476472854614, "step": 1125, "utility_loss": 1.2390633821487427 }, { "cosine_similarity": 0, "epoch": 1.049394221808015, "grad_norm": 1.1039018637314904, "learning_rate": 3.612357611322058e-05, "loss": 1.7517, "reason_loss": 0.47917649149894714, "step": 1126, "utility_loss": 1.2725589275360107 }, { "cosine_similarity": 0, "epoch": 1.0503261882572228, "grad_norm": 0.9416622413191725, "learning_rate": 3.610631687953055e-05, "loss": 1.4076, "reason_loss": 0.514458417892456, "step": 1127, "utility_loss": 0.8931698799133301 }, { "cosine_similarity": 0, "epoch": 1.0512581547064306, "grad_norm": 1.03071968550853, "learning_rate": 3.608905764584053e-05, "loss": 1.4591, "reason_loss": 0.47866663336753845, "step": 1128, "utility_loss": 0.9804147481918335 }, { "cosine_similarity": 0, "epoch": 1.0521901211556384, "grad_norm": 1.0651755317069271, "learning_rate": 3.60717984121505e-05, "loss": 1.816, "reason_loss": 0.476035475730896, "step": 1129, "utility_loss": 1.3399534225463867 }, { "cosine_similarity": 0, "epoch": 1.0531220876048462, "grad_norm": 1.252762696040186, "learning_rate": 3.6054539178460476e-05, "loss": 1.539, "reason_loss": 0.4748075008392334, "step": 1130, "utility_loss": 1.064173936843872 }, { "cosine_similarity": 0, "epoch": 1.054054054054054, "grad_norm": 1.2802501990504713, "learning_rate": 3.603727994477045e-05, "loss": 1.8212, "reason_loss": 0.5032017230987549, "step": 1131, "utility_loss": 1.3179712295532227 }, { "cosine_similarity": 0, "epoch": 1.054986020503262, "grad_norm": 1.186384003129759, "learning_rate": 3.602002071108043e-05, "loss": 1.2522, "reason_loss": 0.48477625846862793, "step": 1132, "utility_loss": 0.7674564123153687 }, { "cosine_similarity": 0, "epoch": 1.0559179869524697, "grad_norm": 0.9713791260856938, "learning_rate": 3.6002761477390405e-05, "loss": 1.6928, "reason_loss": 0.47759363055229187, "step": 1133, "utility_loss": 1.215244174003601 }, { "cosine_similarity": 0, "epoch": 1.0568499534016775, "grad_norm": 1.0709346527385943, "learning_rate": 3.598550224370038e-05, "loss": 1.7741, "reason_loss": 0.5085422992706299, "step": 1134, "utility_loss": 1.2655267715454102 }, { "cosine_similarity": 0, "epoch": 1.0577819198508853, "grad_norm": 1.2159414749979358, "learning_rate": 3.5968243010010354e-05, "loss": 1.6554, "reason_loss": 0.5184907913208008, "step": 1135, "utility_loss": 1.136861801147461 }, { "cosine_similarity": 0, "epoch": 1.058713886300093, "grad_norm": 1.0419589665096203, "learning_rate": 3.5950983776320335e-05, "loss": 1.5166, "reason_loss": 0.4764789342880249, "step": 1136, "utility_loss": 1.040095329284668 }, { "cosine_similarity": 0, "epoch": 1.0596458527493011, "grad_norm": 0.8741048734879845, "learning_rate": 3.593372454263031e-05, "loss": 1.7677, "reason_loss": 0.510779857635498, "step": 1137, "utility_loss": 1.2568851709365845 }, { "cosine_similarity": 0, "epoch": 1.060577819198509, "grad_norm": 1.0780219867738883, "learning_rate": 3.5916465308940284e-05, "loss": 1.6274, "reason_loss": 0.4862695336341858, "step": 1138, "utility_loss": 1.1411757469177246 }, { "cosine_similarity": 0, "epoch": 1.0615097856477167, "grad_norm": 0.9492921339957757, "learning_rate": 3.589920607525026e-05, "loss": 1.554, "reason_loss": 0.48565107583999634, "step": 1139, "utility_loss": 1.0683698654174805 }, { "cosine_similarity": 0, "epoch": 1.0624417520969245, "grad_norm": 1.9508973981186293, "learning_rate": 3.588194684156024e-05, "loss": 2.03, "reason_loss": 0.49941587448120117, "step": 1140, "utility_loss": 1.5305521488189697 }, { "cosine_similarity": 0, "epoch": 1.0633737185461323, "grad_norm": 1.0119507968783972, "learning_rate": 3.586468760787021e-05, "loss": 1.6032, "reason_loss": 0.4660715162754059, "step": 1141, "utility_loss": 1.1371409893035889 }, { "cosine_similarity": 0, "epoch": 1.06430568499534, "grad_norm": 0.8442040391110315, "learning_rate": 3.584742837418019e-05, "loss": 1.1417, "reason_loss": 0.46429622173309326, "step": 1142, "utility_loss": 0.6774177551269531 }, { "cosine_similarity": 0, "epoch": 1.065237651444548, "grad_norm": 0.901368544803398, "learning_rate": 3.583016914049017e-05, "loss": 1.4962, "reason_loss": 0.5060018301010132, "step": 1143, "utility_loss": 0.9902164340019226 }, { "cosine_similarity": 0, "epoch": 1.0661696178937559, "grad_norm": 1.166933833433553, "learning_rate": 3.581290990680014e-05, "loss": 1.7101, "reason_loss": 0.4769766926765442, "step": 1144, "utility_loss": 1.2331011295318604 }, { "cosine_similarity": 0, "epoch": 1.0671015843429636, "grad_norm": 1.1957297763041241, "learning_rate": 3.579565067311012e-05, "loss": 1.5055, "reason_loss": 0.4953693747520447, "step": 1145, "utility_loss": 1.0101054906845093 }, { "cosine_similarity": 0, "epoch": 1.0680335507921714, "grad_norm": 1.1313021466952133, "learning_rate": 3.577839143942009e-05, "loss": 1.6356, "reason_loss": 0.48524585366249084, "step": 1146, "utility_loss": 1.1503968238830566 }, { "cosine_similarity": 0, "epoch": 1.0689655172413792, "grad_norm": 0.9662390054631402, "learning_rate": 3.5761132205730066e-05, "loss": 1.6371, "reason_loss": 0.48156866431236267, "step": 1147, "utility_loss": 1.1555010080337524 }, { "cosine_similarity": 0, "epoch": 1.0698974836905872, "grad_norm": 1.0560464153810971, "learning_rate": 3.574387297204005e-05, "loss": 1.5739, "reason_loss": 0.4677554965019226, "step": 1148, "utility_loss": 1.106115460395813 }, { "cosine_similarity": 0, "epoch": 1.070829450139795, "grad_norm": 1.3132124790179076, "learning_rate": 3.572661373835002e-05, "loss": 1.6846, "reason_loss": 0.4663993716239929, "step": 1149, "utility_loss": 1.2182426452636719 }, { "cosine_similarity": 0, "epoch": 1.0717614165890028, "grad_norm": 1.018722140324637, "learning_rate": 3.5709354504659995e-05, "loss": 1.5303, "reason_loss": 0.48847007751464844, "step": 1150, "utility_loss": 1.0418390035629272 }, { "cosine_similarity": 0, "epoch": 1.0726933830382106, "grad_norm": 1.313238245572836, "learning_rate": 3.569209527096997e-05, "loss": 1.8225, "reason_loss": 0.46512359380722046, "step": 1151, "utility_loss": 1.3573805093765259 }, { "cosine_similarity": 0, "epoch": 1.0736253494874184, "grad_norm": 1.168520129469928, "learning_rate": 3.5674836037279944e-05, "loss": 1.6095, "reason_loss": 0.493302583694458, "step": 1152, "utility_loss": 1.116220474243164 }, { "cosine_similarity": 0, "epoch": 1.0745573159366262, "grad_norm": 1.0799476019577972, "learning_rate": 3.565757680358992e-05, "loss": 1.5775, "reason_loss": 0.5008120536804199, "step": 1153, "utility_loss": 1.0766935348510742 }, { "cosine_similarity": 0, "epoch": 1.0754892823858342, "grad_norm": 0.9744244330402569, "learning_rate": 3.56403175698999e-05, "loss": 1.5236, "reason_loss": 0.4668892025947571, "step": 1154, "utility_loss": 1.0566762685775757 }, { "cosine_similarity": 0, "epoch": 1.076421248835042, "grad_norm": 1.1273041989522707, "learning_rate": 3.5623058336209874e-05, "loss": 1.8762, "reason_loss": 0.49747511744499207, "step": 1155, "utility_loss": 1.3787336349487305 }, { "cosine_similarity": 0, "epoch": 1.0773532152842498, "grad_norm": 0.9781302419156646, "learning_rate": 3.560579910251985e-05, "loss": 1.6586, "reason_loss": 0.4812636971473694, "step": 1156, "utility_loss": 1.1773655414581299 }, { "cosine_similarity": 0, "epoch": 1.0782851817334576, "grad_norm": 0.95838490026953, "learning_rate": 3.558853986882982e-05, "loss": 1.8301, "reason_loss": 0.47958099842071533, "step": 1157, "utility_loss": 1.3504990339279175 }, { "cosine_similarity": 0, "epoch": 1.0792171481826653, "grad_norm": 1.0842532735241206, "learning_rate": 3.55712806351398e-05, "loss": 1.882, "reason_loss": 0.4815499782562256, "step": 1158, "utility_loss": 1.4004120826721191 }, { "cosine_similarity": 0, "epoch": 1.0801491146318734, "grad_norm": 0.9574199591122712, "learning_rate": 3.555402140144978e-05, "loss": 1.3367, "reason_loss": 0.4809756577014923, "step": 1159, "utility_loss": 0.8557468056678772 }, { "cosine_similarity": 0, "epoch": 1.0810810810810811, "grad_norm": 1.0244223029884256, "learning_rate": 3.553676216775975e-05, "loss": 1.4336, "reason_loss": 0.467735230922699, "step": 1160, "utility_loss": 0.9658949375152588 }, { "cosine_similarity": 0, "epoch": 1.082013047530289, "grad_norm": 1.1518262032419737, "learning_rate": 3.551950293406973e-05, "loss": 1.2742, "reason_loss": 0.4785226583480835, "step": 1161, "utility_loss": 0.7956591248512268 }, { "cosine_similarity": 0, "epoch": 1.0829450139794967, "grad_norm": 0.9303799406165486, "learning_rate": 3.550224370037971e-05, "loss": 1.3522, "reason_loss": 0.5107309222221375, "step": 1162, "utility_loss": 0.8415101766586304 }, { "cosine_similarity": 0, "epoch": 1.0838769804287045, "grad_norm": 1.230770538329208, "learning_rate": 3.548498446668968e-05, "loss": 1.7875, "reason_loss": 0.5173444151878357, "step": 1163, "utility_loss": 1.2701213359832764 }, { "cosine_similarity": 0, "epoch": 1.0848089468779123, "grad_norm": 1.0357423745829744, "learning_rate": 3.5467725232999656e-05, "loss": 1.6261, "reason_loss": 0.47149789333343506, "step": 1164, "utility_loss": 1.1545681953430176 }, { "cosine_similarity": 0, "epoch": 1.0857409133271203, "grad_norm": 1.2326129475543803, "learning_rate": 3.545046599930964e-05, "loss": 1.6416, "reason_loss": 0.4638722240924835, "step": 1165, "utility_loss": 1.177689552307129 }, { "cosine_similarity": 0, "epoch": 1.086672879776328, "grad_norm": 0.8983766552726384, "learning_rate": 3.543320676561961e-05, "loss": 1.4353, "reason_loss": 0.4822249710559845, "step": 1166, "utility_loss": 0.9530258178710938 }, { "cosine_similarity": 0, "epoch": 1.0876048462255359, "grad_norm": 1.161326430544459, "learning_rate": 3.5415947531929585e-05, "loss": 1.6269, "reason_loss": 0.5017516613006592, "step": 1167, "utility_loss": 1.1251263618469238 }, { "cosine_similarity": 0, "epoch": 1.0885368126747437, "grad_norm": 1.0539873375807378, "learning_rate": 3.539868829823956e-05, "loss": 1.5666, "reason_loss": 0.4756678640842438, "step": 1168, "utility_loss": 1.090928077697754 }, { "cosine_similarity": 0, "epoch": 1.0894687791239515, "grad_norm": 1.1425033079203888, "learning_rate": 3.5381429064549534e-05, "loss": 1.8812, "reason_loss": 0.5404528379440308, "step": 1169, "utility_loss": 1.340714693069458 }, { "cosine_similarity": 0, "epoch": 1.0904007455731595, "grad_norm": 0.8937672609472601, "learning_rate": 3.536416983085951e-05, "loss": 1.5781, "reason_loss": 0.4928678274154663, "step": 1170, "utility_loss": 1.085235595703125 }, { "cosine_similarity": 0, "epoch": 1.0913327120223673, "grad_norm": 1.194712528678555, "learning_rate": 3.534691059716948e-05, "loss": 1.5298, "reason_loss": 0.5069544315338135, "step": 1171, "utility_loss": 1.0228625535964966 }, { "cosine_similarity": 0, "epoch": 1.092264678471575, "grad_norm": 0.8996853070084071, "learning_rate": 3.5329651363479464e-05, "loss": 1.4887, "reason_loss": 0.4780324697494507, "step": 1172, "utility_loss": 1.0106394290924072 }, { "cosine_similarity": 0, "epoch": 1.0931966449207828, "grad_norm": 1.3108105851738558, "learning_rate": 3.531239212978944e-05, "loss": 1.3165, "reason_loss": 0.47361937165260315, "step": 1173, "utility_loss": 0.8429044485092163 }, { "cosine_similarity": 0, "epoch": 1.0941286113699906, "grad_norm": 1.5136523279888705, "learning_rate": 3.529513289609941e-05, "loss": 1.3479, "reason_loss": 0.4567307233810425, "step": 1174, "utility_loss": 0.8911780118942261 }, { "cosine_similarity": 0, "epoch": 1.0950605778191984, "grad_norm": 1.0736703887619876, "learning_rate": 3.5277873662409386e-05, "loss": 1.7418, "reason_loss": 0.4867812991142273, "step": 1175, "utility_loss": 1.255035161972046 }, { "cosine_similarity": 0, "epoch": 1.0959925442684064, "grad_norm": 1.1232875783621163, "learning_rate": 3.526061442871937e-05, "loss": 1.6611, "reason_loss": 0.48666590452194214, "step": 1176, "utility_loss": 1.1744816303253174 }, { "cosine_similarity": 0, "epoch": 1.0969245107176142, "grad_norm": 1.1161293565465848, "learning_rate": 3.524335519502934e-05, "loss": 1.9124, "reason_loss": 0.49757957458496094, "step": 1177, "utility_loss": 1.4148205518722534 }, { "cosine_similarity": 0, "epoch": 1.097856477166822, "grad_norm": 1.0972573993721342, "learning_rate": 3.5226095961339316e-05, "loss": 1.413, "reason_loss": 0.4808139503002167, "step": 1178, "utility_loss": 0.9322075247764587 }, { "cosine_similarity": 0, "epoch": 1.0987884436160298, "grad_norm": 0.9711678933611231, "learning_rate": 3.520883672764929e-05, "loss": 1.7393, "reason_loss": 0.474863737821579, "step": 1179, "utility_loss": 1.264478087425232 }, { "cosine_similarity": 0, "epoch": 1.0997204100652376, "grad_norm": 0.9948710087500663, "learning_rate": 3.519157749395927e-05, "loss": 1.3424, "reason_loss": 0.4955328404903412, "step": 1180, "utility_loss": 0.8468209505081177 }, { "cosine_similarity": 0, "epoch": 1.1006523765144456, "grad_norm": 1.209819278673882, "learning_rate": 3.5174318260269246e-05, "loss": 1.4634, "reason_loss": 0.4770762324333191, "step": 1181, "utility_loss": 0.9863094091415405 }, { "cosine_similarity": 0, "epoch": 1.1015843429636534, "grad_norm": 0.880631003853662, "learning_rate": 3.515705902657922e-05, "loss": 1.2775, "reason_loss": 0.48796898126602173, "step": 1182, "utility_loss": 0.7895106077194214 }, { "cosine_similarity": 0, "epoch": 1.1025163094128612, "grad_norm": 1.0557245223658274, "learning_rate": 3.51397997928892e-05, "loss": 1.437, "reason_loss": 0.4595106542110443, "step": 1183, "utility_loss": 0.9774549603462219 }, { "cosine_similarity": 0, "epoch": 1.103448275862069, "grad_norm": 1.0692629584969866, "learning_rate": 3.5122540559199175e-05, "loss": 1.7719, "reason_loss": 0.5004208087921143, "step": 1184, "utility_loss": 1.271497368812561 }, { "cosine_similarity": 0, "epoch": 1.1043802423112767, "grad_norm": 1.0265027156640802, "learning_rate": 3.510528132550915e-05, "loss": 1.461, "reason_loss": 0.4913289546966553, "step": 1185, "utility_loss": 0.9697138667106628 }, { "cosine_similarity": 0, "epoch": 1.1053122087604845, "grad_norm": 1.2158573092015752, "learning_rate": 3.5088022091819124e-05, "loss": 1.7157, "reason_loss": 0.5129992365837097, "step": 1186, "utility_loss": 1.2026721239089966 }, { "cosine_similarity": 0, "epoch": 1.1062441752096925, "grad_norm": 1.0742658779976675, "learning_rate": 3.5070762858129105e-05, "loss": 1.6833, "reason_loss": 0.48660045862197876, "step": 1187, "utility_loss": 1.1967430114746094 }, { "cosine_similarity": 0, "epoch": 1.1071761416589003, "grad_norm": 1.1590927448117925, "learning_rate": 3.505350362443908e-05, "loss": 1.7097, "reason_loss": 0.5008271932601929, "step": 1188, "utility_loss": 1.2089049816131592 }, { "cosine_similarity": 0, "epoch": 1.1081081081081081, "grad_norm": 0.9667481286022619, "learning_rate": 3.5036244390749054e-05, "loss": 1.6392, "reason_loss": 0.46465378999710083, "step": 1189, "utility_loss": 1.1745576858520508 }, { "cosine_similarity": 0, "epoch": 1.109040074557316, "grad_norm": 1.0332219254407258, "learning_rate": 3.501898515705903e-05, "loss": 1.3536, "reason_loss": 0.4711418151855469, "step": 1190, "utility_loss": 0.882439136505127 }, { "cosine_similarity": 0, "epoch": 1.1099720410065237, "grad_norm": 0.9597303677257298, "learning_rate": 3.5001725923369e-05, "loss": 1.6732, "reason_loss": 0.49238553643226624, "step": 1191, "utility_loss": 1.1808240413665771 }, { "cosine_similarity": 0, "epoch": 1.1109040074557317, "grad_norm": 1.0264728724987857, "learning_rate": 3.4984466689678977e-05, "loss": 1.5121, "reason_loss": 0.4854608178138733, "step": 1192, "utility_loss": 1.0266437530517578 }, { "cosine_similarity": 0, "epoch": 1.1118359739049395, "grad_norm": 1.0067343840183471, "learning_rate": 3.496720745598895e-05, "loss": 1.6578, "reason_loss": 0.4834374189376831, "step": 1193, "utility_loss": 1.1743375062942505 }, { "cosine_similarity": 0, "epoch": 1.1127679403541473, "grad_norm": 1.1512779232223127, "learning_rate": 3.494994822229893e-05, "loss": 1.663, "reason_loss": 0.5014508962631226, "step": 1194, "utility_loss": 1.1615933179855347 }, { "cosine_similarity": 0, "epoch": 1.113699906803355, "grad_norm": 1.016230345880356, "learning_rate": 3.4932688988608906e-05, "loss": 1.4911, "reason_loss": 0.5141400098800659, "step": 1195, "utility_loss": 0.9770070910453796 }, { "cosine_similarity": 0, "epoch": 1.1146318732525629, "grad_norm": 1.1927289124846452, "learning_rate": 3.491542975491888e-05, "loss": 1.2656, "reason_loss": 0.4907902777194977, "step": 1196, "utility_loss": 0.7747844457626343 }, { "cosine_similarity": 0, "epoch": 1.1155638397017706, "grad_norm": 0.9479671574448507, "learning_rate": 3.4898170521228855e-05, "loss": 1.893, "reason_loss": 0.4931907653808594, "step": 1197, "utility_loss": 1.399847149848938 }, { "cosine_similarity": 0, "epoch": 1.1164958061509787, "grad_norm": 1.0463051595939519, "learning_rate": 3.4880911287538836e-05, "loss": 1.9269, "reason_loss": 0.4793199896812439, "step": 1198, "utility_loss": 1.447607159614563 }, { "cosine_similarity": 0, "epoch": 1.1174277726001864, "grad_norm": 1.267010980847301, "learning_rate": 3.486365205384881e-05, "loss": 1.7103, "reason_loss": 0.4919610023498535, "step": 1199, "utility_loss": 1.218289852142334 }, { "cosine_similarity": 0, "epoch": 1.1183597390493942, "grad_norm": 1.2976507475985006, "learning_rate": 3.4846392820158784e-05, "loss": 1.4419, "reason_loss": 0.4710879921913147, "step": 1200, "utility_loss": 0.9707885384559631 }, { "cosine_similarity": 0, "epoch": 1.119291705498602, "grad_norm": 1.2808008744755706, "learning_rate": 3.4829133586468765e-05, "loss": 1.7794, "reason_loss": 0.5077762603759766, "step": 1201, "utility_loss": 1.2716093063354492 }, { "cosine_similarity": 0, "epoch": 1.1202236719478098, "grad_norm": 0.9581295255643657, "learning_rate": 3.481187435277874e-05, "loss": 1.9691, "reason_loss": 0.4589517116546631, "step": 1202, "utility_loss": 1.5101959705352783 }, { "cosine_similarity": 0, "epoch": 1.1211556383970178, "grad_norm": 1.1839441699922602, "learning_rate": 3.4794615119088714e-05, "loss": 1.6238, "reason_loss": 0.47086286544799805, "step": 1203, "utility_loss": 1.1529510021209717 }, { "cosine_similarity": 0, "epoch": 1.1220876048462256, "grad_norm": 1.177883061842168, "learning_rate": 3.477735588539869e-05, "loss": 1.5092, "reason_loss": 0.47444984316825867, "step": 1204, "utility_loss": 1.0347063541412354 }, { "cosine_similarity": 0, "epoch": 1.1230195712954334, "grad_norm": 1.1594459300921967, "learning_rate": 3.476009665170867e-05, "loss": 1.633, "reason_loss": 0.49686551094055176, "step": 1205, "utility_loss": 1.1361501216888428 }, { "cosine_similarity": 0, "epoch": 1.1239515377446412, "grad_norm": 1.0274723680389413, "learning_rate": 3.4742837418018644e-05, "loss": 1.775, "reason_loss": 0.465617835521698, "step": 1206, "utility_loss": 1.3093408346176147 }, { "cosine_similarity": 0, "epoch": 1.124883504193849, "grad_norm": 1.0152098790072284, "learning_rate": 3.472557818432862e-05, "loss": 1.536, "reason_loss": 0.4923136532306671, "step": 1207, "utility_loss": 1.0437061786651611 }, { "cosine_similarity": 0, "epoch": 1.1258154706430568, "grad_norm": 1.1262894713113951, "learning_rate": 3.470831895063859e-05, "loss": 1.6716, "reason_loss": 0.48186665773391724, "step": 1208, "utility_loss": 1.1897308826446533 }, { "cosine_similarity": 0, "epoch": 1.1267474370922648, "grad_norm": 1.1400182848112312, "learning_rate": 3.469105971694857e-05, "loss": 1.7422, "reason_loss": 0.46394598484039307, "step": 1209, "utility_loss": 1.278303623199463 }, { "cosine_similarity": 0, "epoch": 1.1276794035414726, "grad_norm": 1.2308982346663306, "learning_rate": 3.467380048325855e-05, "loss": 1.6623, "reason_loss": 0.48003965616226196, "step": 1210, "utility_loss": 1.1822689771652222 }, { "cosine_similarity": 0, "epoch": 1.1286113699906803, "grad_norm": 1.0805151433929348, "learning_rate": 3.465654124956852e-05, "loss": 1.6928, "reason_loss": 0.4885282516479492, "step": 1211, "utility_loss": 1.2042710781097412 }, { "cosine_similarity": 0, "epoch": 1.1295433364398881, "grad_norm": 0.776441854476159, "learning_rate": 3.4639282015878496e-05, "loss": 1.2168, "reason_loss": 0.4766700565814972, "step": 1212, "utility_loss": 0.7401570677757263 }, { "cosine_similarity": 0, "epoch": 1.130475302889096, "grad_norm": 1.2696803794726161, "learning_rate": 3.462202278218847e-05, "loss": 2.0543, "reason_loss": 0.509494423866272, "step": 1213, "utility_loss": 1.5448112487792969 }, { "cosine_similarity": 0, "epoch": 1.131407269338304, "grad_norm": 1.1417642706400812, "learning_rate": 3.4604763548498445e-05, "loss": 1.7184, "reason_loss": 0.4879991114139557, "step": 1214, "utility_loss": 1.2303740978240967 }, { "cosine_similarity": 0, "epoch": 1.1323392357875117, "grad_norm": 0.9335338252435381, "learning_rate": 3.458750431480842e-05, "loss": 1.6085, "reason_loss": 0.47414296865463257, "step": 1215, "utility_loss": 1.1344022750854492 }, { "cosine_similarity": 0, "epoch": 1.1332712022367195, "grad_norm": 1.1307756830955247, "learning_rate": 3.45702450811184e-05, "loss": 1.8472, "reason_loss": 0.4780598282814026, "step": 1216, "utility_loss": 1.3691797256469727 }, { "cosine_similarity": 0, "epoch": 1.1342031686859273, "grad_norm": 1.145940126168395, "learning_rate": 3.4552985847428374e-05, "loss": 1.4243, "reason_loss": 0.4756999611854553, "step": 1217, "utility_loss": 0.9486408233642578 }, { "cosine_similarity": 0, "epoch": 1.135135135135135, "grad_norm": 1.079807257881406, "learning_rate": 3.453572661373835e-05, "loss": 1.7791, "reason_loss": 0.48494598269462585, "step": 1218, "utility_loss": 1.2941491603851318 }, { "cosine_similarity": 0, "epoch": 1.1360671015843429, "grad_norm": 0.8453821123723062, "learning_rate": 3.451846738004832e-05, "loss": 1.5129, "reason_loss": 0.47218024730682373, "step": 1219, "utility_loss": 1.0407278537750244 }, { "cosine_similarity": 0, "epoch": 1.1369990680335509, "grad_norm": 1.0930788229995911, "learning_rate": 3.4501208146358304e-05, "loss": 1.8056, "reason_loss": 0.5083520412445068, "step": 1220, "utility_loss": 1.297279953956604 }, { "cosine_similarity": 0, "epoch": 1.1379310344827587, "grad_norm": 1.0643767735035048, "learning_rate": 3.448394891266828e-05, "loss": 1.5784, "reason_loss": 0.5113887786865234, "step": 1221, "utility_loss": 1.0669625997543335 }, { "cosine_similarity": 0, "epoch": 1.1388630009319665, "grad_norm": 1.0603030617552833, "learning_rate": 3.446668967897825e-05, "loss": 1.3816, "reason_loss": 0.46109241247177124, "step": 1222, "utility_loss": 0.9204914569854736 }, { "cosine_similarity": 0, "epoch": 1.1397949673811743, "grad_norm": 1.0656092991263106, "learning_rate": 3.4449430445288234e-05, "loss": 1.7241, "reason_loss": 0.47398728132247925, "step": 1223, "utility_loss": 1.2501167058944702 }, { "cosine_similarity": 0, "epoch": 1.140726933830382, "grad_norm": 1.210796674195371, "learning_rate": 3.443217121159821e-05, "loss": 1.7914, "reason_loss": 0.46994349360466003, "step": 1224, "utility_loss": 1.3214561939239502 }, { "cosine_similarity": 0, "epoch": 1.14165890027959, "grad_norm": 1.1018092571574745, "learning_rate": 3.441491197790818e-05, "loss": 1.5547, "reason_loss": 0.5028093457221985, "step": 1225, "utility_loss": 1.051855444908142 }, { "cosine_similarity": 0, "epoch": 1.1425908667287978, "grad_norm": 0.8838068065191846, "learning_rate": 3.4397652744218157e-05, "loss": 1.3638, "reason_loss": 0.5168955326080322, "step": 1226, "utility_loss": 0.8469089269638062 }, { "cosine_similarity": 0, "epoch": 1.1435228331780056, "grad_norm": 0.9556518421429526, "learning_rate": 3.438039351052814e-05, "loss": 1.213, "reason_loss": 0.4794648289680481, "step": 1227, "utility_loss": 0.7335119247436523 }, { "cosine_similarity": 0, "epoch": 1.1444547996272134, "grad_norm": 1.686813231201285, "learning_rate": 3.436313427683811e-05, "loss": 2.1553, "reason_loss": 0.5096450448036194, "step": 1228, "utility_loss": 1.6456758975982666 }, { "cosine_similarity": 0, "epoch": 1.1453867660764212, "grad_norm": 1.2180394527495293, "learning_rate": 3.4345875043148086e-05, "loss": 1.2763, "reason_loss": 0.4574582576751709, "step": 1229, "utility_loss": 0.8188812732696533 }, { "cosine_similarity": 0, "epoch": 1.146318732525629, "grad_norm": 1.076880046921347, "learning_rate": 3.432861580945806e-05, "loss": 1.7093, "reason_loss": 0.4817527234554291, "step": 1230, "utility_loss": 1.2275789976119995 }, { "cosine_similarity": 0, "epoch": 1.147250698974837, "grad_norm": 1.085491339897476, "learning_rate": 3.431135657576804e-05, "loss": 1.5135, "reason_loss": 0.5226575136184692, "step": 1231, "utility_loss": 0.9908363223075867 }, { "cosine_similarity": 0, "epoch": 1.1481826654240448, "grad_norm": 1.1067313112403057, "learning_rate": 3.4294097342078016e-05, "loss": 1.6956, "reason_loss": 0.45408034324645996, "step": 1232, "utility_loss": 1.2414817810058594 }, { "cosine_similarity": 0, "epoch": 1.1491146318732526, "grad_norm": 1.060632716997378, "learning_rate": 3.427683810838799e-05, "loss": 1.2787, "reason_loss": 0.47383296489715576, "step": 1233, "utility_loss": 0.8049103021621704 }, { "cosine_similarity": 0, "epoch": 1.1500465983224604, "grad_norm": 1.0340391155166004, "learning_rate": 3.4259578874697964e-05, "loss": 1.4645, "reason_loss": 0.488663911819458, "step": 1234, "utility_loss": 0.9758543968200684 }, { "cosine_similarity": 0, "epoch": 1.1509785647716682, "grad_norm": 1.1712433909983166, "learning_rate": 3.424231964100794e-05, "loss": 1.6543, "reason_loss": 0.48806828260421753, "step": 1235, "utility_loss": 1.1661863327026367 }, { "cosine_similarity": 0, "epoch": 1.1519105312208762, "grad_norm": 0.9102340548487229, "learning_rate": 3.422506040731791e-05, "loss": 1.7539, "reason_loss": 0.47984808683395386, "step": 1236, "utility_loss": 1.2740191221237183 }, { "cosine_similarity": 0, "epoch": 1.152842497670084, "grad_norm": 1.0339053203088047, "learning_rate": 3.420780117362789e-05, "loss": 1.6963, "reason_loss": 0.4753672480583191, "step": 1237, "utility_loss": 1.2208874225616455 }, { "cosine_similarity": 0, "epoch": 1.1537744641192917, "grad_norm": 0.9858178668426648, "learning_rate": 3.419054193993787e-05, "loss": 1.6416, "reason_loss": 0.4843648672103882, "step": 1238, "utility_loss": 1.1572160720825195 }, { "cosine_similarity": 0, "epoch": 1.1547064305684995, "grad_norm": 1.1118498782899815, "learning_rate": 3.417328270624784e-05, "loss": 1.4876, "reason_loss": 0.4828680157661438, "step": 1239, "utility_loss": 1.0047718286514282 }, { "cosine_similarity": 0, "epoch": 1.1556383970177073, "grad_norm": 1.1536431327745347, "learning_rate": 3.415602347255782e-05, "loss": 1.5538, "reason_loss": 0.48774027824401855, "step": 1240, "utility_loss": 1.0660494565963745 }, { "cosine_similarity": 0, "epoch": 1.156570363466915, "grad_norm": 1.038234592549001, "learning_rate": 3.41387642388678e-05, "loss": 1.5292, "reason_loss": 0.5046001672744751, "step": 1241, "utility_loss": 1.0245802402496338 }, { "cosine_similarity": 0, "epoch": 1.157502329916123, "grad_norm": 1.2035671603502718, "learning_rate": 3.412150500517777e-05, "loss": 1.7563, "reason_loss": 0.4817044138908386, "step": 1242, "utility_loss": 1.2745988368988037 }, { "cosine_similarity": 0, "epoch": 1.158434296365331, "grad_norm": 0.9912112690898087, "learning_rate": 3.410424577148775e-05, "loss": 1.435, "reason_loss": 0.5085784196853638, "step": 1243, "utility_loss": 0.9264700412750244 }, { "cosine_similarity": 0, "epoch": 1.1593662628145387, "grad_norm": 1.1467050322943328, "learning_rate": 3.408698653779772e-05, "loss": 1.8273, "reason_loss": 0.5044019222259521, "step": 1244, "utility_loss": 1.3229176998138428 }, { "cosine_similarity": 0, "epoch": 1.1602982292637465, "grad_norm": 1.037344226470189, "learning_rate": 3.40697273041077e-05, "loss": 1.6052, "reason_loss": 0.47489458322525024, "step": 1245, "utility_loss": 1.1302587985992432 }, { "cosine_similarity": 0, "epoch": 1.1612301957129543, "grad_norm": 0.9073789990719349, "learning_rate": 3.4052468070417676e-05, "loss": 1.52, "reason_loss": 0.4846894145011902, "step": 1246, "utility_loss": 1.035282850265503 }, { "cosine_similarity": 0, "epoch": 1.1621621621621623, "grad_norm": 0.9681476188199853, "learning_rate": 3.403520883672765e-05, "loss": 1.6027, "reason_loss": 0.5077744722366333, "step": 1247, "utility_loss": 1.0948864221572876 }, { "cosine_similarity": 0, "epoch": 1.16309412861137, "grad_norm": 1.2085502961722405, "learning_rate": 3.4017949603037625e-05, "loss": 1.4175, "reason_loss": 0.483700156211853, "step": 1248, "utility_loss": 0.933797299861908 }, { "cosine_similarity": 0, "epoch": 1.1640260950605779, "grad_norm": 1.6324882667667464, "learning_rate": 3.4000690369347606e-05, "loss": 2.0572, "reason_loss": 0.4759643077850342, "step": 1249, "utility_loss": 1.5812771320343018 }, { "cosine_similarity": 0, "epoch": 1.1649580615097856, "grad_norm": 1.0565896032047704, "learning_rate": 3.398343113565758e-05, "loss": 1.5746, "reason_loss": 0.4538206458091736, "step": 1250, "utility_loss": 1.120788335800171 }, { "cosine_similarity": 0, "epoch": 1.1658900279589934, "grad_norm": 1.01008327480785, "learning_rate": 3.3966171901967554e-05, "loss": 1.4493, "reason_loss": 0.4557391107082367, "step": 1251, "utility_loss": 0.9935308694839478 }, { "cosine_similarity": 0, "epoch": 1.1668219944082012, "grad_norm": 1.0005298536033018, "learning_rate": 3.3948912668277536e-05, "loss": 1.3109, "reason_loss": 0.45368146896362305, "step": 1252, "utility_loss": 0.857244610786438 }, { "cosine_similarity": 0, "epoch": 1.167753960857409, "grad_norm": 1.1204785586431245, "learning_rate": 3.393165343458751e-05, "loss": 1.5583, "reason_loss": 0.46734321117401123, "step": 1253, "utility_loss": 1.0909085273742676 }, { "cosine_similarity": 0, "epoch": 1.168685927306617, "grad_norm": 1.1060644207115415, "learning_rate": 3.3914394200897484e-05, "loss": 1.6566, "reason_loss": 0.46331048011779785, "step": 1254, "utility_loss": 1.1933043003082275 }, { "cosine_similarity": 0, "epoch": 1.1696178937558248, "grad_norm": 1.1275250087273274, "learning_rate": 3.389713496720746e-05, "loss": 1.6578, "reason_loss": 0.46423816680908203, "step": 1255, "utility_loss": 1.1936044692993164 }, { "cosine_similarity": 0, "epoch": 1.1705498602050326, "grad_norm": 1.2953713372632003, "learning_rate": 3.387987573351743e-05, "loss": 1.7617, "reason_loss": 0.4880441427230835, "step": 1256, "utility_loss": 1.2736879587173462 }, { "cosine_similarity": 0, "epoch": 1.1714818266542404, "grad_norm": 0.9690277974700282, "learning_rate": 3.386261649982741e-05, "loss": 1.7694, "reason_loss": 0.4769287109375, "step": 1257, "utility_loss": 1.292422890663147 }, { "cosine_similarity": 0, "epoch": 1.1724137931034484, "grad_norm": 0.9967186016917438, "learning_rate": 3.384535726613738e-05, "loss": 1.2984, "reason_loss": 0.4569964110851288, "step": 1258, "utility_loss": 0.8414055705070496 }, { "cosine_similarity": 0, "epoch": 1.1733457595526562, "grad_norm": 0.8626513952860625, "learning_rate": 3.3828098032447356e-05, "loss": 1.4668, "reason_loss": 0.5070091485977173, "step": 1259, "utility_loss": 0.959815502166748 }, { "cosine_similarity": 0, "epoch": 1.174277726001864, "grad_norm": 0.9435467538563285, "learning_rate": 3.381083879875734e-05, "loss": 1.5547, "reason_loss": 0.4729577898979187, "step": 1260, "utility_loss": 1.0817875862121582 }, { "cosine_similarity": 0, "epoch": 1.1752096924510718, "grad_norm": 1.1510202235417863, "learning_rate": 3.379357956506731e-05, "loss": 1.4356, "reason_loss": 0.47263866662979126, "step": 1261, "utility_loss": 0.9629477262496948 }, { "cosine_similarity": 0, "epoch": 1.1761416589002796, "grad_norm": 1.334142070577609, "learning_rate": 3.3776320331377285e-05, "loss": 1.6113, "reason_loss": 0.47105872631073, "step": 1262, "utility_loss": 1.1402095556259155 }, { "cosine_similarity": 0, "epoch": 1.1770736253494873, "grad_norm": 0.9664820641101899, "learning_rate": 3.3759061097687266e-05, "loss": 1.5823, "reason_loss": 0.45866215229034424, "step": 1263, "utility_loss": 1.12363600730896 }, { "cosine_similarity": 0, "epoch": 1.1780055917986951, "grad_norm": 1.0062968212052557, "learning_rate": 3.374180186399724e-05, "loss": 1.5888, "reason_loss": 0.49475839734077454, "step": 1264, "utility_loss": 1.0939961671829224 }, { "cosine_similarity": 0, "epoch": 1.1789375582479031, "grad_norm": 0.982626998562165, "learning_rate": 3.3724542630307215e-05, "loss": 1.6804, "reason_loss": 0.4704878032207489, "step": 1265, "utility_loss": 1.209896445274353 }, { "cosine_similarity": 0, "epoch": 1.179869524697111, "grad_norm": 0.9956387284278893, "learning_rate": 3.370728339661719e-05, "loss": 1.5051, "reason_loss": 0.4937763214111328, "step": 1266, "utility_loss": 1.0112874507904053 }, { "cosine_similarity": 0, "epoch": 1.1808014911463187, "grad_norm": 1.0365667548732018, "learning_rate": 3.369002416292717e-05, "loss": 1.6485, "reason_loss": 0.5050100088119507, "step": 1267, "utility_loss": 1.143483281135559 }, { "cosine_similarity": 0, "epoch": 1.1817334575955265, "grad_norm": 1.1523406075537088, "learning_rate": 3.3672764929237145e-05, "loss": 1.6521, "reason_loss": 0.49417006969451904, "step": 1268, "utility_loss": 1.1579649448394775 }, { "cosine_similarity": 0, "epoch": 1.1826654240447343, "grad_norm": 0.8821540965962433, "learning_rate": 3.365550569554712e-05, "loss": 1.5906, "reason_loss": 0.5012308955192566, "step": 1269, "utility_loss": 1.0893341302871704 }, { "cosine_similarity": 0, "epoch": 1.1835973904939423, "grad_norm": 0.9536011634312, "learning_rate": 3.363824646185709e-05, "loss": 1.5733, "reason_loss": 0.46957913041114807, "step": 1270, "utility_loss": 1.1037209033966064 }, { "cosine_similarity": 0, "epoch": 1.18452935694315, "grad_norm": 0.8622869276626624, "learning_rate": 3.3620987228167074e-05, "loss": 1.2926, "reason_loss": 0.5108368396759033, "step": 1271, "utility_loss": 0.7817687392234802 }, { "cosine_similarity": 0, "epoch": 1.1854613233923579, "grad_norm": 0.9450278629201102, "learning_rate": 3.360372799447705e-05, "loss": 1.3349, "reason_loss": 0.48470354080200195, "step": 1272, "utility_loss": 0.8501562476158142 }, { "cosine_similarity": 0, "epoch": 1.1863932898415657, "grad_norm": 1.076141215647299, "learning_rate": 3.358646876078702e-05, "loss": 1.5133, "reason_loss": 0.4484395384788513, "step": 1273, "utility_loss": 1.0648863315582275 }, { "cosine_similarity": 0, "epoch": 1.1873252562907735, "grad_norm": 1.0681200002931983, "learning_rate": 3.3569209527097004e-05, "loss": 1.4374, "reason_loss": 0.46774566173553467, "step": 1274, "utility_loss": 0.9696897864341736 }, { "cosine_similarity": 0, "epoch": 1.1882572227399812, "grad_norm": 1.4038086449572684, "learning_rate": 3.355195029340698e-05, "loss": 1.6194, "reason_loss": 0.4821249544620514, "step": 1275, "utility_loss": 1.1372830867767334 }, { "cosine_similarity": 0, "epoch": 1.1891891891891893, "grad_norm": 0.9749483438847949, "learning_rate": 3.353469105971695e-05, "loss": 1.2412, "reason_loss": 0.496254563331604, "step": 1276, "utility_loss": 0.7449279427528381 }, { "cosine_similarity": 0, "epoch": 1.190121155638397, "grad_norm": 1.005311913639345, "learning_rate": 3.351743182602693e-05, "loss": 1.6178, "reason_loss": 0.5156271457672119, "step": 1277, "utility_loss": 1.1021640300750732 }, { "cosine_similarity": 0, "epoch": 1.1910531220876048, "grad_norm": 0.934614160664514, "learning_rate": 3.35001725923369e-05, "loss": 1.5817, "reason_loss": 0.5098453164100647, "step": 1278, "utility_loss": 1.0718755722045898 }, { "cosine_similarity": 0, "epoch": 1.1919850885368126, "grad_norm": 1.0909290794293436, "learning_rate": 3.3482913358646875e-05, "loss": 1.5407, "reason_loss": 0.4728637933731079, "step": 1279, "utility_loss": 1.0678496360778809 }, { "cosine_similarity": 0, "epoch": 1.1929170549860204, "grad_norm": 0.9531026130033426, "learning_rate": 3.346565412495685e-05, "loss": 1.2334, "reason_loss": 0.4731189012527466, "step": 1280, "utility_loss": 0.7603269219398499 }, { "cosine_similarity": 0, "epoch": 1.1938490214352284, "grad_norm": 1.0632091055572204, "learning_rate": 3.344839489126683e-05, "loss": 1.7338, "reason_loss": 0.4887496829032898, "step": 1281, "utility_loss": 1.2450494766235352 }, { "cosine_similarity": 0, "epoch": 1.1947809878844362, "grad_norm": 1.0925867309136335, "learning_rate": 3.3431135657576805e-05, "loss": 1.6749, "reason_loss": 0.4740608334541321, "step": 1282, "utility_loss": 1.2008124589920044 }, { "cosine_similarity": 0, "epoch": 1.195712954333644, "grad_norm": 0.9433165247188632, "learning_rate": 3.341387642388678e-05, "loss": 1.3432, "reason_loss": 0.5069006681442261, "step": 1283, "utility_loss": 0.8362874388694763 }, { "cosine_similarity": 0, "epoch": 1.1966449207828518, "grad_norm": 1.0522357259558501, "learning_rate": 3.3396617190196753e-05, "loss": 1.6317, "reason_loss": 0.498808890581131, "step": 1284, "utility_loss": 1.132934331893921 }, { "cosine_similarity": 0, "epoch": 1.1975768872320596, "grad_norm": 1.1113375395073588, "learning_rate": 3.3379357956506735e-05, "loss": 1.8051, "reason_loss": 0.49176907539367676, "step": 1285, "utility_loss": 1.313370943069458 }, { "cosine_similarity": 0, "epoch": 1.1985088536812674, "grad_norm": 0.7806163377801999, "learning_rate": 3.336209872281671e-05, "loss": 1.3908, "reason_loss": 0.5137535333633423, "step": 1286, "utility_loss": 0.8770695924758911 }, { "cosine_similarity": 0, "epoch": 1.1994408201304754, "grad_norm": 1.0416570552805124, "learning_rate": 3.334483948912668e-05, "loss": 1.5388, "reason_loss": 0.44798511266708374, "step": 1287, "utility_loss": 1.0907800197601318 }, { "cosine_similarity": 0, "epoch": 1.2003727865796832, "grad_norm": 1.2057770436972426, "learning_rate": 3.332758025543666e-05, "loss": 1.4205, "reason_loss": 0.4749988615512848, "step": 1288, "utility_loss": 0.945528507232666 }, { "cosine_similarity": 0, "epoch": 1.201304753028891, "grad_norm": 0.7568970264033683, "learning_rate": 3.331032102174664e-05, "loss": 0.9687, "reason_loss": 0.46173369884490967, "step": 1289, "utility_loss": 0.5070087909698486 }, { "cosine_similarity": 0, "epoch": 1.2022367194780987, "grad_norm": 1.1729650888220688, "learning_rate": 3.329306178805661e-05, "loss": 1.5977, "reason_loss": 0.4796189069747925, "step": 1290, "utility_loss": 1.1180485486984253 }, { "cosine_similarity": 0, "epoch": 1.2031686859273065, "grad_norm": 1.2610345642805443, "learning_rate": 3.327580255436659e-05, "loss": 1.4113, "reason_loss": 0.49999791383743286, "step": 1291, "utility_loss": 0.9112836718559265 }, { "cosine_similarity": 0, "epoch": 1.2041006523765145, "grad_norm": 2.1951817210711924, "learning_rate": 3.325854332067657e-05, "loss": 1.5979, "reason_loss": 0.44041529297828674, "step": 1292, "utility_loss": 1.1574933528900146 }, { "cosine_similarity": 0, "epoch": 1.2050326188257223, "grad_norm": 1.1650894752745002, "learning_rate": 3.324128408698654e-05, "loss": 1.6725, "reason_loss": 0.4945318400859833, "step": 1293, "utility_loss": 1.1779721975326538 }, { "cosine_similarity": 0, "epoch": 1.2059645852749301, "grad_norm": 1.1257634822708662, "learning_rate": 3.322402485329652e-05, "loss": 2.0876, "reason_loss": 0.5187505483627319, "step": 1294, "utility_loss": 1.5688812732696533 }, { "cosine_similarity": 0, "epoch": 1.206896551724138, "grad_norm": 1.0852178866603144, "learning_rate": 3.320676561960649e-05, "loss": 1.3405, "reason_loss": 0.4651813805103302, "step": 1295, "utility_loss": 0.8753302097320557 }, { "cosine_similarity": 0, "epoch": 1.2078285181733457, "grad_norm": 1.1410520926000487, "learning_rate": 3.318950638591647e-05, "loss": 1.5222, "reason_loss": 0.4984704852104187, "step": 1296, "utility_loss": 1.023771047592163 }, { "cosine_similarity": 0, "epoch": 1.2087604846225535, "grad_norm": 1.0959768904420244, "learning_rate": 3.3172247152226446e-05, "loss": 1.5065, "reason_loss": 0.4848899841308594, "step": 1297, "utility_loss": 1.0216420888900757 }, { "cosine_similarity": 0, "epoch": 1.2096924510717615, "grad_norm": 0.9584890208615698, "learning_rate": 3.315498791853642e-05, "loss": 1.601, "reason_loss": 0.47048139572143555, "step": 1298, "utility_loss": 1.1305522918701172 }, { "cosine_similarity": 0, "epoch": 1.2106244175209693, "grad_norm": 0.9438779212262499, "learning_rate": 3.3137728684846395e-05, "loss": 1.4261, "reason_loss": 0.4933091402053833, "step": 1299, "utility_loss": 0.9327805042266846 }, { "cosine_similarity": 0, "epoch": 1.211556383970177, "grad_norm": 0.800708087126461, "learning_rate": 3.312046945115637e-05, "loss": 1.3384, "reason_loss": 0.5107696056365967, "step": 1300, "utility_loss": 0.8276058435440063 }, { "cosine_similarity": 0, "epoch": 1.2124883504193849, "grad_norm": 1.0340391395574113, "learning_rate": 3.3103210217466344e-05, "loss": 1.7933, "reason_loss": 0.49837788939476013, "step": 1301, "utility_loss": 1.2949308156967163 }, { "cosine_similarity": 0, "epoch": 1.2134203168685926, "grad_norm": 1.1617961633337976, "learning_rate": 3.308595098377632e-05, "loss": 1.6564, "reason_loss": 0.5045011043548584, "step": 1302, "utility_loss": 1.1519235372543335 }, { "cosine_similarity": 0, "epoch": 1.2143522833178007, "grad_norm": 0.9847984212446679, "learning_rate": 3.30686917500863e-05, "loss": 1.4308, "reason_loss": 0.4703410863876343, "step": 1303, "utility_loss": 0.9604921340942383 }, { "cosine_similarity": 0, "epoch": 1.2152842497670084, "grad_norm": 1.0362440989823734, "learning_rate": 3.305143251639627e-05, "loss": 1.6309, "reason_loss": 0.4592973589897156, "step": 1304, "utility_loss": 1.1715540885925293 }, { "cosine_similarity": 0, "epoch": 1.2162162162162162, "grad_norm": 1.0778818173294689, "learning_rate": 3.303417328270625e-05, "loss": 1.6832, "reason_loss": 0.5006740689277649, "step": 1305, "utility_loss": 1.182574987411499 }, { "cosine_similarity": 0, "epoch": 1.217148182665424, "grad_norm": 1.1633256567817487, "learning_rate": 3.301691404901622e-05, "loss": 1.5143, "reason_loss": 0.4884709119796753, "step": 1306, "utility_loss": 1.0257893800735474 }, { "cosine_similarity": 0, "epoch": 1.2180801491146318, "grad_norm": 1.081237853777156, "learning_rate": 3.29996548153262e-05, "loss": 1.7713, "reason_loss": 0.4816493093967438, "step": 1307, "utility_loss": 1.2896389961242676 }, { "cosine_similarity": 0, "epoch": 1.2190121155638396, "grad_norm": 1.281617869018372, "learning_rate": 3.298239558163618e-05, "loss": 1.7491, "reason_loss": 0.4706202447414398, "step": 1308, "utility_loss": 1.278479814529419 }, { "cosine_similarity": 0, "epoch": 1.2199440820130476, "grad_norm": 1.0281628693185565, "learning_rate": 3.296513634794615e-05, "loss": 1.3582, "reason_loss": 0.4767347574234009, "step": 1309, "utility_loss": 0.8814204931259155 }, { "cosine_similarity": 0, "epoch": 1.2208760484622554, "grad_norm": 1.1590508541744553, "learning_rate": 3.2947877114256126e-05, "loss": 1.4943, "reason_loss": 0.4588172435760498, "step": 1310, "utility_loss": 1.0354336500167847 }, { "cosine_similarity": 0, "epoch": 1.2218080149114632, "grad_norm": 1.3348285254677623, "learning_rate": 3.293061788056611e-05, "loss": 1.5893, "reason_loss": 0.49986889958381653, "step": 1311, "utility_loss": 1.0894408226013184 }, { "cosine_similarity": 0, "epoch": 1.222739981360671, "grad_norm": 0.9729787386833026, "learning_rate": 3.291335864687608e-05, "loss": 1.684, "reason_loss": 0.5283535718917847, "step": 1312, "utility_loss": 1.1556196212768555 }, { "cosine_similarity": 0, "epoch": 1.2236719478098788, "grad_norm": 1.0119589439401515, "learning_rate": 3.2896099413186055e-05, "loss": 1.4112, "reason_loss": 0.4476103186607361, "step": 1313, "utility_loss": 0.9636275172233582 }, { "cosine_similarity": 0, "epoch": 1.2246039142590868, "grad_norm": 0.9687134701829252, "learning_rate": 3.2878840179496036e-05, "loss": 1.5365, "reason_loss": 0.5146850347518921, "step": 1314, "utility_loss": 1.0217781066894531 }, { "cosine_similarity": 0, "epoch": 1.2255358807082946, "grad_norm": 1.088992577867359, "learning_rate": 3.286158094580601e-05, "loss": 1.519, "reason_loss": 0.5210968255996704, "step": 1315, "utility_loss": 0.9979074001312256 }, { "cosine_similarity": 0, "epoch": 1.2264678471575023, "grad_norm": 1.0902800372378627, "learning_rate": 3.2844321712115985e-05, "loss": 1.5211, "reason_loss": 0.46749281883239746, "step": 1316, "utility_loss": 1.0535814762115479 }, { "cosine_similarity": 0, "epoch": 1.2273998136067101, "grad_norm": 1.015386605966868, "learning_rate": 3.282706247842596e-05, "loss": 1.5597, "reason_loss": 0.47883808612823486, "step": 1317, "utility_loss": 1.0808476209640503 }, { "cosine_similarity": 0, "epoch": 1.228331780055918, "grad_norm": 0.9399461228678769, "learning_rate": 3.280980324473594e-05, "loss": 1.5306, "reason_loss": 0.47122883796691895, "step": 1318, "utility_loss": 1.0593266487121582 }, { "cosine_similarity": 0, "epoch": 1.2292637465051257, "grad_norm": 1.078122445680302, "learning_rate": 3.2792544011045915e-05, "loss": 1.7423, "reason_loss": 0.48680025339126587, "step": 1319, "utility_loss": 1.2555420398712158 }, { "cosine_similarity": 0, "epoch": 1.2301957129543337, "grad_norm": 1.3312616717884063, "learning_rate": 3.277528477735589e-05, "loss": 1.7367, "reason_loss": 0.48290878534317017, "step": 1320, "utility_loss": 1.2537851333618164 }, { "cosine_similarity": 0, "epoch": 1.2311276794035415, "grad_norm": 1.0851773611996354, "learning_rate": 3.275802554366586e-05, "loss": 1.4451, "reason_loss": 0.5215688943862915, "step": 1321, "utility_loss": 0.9234956502914429 }, { "cosine_similarity": 0, "epoch": 1.2320596458527493, "grad_norm": 0.9640920621961682, "learning_rate": 3.274076630997584e-05, "loss": 1.3339, "reason_loss": 0.4818078279495239, "step": 1322, "utility_loss": 0.8521214723587036 }, { "cosine_similarity": 0, "epoch": 1.232991612301957, "grad_norm": 1.0096437745911873, "learning_rate": 3.272350707628581e-05, "loss": 1.5771, "reason_loss": 0.49150124192237854, "step": 1323, "utility_loss": 1.085582971572876 }, { "cosine_similarity": 0, "epoch": 1.2339235787511649, "grad_norm": 0.9928534442635176, "learning_rate": 3.2706247842595786e-05, "loss": 1.5659, "reason_loss": 0.4995342493057251, "step": 1324, "utility_loss": 1.0663341283798218 }, { "cosine_similarity": 0, "epoch": 1.2348555452003729, "grad_norm": 1.0088633653010644, "learning_rate": 3.268898860890577e-05, "loss": 1.4198, "reason_loss": 0.4746694564819336, "step": 1325, "utility_loss": 0.9451314806938171 }, { "cosine_similarity": 0, "epoch": 1.2357875116495807, "grad_norm": 1.1773223537600068, "learning_rate": 3.267172937521574e-05, "loss": 1.561, "reason_loss": 0.4775056838989258, "step": 1326, "utility_loss": 1.083461046218872 }, { "cosine_similarity": 0, "epoch": 1.2367194780987885, "grad_norm": 0.9801015082294685, "learning_rate": 3.2654470141525716e-05, "loss": 2.0011, "reason_loss": 0.4785725474357605, "step": 1327, "utility_loss": 1.5225698947906494 }, { "cosine_similarity": 0, "epoch": 1.2376514445479962, "grad_norm": 0.9215496292779362, "learning_rate": 3.263721090783569e-05, "loss": 1.2432, "reason_loss": 0.4894052743911743, "step": 1328, "utility_loss": 0.753795862197876 }, { "cosine_similarity": 0, "epoch": 1.238583410997204, "grad_norm": 1.7381342246124805, "learning_rate": 3.261995167414567e-05, "loss": 1.3404, "reason_loss": 0.49141445755958557, "step": 1329, "utility_loss": 0.8490345478057861 }, { "cosine_similarity": 0, "epoch": 1.2395153774464118, "grad_norm": 1.0860095982584657, "learning_rate": 3.2602692440455645e-05, "loss": 1.407, "reason_loss": 0.4710927903652191, "step": 1330, "utility_loss": 0.9359170198440552 }, { "cosine_similarity": 0, "epoch": 1.2404473438956198, "grad_norm": 1.1050001103798148, "learning_rate": 3.258543320676562e-05, "loss": 1.8419, "reason_loss": 0.4856261909008026, "step": 1331, "utility_loss": 1.3562617301940918 }, { "cosine_similarity": 0, "epoch": 1.2413793103448276, "grad_norm": 1.0216189687254889, "learning_rate": 3.25681739730756e-05, "loss": 1.6303, "reason_loss": 0.4644594192504883, "step": 1332, "utility_loss": 1.16581392288208 }, { "cosine_similarity": 0, "epoch": 1.2423112767940354, "grad_norm": 1.0636725195800205, "learning_rate": 3.2550914739385575e-05, "loss": 1.3845, "reason_loss": 0.4655221104621887, "step": 1333, "utility_loss": 0.9189469218254089 }, { "cosine_similarity": 0, "epoch": 1.2432432432432432, "grad_norm": 0.985325917884217, "learning_rate": 3.253365550569555e-05, "loss": 1.5715, "reason_loss": 0.4802960753440857, "step": 1334, "utility_loss": 1.0912078619003296 }, { "cosine_similarity": 0, "epoch": 1.244175209692451, "grad_norm": 1.1175565571164368, "learning_rate": 3.2516396272005524e-05, "loss": 1.7203, "reason_loss": 0.49357643723487854, "step": 1335, "utility_loss": 1.2266894578933716 }, { "cosine_similarity": 0, "epoch": 1.245107176141659, "grad_norm": 0.9415851994000247, "learning_rate": 3.2499137038315505e-05, "loss": 1.8209, "reason_loss": 0.4670798182487488, "step": 1336, "utility_loss": 1.3537784814834595 }, { "cosine_similarity": 0, "epoch": 1.2460391425908668, "grad_norm": 0.9376517740072476, "learning_rate": 3.248187780462548e-05, "loss": 1.4811, "reason_loss": 0.4807179570198059, "step": 1337, "utility_loss": 1.0003408193588257 }, { "cosine_similarity": 0, "epoch": 1.2469711090400746, "grad_norm": 0.9310705220135068, "learning_rate": 3.246461857093545e-05, "loss": 1.7163, "reason_loss": 0.4670220911502838, "step": 1338, "utility_loss": 1.249237060546875 }, { "cosine_similarity": 0, "epoch": 1.2479030754892824, "grad_norm": 1.05377465750187, "learning_rate": 3.244735933724543e-05, "loss": 1.5455, "reason_loss": 0.5153943300247192, "step": 1339, "utility_loss": 1.0300742387771606 }, { "cosine_similarity": 0, "epoch": 1.2488350419384902, "grad_norm": 1.232574441028814, "learning_rate": 3.243010010355541e-05, "loss": 1.804, "reason_loss": 0.4956578016281128, "step": 1340, "utility_loss": 1.3083508014678955 }, { "cosine_similarity": 0, "epoch": 1.249767008387698, "grad_norm": 1.1452655071850217, "learning_rate": 3.241284086986538e-05, "loss": 1.7367, "reason_loss": 0.45021724700927734, "step": 1341, "utility_loss": 1.2864482402801514 }, { "cosine_similarity": 0, "epoch": 1.250698974836906, "grad_norm": 1.2925005042622775, "learning_rate": 3.239558163617536e-05, "loss": 1.6557, "reason_loss": 0.48824262619018555, "step": 1342, "utility_loss": 1.1675063371658325 }, { "cosine_similarity": 0, "epoch": 1.2516309412861137, "grad_norm": 1.1652128560466508, "learning_rate": 3.237832240248533e-05, "loss": 1.4015, "reason_loss": 0.4576036334037781, "step": 1343, "utility_loss": 0.9439071416854858 }, { "cosine_similarity": 0, "epoch": 1.2525629077353215, "grad_norm": 1.3301071330402074, "learning_rate": 3.2361063168795306e-05, "loss": 1.6447, "reason_loss": 0.46285274624824524, "step": 1344, "utility_loss": 1.1818119287490845 }, { "cosine_similarity": 0, "epoch": 1.2534948741845293, "grad_norm": 1.0313654330135171, "learning_rate": 3.234380393510528e-05, "loss": 1.4833, "reason_loss": 0.4755467176437378, "step": 1345, "utility_loss": 1.0077860355377197 }, { "cosine_similarity": 0, "epoch": 1.254426840633737, "grad_norm": 0.9879844784026619, "learning_rate": 3.2326544701415254e-05, "loss": 1.6295, "reason_loss": 0.47321081161499023, "step": 1346, "utility_loss": 1.1562550067901611 }, { "cosine_similarity": 0, "epoch": 1.2553588070829451, "grad_norm": 1.0807279516393802, "learning_rate": 3.2309285467725235e-05, "loss": 1.7883, "reason_loss": 0.4671216607093811, "step": 1347, "utility_loss": 1.321162462234497 }, { "cosine_similarity": 0, "epoch": 1.256290773532153, "grad_norm": 1.1054269357174527, "learning_rate": 3.229202623403521e-05, "loss": 1.621, "reason_loss": 0.4979691505432129, "step": 1348, "utility_loss": 1.1230216026306152 }, { "cosine_similarity": 0, "epoch": 1.2572227399813607, "grad_norm": 0.8370193718130727, "learning_rate": 3.2274767000345184e-05, "loss": 1.592, "reason_loss": 0.49707794189453125, "step": 1349, "utility_loss": 1.0948935747146606 }, { "cosine_similarity": 0, "epoch": 1.2581547064305685, "grad_norm": 0.9381197612121077, "learning_rate": 3.225750776665516e-05, "loss": 1.329, "reason_loss": 0.47211506962776184, "step": 1350, "utility_loss": 0.8568381667137146 }, { "cosine_similarity": 0, "epoch": 1.2590866728797763, "grad_norm": 0.921791472015043, "learning_rate": 3.224024853296514e-05, "loss": 1.2828, "reason_loss": 0.497016578912735, "step": 1351, "utility_loss": 0.7857730388641357 }, { "cosine_similarity": 0, "epoch": 1.260018639328984, "grad_norm": 1.0783078958174437, "learning_rate": 3.2222989299275114e-05, "loss": 1.8568, "reason_loss": 0.4876713752746582, "step": 1352, "utility_loss": 1.3691219091415405 }, { "cosine_similarity": 0, "epoch": 1.2609506057781918, "grad_norm": 1.1382014688720403, "learning_rate": 3.220573006558509e-05, "loss": 1.6169, "reason_loss": 0.5101397037506104, "step": 1353, "utility_loss": 1.1067184209823608 }, { "cosine_similarity": 0, "epoch": 1.2618825722273999, "grad_norm": 1.045375668020112, "learning_rate": 3.218847083189507e-05, "loss": 1.825, "reason_loss": 0.5098273754119873, "step": 1354, "utility_loss": 1.3151938915252686 }, { "cosine_similarity": 0, "epoch": 1.2628145386766076, "grad_norm": 1.2101393712171393, "learning_rate": 3.217121159820504e-05, "loss": 1.5216, "reason_loss": 0.47714826464653015, "step": 1355, "utility_loss": 1.0444705486297607 }, { "cosine_similarity": 0, "epoch": 1.2637465051258154, "grad_norm": 1.0879518223910158, "learning_rate": 3.215395236451502e-05, "loss": 1.6331, "reason_loss": 0.5192886590957642, "step": 1356, "utility_loss": 1.1137712001800537 }, { "cosine_similarity": 0, "epoch": 1.2646784715750232, "grad_norm": 0.992292119175871, "learning_rate": 3.213669313082499e-05, "loss": 1.6504, "reason_loss": 0.47151780128479004, "step": 1357, "utility_loss": 1.1788536310195923 }, { "cosine_similarity": 0, "epoch": 1.2656104380242312, "grad_norm": 1.0087481425993021, "learning_rate": 3.211943389713497e-05, "loss": 1.4129, "reason_loss": 0.488249272108078, "step": 1358, "utility_loss": 0.9246382713317871 }, { "cosine_similarity": 0, "epoch": 1.266542404473439, "grad_norm": 0.9549489921590449, "learning_rate": 3.210217466344495e-05, "loss": 1.179, "reason_loss": 0.4477010667324066, "step": 1359, "utility_loss": 0.731305718421936 }, { "cosine_similarity": 0, "epoch": 1.2674743709226468, "grad_norm": 0.923779130405037, "learning_rate": 3.208491542975492e-05, "loss": 1.5287, "reason_loss": 0.47453573346138, "step": 1360, "utility_loss": 1.0541632175445557 }, { "cosine_similarity": 0, "epoch": 1.2684063373718546, "grad_norm": 0.9922497805959059, "learning_rate": 3.2067656196064896e-05, "loss": 1.6355, "reason_loss": 0.5029383897781372, "step": 1361, "utility_loss": 1.1325621604919434 }, { "cosine_similarity": 0, "epoch": 1.2693383038210624, "grad_norm": 1.0533157411556855, "learning_rate": 3.205039696237487e-05, "loss": 1.6645, "reason_loss": 0.4753550887107849, "step": 1362, "utility_loss": 1.1891143321990967 }, { "cosine_similarity": 0, "epoch": 1.2702702702702702, "grad_norm": 0.9082074647210622, "learning_rate": 3.2033137728684844e-05, "loss": 1.2519, "reason_loss": 0.4597124457359314, "step": 1363, "utility_loss": 0.792210042476654 }, { "cosine_similarity": 0, "epoch": 1.271202236719478, "grad_norm": 1.0385502005666738, "learning_rate": 3.201587849499482e-05, "loss": 1.7637, "reason_loss": 0.47243526577949524, "step": 1364, "utility_loss": 1.2912790775299072 }, { "cosine_similarity": 0, "epoch": 1.272134203168686, "grad_norm": 1.0299714736307377, "learning_rate": 3.19986192613048e-05, "loss": 1.3912, "reason_loss": 0.4832581877708435, "step": 1365, "utility_loss": 0.907931387424469 }, { "cosine_similarity": 0, "epoch": 1.2730661696178938, "grad_norm": 1.055316489660499, "learning_rate": 3.1981360027614774e-05, "loss": 1.5153, "reason_loss": 0.4887140095233917, "step": 1366, "utility_loss": 1.0266227722167969 }, { "cosine_similarity": 0, "epoch": 1.2739981360671015, "grad_norm": 1.1459659826179571, "learning_rate": 3.196410079392475e-05, "loss": 1.896, "reason_loss": 0.4818935990333557, "step": 1367, "utility_loss": 1.4140664339065552 }, { "cosine_similarity": 0, "epoch": 1.2749301025163093, "grad_norm": 1.0071943519210471, "learning_rate": 3.194684156023472e-05, "loss": 1.6625, "reason_loss": 0.5071300268173218, "step": 1368, "utility_loss": 1.1553690433502197 }, { "cosine_similarity": 0, "epoch": 1.2758620689655173, "grad_norm": 1.0045548128033936, "learning_rate": 3.1929582326544704e-05, "loss": 1.7782, "reason_loss": 0.4725492000579834, "step": 1369, "utility_loss": 1.305652141571045 }, { "cosine_similarity": 0, "epoch": 1.2767940354147251, "grad_norm": 1.0426514309963402, "learning_rate": 3.191232309285468e-05, "loss": 1.4861, "reason_loss": 0.4835493564605713, "step": 1370, "utility_loss": 1.0025850534439087 }, { "cosine_similarity": 0, "epoch": 1.277726001863933, "grad_norm": 1.100705200616447, "learning_rate": 3.189506385916465e-05, "loss": 1.5608, "reason_loss": 0.4783424735069275, "step": 1371, "utility_loss": 1.0824681520462036 }, { "cosine_similarity": 0, "epoch": 1.2786579683131407, "grad_norm": 0.9133884864822513, "learning_rate": 3.187780462547463e-05, "loss": 1.7526, "reason_loss": 0.48221975564956665, "step": 1372, "utility_loss": 1.2703900337219238 }, { "cosine_similarity": 0, "epoch": 1.2795899347623485, "grad_norm": 1.041186379573277, "learning_rate": 3.186054539178461e-05, "loss": 1.7721, "reason_loss": 0.455096960067749, "step": 1373, "utility_loss": 1.3170171976089478 }, { "cosine_similarity": 0, "epoch": 1.2805219012115563, "grad_norm": 0.8965120854862276, "learning_rate": 3.184328615809458e-05, "loss": 1.3891, "reason_loss": 0.4879136085510254, "step": 1374, "utility_loss": 0.9011406898498535 }, { "cosine_similarity": 0, "epoch": 1.281453867660764, "grad_norm": 0.8297873000206292, "learning_rate": 3.1826026924404556e-05, "loss": 1.6137, "reason_loss": 0.4558175802230835, "step": 1375, "utility_loss": 1.1578893661499023 }, { "cosine_similarity": 0, "epoch": 1.282385834109972, "grad_norm": 0.9938141735064199, "learning_rate": 3.180876769071454e-05, "loss": 1.3829, "reason_loss": 0.4914016127586365, "step": 1376, "utility_loss": 0.8915148973464966 }, { "cosine_similarity": 0, "epoch": 1.2833178005591799, "grad_norm": 1.160225935196745, "learning_rate": 3.179150845702451e-05, "loss": 1.5977, "reason_loss": 0.45434755086898804, "step": 1377, "utility_loss": 1.1433887481689453 }, { "cosine_similarity": 0, "epoch": 1.2842497670083877, "grad_norm": 1.0306232245542668, "learning_rate": 3.1774249223334486e-05, "loss": 1.456, "reason_loss": 0.4928150177001953, "step": 1378, "utility_loss": 0.9632112979888916 }, { "cosine_similarity": 0, "epoch": 1.2851817334575955, "grad_norm": 1.3168687547533895, "learning_rate": 3.175698998964446e-05, "loss": 1.5049, "reason_loss": 0.48788923025131226, "step": 1379, "utility_loss": 1.0170209407806396 }, { "cosine_similarity": 0, "epoch": 1.2861136999068035, "grad_norm": 1.0938437941250443, "learning_rate": 3.173973075595444e-05, "loss": 1.5807, "reason_loss": 0.5136226415634155, "step": 1380, "utility_loss": 1.0670294761657715 }, { "cosine_similarity": 0, "epoch": 1.2870456663560113, "grad_norm": 1.0535152854001546, "learning_rate": 3.1722471522264415e-05, "loss": 1.7266, "reason_loss": 0.4901650846004486, "step": 1381, "utility_loss": 1.2364373207092285 }, { "cosine_similarity": 0, "epoch": 1.287977632805219, "grad_norm": 0.9572970807935126, "learning_rate": 3.170521228857439e-05, "loss": 1.3326, "reason_loss": 0.48152509331703186, "step": 1382, "utility_loss": 0.8510380983352661 }, { "cosine_similarity": 0, "epoch": 1.2889095992544268, "grad_norm": 1.1774240669744929, "learning_rate": 3.1687953054884364e-05, "loss": 1.5828, "reason_loss": 0.5065891742706299, "step": 1383, "utility_loss": 1.076195478439331 }, { "cosine_similarity": 0, "epoch": 1.2898415657036346, "grad_norm": 1.0326087231063608, "learning_rate": 3.167069382119434e-05, "loss": 1.6284, "reason_loss": 0.49008655548095703, "step": 1384, "utility_loss": 1.13827383518219 }, { "cosine_similarity": 0, "epoch": 1.2907735321528424, "grad_norm": 0.9712693424964829, "learning_rate": 3.165343458750431e-05, "loss": 1.5302, "reason_loss": 0.49340569972991943, "step": 1385, "utility_loss": 1.0367610454559326 }, { "cosine_similarity": 0, "epoch": 1.2917054986020502, "grad_norm": 1.0510397250655996, "learning_rate": 3.163617535381429e-05, "loss": 1.704, "reason_loss": 0.471066951751709, "step": 1386, "utility_loss": 1.2329168319702148 }, { "cosine_similarity": 0, "epoch": 1.2926374650512582, "grad_norm": 1.2993022270185413, "learning_rate": 3.161891612012427e-05, "loss": 1.3645, "reason_loss": 0.47249406576156616, "step": 1387, "utility_loss": 0.8920416831970215 }, { "cosine_similarity": 0, "epoch": 1.293569431500466, "grad_norm": 0.9781043928106347, "learning_rate": 3.160165688643424e-05, "loss": 1.3212, "reason_loss": 0.49604520201683044, "step": 1388, "utility_loss": 0.825160026550293 }, { "cosine_similarity": 0, "epoch": 1.2945013979496738, "grad_norm": 1.1093509030917845, "learning_rate": 3.1584397652744217e-05, "loss": 1.4057, "reason_loss": 0.4835119843482971, "step": 1389, "utility_loss": 0.9221878051757812 }, { "cosine_similarity": 0, "epoch": 1.2954333643988816, "grad_norm": 0.9483498611886907, "learning_rate": 3.156713841905419e-05, "loss": 1.4592, "reason_loss": 0.47387176752090454, "step": 1390, "utility_loss": 0.9853113889694214 }, { "cosine_similarity": 0, "epoch": 1.2963653308480896, "grad_norm": 1.3478280038198696, "learning_rate": 3.154987918536417e-05, "loss": 1.5398, "reason_loss": 0.46723026037216187, "step": 1391, "utility_loss": 1.0725229978561401 }, { "cosine_similarity": 0, "epoch": 1.2972972972972974, "grad_norm": 1.1245623005105199, "learning_rate": 3.1532619951674146e-05, "loss": 1.7477, "reason_loss": 0.48943084478378296, "step": 1392, "utility_loss": 1.2582850456237793 }, { "cosine_similarity": 0, "epoch": 1.2982292637465052, "grad_norm": 1.1278096270838016, "learning_rate": 3.151536071798412e-05, "loss": 1.4555, "reason_loss": 0.4901247024536133, "step": 1393, "utility_loss": 0.9654040336608887 }, { "cosine_similarity": 0, "epoch": 1.299161230195713, "grad_norm": 0.9731279106038642, "learning_rate": 3.14981014842941e-05, "loss": 1.5235, "reason_loss": 0.5032514929771423, "step": 1394, "utility_loss": 1.0202758312225342 }, { "cosine_similarity": 0, "epoch": 1.3000931966449207, "grad_norm": 0.9733545217949426, "learning_rate": 3.1480842250604076e-05, "loss": 1.4637, "reason_loss": 0.48294776678085327, "step": 1395, "utility_loss": 0.9807313084602356 }, { "cosine_similarity": 0, "epoch": 1.3010251630941285, "grad_norm": 1.103442695048664, "learning_rate": 3.146358301691405e-05, "loss": 1.7571, "reason_loss": 0.47718513011932373, "step": 1396, "utility_loss": 1.2799150943756104 }, { "cosine_similarity": 0, "epoch": 1.3019571295433363, "grad_norm": 0.8991687892588794, "learning_rate": 3.1446323783224024e-05, "loss": 1.4075, "reason_loss": 0.4873010218143463, "step": 1397, "utility_loss": 0.9201771020889282 }, { "cosine_similarity": 0, "epoch": 1.3028890959925443, "grad_norm": 0.9128917782554629, "learning_rate": 3.1429064549534006e-05, "loss": 1.0759, "reason_loss": 0.46990180015563965, "step": 1398, "utility_loss": 0.6059651970863342 }, { "cosine_similarity": 0, "epoch": 1.303821062441752, "grad_norm": 1.1362024973675833, "learning_rate": 3.141180531584398e-05, "loss": 1.5468, "reason_loss": 0.47168195247650146, "step": 1399, "utility_loss": 1.0751256942749023 }, { "cosine_similarity": 0, "epoch": 1.30475302889096, "grad_norm": 0.8801693029817437, "learning_rate": 3.1394546082153954e-05, "loss": 1.3658, "reason_loss": 0.4837980270385742, "step": 1400, "utility_loss": 0.8820126056671143 }, { "cosine_similarity": 0, "epoch": 1.3056849953401677, "grad_norm": 1.0987584238134211, "learning_rate": 3.137728684846393e-05, "loss": 1.3595, "reason_loss": 0.49231335520744324, "step": 1401, "utility_loss": 0.8671419024467468 }, { "cosine_similarity": 0, "epoch": 1.3066169617893757, "grad_norm": 0.9554146547866705, "learning_rate": 3.136002761477391e-05, "loss": 1.4243, "reason_loss": 0.5182011723518372, "step": 1402, "utility_loss": 0.9060924649238586 }, { "cosine_similarity": 0, "epoch": 1.3075489282385835, "grad_norm": 1.083494931692815, "learning_rate": 3.1342768381083884e-05, "loss": 1.5082, "reason_loss": 0.4575754702091217, "step": 1403, "utility_loss": 1.0506151914596558 }, { "cosine_similarity": 0, "epoch": 1.3084808946877913, "grad_norm": 1.0037090742699546, "learning_rate": 3.132550914739386e-05, "loss": 1.643, "reason_loss": 0.5030217170715332, "step": 1404, "utility_loss": 1.1400071382522583 }, { "cosine_similarity": 0, "epoch": 1.309412861136999, "grad_norm": 1.17036785783753, "learning_rate": 3.130824991370383e-05, "loss": 1.5398, "reason_loss": 0.47914209961891174, "step": 1405, "utility_loss": 1.0606367588043213 }, { "cosine_similarity": 0, "epoch": 1.3103448275862069, "grad_norm": 1.0747955071716493, "learning_rate": 3.129099068001381e-05, "loss": 1.7603, "reason_loss": 0.47603681683540344, "step": 1406, "utility_loss": 1.284282922744751 }, { "cosine_similarity": 0, "epoch": 1.3112767940354146, "grad_norm": 1.0190918232090052, "learning_rate": 3.127373144632378e-05, "loss": 1.6584, "reason_loss": 0.46332135796546936, "step": 1407, "utility_loss": 1.1950594186782837 }, { "cosine_similarity": 0, "epoch": 1.3122087604846224, "grad_norm": 1.1201921066593272, "learning_rate": 3.1256472212633755e-05, "loss": 1.8016, "reason_loss": 0.47748124599456787, "step": 1408, "utility_loss": 1.3240866661071777 }, { "cosine_similarity": 0, "epoch": 1.3131407269338304, "grad_norm": 1.1521315379073949, "learning_rate": 3.1239212978943736e-05, "loss": 1.525, "reason_loss": 0.47203996777534485, "step": 1409, "utility_loss": 1.0529712438583374 }, { "cosine_similarity": 0, "epoch": 1.3140726933830382, "grad_norm": 1.204729954120199, "learning_rate": 3.122195374525371e-05, "loss": 1.6515, "reason_loss": 0.5019870400428772, "step": 1410, "utility_loss": 1.149538278579712 }, { "cosine_similarity": 0, "epoch": 1.315004659832246, "grad_norm": 1.272251615463511, "learning_rate": 3.1204694511563685e-05, "loss": 1.6339, "reason_loss": 0.4821786880493164, "step": 1411, "utility_loss": 1.1517618894577026 }, { "cosine_similarity": 0, "epoch": 1.3159366262814538, "grad_norm": 1.2514077806931692, "learning_rate": 3.118743527787366e-05, "loss": 1.3819, "reason_loss": 0.4602484107017517, "step": 1412, "utility_loss": 0.921690821647644 }, { "cosine_similarity": 0, "epoch": 1.3168685927306618, "grad_norm": 1.0808003435104616, "learning_rate": 3.117017604418364e-05, "loss": 1.6557, "reason_loss": 0.47317036986351013, "step": 1413, "utility_loss": 1.1825356483459473 }, { "cosine_similarity": 0, "epoch": 1.3178005591798696, "grad_norm": 1.5305834183454838, "learning_rate": 3.1152916810493615e-05, "loss": 1.5821, "reason_loss": 0.5076863765716553, "step": 1414, "utility_loss": 1.074425220489502 }, { "cosine_similarity": 0, "epoch": 1.3187325256290774, "grad_norm": 0.9652189357725346, "learning_rate": 3.113565757680359e-05, "loss": 1.7718, "reason_loss": 0.49134790897369385, "step": 1415, "utility_loss": 1.280428171157837 }, { "cosine_similarity": 0, "epoch": 1.3196644920782852, "grad_norm": 0.9228251856411901, "learning_rate": 3.111839834311357e-05, "loss": 1.562, "reason_loss": 0.46053215861320496, "step": 1416, "utility_loss": 1.1014282703399658 }, { "cosine_similarity": 0, "epoch": 1.320596458527493, "grad_norm": 0.9107402996581432, "learning_rate": 3.1101139109423544e-05, "loss": 1.5545, "reason_loss": 0.4427892565727234, "step": 1417, "utility_loss": 1.1116983890533447 }, { "cosine_similarity": 0, "epoch": 1.3215284249767008, "grad_norm": 1.0202723821224564, "learning_rate": 3.108387987573352e-05, "loss": 1.4947, "reason_loss": 0.4821050763130188, "step": 1418, "utility_loss": 1.0125858783721924 }, { "cosine_similarity": 0, "epoch": 1.3224603914259085, "grad_norm": 0.997345444848568, "learning_rate": 3.106662064204349e-05, "loss": 1.6677, "reason_loss": 0.4642488360404968, "step": 1419, "utility_loss": 1.2034050226211548 }, { "cosine_similarity": 0, "epoch": 1.3233923578751166, "grad_norm": 1.0877300190966168, "learning_rate": 3.1049361408353474e-05, "loss": 1.6768, "reason_loss": 0.47555893659591675, "step": 1420, "utility_loss": 1.2012457847595215 }, { "cosine_similarity": 0, "epoch": 1.3243243243243243, "grad_norm": 1.1647901667547529, "learning_rate": 3.103210217466345e-05, "loss": 1.7902, "reason_loss": 0.454083114862442, "step": 1421, "utility_loss": 1.336119532585144 }, { "cosine_similarity": 0, "epoch": 1.3252562907735321, "grad_norm": 0.8847666559488657, "learning_rate": 3.101484294097342e-05, "loss": 1.3268, "reason_loss": 0.4800747036933899, "step": 1422, "utility_loss": 0.8467104434967041 }, { "cosine_similarity": 0, "epoch": 1.32618825722274, "grad_norm": 1.0871973452764132, "learning_rate": 3.0997583707283403e-05, "loss": 1.6598, "reason_loss": 0.49409258365631104, "step": 1423, "utility_loss": 1.1656863689422607 }, { "cosine_similarity": 0, "epoch": 1.327120223671948, "grad_norm": 1.0822074973060924, "learning_rate": 3.098032447359338e-05, "loss": 1.5158, "reason_loss": 0.47437918186187744, "step": 1424, "utility_loss": 1.0413905382156372 }, { "cosine_similarity": 0, "epoch": 1.3280521901211557, "grad_norm": 0.8522030614776458, "learning_rate": 3.096306523990335e-05, "loss": 1.3464, "reason_loss": 0.48902493715286255, "step": 1425, "utility_loss": 0.8574163317680359 }, { "cosine_similarity": 0, "epoch": 1.3289841565703635, "grad_norm": 1.1140449088762467, "learning_rate": 3.0945806006213326e-05, "loss": 1.6829, "reason_loss": 0.5164638757705688, "step": 1426, "utility_loss": 1.1664084196090698 }, { "cosine_similarity": 0, "epoch": 1.3299161230195713, "grad_norm": 0.9998687926698209, "learning_rate": 3.09285467725233e-05, "loss": 1.5549, "reason_loss": 0.4788668751716614, "step": 1427, "utility_loss": 1.0760141611099243 }, { "cosine_similarity": 0, "epoch": 1.330848089468779, "grad_norm": 0.9603767039403746, "learning_rate": 3.0911287538833275e-05, "loss": 1.6219, "reason_loss": 0.4477187991142273, "step": 1428, "utility_loss": 1.1741970777511597 }, { "cosine_similarity": 0, "epoch": 1.3317800559179869, "grad_norm": 0.9731879938161384, "learning_rate": 3.089402830514325e-05, "loss": 1.7667, "reason_loss": 0.5060592889785767, "step": 1429, "utility_loss": 1.260624647140503 }, { "cosine_similarity": 0, "epoch": 1.3327120223671947, "grad_norm": 1.1717925129596853, "learning_rate": 3.0876769071453223e-05, "loss": 1.6732, "reason_loss": 0.48158887028694153, "step": 1430, "utility_loss": 1.1916242837905884 }, { "cosine_similarity": 0, "epoch": 1.3336439888164027, "grad_norm": 1.0065194598810459, "learning_rate": 3.0859509837763205e-05, "loss": 1.5727, "reason_loss": 0.4734126329421997, "step": 1431, "utility_loss": 1.0992815494537354 }, { "cosine_similarity": 0, "epoch": 1.3345759552656105, "grad_norm": 1.0146430244111257, "learning_rate": 3.084225060407318e-05, "loss": 1.7387, "reason_loss": 0.5133500099182129, "step": 1432, "utility_loss": 1.225330114364624 }, { "cosine_similarity": 0, "epoch": 1.3355079217148182, "grad_norm": 1.006551541171126, "learning_rate": 3.082499137038315e-05, "loss": 1.4144, "reason_loss": 0.49089568853378296, "step": 1433, "utility_loss": 0.9234644770622253 }, { "cosine_similarity": 0, "epoch": 1.336439888164026, "grad_norm": 1.0913209831524662, "learning_rate": 3.0807732136693134e-05, "loss": 1.1545, "reason_loss": 0.5010132789611816, "step": 1434, "utility_loss": 0.6534838080406189 }, { "cosine_similarity": 0, "epoch": 1.337371854613234, "grad_norm": 1.1127485424753307, "learning_rate": 3.079047290300311e-05, "loss": 1.4604, "reason_loss": 0.47617292404174805, "step": 1435, "utility_loss": 0.9841854572296143 }, { "cosine_similarity": 0, "epoch": 1.3383038210624418, "grad_norm": 1.0535632316782624, "learning_rate": 3.077321366931308e-05, "loss": 1.4143, "reason_loss": 0.49327826499938965, "step": 1436, "utility_loss": 0.9210243225097656 }, { "cosine_similarity": 0, "epoch": 1.3392357875116496, "grad_norm": 1.3129082915006922, "learning_rate": 3.075595443562306e-05, "loss": 1.4579, "reason_loss": 0.4747314751148224, "step": 1437, "utility_loss": 0.983196496963501 }, { "cosine_similarity": 0, "epoch": 1.3401677539608574, "grad_norm": 1.1098564550675096, "learning_rate": 3.073869520193304e-05, "loss": 1.5326, "reason_loss": 0.4846741259098053, "step": 1438, "utility_loss": 1.0479143857955933 }, { "cosine_similarity": 0, "epoch": 1.3410997204100652, "grad_norm": 1.1997717562119676, "learning_rate": 3.072143596824301e-05, "loss": 1.9057, "reason_loss": 0.46664559841156006, "step": 1439, "utility_loss": 1.4390110969543457 }, { "cosine_similarity": 0, "epoch": 1.342031686859273, "grad_norm": 1.036598906672786, "learning_rate": 3.070417673455299e-05, "loss": 1.8702, "reason_loss": 0.4885331988334656, "step": 1440, "utility_loss": 1.3816815614700317 }, { "cosine_similarity": 0, "epoch": 1.3429636533084808, "grad_norm": 1.728875944249332, "learning_rate": 3.068691750086296e-05, "loss": 1.7572, "reason_loss": 0.5081793665885925, "step": 1441, "utility_loss": 1.2490407228469849 }, { "cosine_similarity": 0, "epoch": 1.3438956197576888, "grad_norm": 1.1158954479640122, "learning_rate": 3.066965826717294e-05, "loss": 1.587, "reason_loss": 0.46588101983070374, "step": 1442, "utility_loss": 1.1211073398590088 }, { "cosine_similarity": 0, "epoch": 1.3448275862068966, "grad_norm": 0.9049136432511763, "learning_rate": 3.0652399033482916e-05, "loss": 1.3787, "reason_loss": 0.4950183033943176, "step": 1443, "utility_loss": 0.8836658596992493 }, { "cosine_similarity": 0, "epoch": 1.3457595526561044, "grad_norm": 1.0201134722592202, "learning_rate": 3.063513979979289e-05, "loss": 1.6671, "reason_loss": 0.512165904045105, "step": 1444, "utility_loss": 1.1548928022384644 }, { "cosine_similarity": 0, "epoch": 1.3466915191053122, "grad_norm": 0.9823391642849146, "learning_rate": 3.061788056610287e-05, "loss": 1.804, "reason_loss": 0.45828503370285034, "step": 1445, "utility_loss": 1.345754861831665 }, { "cosine_similarity": 0, "epoch": 1.3476234855545202, "grad_norm": 1.0310067303703845, "learning_rate": 3.0600621332412846e-05, "loss": 1.7227, "reason_loss": 0.4765382409095764, "step": 1446, "utility_loss": 1.246201753616333 }, { "cosine_similarity": 0, "epoch": 1.348555452003728, "grad_norm": 1.1496307476829795, "learning_rate": 3.058336209872282e-05, "loss": 1.7047, "reason_loss": 0.45124906301498413, "step": 1447, "utility_loss": 1.2534801959991455 }, { "cosine_similarity": 0, "epoch": 1.3494874184529357, "grad_norm": 1.0342018819168042, "learning_rate": 3.0566102865032795e-05, "loss": 1.3926, "reason_loss": 0.48729920387268066, "step": 1448, "utility_loss": 0.9052759408950806 }, { "cosine_similarity": 0, "epoch": 1.3504193849021435, "grad_norm": 0.8172743394183744, "learning_rate": 3.054884363134277e-05, "loss": 1.4719, "reason_loss": 0.4787558317184448, "step": 1449, "utility_loss": 0.993156909942627 }, { "cosine_similarity": 0, "epoch": 1.3513513513513513, "grad_norm": 0.9431553958956524, "learning_rate": 3.053158439765274e-05, "loss": 1.3732, "reason_loss": 0.49596214294433594, "step": 1450, "utility_loss": 0.8772224187850952 }, { "cosine_similarity": 0, "epoch": 1.352283317800559, "grad_norm": 1.200097667261622, "learning_rate": 3.051432516396272e-05, "loss": 1.5677, "reason_loss": 0.4924415647983551, "step": 1451, "utility_loss": 1.0752698183059692 }, { "cosine_similarity": 0, "epoch": 1.353215284249767, "grad_norm": 1.229249331233002, "learning_rate": 3.0497065930272695e-05, "loss": 1.4373, "reason_loss": 0.5080792903900146, "step": 1452, "utility_loss": 0.9292489290237427 }, { "cosine_similarity": 0, "epoch": 1.354147250698975, "grad_norm": 0.9069964644861299, "learning_rate": 3.0479806696582676e-05, "loss": 1.1692, "reason_loss": 0.49002230167388916, "step": 1453, "utility_loss": 0.679207980632782 }, { "cosine_similarity": 0, "epoch": 1.3550792171481827, "grad_norm": 0.810598007668159, "learning_rate": 3.046254746289265e-05, "loss": 1.2435, "reason_loss": 0.4562338888645172, "step": 1454, "utility_loss": 0.7873070240020752 }, { "cosine_similarity": 0, "epoch": 1.3560111835973905, "grad_norm": 1.0824219284527727, "learning_rate": 3.0445288229202625e-05, "loss": 1.38, "reason_loss": 0.47385480999946594, "step": 1455, "utility_loss": 0.9061685800552368 }, { "cosine_similarity": 0, "epoch": 1.3569431500465983, "grad_norm": 1.0632394933118354, "learning_rate": 3.0428028995512602e-05, "loss": 1.4652, "reason_loss": 0.4749245047569275, "step": 1456, "utility_loss": 0.990235447883606 }, { "cosine_similarity": 0, "epoch": 1.3578751164958063, "grad_norm": 1.2527564236610385, "learning_rate": 3.0410769761822577e-05, "loss": 1.5669, "reason_loss": 0.5102716684341431, "step": 1457, "utility_loss": 1.0566537380218506 }, { "cosine_similarity": 0, "epoch": 1.358807082945014, "grad_norm": 1.0489318416645497, "learning_rate": 3.039351052813255e-05, "loss": 1.4832, "reason_loss": 0.4967684745788574, "step": 1458, "utility_loss": 0.9864645600318909 }, { "cosine_similarity": 0, "epoch": 1.3597390493942219, "grad_norm": 1.2057989771388435, "learning_rate": 3.0376251294442525e-05, "loss": 1.9657, "reason_loss": 0.5126309394836426, "step": 1459, "utility_loss": 1.4530346393585205 }, { "cosine_similarity": 0, "epoch": 1.3606710158434296, "grad_norm": 1.0417768592231276, "learning_rate": 3.0358992060752506e-05, "loss": 1.643, "reason_loss": 0.46014851331710815, "step": 1460, "utility_loss": 1.1828275918960571 }, { "cosine_similarity": 0, "epoch": 1.3616029822926374, "grad_norm": 0.9343420499568413, "learning_rate": 3.034173282706248e-05, "loss": 1.359, "reason_loss": 0.4832601547241211, "step": 1461, "utility_loss": 0.8757134675979614 }, { "cosine_similarity": 0, "epoch": 1.3625349487418452, "grad_norm": 1.07927514034802, "learning_rate": 3.0324473593372455e-05, "loss": 1.5645, "reason_loss": 0.47611767053604126, "step": 1462, "utility_loss": 1.088388442993164 }, { "cosine_similarity": 0, "epoch": 1.363466915191053, "grad_norm": 1.1036387824709293, "learning_rate": 3.0307214359682433e-05, "loss": 1.7477, "reason_loss": 0.49798309803009033, "step": 1463, "utility_loss": 1.2497315406799316 }, { "cosine_similarity": 0, "epoch": 1.364398881640261, "grad_norm": 0.934034915232309, "learning_rate": 3.0289955125992407e-05, "loss": 1.3672, "reason_loss": 0.48030734062194824, "step": 1464, "utility_loss": 0.8868549466133118 }, { "cosine_similarity": 0, "epoch": 1.3653308480894688, "grad_norm": 1.090287865603913, "learning_rate": 3.027269589230238e-05, "loss": 1.432, "reason_loss": 0.49857768416404724, "step": 1465, "utility_loss": 0.9333837032318115 }, { "cosine_similarity": 0, "epoch": 1.3662628145386766, "grad_norm": 1.0867797981264662, "learning_rate": 3.0255436658612356e-05, "loss": 1.7947, "reason_loss": 0.48807668685913086, "step": 1466, "utility_loss": 1.3066288232803345 }, { "cosine_similarity": 0, "epoch": 1.3671947809878844, "grad_norm": 1.0754805845950997, "learning_rate": 3.0238177424922337e-05, "loss": 1.3711, "reason_loss": 0.46689170598983765, "step": 1467, "utility_loss": 0.9041872024536133 }, { "cosine_similarity": 0, "epoch": 1.3681267474370924, "grad_norm": 1.1245778591851237, "learning_rate": 3.022091819123231e-05, "loss": 1.8525, "reason_loss": 0.47005632519721985, "step": 1468, "utility_loss": 1.3824474811553955 }, { "cosine_similarity": 0, "epoch": 1.3690587138863002, "grad_norm": 1.1123990930597616, "learning_rate": 3.0203658957542285e-05, "loss": 1.5731, "reason_loss": 0.4849877953529358, "step": 1469, "utility_loss": 1.0881173610687256 }, { "cosine_similarity": 0, "epoch": 1.369990680335508, "grad_norm": 1.0544005905376748, "learning_rate": 3.018639972385226e-05, "loss": 1.9078, "reason_loss": 0.4871283769607544, "step": 1470, "utility_loss": 1.4206807613372803 }, { "cosine_similarity": 0, "epoch": 1.3709226467847158, "grad_norm": 1.0633734254689493, "learning_rate": 3.016914049016224e-05, "loss": 1.4456, "reason_loss": 0.4691261053085327, "step": 1471, "utility_loss": 0.9764760136604309 }, { "cosine_similarity": 0, "epoch": 1.3718546132339235, "grad_norm": 0.9829771282706254, "learning_rate": 3.0151881256472215e-05, "loss": 1.8522, "reason_loss": 0.4812125563621521, "step": 1472, "utility_loss": 1.3710248470306396 }, { "cosine_similarity": 0, "epoch": 1.3727865796831313, "grad_norm": 0.9384680264255443, "learning_rate": 3.013462202278219e-05, "loss": 1.6448, "reason_loss": 0.4879578649997711, "step": 1473, "utility_loss": 1.1568107604980469 }, { "cosine_similarity": 0, "epoch": 1.3737185461323391, "grad_norm": 0.9777032771404529, "learning_rate": 3.0117362789092167e-05, "loss": 1.5227, "reason_loss": 0.4791642427444458, "step": 1474, "utility_loss": 1.0435373783111572 }, { "cosine_similarity": 0, "epoch": 1.3746505125815471, "grad_norm": 1.0965755920533946, "learning_rate": 3.010010355540214e-05, "loss": 1.5451, "reason_loss": 0.46286848187446594, "step": 1475, "utility_loss": 1.082209587097168 }, { "cosine_similarity": 0, "epoch": 1.375582479030755, "grad_norm": 0.9352712682210785, "learning_rate": 3.0082844321712115e-05, "loss": 1.4334, "reason_loss": 0.46576976776123047, "step": 1476, "utility_loss": 0.9676145911216736 }, { "cosine_similarity": 0, "epoch": 1.3765144454799627, "grad_norm": 0.8105951439631351, "learning_rate": 3.006558508802209e-05, "loss": 1.3259, "reason_loss": 0.4385679066181183, "step": 1477, "utility_loss": 0.8873640298843384 }, { "cosine_similarity": 0, "epoch": 1.3774464119291705, "grad_norm": 0.9571427279171335, "learning_rate": 3.004832585433207e-05, "loss": 1.6224, "reason_loss": 0.4792690575122833, "step": 1478, "utility_loss": 1.1430895328521729 }, { "cosine_similarity": 0, "epoch": 1.3783783783783785, "grad_norm": 0.9332470867752711, "learning_rate": 3.0031066620642045e-05, "loss": 1.2241, "reason_loss": 0.4646660089492798, "step": 1479, "utility_loss": 0.7593966126441956 }, { "cosine_similarity": 0, "epoch": 1.3793103448275863, "grad_norm": 0.9264004885042755, "learning_rate": 3.001380738695202e-05, "loss": 1.6666, "reason_loss": 0.46604007482528687, "step": 1480, "utility_loss": 1.2005393505096436 }, { "cosine_similarity": 0, "epoch": 1.380242311276794, "grad_norm": 1.1951592367963444, "learning_rate": 2.9996548153261994e-05, "loss": 1.5262, "reason_loss": 0.47314849495887756, "step": 1481, "utility_loss": 1.053031086921692 }, { "cosine_similarity": 0, "epoch": 1.3811742777260019, "grad_norm": 1.045135071177155, "learning_rate": 2.9979288919571975e-05, "loss": 1.6808, "reason_loss": 0.47575289011001587, "step": 1482, "utility_loss": 1.2050437927246094 }, { "cosine_similarity": 0, "epoch": 1.3821062441752097, "grad_norm": 1.02448175022263, "learning_rate": 2.996202968588195e-05, "loss": 1.5479, "reason_loss": 0.47203129529953003, "step": 1483, "utility_loss": 1.075852870941162 }, { "cosine_similarity": 0, "epoch": 1.3830382106244175, "grad_norm": 1.026971684304496, "learning_rate": 2.9944770452191923e-05, "loss": 1.7113, "reason_loss": 0.5116301774978638, "step": 1484, "utility_loss": 1.1997143030166626 }, { "cosine_similarity": 0, "epoch": 1.3839701770736252, "grad_norm": 1.2309011724802563, "learning_rate": 2.99275112185019e-05, "loss": 1.7058, "reason_loss": 0.46997231245040894, "step": 1485, "utility_loss": 1.2358529567718506 }, { "cosine_similarity": 0, "epoch": 1.3849021435228333, "grad_norm": 1.0631690453991216, "learning_rate": 2.9910251984811875e-05, "loss": 1.5519, "reason_loss": 0.5042624473571777, "step": 1486, "utility_loss": 1.0476295948028564 }, { "cosine_similarity": 0, "epoch": 1.385834109972041, "grad_norm": 0.9177774815903951, "learning_rate": 2.989299275112185e-05, "loss": 1.1789, "reason_loss": 0.48744258284568787, "step": 1487, "utility_loss": 0.6914696097373962 }, { "cosine_similarity": 0, "epoch": 1.3867660764212488, "grad_norm": 0.9038986963334283, "learning_rate": 2.9875733517431824e-05, "loss": 1.3141, "reason_loss": 0.476068913936615, "step": 1488, "utility_loss": 0.8379952907562256 }, { "cosine_similarity": 0, "epoch": 1.3876980428704566, "grad_norm": 1.0929108100172042, "learning_rate": 2.9858474283741805e-05, "loss": 1.5084, "reason_loss": 0.489496648311615, "step": 1489, "utility_loss": 1.0189387798309326 }, { "cosine_similarity": 0, "epoch": 1.3886300093196646, "grad_norm": 1.066614370426724, "learning_rate": 2.984121505005178e-05, "loss": 1.756, "reason_loss": 0.48542577028274536, "step": 1490, "utility_loss": 1.2705802917480469 }, { "cosine_similarity": 0, "epoch": 1.3895619757688724, "grad_norm": 0.9942705367533771, "learning_rate": 2.9823955816361753e-05, "loss": 1.4211, "reason_loss": 0.4953126609325409, "step": 1491, "utility_loss": 0.9257603287696838 }, { "cosine_similarity": 0, "epoch": 1.3904939422180802, "grad_norm": 1.0851070090222872, "learning_rate": 2.9806696582671728e-05, "loss": 1.6097, "reason_loss": 0.4789917469024658, "step": 1492, "utility_loss": 1.1307538747787476 }, { "cosine_similarity": 0, "epoch": 1.391425908667288, "grad_norm": 1.1203807367435106, "learning_rate": 2.978943734898171e-05, "loss": 1.6707, "reason_loss": 0.4973563849925995, "step": 1493, "utility_loss": 1.1733183860778809 }, { "cosine_similarity": 0, "epoch": 1.3923578751164958, "grad_norm": 1.030033637702031, "learning_rate": 2.9772178115291683e-05, "loss": 1.468, "reason_loss": 0.4937095642089844, "step": 1494, "utility_loss": 0.9742687940597534 }, { "cosine_similarity": 0, "epoch": 1.3932898415657036, "grad_norm": 0.9733664396869655, "learning_rate": 2.9754918881601657e-05, "loss": 1.4801, "reason_loss": 0.5136924982070923, "step": 1495, "utility_loss": 0.9664559364318848 }, { "cosine_similarity": 0, "epoch": 1.3942218080149114, "grad_norm": 1.0150264151018669, "learning_rate": 2.9737659647911635e-05, "loss": 1.5824, "reason_loss": 0.4990622401237488, "step": 1496, "utility_loss": 1.0833185911178589 }, { "cosine_similarity": 0, "epoch": 1.3951537744641194, "grad_norm": 1.104180463787969, "learning_rate": 2.972040041422161e-05, "loss": 1.5747, "reason_loss": 0.49903959035873413, "step": 1497, "utility_loss": 1.0757062435150146 }, { "cosine_similarity": 0, "epoch": 1.3960857409133272, "grad_norm": 1.0461928555413063, "learning_rate": 2.9703141180531584e-05, "loss": 1.3432, "reason_loss": 0.47261935472488403, "step": 1498, "utility_loss": 0.8705449104309082 }, { "cosine_similarity": 0, "epoch": 1.397017707362535, "grad_norm": 0.9411418499752561, "learning_rate": 2.9685881946841558e-05, "loss": 1.439, "reason_loss": 0.49077916145324707, "step": 1499, "utility_loss": 0.9482196569442749 }, { "cosine_similarity": 0, "epoch": 1.3979496738117427, "grad_norm": 1.1641069795027903, "learning_rate": 2.966862271315154e-05, "loss": 1.4737, "reason_loss": 0.529478907585144, "step": 1500, "utility_loss": 0.9442398548126221 }, { "cosine_similarity": 0, "epoch": 1.3988816402609507, "grad_norm": 1.2329608515413675, "learning_rate": 2.9651363479461513e-05, "loss": 1.4744, "reason_loss": 0.49208617210388184, "step": 1501, "utility_loss": 0.9822642803192139 }, { "cosine_similarity": 0, "epoch": 1.3998136067101585, "grad_norm": 1.1559963713536177, "learning_rate": 2.9634104245771488e-05, "loss": 1.5291, "reason_loss": 0.4685600697994232, "step": 1502, "utility_loss": 1.060537576675415 }, { "cosine_similarity": 0, "epoch": 1.4007455731593663, "grad_norm": 0.9775709761112152, "learning_rate": 2.9616845012081462e-05, "loss": 1.4201, "reason_loss": 0.47986817359924316, "step": 1503, "utility_loss": 0.9402745962142944 }, { "cosine_similarity": 0, "epoch": 1.401677539608574, "grad_norm": 1.0251567528809171, "learning_rate": 2.9599585778391443e-05, "loss": 1.4214, "reason_loss": 0.4713616371154785, "step": 1504, "utility_loss": 0.9500223994255066 }, { "cosine_similarity": 0, "epoch": 1.402609506057782, "grad_norm": 1.1904624343880286, "learning_rate": 2.9582326544701417e-05, "loss": 1.8091, "reason_loss": 0.5358643531799316, "step": 1505, "utility_loss": 1.273195743560791 }, { "cosine_similarity": 0, "epoch": 1.4035414725069897, "grad_norm": 0.9903948692987815, "learning_rate": 2.956506731101139e-05, "loss": 1.729, "reason_loss": 0.4870857894420624, "step": 1506, "utility_loss": 1.2419097423553467 }, { "cosine_similarity": 0, "epoch": 1.4044734389561975, "grad_norm": 1.087897152643165, "learning_rate": 2.954780807732137e-05, "loss": 1.7295, "reason_loss": 0.4940906763076782, "step": 1507, "utility_loss": 1.2353627681732178 }, { "cosine_similarity": 0, "epoch": 1.4054054054054055, "grad_norm": 1.1989271624979376, "learning_rate": 2.9530548843631343e-05, "loss": 1.2926, "reason_loss": 0.46766746044158936, "step": 1508, "utility_loss": 0.8249146938323975 }, { "cosine_similarity": 0, "epoch": 1.4063373718546133, "grad_norm": 1.3823715326036439, "learning_rate": 2.9513289609941318e-05, "loss": 2.1172, "reason_loss": 0.48426562547683716, "step": 1509, "utility_loss": 1.6329677104949951 }, { "cosine_similarity": 0, "epoch": 1.407269338303821, "grad_norm": 1.0086087983588092, "learning_rate": 2.9496030376251292e-05, "loss": 1.3251, "reason_loss": 0.46800926327705383, "step": 1510, "utility_loss": 0.8570795059204102 }, { "cosine_similarity": 0, "epoch": 1.4082013047530288, "grad_norm": 1.1092932334963437, "learning_rate": 2.9478771142561273e-05, "loss": 1.4944, "reason_loss": 0.47966113686561584, "step": 1511, "utility_loss": 1.014732003211975 }, { "cosine_similarity": 0, "epoch": 1.4091332712022366, "grad_norm": 0.841224064106865, "learning_rate": 2.9461511908871247e-05, "loss": 1.1745, "reason_loss": 0.45974472165107727, "step": 1512, "utility_loss": 0.7147601246833801 }, { "cosine_similarity": 0, "epoch": 1.4100652376514446, "grad_norm": 1.139074754021095, "learning_rate": 2.944425267518122e-05, "loss": 1.6883, "reason_loss": 0.4843173921108246, "step": 1513, "utility_loss": 1.2039417028427124 }, { "cosine_similarity": 0, "epoch": 1.4109972041006524, "grad_norm": 1.1776000903203865, "learning_rate": 2.9426993441491203e-05, "loss": 1.728, "reason_loss": 0.47754964232444763, "step": 1514, "utility_loss": 1.2504714727401733 }, { "cosine_similarity": 0, "epoch": 1.4119291705498602, "grad_norm": 1.156536499114028, "learning_rate": 2.9409734207801177e-05, "loss": 1.3442, "reason_loss": 0.511644721031189, "step": 1515, "utility_loss": 0.8325586318969727 }, { "cosine_similarity": 0, "epoch": 1.412861136999068, "grad_norm": 1.2805667871271889, "learning_rate": 2.939247497411115e-05, "loss": 1.519, "reason_loss": 0.46576425433158875, "step": 1516, "utility_loss": 1.0532221794128418 }, { "cosine_similarity": 0, "epoch": 1.4137931034482758, "grad_norm": 0.895163474019239, "learning_rate": 2.9375215740421126e-05, "loss": 1.4983, "reason_loss": 0.48881328105926514, "step": 1517, "utility_loss": 1.009442925453186 }, { "cosine_similarity": 0, "epoch": 1.4147250698974836, "grad_norm": 0.9378165758350528, "learning_rate": 2.9357956506731103e-05, "loss": 1.5008, "reason_loss": 0.48780834674835205, "step": 1518, "utility_loss": 1.0130176544189453 }, { "cosine_similarity": 0, "epoch": 1.4156570363466916, "grad_norm": 1.2189337596401546, "learning_rate": 2.9340697273041078e-05, "loss": 1.3779, "reason_loss": 0.48638173937797546, "step": 1519, "utility_loss": 0.8915479183197021 }, { "cosine_similarity": 0, "epoch": 1.4165890027958994, "grad_norm": 1.0200356867939098, "learning_rate": 2.9323438039351052e-05, "loss": 1.4719, "reason_loss": 0.47255629301071167, "step": 1520, "utility_loss": 0.9993020296096802 }, { "cosine_similarity": 0, "epoch": 1.4175209692451072, "grad_norm": 0.913177044170972, "learning_rate": 2.9306178805661026e-05, "loss": 1.6558, "reason_loss": 0.4789031744003296, "step": 1521, "utility_loss": 1.176896333694458 }, { "cosine_similarity": 0, "epoch": 1.418452935694315, "grad_norm": 1.5831931708911988, "learning_rate": 2.9288919571971007e-05, "loss": 1.8728, "reason_loss": 0.4843670725822449, "step": 1522, "utility_loss": 1.3884406089782715 }, { "cosine_similarity": 0, "epoch": 1.4193849021435228, "grad_norm": 0.971088013498449, "learning_rate": 2.927166033828098e-05, "loss": 1.4846, "reason_loss": 0.4607977867126465, "step": 1523, "utility_loss": 1.0237607955932617 }, { "cosine_similarity": 0, "epoch": 1.4203168685927308, "grad_norm": 1.2123679594233057, "learning_rate": 2.9254401104590956e-05, "loss": 1.4316, "reason_loss": 0.4762968420982361, "step": 1524, "utility_loss": 0.9552954435348511 }, { "cosine_similarity": 0, "epoch": 1.4212488350419386, "grad_norm": 1.1961488522855623, "learning_rate": 2.9237141870900937e-05, "loss": 1.5465, "reason_loss": 0.505330502986908, "step": 1525, "utility_loss": 1.0411980152130127 }, { "cosine_similarity": 0, "epoch": 1.4221808014911463, "grad_norm": 1.3088899310159845, "learning_rate": 2.921988263721091e-05, "loss": 1.8907, "reason_loss": 0.4767059087753296, "step": 1526, "utility_loss": 1.4139525890350342 }, { "cosine_similarity": 0, "epoch": 1.4231127679403541, "grad_norm": 1.0160994222764734, "learning_rate": 2.9202623403520885e-05, "loss": 1.5571, "reason_loss": 0.48289233446121216, "step": 1527, "utility_loss": 1.0741726160049438 }, { "cosine_similarity": 0, "epoch": 1.424044734389562, "grad_norm": 1.0350016608583117, "learning_rate": 2.918536416983086e-05, "loss": 1.4997, "reason_loss": 0.47020184993743896, "step": 1528, "utility_loss": 1.0295419692993164 }, { "cosine_similarity": 0, "epoch": 1.4249767008387697, "grad_norm": 1.0476462610466115, "learning_rate": 2.9168104936140837e-05, "loss": 1.3472, "reason_loss": 0.49124640226364136, "step": 1529, "utility_loss": 0.855998694896698 }, { "cosine_similarity": 0, "epoch": 1.4259086672879777, "grad_norm": 1.2206747246922807, "learning_rate": 2.9150845702450812e-05, "loss": 1.5968, "reason_loss": 0.473107248544693, "step": 1530, "utility_loss": 1.123718023300171 }, { "cosine_similarity": 0, "epoch": 1.4268406337371855, "grad_norm": 0.9980172785686962, "learning_rate": 2.9133586468760786e-05, "loss": 1.381, "reason_loss": 0.4697383642196655, "step": 1531, "utility_loss": 0.9112284183502197 }, { "cosine_similarity": 0, "epoch": 1.4277726001863933, "grad_norm": 1.268842559024404, "learning_rate": 2.911632723507076e-05, "loss": 1.8228, "reason_loss": 0.47652876377105713, "step": 1532, "utility_loss": 1.3462927341461182 }, { "cosine_similarity": 0, "epoch": 1.428704566635601, "grad_norm": 1.2100379500333966, "learning_rate": 2.909906800138074e-05, "loss": 1.5724, "reason_loss": 0.5107903480529785, "step": 1533, "utility_loss": 1.0616137981414795 }, { "cosine_similarity": 0, "epoch": 1.4296365330848089, "grad_norm": 1.0490948329905596, "learning_rate": 2.9081808767690716e-05, "loss": 1.5622, "reason_loss": 0.45185455679893494, "step": 1534, "utility_loss": 1.1103875637054443 }, { "cosine_similarity": 0, "epoch": 1.4305684995340169, "grad_norm": 1.0074717113726752, "learning_rate": 2.906454953400069e-05, "loss": 1.532, "reason_loss": 0.5125745534896851, "step": 1535, "utility_loss": 1.0193859338760376 }, { "cosine_similarity": 0, "epoch": 1.4315004659832247, "grad_norm": 1.0557694189478009, "learning_rate": 2.904729030031067e-05, "loss": 1.2849, "reason_loss": 0.4921383857727051, "step": 1536, "utility_loss": 0.7927948236465454 }, { "cosine_similarity": 0, "epoch": 1.4324324324324325, "grad_norm": 1.0778878568026322, "learning_rate": 2.9030031066620645e-05, "loss": 1.7843, "reason_loss": 0.469745934009552, "step": 1537, "utility_loss": 1.3145523071289062 }, { "cosine_similarity": 0, "epoch": 1.4333643988816402, "grad_norm": 0.9064643315000829, "learning_rate": 2.901277183293062e-05, "loss": 1.5262, "reason_loss": 0.5202808976173401, "step": 1538, "utility_loss": 1.005881667137146 }, { "cosine_similarity": 0, "epoch": 1.434296365330848, "grad_norm": 1.0430790347329457, "learning_rate": 2.8995512599240594e-05, "loss": 1.6421, "reason_loss": 0.4581581652164459, "step": 1539, "utility_loss": 1.1839314699172974 }, { "cosine_similarity": 0, "epoch": 1.4352283317800558, "grad_norm": 0.9772080527054516, "learning_rate": 2.897825336555057e-05, "loss": 1.5093, "reason_loss": 0.43750637769699097, "step": 1540, "utility_loss": 1.0718095302581787 }, { "cosine_similarity": 0, "epoch": 1.4361602982292636, "grad_norm": 0.9007029173406997, "learning_rate": 2.8960994131860546e-05, "loss": 1.4353, "reason_loss": 0.4758381247520447, "step": 1541, "utility_loss": 0.9594801068305969 }, { "cosine_similarity": 0, "epoch": 1.4370922646784716, "grad_norm": 1.2357724154131957, "learning_rate": 2.894373489817052e-05, "loss": 1.6626, "reason_loss": 0.4980520009994507, "step": 1542, "utility_loss": 1.1645874977111816 }, { "cosine_similarity": 0, "epoch": 1.4380242311276794, "grad_norm": 2.2708462326800465, "learning_rate": 2.8926475664480494e-05, "loss": 1.8355, "reason_loss": 0.4903166592121124, "step": 1543, "utility_loss": 1.345212459564209 }, { "cosine_similarity": 0, "epoch": 1.4389561975768872, "grad_norm": 0.9551948349204155, "learning_rate": 2.8909216430790476e-05, "loss": 1.4641, "reason_loss": 0.4702228307723999, "step": 1544, "utility_loss": 0.9939260482788086 }, { "cosine_similarity": 0, "epoch": 1.439888164026095, "grad_norm": 1.1071940770498976, "learning_rate": 2.889195719710045e-05, "loss": 1.4323, "reason_loss": 0.4658501148223877, "step": 1545, "utility_loss": 0.9664729833602905 }, { "cosine_similarity": 0, "epoch": 1.440820130475303, "grad_norm": 0.9380179576810411, "learning_rate": 2.8874697963410424e-05, "loss": 1.5435, "reason_loss": 0.4687677323818207, "step": 1546, "utility_loss": 1.0747003555297852 }, { "cosine_similarity": 0, "epoch": 1.4417520969245108, "grad_norm": 0.8581269303680928, "learning_rate": 2.8857438729720405e-05, "loss": 1.3963, "reason_loss": 0.4763891398906708, "step": 1547, "utility_loss": 0.919948160648346 }, { "cosine_similarity": 0, "epoch": 1.4426840633737186, "grad_norm": 1.1854567237658076, "learning_rate": 2.884017949603038e-05, "loss": 1.5191, "reason_loss": 0.4667285084724426, "step": 1548, "utility_loss": 1.0523967742919922 }, { "cosine_similarity": 0, "epoch": 1.4436160298229264, "grad_norm": 1.0280391271699871, "learning_rate": 2.8822920262340354e-05, "loss": 1.6801, "reason_loss": 0.4763365387916565, "step": 1549, "utility_loss": 1.2037160396575928 }, { "cosine_similarity": 0, "epoch": 1.4445479962721341, "grad_norm": 1.0667407021235735, "learning_rate": 2.8805661028650328e-05, "loss": 1.6351, "reason_loss": 0.4813924729824066, "step": 1550, "utility_loss": 1.1537102460861206 }, { "cosine_similarity": 0, "epoch": 1.445479962721342, "grad_norm": 1.3696986431259661, "learning_rate": 2.8788401794960306e-05, "loss": 1.5694, "reason_loss": 0.47998344898223877, "step": 1551, "utility_loss": 1.0894618034362793 }, { "cosine_similarity": 0, "epoch": 1.4464119291705497, "grad_norm": 0.9815504829319385, "learning_rate": 2.877114256127028e-05, "loss": 1.4449, "reason_loss": 0.4761704206466675, "step": 1552, "utility_loss": 0.9687721133232117 }, { "cosine_similarity": 0, "epoch": 1.4473438956197577, "grad_norm": 0.952686229126748, "learning_rate": 2.8753883327580254e-05, "loss": 1.2589, "reason_loss": 0.4578862488269806, "step": 1553, "utility_loss": 0.800981879234314 }, { "cosine_similarity": 0, "epoch": 1.4482758620689655, "grad_norm": 0.9908141366314217, "learning_rate": 2.8736624093890235e-05, "loss": 1.6976, "reason_loss": 0.4744740128517151, "step": 1554, "utility_loss": 1.2231420278549194 }, { "cosine_similarity": 0, "epoch": 1.4492078285181733, "grad_norm": 1.0716135161055569, "learning_rate": 2.871936486020021e-05, "loss": 1.4346, "reason_loss": 0.4902471899986267, "step": 1555, "utility_loss": 0.9443182349205017 }, { "cosine_similarity": 0, "epoch": 1.450139794967381, "grad_norm": 0.9431174033787819, "learning_rate": 2.8702105626510184e-05, "loss": 1.6852, "reason_loss": 0.4860904812812805, "step": 1556, "utility_loss": 1.1990652084350586 }, { "cosine_similarity": 0, "epoch": 1.4510717614165891, "grad_norm": 0.9031744252602836, "learning_rate": 2.8684846392820158e-05, "loss": 1.6876, "reason_loss": 0.4683411419391632, "step": 1557, "utility_loss": 1.2192302942276 }, { "cosine_similarity": 0, "epoch": 1.452003727865797, "grad_norm": 1.0077594133937535, "learning_rate": 2.866758715913014e-05, "loss": 1.6297, "reason_loss": 0.46164172887802124, "step": 1558, "utility_loss": 1.1680338382720947 }, { "cosine_similarity": 0, "epoch": 1.4529356943150047, "grad_norm": 1.1097101389021609, "learning_rate": 2.8650327925440114e-05, "loss": 1.5137, "reason_loss": 0.46563154458999634, "step": 1559, "utility_loss": 1.048088788986206 }, { "cosine_similarity": 0, "epoch": 1.4538676607642125, "grad_norm": 1.0780549669517923, "learning_rate": 2.8633068691750088e-05, "loss": 1.3526, "reason_loss": 0.48827648162841797, "step": 1560, "utility_loss": 0.8643255829811096 }, { "cosine_similarity": 0, "epoch": 1.4547996272134203, "grad_norm": 1.40488358110202, "learning_rate": 2.8615809458060062e-05, "loss": 1.5817, "reason_loss": 0.46548333764076233, "step": 1561, "utility_loss": 1.116190791130066 }, { "cosine_similarity": 0, "epoch": 1.455731593662628, "grad_norm": 1.0314781201839738, "learning_rate": 2.859855022437004e-05, "loss": 1.597, "reason_loss": 0.49053484201431274, "step": 1562, "utility_loss": 1.106441617012024 }, { "cosine_similarity": 0, "epoch": 1.4566635601118358, "grad_norm": 1.4041022262785625, "learning_rate": 2.8581290990680014e-05, "loss": 1.5279, "reason_loss": 0.4734664559364319, "step": 1563, "utility_loss": 1.0544085502624512 }, { "cosine_similarity": 0, "epoch": 1.4575955265610439, "grad_norm": 1.1699691337928948, "learning_rate": 2.856403175698999e-05, "loss": 1.9336, "reason_loss": 0.48666414618492126, "step": 1564, "utility_loss": 1.446904182434082 }, { "cosine_similarity": 0, "epoch": 1.4585274930102516, "grad_norm": 0.9680326092433363, "learning_rate": 2.854677252329997e-05, "loss": 1.4336, "reason_loss": 0.46398571133613586, "step": 1565, "utility_loss": 0.9696619510650635 }, { "cosine_similarity": 0, "epoch": 1.4594594594594594, "grad_norm": 1.0173630310302466, "learning_rate": 2.8529513289609944e-05, "loss": 1.6447, "reason_loss": 0.5165839195251465, "step": 1566, "utility_loss": 1.1280758380889893 }, { "cosine_similarity": 0, "epoch": 1.4603914259086672, "grad_norm": 1.2377847452562432, "learning_rate": 2.8512254055919918e-05, "loss": 1.4417, "reason_loss": 0.4485737085342407, "step": 1567, "utility_loss": 0.9931243062019348 }, { "cosine_similarity": 0, "epoch": 1.4613233923578752, "grad_norm": 1.1290936641624685, "learning_rate": 2.8494994822229892e-05, "loss": 1.6843, "reason_loss": 0.4833833575248718, "step": 1568, "utility_loss": 1.2008793354034424 }, { "cosine_similarity": 0, "epoch": 1.462255358807083, "grad_norm": 1.2201687246159487, "learning_rate": 2.8477735588539873e-05, "loss": 1.835, "reason_loss": 0.4901660680770874, "step": 1569, "utility_loss": 1.3448084592819214 }, { "cosine_similarity": 0, "epoch": 1.4631873252562908, "grad_norm": 1.1339837390167327, "learning_rate": 2.8460476354849848e-05, "loss": 1.7599, "reason_loss": 0.45511388778686523, "step": 1570, "utility_loss": 1.3047491312026978 }, { "cosine_similarity": 0, "epoch": 1.4641192917054986, "grad_norm": 1.0378658849301976, "learning_rate": 2.8443217121159822e-05, "loss": 1.3742, "reason_loss": 0.4897468686103821, "step": 1571, "utility_loss": 0.8844705820083618 }, { "cosine_similarity": 0, "epoch": 1.4650512581547064, "grad_norm": 0.9397156077326587, "learning_rate": 2.8425957887469796e-05, "loss": 1.5824, "reason_loss": 0.4834047555923462, "step": 1572, "utility_loss": 1.0990031957626343 }, { "cosine_similarity": 0, "epoch": 1.4659832246039142, "grad_norm": 0.9700340885594848, "learning_rate": 2.8408698653779774e-05, "loss": 1.4592, "reason_loss": 0.4721222221851349, "step": 1573, "utility_loss": 0.9871053099632263 }, { "cosine_similarity": 0, "epoch": 1.466915191053122, "grad_norm": 1.0152096298751134, "learning_rate": 2.8391439420089748e-05, "loss": 1.5429, "reason_loss": 0.5046443343162537, "step": 1574, "utility_loss": 1.0383046865463257 }, { "cosine_similarity": 0, "epoch": 1.46784715750233, "grad_norm": 1.0744031885179026, "learning_rate": 2.8374180186399723e-05, "loss": 1.49, "reason_loss": 0.4872293174266815, "step": 1575, "utility_loss": 1.0027508735656738 }, { "cosine_similarity": 0, "epoch": 1.4687791239515378, "grad_norm": 1.2361826345968476, "learning_rate": 2.8356920952709704e-05, "loss": 1.4185, "reason_loss": 0.460987389087677, "step": 1576, "utility_loss": 0.9574759602546692 }, { "cosine_similarity": 0, "epoch": 1.4697110904007455, "grad_norm": 1.1730953803797555, "learning_rate": 2.8339661719019678e-05, "loss": 1.6626, "reason_loss": 0.47672122716903687, "step": 1577, "utility_loss": 1.1858479976654053 }, { "cosine_similarity": 0, "epoch": 1.4706430568499533, "grad_norm": 0.9886202260144589, "learning_rate": 2.8322402485329652e-05, "loss": 1.7086, "reason_loss": 0.49447810649871826, "step": 1578, "utility_loss": 1.2141509056091309 }, { "cosine_similarity": 0, "epoch": 1.4715750232991613, "grad_norm": 1.3096496298057145, "learning_rate": 2.8305143251639626e-05, "loss": 1.9021, "reason_loss": 0.4957908093929291, "step": 1579, "utility_loss": 1.4063289165496826 }, { "cosine_similarity": 0, "epoch": 1.4725069897483691, "grad_norm": 1.0524578350125324, "learning_rate": 2.8287884017949608e-05, "loss": 1.5354, "reason_loss": 0.48848652839660645, "step": 1580, "utility_loss": 1.0468864440917969 }, { "cosine_similarity": 0, "epoch": 1.473438956197577, "grad_norm": 1.402454470622421, "learning_rate": 2.8270624784259582e-05, "loss": 1.3496, "reason_loss": 0.4861041307449341, "step": 1581, "utility_loss": 0.8635424971580505 }, { "cosine_similarity": 0, "epoch": 1.4743709226467847, "grad_norm": 1.267001579850831, "learning_rate": 2.8253365550569556e-05, "loss": 1.4455, "reason_loss": 0.4627136290073395, "step": 1582, "utility_loss": 0.9828178882598877 }, { "cosine_similarity": 0, "epoch": 1.4753028890959925, "grad_norm": 1.0355610190490105, "learning_rate": 2.823610631687953e-05, "loss": 1.4029, "reason_loss": 0.4706709086894989, "step": 1583, "utility_loss": 0.9322484731674194 }, { "cosine_similarity": 0, "epoch": 1.4762348555452003, "grad_norm": 1.1243128956814181, "learning_rate": 2.8218847083189508e-05, "loss": 1.9527, "reason_loss": 0.49208030104637146, "step": 1584, "utility_loss": 1.4605906009674072 }, { "cosine_similarity": 0, "epoch": 1.477166821994408, "grad_norm": 0.9010033858844959, "learning_rate": 2.8201587849499482e-05, "loss": 1.5047, "reason_loss": 0.49908313155174255, "step": 1585, "utility_loss": 1.0056084394454956 }, { "cosine_similarity": 0, "epoch": 1.478098788443616, "grad_norm": 1.0921314673009601, "learning_rate": 2.8184328615809457e-05, "loss": 1.6467, "reason_loss": 0.5091891288757324, "step": 1586, "utility_loss": 1.137474536895752 }, { "cosine_similarity": 0, "epoch": 1.4790307548928239, "grad_norm": 0.9537623451549432, "learning_rate": 2.8167069382119438e-05, "loss": 1.3836, "reason_loss": 0.472933292388916, "step": 1587, "utility_loss": 0.9106747508049011 }, { "cosine_similarity": 0, "epoch": 1.4799627213420317, "grad_norm": 1.098467651324632, "learning_rate": 2.8149810148429412e-05, "loss": 1.7377, "reason_loss": 0.474373459815979, "step": 1588, "utility_loss": 1.2633695602416992 }, { "cosine_similarity": 0, "epoch": 1.4808946877912395, "grad_norm": 1.0157029949929082, "learning_rate": 2.8132550914739386e-05, "loss": 1.4842, "reason_loss": 0.5240851640701294, "step": 1589, "utility_loss": 0.9601545929908752 }, { "cosine_similarity": 0, "epoch": 1.4818266542404475, "grad_norm": 0.934393823871324, "learning_rate": 2.811529168104936e-05, "loss": 1.7144, "reason_loss": 0.481222540140152, "step": 1590, "utility_loss": 1.2331621646881104 }, { "cosine_similarity": 0, "epoch": 1.4827586206896552, "grad_norm": 1.0713975281307173, "learning_rate": 2.809803244735934e-05, "loss": 1.5815, "reason_loss": 0.5062912106513977, "step": 1591, "utility_loss": 1.075179100036621 }, { "cosine_similarity": 0, "epoch": 1.483690587138863, "grad_norm": 1.0151599847999768, "learning_rate": 2.8080773213669316e-05, "loss": 1.951, "reason_loss": 0.47281402349472046, "step": 1592, "utility_loss": 1.478186845779419 }, { "cosine_similarity": 0, "epoch": 1.4846225535880708, "grad_norm": 0.8510652959682808, "learning_rate": 2.806351397997929e-05, "loss": 1.2763, "reason_loss": 0.5057499408721924, "step": 1593, "utility_loss": 0.7705552577972412 }, { "cosine_similarity": 0, "epoch": 1.4855545200372786, "grad_norm": 0.977253785177534, "learning_rate": 2.8046254746289268e-05, "loss": 1.7951, "reason_loss": 0.46245795488357544, "step": 1594, "utility_loss": 1.3325964212417603 }, { "cosine_similarity": 0, "epoch": 1.4864864864864864, "grad_norm": 1.036850722451204, "learning_rate": 2.8028995512599242e-05, "loss": 1.5139, "reason_loss": 0.46992427110671997, "step": 1595, "utility_loss": 1.0439807176589966 }, { "cosine_similarity": 0, "epoch": 1.4874184529356942, "grad_norm": 1.0039865044283143, "learning_rate": 2.8011736278909217e-05, "loss": 1.6133, "reason_loss": 0.48515790700912476, "step": 1596, "utility_loss": 1.12812340259552 }, { "cosine_similarity": 0, "epoch": 1.4883504193849022, "grad_norm": 1.0656938258386661, "learning_rate": 2.799447704521919e-05, "loss": 1.5263, "reason_loss": 0.4743536412715912, "step": 1597, "utility_loss": 1.0519541501998901 }, { "cosine_similarity": 0, "epoch": 1.48928238583411, "grad_norm": 1.1640702764744815, "learning_rate": 2.7977217811529172e-05, "loss": 1.5028, "reason_loss": 0.48055657744407654, "step": 1598, "utility_loss": 1.022228479385376 }, { "cosine_similarity": 0, "epoch": 1.4902143522833178, "grad_norm": 1.257530947146516, "learning_rate": 2.7959958577839146e-05, "loss": 1.5509, "reason_loss": 0.4819483757019043, "step": 1599, "utility_loss": 1.0689520835876465 }, { "cosine_similarity": 0, "epoch": 1.4911463187325256, "grad_norm": 0.9355711364015296, "learning_rate": 2.794269934414912e-05, "loss": 1.2753, "reason_loss": 0.4950487017631531, "step": 1600, "utility_loss": 0.7802395224571228 }, { "cosine_similarity": 0, "epoch": 1.4920782851817336, "grad_norm": 0.9659052788132703, "learning_rate": 2.7925440110459095e-05, "loss": 1.8321, "reason_loss": 0.5066348314285278, "step": 1601, "utility_loss": 1.3255062103271484 }, { "cosine_similarity": 0, "epoch": 1.4930102516309414, "grad_norm": 0.96530621577942, "learning_rate": 2.7908180876769076e-05, "loss": 1.5979, "reason_loss": 0.4823590815067291, "step": 1602, "utility_loss": 1.1155657768249512 }, { "cosine_similarity": 0, "epoch": 1.4939422180801492, "grad_norm": 0.8976974694676165, "learning_rate": 2.789092164307905e-05, "loss": 1.3966, "reason_loss": 0.47816041111946106, "step": 1603, "utility_loss": 0.9184828996658325 }, { "cosine_similarity": 0, "epoch": 1.494874184529357, "grad_norm": 1.0891376902952532, "learning_rate": 2.7873662409389024e-05, "loss": 1.4927, "reason_loss": 0.5031235814094543, "step": 1604, "utility_loss": 0.9896035194396973 }, { "cosine_similarity": 0, "epoch": 1.4958061509785647, "grad_norm": 1.0623814515102594, "learning_rate": 2.7856403175699002e-05, "loss": 1.9346, "reason_loss": 0.48752540349960327, "step": 1605, "utility_loss": 1.4470417499542236 }, { "cosine_similarity": 0, "epoch": 1.4967381174277725, "grad_norm": 1.1830067043864916, "learning_rate": 2.7839143942008976e-05, "loss": 1.8957, "reason_loss": 0.4717673361301422, "step": 1606, "utility_loss": 1.4239349365234375 }, { "cosine_similarity": 0, "epoch": 1.4976700838769803, "grad_norm": 1.0092894214182075, "learning_rate": 2.782188470831895e-05, "loss": 1.5683, "reason_loss": 0.4933917820453644, "step": 1607, "utility_loss": 1.0749232769012451 }, { "cosine_similarity": 0, "epoch": 1.4986020503261883, "grad_norm": 1.0189502559303283, "learning_rate": 2.7804625474628925e-05, "loss": 1.7034, "reason_loss": 0.5165378451347351, "step": 1608, "utility_loss": 1.186822533607483 }, { "cosine_similarity": 0, "epoch": 1.499534016775396, "grad_norm": 0.8672791741864694, "learning_rate": 2.7787366240938906e-05, "loss": 1.2592, "reason_loss": 0.4617595672607422, "step": 1609, "utility_loss": 0.7974283695220947 }, { "cosine_similarity": 0, "epoch": 1.500465983224604, "grad_norm": 1.2681595401084071, "learning_rate": 2.777010700724888e-05, "loss": 1.6831, "reason_loss": 0.49008336663246155, "step": 1610, "utility_loss": 1.1930270195007324 }, { "cosine_similarity": 0, "epoch": 1.501397949673812, "grad_norm": 1.0132648552404147, "learning_rate": 2.7752847773558855e-05, "loss": 1.6853, "reason_loss": 0.4499077796936035, "step": 1611, "utility_loss": 1.2354243993759155 }, { "cosine_similarity": 0, "epoch": 1.5023299161230197, "grad_norm": 1.087977414314134, "learning_rate": 2.773558853986883e-05, "loss": 1.5692, "reason_loss": 0.5092194080352783, "step": 1612, "utility_loss": 1.0599844455718994 }, { "cosine_similarity": 0, "epoch": 1.5032618825722275, "grad_norm": 1.0567549881255236, "learning_rate": 2.771832930617881e-05, "loss": 1.4568, "reason_loss": 0.4684787392616272, "step": 1613, "utility_loss": 0.9883129596710205 }, { "cosine_similarity": 0, "epoch": 1.5041938490214353, "grad_norm": 1.1351422320057911, "learning_rate": 2.7701070072488784e-05, "loss": 1.5549, "reason_loss": 0.49383410811424255, "step": 1614, "utility_loss": 1.0610765218734741 }, { "cosine_similarity": 0, "epoch": 1.505125815470643, "grad_norm": 1.1166618332593672, "learning_rate": 2.768381083879876e-05, "loss": 1.2703, "reason_loss": 0.47319936752319336, "step": 1615, "utility_loss": 0.7970725893974304 }, { "cosine_similarity": 0, "epoch": 1.5060577819198508, "grad_norm": 1.0852967332592927, "learning_rate": 2.7666551605108736e-05, "loss": 1.4853, "reason_loss": 0.4757280945777893, "step": 1616, "utility_loss": 1.009581446647644 }, { "cosine_similarity": 0, "epoch": 1.5069897483690586, "grad_norm": 0.8946011426967382, "learning_rate": 2.764929237141871e-05, "loss": 1.2785, "reason_loss": 0.4622705280780792, "step": 1617, "utility_loss": 0.8162084817886353 }, { "cosine_similarity": 0, "epoch": 1.5079217148182664, "grad_norm": 1.3567721584540893, "learning_rate": 2.7632033137728685e-05, "loss": 1.5221, "reason_loss": 0.49851393699645996, "step": 1618, "utility_loss": 1.0236066579818726 }, { "cosine_similarity": 0, "epoch": 1.5088536812674742, "grad_norm": 0.8953547752395027, "learning_rate": 2.761477390403866e-05, "loss": 1.5651, "reason_loss": 0.47822439670562744, "step": 1619, "utility_loss": 1.086918830871582 }, { "cosine_similarity": 0, "epoch": 1.5097856477166822, "grad_norm": 1.045584413267425, "learning_rate": 2.759751467034864e-05, "loss": 1.6138, "reason_loss": 0.48367252945899963, "step": 1620, "utility_loss": 1.1300833225250244 }, { "cosine_similarity": 0, "epoch": 1.51071761416589, "grad_norm": 0.9301852842087587, "learning_rate": 2.7580255436658614e-05, "loss": 1.6601, "reason_loss": 0.45311838388442993, "step": 1621, "utility_loss": 1.20699942111969 }, { "cosine_similarity": 0, "epoch": 1.511649580615098, "grad_norm": 0.9599637574650988, "learning_rate": 2.756299620296859e-05, "loss": 1.493, "reason_loss": 0.46628913283348083, "step": 1622, "utility_loss": 1.0267045497894287 }, { "cosine_similarity": 0, "epoch": 1.5125815470643058, "grad_norm": 1.1119645720088107, "learning_rate": 2.7545736969278563e-05, "loss": 1.5911, "reason_loss": 0.4697704315185547, "step": 1623, "utility_loss": 1.1213643550872803 }, { "cosine_similarity": 0, "epoch": 1.5135135135135136, "grad_norm": 1.0316138238452548, "learning_rate": 2.7528477735588544e-05, "loss": 1.2737, "reason_loss": 0.4834127426147461, "step": 1624, "utility_loss": 0.7903270721435547 }, { "cosine_similarity": 0, "epoch": 1.5144454799627214, "grad_norm": 0.8886876822175728, "learning_rate": 2.751121850189852e-05, "loss": 1.3834, "reason_loss": 0.4520454406738281, "step": 1625, "utility_loss": 0.9313992261886597 }, { "cosine_similarity": 0, "epoch": 1.5153774464119292, "grad_norm": 1.0667075020103467, "learning_rate": 2.7493959268208493e-05, "loss": 1.5023, "reason_loss": 0.45632514357566833, "step": 1626, "utility_loss": 1.045936107635498 }, { "cosine_similarity": 0, "epoch": 1.516309412861137, "grad_norm": 0.8818518616867067, "learning_rate": 2.747670003451847e-05, "loss": 1.192, "reason_loss": 0.4594430923461914, "step": 1627, "utility_loss": 0.7325977087020874 }, { "cosine_similarity": 0, "epoch": 1.5172413793103448, "grad_norm": 0.941598540105582, "learning_rate": 2.7459440800828445e-05, "loss": 1.2994, "reason_loss": 0.46082115173339844, "step": 1628, "utility_loss": 0.8385865688323975 }, { "cosine_similarity": 0, "epoch": 1.5181733457595525, "grad_norm": 0.9535716584804187, "learning_rate": 2.744218156713842e-05, "loss": 1.4421, "reason_loss": 0.4719833433628082, "step": 1629, "utility_loss": 0.9701279401779175 }, { "cosine_similarity": 0, "epoch": 1.5191053122087603, "grad_norm": 1.163718999957045, "learning_rate": 2.7424922333448393e-05, "loss": 1.4505, "reason_loss": 0.4917595088481903, "step": 1630, "utility_loss": 0.958726167678833 }, { "cosine_similarity": 0, "epoch": 1.5200372786579683, "grad_norm": 0.9737001703554152, "learning_rate": 2.7407663099758374e-05, "loss": 2.5813, "reason_loss": 0.4600248336791992, "step": 1631, "utility_loss": 2.1212892532348633 }, { "cosine_similarity": 0, "epoch": 1.5209692451071761, "grad_norm": 1.0404778786321556, "learning_rate": 2.739040386606835e-05, "loss": 1.5308, "reason_loss": 0.4578952193260193, "step": 1632, "utility_loss": 1.0729470252990723 }, { "cosine_similarity": 0, "epoch": 1.521901211556384, "grad_norm": 1.0731262942002098, "learning_rate": 2.7373144632378323e-05, "loss": 1.5401, "reason_loss": 0.506679892539978, "step": 1633, "utility_loss": 1.0333795547485352 }, { "cosine_similarity": 0, "epoch": 1.522833178005592, "grad_norm": 1.1826679986117923, "learning_rate": 2.7355885398688297e-05, "loss": 1.44, "reason_loss": 0.4767533242702484, "step": 1634, "utility_loss": 0.9632560610771179 }, { "cosine_similarity": 0, "epoch": 1.5237651444547997, "grad_norm": 0.8487592909737743, "learning_rate": 2.7338626164998278e-05, "loss": 1.4133, "reason_loss": 0.4437992572784424, "step": 1635, "utility_loss": 0.9695277214050293 }, { "cosine_similarity": 0, "epoch": 1.5246971109040075, "grad_norm": 0.9961284173235436, "learning_rate": 2.7321366931308253e-05, "loss": 1.3979, "reason_loss": 0.48501119017601013, "step": 1636, "utility_loss": 0.9129148721694946 }, { "cosine_similarity": 0, "epoch": 1.5256290773532153, "grad_norm": 0.9968449262124296, "learning_rate": 2.7304107697618227e-05, "loss": 1.4218, "reason_loss": 0.45383140444755554, "step": 1637, "utility_loss": 0.9679815173149109 }, { "cosine_similarity": 0, "epoch": 1.526561043802423, "grad_norm": 0.9822304415802364, "learning_rate": 2.7286848463928204e-05, "loss": 1.2398, "reason_loss": 0.46370241045951843, "step": 1638, "utility_loss": 0.7761209011077881 }, { "cosine_similarity": 0, "epoch": 1.5274930102516309, "grad_norm": 1.1789414551887922, "learning_rate": 2.726958923023818e-05, "loss": 1.6347, "reason_loss": 0.48959559202194214, "step": 1639, "utility_loss": 1.1451423168182373 }, { "cosine_similarity": 0, "epoch": 1.5284249767008387, "grad_norm": 1.0232326621901962, "learning_rate": 2.7252329996548153e-05, "loss": 1.3749, "reason_loss": 0.47864964604377747, "step": 1640, "utility_loss": 0.8962032794952393 }, { "cosine_similarity": 0, "epoch": 1.5293569431500464, "grad_norm": 1.1034650907728, "learning_rate": 2.7235070762858127e-05, "loss": 1.6473, "reason_loss": 0.49583256244659424, "step": 1641, "utility_loss": 1.1514824628829956 }, { "cosine_similarity": 0, "epoch": 1.5302889095992545, "grad_norm": 1.0128048142629316, "learning_rate": 2.721781152916811e-05, "loss": 1.5975, "reason_loss": 0.4836502969264984, "step": 1642, "utility_loss": 1.11384916305542 }, { "cosine_similarity": 0, "epoch": 1.5312208760484622, "grad_norm": 1.0659236570354769, "learning_rate": 2.7200552295478083e-05, "loss": 1.6116, "reason_loss": 0.5173554420471191, "step": 1643, "utility_loss": 1.094202995300293 }, { "cosine_similarity": 0, "epoch": 1.53215284249767, "grad_norm": 1.1155546519142474, "learning_rate": 2.7183293061788057e-05, "loss": 1.6355, "reason_loss": 0.4801981747150421, "step": 1644, "utility_loss": 1.1553466320037842 }, { "cosine_similarity": 0, "epoch": 1.533084808946878, "grad_norm": 0.9480899162481193, "learning_rate": 2.7166033828098038e-05, "loss": 1.5327, "reason_loss": 0.4809308648109436, "step": 1645, "utility_loss": 1.051724910736084 }, { "cosine_similarity": 0, "epoch": 1.5340167753960858, "grad_norm": 0.9195718092428343, "learning_rate": 2.7148774594408012e-05, "loss": 1.4534, "reason_loss": 0.5054928660392761, "step": 1646, "utility_loss": 0.9478583335876465 }, { "cosine_similarity": 0, "epoch": 1.5349487418452936, "grad_norm": 1.141188360329854, "learning_rate": 2.7131515360717987e-05, "loss": 1.6133, "reason_loss": 0.4718969166278839, "step": 1647, "utility_loss": 1.1414462327957153 }, { "cosine_similarity": 0, "epoch": 1.5358807082945014, "grad_norm": 1.067909279126327, "learning_rate": 2.711425612702796e-05, "loss": 1.7092, "reason_loss": 0.48326927423477173, "step": 1648, "utility_loss": 1.2259678840637207 }, { "cosine_similarity": 0, "epoch": 1.5368126747437092, "grad_norm": 1.005673027847296, "learning_rate": 2.709699689333794e-05, "loss": 1.5878, "reason_loss": 0.4891749322414398, "step": 1649, "utility_loss": 1.0986162424087524 }, { "cosine_similarity": 0, "epoch": 1.537744641192917, "grad_norm": 0.9101846207067075, "learning_rate": 2.7079737659647913e-05, "loss": 1.2963, "reason_loss": 0.47792530059814453, "step": 1650, "utility_loss": 0.818355917930603 }, { "cosine_similarity": 0, "epoch": 1.5386766076421248, "grad_norm": 2.3184468727683565, "learning_rate": 2.7062478425957887e-05, "loss": 1.7302, "reason_loss": 0.4966309666633606, "step": 1651, "utility_loss": 1.2336180210113525 }, { "cosine_similarity": 0, "epoch": 1.5396085740913326, "grad_norm": 1.0283629912568528, "learning_rate": 2.704521919226786e-05, "loss": 1.6621, "reason_loss": 0.49199530482292175, "step": 1652, "utility_loss": 1.1700817346572876 }, { "cosine_similarity": 0, "epoch": 1.5405405405405406, "grad_norm": 1.1399581566005512, "learning_rate": 2.7027959958577843e-05, "loss": 1.6795, "reason_loss": 0.5050119161605835, "step": 1653, "utility_loss": 1.174464225769043 }, { "cosine_similarity": 0, "epoch": 1.5414725069897484, "grad_norm": 1.1112809407696236, "learning_rate": 2.7010700724887817e-05, "loss": 1.4296, "reason_loss": 0.4649277925491333, "step": 1654, "utility_loss": 0.9647146463394165 }, { "cosine_similarity": 0, "epoch": 1.5424044734389561, "grad_norm": 8.376690681706076, "learning_rate": 2.699344149119779e-05, "loss": 2.1594, "reason_loss": 0.48434847593307495, "step": 1655, "utility_loss": 1.6750046014785767 }, { "cosine_similarity": 0, "epoch": 1.5433364398881642, "grad_norm": 1.1649003866963539, "learning_rate": 2.6976182257507772e-05, "loss": 1.4358, "reason_loss": 0.4765832722187042, "step": 1656, "utility_loss": 0.9592220187187195 }, { "cosine_similarity": 0, "epoch": 1.544268406337372, "grad_norm": 1.1574254943900393, "learning_rate": 2.6958923023817746e-05, "loss": 1.8708, "reason_loss": 0.46257370710372925, "step": 1657, "utility_loss": 1.4082221984863281 }, { "cosine_similarity": 0, "epoch": 1.5452003727865797, "grad_norm": 1.0417985786694548, "learning_rate": 2.694166379012772e-05, "loss": 1.8012, "reason_loss": 0.4854496121406555, "step": 1658, "utility_loss": 1.3157267570495605 }, { "cosine_similarity": 0, "epoch": 1.5461323392357875, "grad_norm": 1.0246262124196526, "learning_rate": 2.6924404556437695e-05, "loss": 1.4115, "reason_loss": 0.48361238837242126, "step": 1659, "utility_loss": 0.9278564453125 }, { "cosine_similarity": 0, "epoch": 1.5470643056849953, "grad_norm": 0.9536200515633925, "learning_rate": 2.6907145322747673e-05, "loss": 1.4242, "reason_loss": 0.502952516078949, "step": 1660, "utility_loss": 0.9212436676025391 }, { "cosine_similarity": 0, "epoch": 1.547996272134203, "grad_norm": 1.0713154813237773, "learning_rate": 2.6889886089057647e-05, "loss": 1.6052, "reason_loss": 0.47343575954437256, "step": 1661, "utility_loss": 1.1317355632781982 }, { "cosine_similarity": 0, "epoch": 1.5489282385834109, "grad_norm": 1.1670714684875865, "learning_rate": 2.687262685536762e-05, "loss": 1.4783, "reason_loss": 0.4674105644226074, "step": 1662, "utility_loss": 1.0108586549758911 }, { "cosine_similarity": 0, "epoch": 1.5498602050326187, "grad_norm": 1.0184464554313548, "learning_rate": 2.6855367621677596e-05, "loss": 1.5035, "reason_loss": 0.4664543867111206, "step": 1663, "utility_loss": 1.037024974822998 }, { "cosine_similarity": 0, "epoch": 1.5507921714818267, "grad_norm": 1.1635204868960656, "learning_rate": 2.6838108387987577e-05, "loss": 1.5314, "reason_loss": 0.48111066222190857, "step": 1664, "utility_loss": 1.0503323078155518 }, { "cosine_similarity": 0, "epoch": 1.5517241379310345, "grad_norm": 1.0641426985147955, "learning_rate": 2.682084915429755e-05, "loss": 1.5518, "reason_loss": 0.4598180651664734, "step": 1665, "utility_loss": 1.0919628143310547 }, { "cosine_similarity": 0, "epoch": 1.5526561043802423, "grad_norm": 1.0657812789190673, "learning_rate": 2.6803589920607525e-05, "loss": 1.4097, "reason_loss": 0.47121918201446533, "step": 1666, "utility_loss": 0.9384682178497314 }, { "cosine_similarity": 0, "epoch": 1.5535880708294503, "grad_norm": 1.0330284846909514, "learning_rate": 2.6786330686917503e-05, "loss": 1.5467, "reason_loss": 0.48722267150878906, "step": 1667, "utility_loss": 1.0594409704208374 }, { "cosine_similarity": 0, "epoch": 1.554520037278658, "grad_norm": 1.1959644032119576, "learning_rate": 2.6769071453227477e-05, "loss": 1.3548, "reason_loss": 0.4722798466682434, "step": 1668, "utility_loss": 0.8825333118438721 }, { "cosine_similarity": 0, "epoch": 1.5554520037278659, "grad_norm": 0.8723028373508239, "learning_rate": 2.675181221953745e-05, "loss": 1.3985, "reason_loss": 0.47724178433418274, "step": 1669, "utility_loss": 0.9212970733642578 }, { "cosine_similarity": 0, "epoch": 1.5563839701770736, "grad_norm": 1.0402385097580091, "learning_rate": 2.673455298584743e-05, "loss": 1.3514, "reason_loss": 0.4658361077308655, "step": 1670, "utility_loss": 0.8855337500572205 }, { "cosine_similarity": 0, "epoch": 1.5573159366262814, "grad_norm": 1.0097717455591881, "learning_rate": 2.6717293752157407e-05, "loss": 1.5291, "reason_loss": 0.44985562562942505, "step": 1671, "utility_loss": 1.0792160034179688 }, { "cosine_similarity": 0, "epoch": 1.5582479030754892, "grad_norm": 1.0848707845980348, "learning_rate": 2.670003451846738e-05, "loss": 1.5794, "reason_loss": 0.51458740234375, "step": 1672, "utility_loss": 1.0647717714309692 }, { "cosine_similarity": 0, "epoch": 1.559179869524697, "grad_norm": 0.9003078481195756, "learning_rate": 2.6682775284777355e-05, "loss": 1.227, "reason_loss": 0.5016977787017822, "step": 1673, "utility_loss": 0.7252823114395142 }, { "cosine_similarity": 0, "epoch": 1.5601118359739048, "grad_norm": 1.031276239592328, "learning_rate": 2.666551605108733e-05, "loss": 1.3373, "reason_loss": 0.46461722254753113, "step": 1674, "utility_loss": 0.8726683259010315 }, { "cosine_similarity": 0, "epoch": 1.5610438024231128, "grad_norm": 1.2699330900113082, "learning_rate": 2.664825681739731e-05, "loss": 1.5748, "reason_loss": 0.507235050201416, "step": 1675, "utility_loss": 1.0675327777862549 }, { "cosine_similarity": 0, "epoch": 1.5619757688723206, "grad_norm": 1.1341258035045345, "learning_rate": 2.6630997583707285e-05, "loss": 1.4825, "reason_loss": 0.47239741683006287, "step": 1676, "utility_loss": 1.0100562572479248 }, { "cosine_similarity": 0, "epoch": 1.5629077353215284, "grad_norm": 1.0615519498390302, "learning_rate": 2.661373835001726e-05, "loss": 1.3126, "reason_loss": 0.46284306049346924, "step": 1677, "utility_loss": 0.8497273921966553 }, { "cosine_similarity": 0, "epoch": 1.5638397017707364, "grad_norm": 0.9155967632992748, "learning_rate": 2.6596479116327237e-05, "loss": 1.3538, "reason_loss": 0.4746679663658142, "step": 1678, "utility_loss": 0.8791600465774536 }, { "cosine_similarity": 0, "epoch": 1.5647716682199442, "grad_norm": 0.9440842394035237, "learning_rate": 2.657921988263721e-05, "loss": 1.3865, "reason_loss": 0.4566097855567932, "step": 1679, "utility_loss": 0.9298726320266724 }, { "cosine_similarity": 0, "epoch": 1.565703634669152, "grad_norm": 1.032547108381051, "learning_rate": 2.6561960648947186e-05, "loss": 1.81, "reason_loss": 0.5018048286437988, "step": 1680, "utility_loss": 1.3082172870635986 }, { "cosine_similarity": 0, "epoch": 1.5666356011183598, "grad_norm": 0.8795562716955534, "learning_rate": 2.654470141525716e-05, "loss": 1.5395, "reason_loss": 0.466440349817276, "step": 1681, "utility_loss": 1.0730513334274292 }, { "cosine_similarity": 0, "epoch": 1.5675675675675675, "grad_norm": 0.9302497531395603, "learning_rate": 2.652744218156714e-05, "loss": 1.3803, "reason_loss": 0.4514610469341278, "step": 1682, "utility_loss": 0.9288642406463623 }, { "cosine_similarity": 0, "epoch": 1.5684995340167753, "grad_norm": 0.9194235885457601, "learning_rate": 2.6510182947877115e-05, "loss": 1.5839, "reason_loss": 0.4589046239852905, "step": 1683, "utility_loss": 1.1250027418136597 }, { "cosine_similarity": 0, "epoch": 1.5694315004659831, "grad_norm": 0.9401066722496376, "learning_rate": 2.649292371418709e-05, "loss": 1.779, "reason_loss": 0.48803791403770447, "step": 1684, "utility_loss": 1.2909971475601196 }, { "cosine_similarity": 0, "epoch": 1.570363466915191, "grad_norm": 1.0563644970846608, "learning_rate": 2.647566448049707e-05, "loss": 1.7862, "reason_loss": 0.48196613788604736, "step": 1685, "utility_loss": 1.3042654991149902 }, { "cosine_similarity": 0, "epoch": 1.571295433364399, "grad_norm": 0.925795979627938, "learning_rate": 2.6458405246807045e-05, "loss": 1.4802, "reason_loss": 0.47344762086868286, "step": 1686, "utility_loss": 1.0067198276519775 }, { "cosine_similarity": 0, "epoch": 1.5722273998136067, "grad_norm": 1.081352376766096, "learning_rate": 2.644114601311702e-05, "loss": 1.6733, "reason_loss": 0.4762521982192993, "step": 1687, "utility_loss": 1.1970210075378418 }, { "cosine_similarity": 0, "epoch": 1.5731593662628145, "grad_norm": 0.8977102329186301, "learning_rate": 2.6423886779426994e-05, "loss": 1.4283, "reason_loss": 0.49892672896385193, "step": 1688, "utility_loss": 0.9293641448020935 }, { "cosine_similarity": 0, "epoch": 1.5740913327120225, "grad_norm": 1.294148391183252, "learning_rate": 2.640662754573697e-05, "loss": 1.5697, "reason_loss": 0.49348920583724976, "step": 1689, "utility_loss": 1.0762052536010742 }, { "cosine_similarity": 0, "epoch": 1.5750232991612303, "grad_norm": 0.9895972852038493, "learning_rate": 2.6389368312046945e-05, "loss": 1.5324, "reason_loss": 0.48793983459472656, "step": 1690, "utility_loss": 1.0444520711898804 }, { "cosine_similarity": 0, "epoch": 1.575955265610438, "grad_norm": 1.1628443826325727, "learning_rate": 2.637210907835692e-05, "loss": 1.8175, "reason_loss": 0.5034180283546448, "step": 1691, "utility_loss": 1.3141168355941772 }, { "cosine_similarity": 0, "epoch": 1.5768872320596459, "grad_norm": 0.9112318047784316, "learning_rate": 2.6354849844666894e-05, "loss": 1.1614, "reason_loss": 0.4801315367221832, "step": 1692, "utility_loss": 0.6812818050384521 }, { "cosine_similarity": 0, "epoch": 1.5778191985088537, "grad_norm": 0.9986736624631204, "learning_rate": 2.6337590610976875e-05, "loss": 1.7514, "reason_loss": 0.5168294310569763, "step": 1693, "utility_loss": 1.2345635890960693 }, { "cosine_similarity": 0, "epoch": 1.5787511649580614, "grad_norm": 0.889546164445116, "learning_rate": 2.632033137728685e-05, "loss": 1.4838, "reason_loss": 0.4608538746833801, "step": 1694, "utility_loss": 1.0229432582855225 }, { "cosine_similarity": 0, "epoch": 1.5796831314072692, "grad_norm": 1.1781066469214103, "learning_rate": 2.6303072143596824e-05, "loss": 1.4691, "reason_loss": 0.48403775691986084, "step": 1695, "utility_loss": 0.9850664138793945 }, { "cosine_similarity": 0, "epoch": 1.580615097856477, "grad_norm": 1.1347370003248556, "learning_rate": 2.6285812909906805e-05, "loss": 1.572, "reason_loss": 0.47139090299606323, "step": 1696, "utility_loss": 1.10060453414917 }, { "cosine_similarity": 0, "epoch": 1.581547064305685, "grad_norm": 0.8705698855650378, "learning_rate": 2.626855367621678e-05, "loss": 1.3806, "reason_loss": 0.4832455515861511, "step": 1697, "utility_loss": 0.8973883986473083 }, { "cosine_similarity": 0, "epoch": 1.5824790307548928, "grad_norm": 1.0203550521060947, "learning_rate": 2.6251294442526753e-05, "loss": 1.499, "reason_loss": 0.4932406544685364, "step": 1698, "utility_loss": 1.0057158470153809 }, { "cosine_similarity": 0, "epoch": 1.5834109972041006, "grad_norm": 1.272742466054431, "learning_rate": 2.6234035208836728e-05, "loss": 1.6119, "reason_loss": 0.4518718719482422, "step": 1699, "utility_loss": 1.1600074768066406 }, { "cosine_similarity": 0, "epoch": 1.5843429636533086, "grad_norm": 1.0258718285140538, "learning_rate": 2.6216775975146705e-05, "loss": 1.6722, "reason_loss": 0.49927791953086853, "step": 1700, "utility_loss": 1.1729259490966797 }, { "cosine_similarity": 0, "epoch": 1.5852749301025164, "grad_norm": 0.8810591337452782, "learning_rate": 2.619951674145668e-05, "loss": 1.3009, "reason_loss": 0.47073477506637573, "step": 1701, "utility_loss": 0.830154299736023 }, { "cosine_similarity": 0, "epoch": 1.5862068965517242, "grad_norm": 1.0938676124189795, "learning_rate": 2.6182257507766654e-05, "loss": 1.311, "reason_loss": 0.48260819911956787, "step": 1702, "utility_loss": 0.8283889889717102 }, { "cosine_similarity": 0, "epoch": 1.587138863000932, "grad_norm": 1.105245755300287, "learning_rate": 2.6164998274076628e-05, "loss": 1.4199, "reason_loss": 0.45904412865638733, "step": 1703, "utility_loss": 0.9608981609344482 }, { "cosine_similarity": 0, "epoch": 1.5880708294501398, "grad_norm": 0.9524987838489282, "learning_rate": 2.614773904038661e-05, "loss": 1.477, "reason_loss": 0.5176829695701599, "step": 1704, "utility_loss": 0.9593157172203064 }, { "cosine_similarity": 0, "epoch": 1.5890027958993476, "grad_norm": 1.1578011389408986, "learning_rate": 2.6130479806696584e-05, "loss": 1.5185, "reason_loss": 0.47742873430252075, "step": 1705, "utility_loss": 1.04102623462677 }, { "cosine_similarity": 0, "epoch": 1.5899347623485554, "grad_norm": 1.0251523845912667, "learning_rate": 2.6113220573006558e-05, "loss": 1.2535, "reason_loss": 0.4596555233001709, "step": 1706, "utility_loss": 0.7938018441200256 }, { "cosine_similarity": 0, "epoch": 1.5908667287977631, "grad_norm": 1.0993464509190984, "learning_rate": 2.609596133931654e-05, "loss": 1.5286, "reason_loss": 0.4673903286457062, "step": 1707, "utility_loss": 1.0612220764160156 }, { "cosine_similarity": 0, "epoch": 1.5917986952469712, "grad_norm": 1.424496411125816, "learning_rate": 2.6078702105626513e-05, "loss": 1.71, "reason_loss": 0.4790750741958618, "step": 1708, "utility_loss": 1.2308881282806396 }, { "cosine_similarity": 0, "epoch": 1.592730661696179, "grad_norm": 1.201497629236595, "learning_rate": 2.6061442871936487e-05, "loss": 1.5198, "reason_loss": 0.49553802609443665, "step": 1709, "utility_loss": 1.024261474609375 }, { "cosine_similarity": 0, "epoch": 1.5936626281453867, "grad_norm": 1.1313622359572046, "learning_rate": 2.6044183638246462e-05, "loss": 1.6995, "reason_loss": 0.4927482604980469, "step": 1710, "utility_loss": 1.2067182064056396 }, { "cosine_similarity": 0, "epoch": 1.5945945945945947, "grad_norm": 0.8604762539538331, "learning_rate": 2.602692440455644e-05, "loss": 1.3258, "reason_loss": 0.4713069796562195, "step": 1711, "utility_loss": 0.8545041680335999 }, { "cosine_similarity": 0, "epoch": 1.5955265610438025, "grad_norm": 0.9148321547157531, "learning_rate": 2.6009665170866414e-05, "loss": 1.3749, "reason_loss": 0.4842953085899353, "step": 1712, "utility_loss": 0.8905584812164307 }, { "cosine_similarity": 0, "epoch": 1.5964585274930103, "grad_norm": 1.0773807207174095, "learning_rate": 2.5992405937176388e-05, "loss": 1.3543, "reason_loss": 0.4869547486305237, "step": 1713, "utility_loss": 0.8673455119132996 }, { "cosine_similarity": 0, "epoch": 1.597390493942218, "grad_norm": 1.315906272796044, "learning_rate": 2.5975146703486362e-05, "loss": 1.6881, "reason_loss": 0.4578005075454712, "step": 1714, "utility_loss": 1.230316162109375 }, { "cosine_similarity": 0, "epoch": 1.598322460391426, "grad_norm": 0.9891146176055514, "learning_rate": 2.5957887469796343e-05, "loss": 1.6113, "reason_loss": 0.46814584732055664, "step": 1715, "utility_loss": 1.1431270837783813 }, { "cosine_similarity": 0, "epoch": 1.5992544268406337, "grad_norm": 0.8841966609030039, "learning_rate": 2.5940628236106318e-05, "loss": 1.2933, "reason_loss": 0.48337429761886597, "step": 1716, "utility_loss": 0.8098857402801514 }, { "cosine_similarity": 0, "epoch": 1.6001863932898415, "grad_norm": 0.8866455301586272, "learning_rate": 2.5923369002416292e-05, "loss": 1.4572, "reason_loss": 0.4830350875854492, "step": 1717, "utility_loss": 0.9741758108139038 }, { "cosine_similarity": 0, "epoch": 1.6011183597390493, "grad_norm": 1.127157557705168, "learning_rate": 2.5906109768726273e-05, "loss": 1.6231, "reason_loss": 0.4601050019264221, "step": 1718, "utility_loss": 1.1629753112792969 }, { "cosine_similarity": 0, "epoch": 1.6020503261882573, "grad_norm": 0.980943749743424, "learning_rate": 2.5888850535036247e-05, "loss": 1.55, "reason_loss": 0.48141276836395264, "step": 1719, "utility_loss": 1.068549633026123 }, { "cosine_similarity": 0, "epoch": 1.602982292637465, "grad_norm": 1.1444910518919225, "learning_rate": 2.587159130134622e-05, "loss": 1.5049, "reason_loss": 0.5027419328689575, "step": 1720, "utility_loss": 1.0022015571594238 }, { "cosine_similarity": 0, "epoch": 1.6039142590866728, "grad_norm": 0.9851083570997872, "learning_rate": 2.5854332067656196e-05, "loss": 1.7397, "reason_loss": 0.502548098564148, "step": 1721, "utility_loss": 1.237117886543274 }, { "cosine_similarity": 0, "epoch": 1.6048462255358809, "grad_norm": 0.7860977545441792, "learning_rate": 2.5837072833966174e-05, "loss": 1.1145, "reason_loss": 0.47395873069763184, "step": 1722, "utility_loss": 0.6404927968978882 }, { "cosine_similarity": 0, "epoch": 1.6057781919850886, "grad_norm": 0.8757419487911481, "learning_rate": 2.5819813600276148e-05, "loss": 1.2373, "reason_loss": 0.48022985458374023, "step": 1723, "utility_loss": 0.7570374011993408 }, { "cosine_similarity": 0, "epoch": 1.6067101584342964, "grad_norm": 1.0375980592543466, "learning_rate": 2.5802554366586122e-05, "loss": 1.695, "reason_loss": 0.48977336287498474, "step": 1724, "utility_loss": 1.2051877975463867 }, { "cosine_similarity": 0, "epoch": 1.6076421248835042, "grad_norm": 0.9847460997828409, "learning_rate": 2.5785295132896096e-05, "loss": 1.572, "reason_loss": 0.4502442181110382, "step": 1725, "utility_loss": 1.1217496395111084 }, { "cosine_similarity": 0, "epoch": 1.608574091332712, "grad_norm": 1.0490601225437888, "learning_rate": 2.5768035899206078e-05, "loss": 1.4738, "reason_loss": 0.45591020584106445, "step": 1726, "utility_loss": 1.017938256263733 }, { "cosine_similarity": 0, "epoch": 1.6095060577819198, "grad_norm": 0.9362066834213582, "learning_rate": 2.5750776665516052e-05, "loss": 1.5204, "reason_loss": 0.4760012626647949, "step": 1727, "utility_loss": 1.0443722009658813 }, { "cosine_similarity": 0, "epoch": 1.6104380242311276, "grad_norm": 1.1842196570410035, "learning_rate": 2.5733517431826026e-05, "loss": 1.4668, "reason_loss": 0.4792974591255188, "step": 1728, "utility_loss": 0.987507700920105 }, { "cosine_similarity": 0, "epoch": 1.6113699906803354, "grad_norm": 0.965049033706151, "learning_rate": 2.5716258198136007e-05, "loss": 1.344, "reason_loss": 0.48089611530303955, "step": 1729, "utility_loss": 0.86310875415802 }, { "cosine_similarity": 0, "epoch": 1.6123019571295434, "grad_norm": 0.920528290150129, "learning_rate": 2.569899896444598e-05, "loss": 1.4851, "reason_loss": 0.46599942445755005, "step": 1730, "utility_loss": 1.0190708637237549 }, { "cosine_similarity": 0, "epoch": 1.6132339235787512, "grad_norm": 1.0090776903127456, "learning_rate": 2.5681739730755956e-05, "loss": 1.5959, "reason_loss": 0.48134198784828186, "step": 1731, "utility_loss": 1.1145641803741455 }, { "cosine_similarity": 0, "epoch": 1.614165890027959, "grad_norm": 0.9918280947201455, "learning_rate": 2.566448049706593e-05, "loss": 1.5798, "reason_loss": 0.5039030313491821, "step": 1732, "utility_loss": 1.0759193897247314 }, { "cosine_similarity": 0, "epoch": 1.615097856477167, "grad_norm": 0.9559297089713671, "learning_rate": 2.5647221263375908e-05, "loss": 1.2784, "reason_loss": 0.46871325373649597, "step": 1733, "utility_loss": 0.8096678256988525 }, { "cosine_similarity": 0, "epoch": 1.6160298229263748, "grad_norm": 0.8360954269144723, "learning_rate": 2.5629962029685882e-05, "loss": 1.1941, "reason_loss": 0.45800065994262695, "step": 1734, "utility_loss": 0.7361354827880859 }, { "cosine_similarity": 0, "epoch": 1.6169617893755825, "grad_norm": 1.074602647497031, "learning_rate": 2.5612702795995856e-05, "loss": 1.4523, "reason_loss": 0.48921751976013184, "step": 1735, "utility_loss": 0.9631015062332153 }, { "cosine_similarity": 0, "epoch": 1.6178937558247903, "grad_norm": 1.1702191985236872, "learning_rate": 2.5595443562305837e-05, "loss": 1.5516, "reason_loss": 0.4580124020576477, "step": 1736, "utility_loss": 1.0936102867126465 }, { "cosine_similarity": 0, "epoch": 1.6188257222739981, "grad_norm": 1.2011388957406939, "learning_rate": 2.557818432861581e-05, "loss": 1.4136, "reason_loss": 0.4751480221748352, "step": 1737, "utility_loss": 0.9384185671806335 }, { "cosine_similarity": 0, "epoch": 1.619757688723206, "grad_norm": 0.832032526713795, "learning_rate": 2.5560925094925786e-05, "loss": 1.3408, "reason_loss": 0.4700556993484497, "step": 1738, "utility_loss": 0.8707306385040283 }, { "cosine_similarity": 0, "epoch": 1.6206896551724137, "grad_norm": 0.9591813591214086, "learning_rate": 2.554366586123576e-05, "loss": 1.2558, "reason_loss": 0.4713069796562195, "step": 1739, "utility_loss": 0.7844727039337158 }, { "cosine_similarity": 0, "epoch": 1.6216216216216215, "grad_norm": 1.012933556586347, "learning_rate": 2.552640662754574e-05, "loss": 1.2632, "reason_loss": 0.5141809582710266, "step": 1740, "utility_loss": 0.7490639686584473 }, { "cosine_similarity": 0, "epoch": 1.6225535880708295, "grad_norm": 1.0136822154481107, "learning_rate": 2.5509147393855716e-05, "loss": 1.465, "reason_loss": 0.493753582239151, "step": 1741, "utility_loss": 0.9712886810302734 }, { "cosine_similarity": 0, "epoch": 1.6234855545200373, "grad_norm": 1.1343149986763283, "learning_rate": 2.549188816016569e-05, "loss": 1.6357, "reason_loss": 0.5005499124526978, "step": 1742, "utility_loss": 1.1351464986801147 }, { "cosine_similarity": 0, "epoch": 1.624417520969245, "grad_norm": 0.9997380380267834, "learning_rate": 2.5474628926475664e-05, "loss": 1.6161, "reason_loss": 0.4756527245044708, "step": 1743, "utility_loss": 1.1404815912246704 }, { "cosine_similarity": 0, "epoch": 1.625349487418453, "grad_norm": 1.0945142212863335, "learning_rate": 2.5457369692785642e-05, "loss": 1.5237, "reason_loss": 0.4988226592540741, "step": 1744, "utility_loss": 1.0248314142227173 }, { "cosine_similarity": 0, "epoch": 1.6262814538676609, "grad_norm": 1.0531652717381867, "learning_rate": 2.5440110459095616e-05, "loss": 1.5462, "reason_loss": 0.47010338306427, "step": 1745, "utility_loss": 1.0761339664459229 }, { "cosine_similarity": 0, "epoch": 1.6272134203168687, "grad_norm": 1.1187603358928238, "learning_rate": 2.542285122540559e-05, "loss": 1.5047, "reason_loss": 0.498816579580307, "step": 1746, "utility_loss": 1.0058975219726562 }, { "cosine_similarity": 0, "epoch": 1.6281453867660765, "grad_norm": 1.281847816652572, "learning_rate": 2.540559199171557e-05, "loss": 1.7935, "reason_loss": 0.46121811866760254, "step": 1747, "utility_loss": 1.3322482109069824 }, { "cosine_similarity": 0, "epoch": 1.6290773532152842, "grad_norm": 0.9318695563385838, "learning_rate": 2.5388332758025546e-05, "loss": 1.2531, "reason_loss": 0.49781474471092224, "step": 1748, "utility_loss": 0.7553076148033142 }, { "cosine_similarity": 0, "epoch": 1.630009319664492, "grad_norm": 1.085520179530419, "learning_rate": 2.537107352433552e-05, "loss": 1.9486, "reason_loss": 0.45819583535194397, "step": 1749, "utility_loss": 1.4903616905212402 }, { "cosine_similarity": 0, "epoch": 1.6309412861136998, "grad_norm": 1.043268575574085, "learning_rate": 2.5353814290645494e-05, "loss": 1.5209, "reason_loss": 0.4943538010120392, "step": 1750, "utility_loss": 1.0264997482299805 }, { "cosine_similarity": 0, "epoch": 1.6318732525629076, "grad_norm": 0.9367039159040834, "learning_rate": 2.5336555056955475e-05, "loss": 1.6434, "reason_loss": 0.4691004753112793, "step": 1751, "utility_loss": 1.1742819547653198 }, { "cosine_similarity": 0, "epoch": 1.6328052190121156, "grad_norm": 0.953232868858407, "learning_rate": 2.531929582326545e-05, "loss": 1.4706, "reason_loss": 0.4795098304748535, "step": 1752, "utility_loss": 0.9910963773727417 }, { "cosine_similarity": 0, "epoch": 1.6337371854613234, "grad_norm": 0.9986413050547535, "learning_rate": 2.5302036589575424e-05, "loss": 1.385, "reason_loss": 0.4854457378387451, "step": 1753, "utility_loss": 0.8995192050933838 }, { "cosine_similarity": 0, "epoch": 1.6346691519105312, "grad_norm": 0.9939659142463878, "learning_rate": 2.5284777355885398e-05, "loss": 1.4295, "reason_loss": 0.45804375410079956, "step": 1754, "utility_loss": 0.9714094996452332 }, { "cosine_similarity": 0, "epoch": 1.6356011183597392, "grad_norm": 1.0140543570591027, "learning_rate": 2.5267518122195376e-05, "loss": 1.3331, "reason_loss": 0.4805457890033722, "step": 1755, "utility_loss": 0.8525381088256836 }, { "cosine_similarity": 0, "epoch": 1.636533084808947, "grad_norm": 1.0171401686799948, "learning_rate": 2.525025888850535e-05, "loss": 1.1554, "reason_loss": 0.4883531928062439, "step": 1756, "utility_loss": 0.6670340895652771 }, { "cosine_similarity": 0, "epoch": 1.6374650512581548, "grad_norm": 1.009400963240591, "learning_rate": 2.5232999654815325e-05, "loss": 1.5079, "reason_loss": 0.4820486307144165, "step": 1757, "utility_loss": 1.0258290767669678 }, { "cosine_similarity": 0, "epoch": 1.6383970177073626, "grad_norm": 0.9377257830862824, "learning_rate": 2.5215740421125306e-05, "loss": 1.387, "reason_loss": 0.4749979078769684, "step": 1758, "utility_loss": 0.9120312929153442 }, { "cosine_similarity": 0, "epoch": 1.6393289841565704, "grad_norm": 1.1157469240524722, "learning_rate": 2.519848118743528e-05, "loss": 1.7897, "reason_loss": 0.4801546335220337, "step": 1759, "utility_loss": 1.3095076084136963 }, { "cosine_similarity": 0, "epoch": 1.6402609506057781, "grad_norm": 0.9957956589593341, "learning_rate": 2.5181221953745254e-05, "loss": 1.9764, "reason_loss": 0.4929250478744507, "step": 1760, "utility_loss": 1.4834680557250977 }, { "cosine_similarity": 0, "epoch": 1.641192917054986, "grad_norm": 1.1519792717441508, "learning_rate": 2.516396272005523e-05, "loss": 1.3374, "reason_loss": 0.4900321960449219, "step": 1761, "utility_loss": 0.847368061542511 }, { "cosine_similarity": 0, "epoch": 1.6421248835041937, "grad_norm": 1.0545984243380486, "learning_rate": 2.514670348636521e-05, "loss": 1.4877, "reason_loss": 0.45711860060691833, "step": 1762, "utility_loss": 1.030578374862671 }, { "cosine_similarity": 0, "epoch": 1.6430568499534017, "grad_norm": 0.9873611522349506, "learning_rate": 2.5129444252675184e-05, "loss": 1.7421, "reason_loss": 0.49135592579841614, "step": 1763, "utility_loss": 1.2507542371749878 }, { "cosine_similarity": 0, "epoch": 1.6439888164026095, "grad_norm": 1.0086654323594026, "learning_rate": 2.5112185018985158e-05, "loss": 1.4423, "reason_loss": 0.5011312961578369, "step": 1764, "utility_loss": 0.9412000179290771 }, { "cosine_similarity": 0, "epoch": 1.6449207828518173, "grad_norm": 0.9316522690394382, "learning_rate": 2.5094925785295132e-05, "loss": 1.435, "reason_loss": 0.47584694623947144, "step": 1765, "utility_loss": 0.9591974020004272 }, { "cosine_similarity": 0, "epoch": 1.6458527493010253, "grad_norm": 1.0646416416905073, "learning_rate": 2.507766655160511e-05, "loss": 1.6739, "reason_loss": 0.4711643159389496, "step": 1766, "utility_loss": 1.2027040719985962 }, { "cosine_similarity": 0, "epoch": 1.646784715750233, "grad_norm": 1.0784784009288122, "learning_rate": 2.5060407317915084e-05, "loss": 1.5783, "reason_loss": 0.4707118570804596, "step": 1767, "utility_loss": 1.1076369285583496 }, { "cosine_similarity": 0, "epoch": 1.647716682199441, "grad_norm": 0.8228878755696135, "learning_rate": 2.504314808422506e-05, "loss": 1.6098, "reason_loss": 0.45562943816185, "step": 1768, "utility_loss": 1.1541943550109863 }, { "cosine_similarity": 0, "epoch": 1.6486486486486487, "grad_norm": 1.1072262564297424, "learning_rate": 2.502588885053504e-05, "loss": 1.5504, "reason_loss": 0.4851125478744507, "step": 1769, "utility_loss": 1.0653187036514282 }, { "cosine_similarity": 0, "epoch": 1.6495806150978565, "grad_norm": 0.9494399751502453, "learning_rate": 2.5008629616845014e-05, "loss": 1.551, "reason_loss": 0.4933182895183563, "step": 1770, "utility_loss": 1.05771005153656 }, { "cosine_similarity": 0, "epoch": 1.6505125815470643, "grad_norm": 0.9791469678687567, "learning_rate": 2.499137038315499e-05, "loss": 1.44, "reason_loss": 0.4518941640853882, "step": 1771, "utility_loss": 0.9881384372711182 }, { "cosine_similarity": 0, "epoch": 1.651444547996272, "grad_norm": 1.0077739271393142, "learning_rate": 2.4974111149464966e-05, "loss": 1.8407, "reason_loss": 0.45791858434677124, "step": 1772, "utility_loss": 1.3827580213546753 }, { "cosine_similarity": 0, "epoch": 1.6523765144454798, "grad_norm": 0.8835128322559487, "learning_rate": 2.495685191577494e-05, "loss": 1.2541, "reason_loss": 0.4998776316642761, "step": 1773, "utility_loss": 0.754264235496521 }, { "cosine_similarity": 0, "epoch": 1.6533084808946876, "grad_norm": 0.9504104974261804, "learning_rate": 2.4939592682084918e-05, "loss": 1.3685, "reason_loss": 0.45969611406326294, "step": 1774, "utility_loss": 0.9087721109390259 }, { "cosine_similarity": 0, "epoch": 1.6542404473438956, "grad_norm": 1.1003327635513145, "learning_rate": 2.4922333448394892e-05, "loss": 1.6434, "reason_loss": 0.4514893889427185, "step": 1775, "utility_loss": 1.1919257640838623 }, { "cosine_similarity": 0, "epoch": 1.6551724137931034, "grad_norm": 1.0687765270241818, "learning_rate": 2.4905074214704867e-05, "loss": 1.2947, "reason_loss": 0.4920209050178528, "step": 1776, "utility_loss": 0.8026492595672607 }, { "cosine_similarity": 0, "epoch": 1.6561043802423114, "grad_norm": 2.11587490845353, "learning_rate": 2.4887814981014844e-05, "loss": 1.3903, "reason_loss": 0.4846780300140381, "step": 1777, "utility_loss": 0.905602753162384 }, { "cosine_similarity": 0, "epoch": 1.6570363466915192, "grad_norm": 1.1554752556770105, "learning_rate": 2.487055574732482e-05, "loss": 1.5431, "reason_loss": 0.48038792610168457, "step": 1778, "utility_loss": 1.0626935958862305 }, { "cosine_similarity": 0, "epoch": 1.657968313140727, "grad_norm": 1.0998599916863676, "learning_rate": 2.4853296513634796e-05, "loss": 1.4954, "reason_loss": 0.5125204920768738, "step": 1779, "utility_loss": 0.9828634858131409 }, { "cosine_similarity": 0, "epoch": 1.6589002795899348, "grad_norm": 0.9573826932958314, "learning_rate": 2.483603727994477e-05, "loss": 1.432, "reason_loss": 0.45674416422843933, "step": 1780, "utility_loss": 0.9752968549728394 }, { "cosine_similarity": 0, "epoch": 1.6598322460391426, "grad_norm": 0.8510852783435917, "learning_rate": 2.4818778046254748e-05, "loss": 1.4103, "reason_loss": 0.4874626100063324, "step": 1781, "utility_loss": 0.9228330850601196 }, { "cosine_similarity": 0, "epoch": 1.6607642124883504, "grad_norm": 0.9566088535292069, "learning_rate": 2.4801518812564722e-05, "loss": 1.5364, "reason_loss": 0.4945247769355774, "step": 1782, "utility_loss": 1.0418686866760254 }, { "cosine_similarity": 0, "epoch": 1.6616961789375582, "grad_norm": 1.13642392721801, "learning_rate": 2.47842595788747e-05, "loss": 1.5293, "reason_loss": 0.4679253101348877, "step": 1783, "utility_loss": 1.0613902807235718 }, { "cosine_similarity": 0, "epoch": 1.662628145386766, "grad_norm": 1.0240585976060381, "learning_rate": 2.4767000345184674e-05, "loss": 1.3812, "reason_loss": 0.4644671380519867, "step": 1784, "utility_loss": 0.916743814945221 }, { "cosine_similarity": 0, "epoch": 1.6635601118359737, "grad_norm": 1.017651444884941, "learning_rate": 2.4749741111494652e-05, "loss": 1.861, "reason_loss": 0.48245495557785034, "step": 1785, "utility_loss": 1.3785817623138428 }, { "cosine_similarity": 0, "epoch": 1.6644920782851818, "grad_norm": 1.0768677596056047, "learning_rate": 2.4732481877804626e-05, "loss": 1.9368, "reason_loss": 0.4799916446208954, "step": 1786, "utility_loss": 1.4567630290985107 }, { "cosine_similarity": 0, "epoch": 1.6654240447343895, "grad_norm": 1.1193754260910804, "learning_rate": 2.47152226441146e-05, "loss": 1.4076, "reason_loss": 0.46356943249702454, "step": 1787, "utility_loss": 0.944050669670105 }, { "cosine_similarity": 0, "epoch": 1.6663560111835976, "grad_norm": 0.906235289695378, "learning_rate": 2.469796341042458e-05, "loss": 1.5661, "reason_loss": 0.4853472411632538, "step": 1788, "utility_loss": 1.0807108879089355 }, { "cosine_similarity": 0, "epoch": 1.6672879776328053, "grad_norm": 0.9702466920077248, "learning_rate": 2.4680704176734553e-05, "loss": 1.6883, "reason_loss": 0.49031418561935425, "step": 1789, "utility_loss": 1.1979868412017822 }, { "cosine_similarity": 0, "epoch": 1.6682199440820131, "grad_norm": 1.271142707384949, "learning_rate": 2.466344494304453e-05, "loss": 1.5289, "reason_loss": 0.479643315076828, "step": 1790, "utility_loss": 1.0492403507232666 }, { "cosine_similarity": 0, "epoch": 1.669151910531221, "grad_norm": 0.9430384342179731, "learning_rate": 2.4646185709354505e-05, "loss": 1.5099, "reason_loss": 0.4671979546546936, "step": 1791, "utility_loss": 1.0427029132843018 }, { "cosine_similarity": 0, "epoch": 1.6700838769804287, "grad_norm": 1.067294722774047, "learning_rate": 2.4628926475664482e-05, "loss": 1.4628, "reason_loss": 0.47284290194511414, "step": 1792, "utility_loss": 0.9899128675460815 }, { "cosine_similarity": 0, "epoch": 1.6710158434296365, "grad_norm": 1.1754982442338793, "learning_rate": 2.4611667241974457e-05, "loss": 1.8496, "reason_loss": 0.4898735582828522, "step": 1793, "utility_loss": 1.3596837520599365 }, { "cosine_similarity": 0, "epoch": 1.6719478098788443, "grad_norm": 1.0927423877820874, "learning_rate": 2.4594408008284434e-05, "loss": 1.5562, "reason_loss": 0.4450514316558838, "step": 1794, "utility_loss": 1.1111558675765991 }, { "cosine_similarity": 0, "epoch": 1.672879776328052, "grad_norm": 1.1092776302829217, "learning_rate": 2.457714877459441e-05, "loss": 1.6378, "reason_loss": 0.46748220920562744, "step": 1795, "utility_loss": 1.1702940464019775 }, { "cosine_similarity": 0, "epoch": 1.6738117427772599, "grad_norm": 1.0646585011416498, "learning_rate": 2.4559889540904386e-05, "loss": 1.6285, "reason_loss": 0.4879249334335327, "step": 1796, "utility_loss": 1.1405681371688843 }, { "cosine_similarity": 0, "epoch": 1.6747437092264679, "grad_norm": 1.0235754889924926, "learning_rate": 2.454263030721436e-05, "loss": 1.336, "reason_loss": 0.44443070888519287, "step": 1797, "utility_loss": 0.8915248513221741 }, { "cosine_similarity": 0, "epoch": 1.6756756756756757, "grad_norm": 1.0715028011096712, "learning_rate": 2.4525371073524335e-05, "loss": 1.6436, "reason_loss": 0.5084030628204346, "step": 1798, "utility_loss": 1.1351902484893799 }, { "cosine_similarity": 0, "epoch": 1.6766076421248837, "grad_norm": 0.9820602590593221, "learning_rate": 2.4508111839834313e-05, "loss": 1.3616, "reason_loss": 0.4272891879081726, "step": 1799, "utility_loss": 0.9343353509902954 }, { "cosine_similarity": 0, "epoch": 1.6775396085740915, "grad_norm": 1.2406225620328384, "learning_rate": 2.4490852606144287e-05, "loss": 1.6467, "reason_loss": 0.4824322462081909, "step": 1800, "utility_loss": 1.1642907857894897 }, { "cosine_similarity": 0, "epoch": 1.6784715750232992, "grad_norm": 1.2321239835150424, "learning_rate": 2.4473593372454264e-05, "loss": 1.6956, "reason_loss": 0.4869992733001709, "step": 1801, "utility_loss": 1.2085756063461304 }, { "cosine_similarity": 0, "epoch": 1.679403541472507, "grad_norm": 0.9572465527364669, "learning_rate": 2.445633413876424e-05, "loss": 1.4086, "reason_loss": 0.46623969078063965, "step": 1802, "utility_loss": 0.9423166513442993 }, { "cosine_similarity": 0, "epoch": 1.6803355079217148, "grad_norm": 0.9365166885465036, "learning_rate": 2.4439074905074216e-05, "loss": 1.4803, "reason_loss": 0.5094084739685059, "step": 1803, "utility_loss": 0.9708753824234009 }, { "cosine_similarity": 0, "epoch": 1.6812674743709226, "grad_norm": 0.9455181776468676, "learning_rate": 2.442181567138419e-05, "loss": 1.5545, "reason_loss": 0.44746512174606323, "step": 1804, "utility_loss": 1.1070762872695923 }, { "cosine_similarity": 0, "epoch": 1.6821994408201304, "grad_norm": 1.025811866523903, "learning_rate": 2.440455643769417e-05, "loss": 1.5361, "reason_loss": 0.4492769241333008, "step": 1805, "utility_loss": 1.0867972373962402 }, { "cosine_similarity": 0, "epoch": 1.6831314072693382, "grad_norm": 1.0507445499941463, "learning_rate": 2.4387297204004146e-05, "loss": 1.3901, "reason_loss": 0.45592033863067627, "step": 1806, "utility_loss": 0.9342037439346313 }, { "cosine_similarity": 0, "epoch": 1.684063373718546, "grad_norm": 0.9944456686563173, "learning_rate": 2.437003797031412e-05, "loss": 1.4647, "reason_loss": 0.47492778301239014, "step": 1807, "utility_loss": 0.9897658824920654 }, { "cosine_similarity": 0, "epoch": 1.684995340167754, "grad_norm": 1.0423567085571817, "learning_rate": 2.4352778736624095e-05, "loss": 1.8702, "reason_loss": 0.5052977800369263, "step": 1808, "utility_loss": 1.3649274110794067 }, { "cosine_similarity": 0, "epoch": 1.6859273066169618, "grad_norm": 0.9284659063128895, "learning_rate": 2.433551950293407e-05, "loss": 1.3889, "reason_loss": 0.46652844548225403, "step": 1809, "utility_loss": 0.9224117994308472 }, { "cosine_similarity": 0, "epoch": 1.6868592730661698, "grad_norm": 1.0680873711678807, "learning_rate": 2.4318260269244047e-05, "loss": 1.4756, "reason_loss": 0.47023314237594604, "step": 1810, "utility_loss": 1.0054049491882324 }, { "cosine_similarity": 0, "epoch": 1.6877912395153776, "grad_norm": 1.164409167637505, "learning_rate": 2.430100103555402e-05, "loss": 1.71, "reason_loss": 0.49182963371276855, "step": 1811, "utility_loss": 1.2181904315948486 }, { "cosine_similarity": 0, "epoch": 1.6887232059645854, "grad_norm": 1.0576111640796164, "learning_rate": 2.4283741801864e-05, "loss": 1.2692, "reason_loss": 0.4368143677711487, "step": 1812, "utility_loss": 0.832383930683136 }, { "cosine_similarity": 0, "epoch": 1.6896551724137931, "grad_norm": 1.278953019763507, "learning_rate": 2.4266482568173973e-05, "loss": 1.4207, "reason_loss": 0.4651089012622833, "step": 1813, "utility_loss": 0.9555532932281494 }, { "cosine_similarity": 0, "epoch": 1.690587138863001, "grad_norm": 1.0799647642721957, "learning_rate": 2.424922333448395e-05, "loss": 1.2776, "reason_loss": 0.459553062915802, "step": 1814, "utility_loss": 0.8180086612701416 }, { "cosine_similarity": 0, "epoch": 1.6915191053122087, "grad_norm": 1.3623765323867714, "learning_rate": 2.4231964100793925e-05, "loss": 1.9322, "reason_loss": 0.4603462219238281, "step": 1815, "utility_loss": 1.4718334674835205 }, { "cosine_similarity": 0, "epoch": 1.6924510717614165, "grad_norm": 0.9406409059226427, "learning_rate": 2.4214704867103903e-05, "loss": 1.6092, "reason_loss": 0.47596243023872375, "step": 1816, "utility_loss": 1.1332266330718994 }, { "cosine_similarity": 0, "epoch": 1.6933830382106243, "grad_norm": 1.094541432535728, "learning_rate": 2.419744563341388e-05, "loss": 1.469, "reason_loss": 0.4797627329826355, "step": 1817, "utility_loss": 0.9892688393592834 }, { "cosine_similarity": 0, "epoch": 1.694315004659832, "grad_norm": 0.7591503671345109, "learning_rate": 2.4180186399723855e-05, "loss": 1.1625, "reason_loss": 0.48982173204421997, "step": 1818, "utility_loss": 0.6726768016815186 }, { "cosine_similarity": 0, "epoch": 1.69524697110904, "grad_norm": 1.0914432272021193, "learning_rate": 2.416292716603383e-05, "loss": 1.4923, "reason_loss": 0.45384204387664795, "step": 1819, "utility_loss": 1.0384591817855835 }, { "cosine_similarity": 0, "epoch": 1.696178937558248, "grad_norm": 0.8822949825210544, "learning_rate": 2.4145667932343803e-05, "loss": 1.2206, "reason_loss": 0.4505169689655304, "step": 1820, "utility_loss": 0.7700467109680176 }, { "cosine_similarity": 0, "epoch": 1.6971109040074557, "grad_norm": 0.9153514157054268, "learning_rate": 2.412840869865378e-05, "loss": 1.4873, "reason_loss": 0.4770694375038147, "step": 1821, "utility_loss": 1.0102603435516357 }, { "cosine_similarity": 0, "epoch": 1.6980428704566637, "grad_norm": 1.1863234805660638, "learning_rate": 2.4111149464963755e-05, "loss": 1.3494, "reason_loss": 0.46835023164749146, "step": 1822, "utility_loss": 0.8810955882072449 }, { "cosine_similarity": 0, "epoch": 1.6989748369058715, "grad_norm": 1.0731511634757893, "learning_rate": 2.4093890231273733e-05, "loss": 1.4523, "reason_loss": 0.5099161863327026, "step": 1823, "utility_loss": 0.942397952079773 }, { "cosine_similarity": 0, "epoch": 1.6999068033550793, "grad_norm": 1.1242059535991524, "learning_rate": 2.4076630997583707e-05, "loss": 1.6443, "reason_loss": 0.4819450378417969, "step": 1824, "utility_loss": 1.1623790264129639 }, { "cosine_similarity": 0, "epoch": 1.700838769804287, "grad_norm": 1.0022595755523929, "learning_rate": 2.4059371763893685e-05, "loss": 1.5675, "reason_loss": 0.4815613627433777, "step": 1825, "utility_loss": 1.085906744003296 }, { "cosine_similarity": 0, "epoch": 1.7017707362534948, "grad_norm": 1.1327759462277749, "learning_rate": 2.404211253020366e-05, "loss": 1.8244, "reason_loss": 0.4697967767715454, "step": 1826, "utility_loss": 1.3545602560043335 }, { "cosine_similarity": 0, "epoch": 1.7027027027027026, "grad_norm": 0.9462613864556663, "learning_rate": 2.4024853296513637e-05, "loss": 1.524, "reason_loss": 0.5166134238243103, "step": 1827, "utility_loss": 1.0073747634887695 }, { "cosine_similarity": 0, "epoch": 1.7036346691519104, "grad_norm": 1.1532417787763802, "learning_rate": 2.4007594062823614e-05, "loss": 1.6952, "reason_loss": 0.4575952887535095, "step": 1828, "utility_loss": 1.2375855445861816 }, { "cosine_similarity": 0, "epoch": 1.7045666356011182, "grad_norm": 1.2594581622641674, "learning_rate": 2.399033482913359e-05, "loss": 1.817, "reason_loss": 0.480102002620697, "step": 1829, "utility_loss": 1.3369038105010986 }, { "cosine_similarity": 0, "epoch": 1.7054986020503262, "grad_norm": 1.0942197642491076, "learning_rate": 2.3973075595443563e-05, "loss": 1.6879, "reason_loss": 0.47754642367362976, "step": 1830, "utility_loss": 1.2103426456451416 }, { "cosine_similarity": 0, "epoch": 1.706430568499534, "grad_norm": 0.9440172286895331, "learning_rate": 2.3955816361753537e-05, "loss": 1.6314, "reason_loss": 0.46454471349716187, "step": 1831, "utility_loss": 1.166845440864563 }, { "cosine_similarity": 0, "epoch": 1.7073625349487418, "grad_norm": 0.9441405287289073, "learning_rate": 2.3938557128063515e-05, "loss": 1.4895, "reason_loss": 0.4825257658958435, "step": 1832, "utility_loss": 1.0069591999053955 }, { "cosine_similarity": 0, "epoch": 1.7082945013979498, "grad_norm": 1.0240314733478957, "learning_rate": 2.392129789437349e-05, "loss": 1.3782, "reason_loss": 0.49530282616615295, "step": 1833, "utility_loss": 0.8828742504119873 }, { "cosine_similarity": 0, "epoch": 1.7092264678471576, "grad_norm": 0.9361649437082865, "learning_rate": 2.3904038660683467e-05, "loss": 1.4313, "reason_loss": 0.48233526945114136, "step": 1834, "utility_loss": 0.9489262700080872 }, { "cosine_similarity": 0, "epoch": 1.7101584342963654, "grad_norm": 0.8697578014827213, "learning_rate": 2.388677942699344e-05, "loss": 1.1616, "reason_loss": 0.4886647164821625, "step": 1835, "utility_loss": 0.6729119420051575 }, { "cosine_similarity": 0, "epoch": 1.7110904007455732, "grad_norm": 1.1167567087185588, "learning_rate": 2.386952019330342e-05, "loss": 1.8127, "reason_loss": 0.4776703119277954, "step": 1836, "utility_loss": 1.335026741027832 }, { "cosine_similarity": 0, "epoch": 1.712022367194781, "grad_norm": 1.1697102998145419, "learning_rate": 2.3852260959613397e-05, "loss": 1.4463, "reason_loss": 0.5002991557121277, "step": 1837, "utility_loss": 0.9460271596908569 }, { "cosine_similarity": 0, "epoch": 1.7129543336439887, "grad_norm": 0.9790121688271803, "learning_rate": 2.383500172592337e-05, "loss": 1.4179, "reason_loss": 0.43223443627357483, "step": 1838, "utility_loss": 0.9856729507446289 }, { "cosine_similarity": 0, "epoch": 1.7138863000931965, "grad_norm": 1.131148435574633, "learning_rate": 2.381774249223335e-05, "loss": 1.604, "reason_loss": 0.4701102375984192, "step": 1839, "utility_loss": 1.1339119672775269 }, { "cosine_similarity": 0, "epoch": 1.7148182665424043, "grad_norm": 1.1259667199890304, "learning_rate": 2.3800483258543323e-05, "loss": 1.3032, "reason_loss": 0.4903017580509186, "step": 1840, "utility_loss": 0.8128587007522583 }, { "cosine_similarity": 0, "epoch": 1.7157502329916123, "grad_norm": 1.1726615039290194, "learning_rate": 2.3783224024853297e-05, "loss": 1.6974, "reason_loss": 0.486130952835083, "step": 1841, "utility_loss": 1.2112855911254883 }, { "cosine_similarity": 0, "epoch": 1.7166821994408201, "grad_norm": 1.075775674066208, "learning_rate": 2.376596479116327e-05, "loss": 1.5715, "reason_loss": 0.47593027353286743, "step": 1842, "utility_loss": 1.095572829246521 }, { "cosine_similarity": 0, "epoch": 1.717614165890028, "grad_norm": 1.0705840265629205, "learning_rate": 2.374870555747325e-05, "loss": 1.5708, "reason_loss": 0.4828597903251648, "step": 1843, "utility_loss": 1.0879197120666504 }, { "cosine_similarity": 0, "epoch": 1.718546132339236, "grad_norm": 0.8067474066586897, "learning_rate": 2.3731446323783223e-05, "loss": 1.3745, "reason_loss": 0.47213447093963623, "step": 1844, "utility_loss": 0.9023959040641785 }, { "cosine_similarity": 0, "epoch": 1.7194780987884437, "grad_norm": 1.1825344470970194, "learning_rate": 2.37141870900932e-05, "loss": 1.6494, "reason_loss": 0.4442973732948303, "step": 1845, "utility_loss": 1.2050899267196655 }, { "cosine_similarity": 0, "epoch": 1.7204100652376515, "grad_norm": 1.0354541326273152, "learning_rate": 2.3696927856403175e-05, "loss": 1.5548, "reason_loss": 0.46670278906822205, "step": 1846, "utility_loss": 1.0880975723266602 }, { "cosine_similarity": 0, "epoch": 1.7213420316868593, "grad_norm": 0.8691994303879093, "learning_rate": 2.3679668622713153e-05, "loss": 1.3632, "reason_loss": 0.4677616357803345, "step": 1847, "utility_loss": 0.8954793810844421 }, { "cosine_similarity": 0, "epoch": 1.722273998136067, "grad_norm": 1.0386749882034552, "learning_rate": 2.366240938902313e-05, "loss": 1.4652, "reason_loss": 0.4166870713233948, "step": 1848, "utility_loss": 1.0484949350357056 }, { "cosine_similarity": 0, "epoch": 1.7232059645852749, "grad_norm": 0.923128559723304, "learning_rate": 2.3645150155333105e-05, "loss": 1.2262, "reason_loss": 0.47106313705444336, "step": 1849, "utility_loss": 0.7551586627960205 }, { "cosine_similarity": 0, "epoch": 1.7241379310344827, "grad_norm": 1.0600525579362312, "learning_rate": 2.3627890921643083e-05, "loss": 1.3469, "reason_loss": 0.471488893032074, "step": 1850, "utility_loss": 0.8754209876060486 }, { "cosine_similarity": 0, "epoch": 1.7250698974836904, "grad_norm": 1.071429251268759, "learning_rate": 2.3610631687953057e-05, "loss": 1.6467, "reason_loss": 0.45818427205085754, "step": 1851, "utility_loss": 1.1884994506835938 }, { "cosine_similarity": 0, "epoch": 1.7260018639328985, "grad_norm": 0.9996626315773013, "learning_rate": 2.359337245426303e-05, "loss": 1.4879, "reason_loss": 0.4882517158985138, "step": 1852, "utility_loss": 0.9996253252029419 }, { "cosine_similarity": 0, "epoch": 1.7269338303821062, "grad_norm": 1.0870085877938238, "learning_rate": 2.3576113220573005e-05, "loss": 1.445, "reason_loss": 0.46409881114959717, "step": 1853, "utility_loss": 0.9809181690216064 }, { "cosine_similarity": 0, "epoch": 1.727865796831314, "grad_norm": 0.9086524930593293, "learning_rate": 2.3558853986882983e-05, "loss": 1.3411, "reason_loss": 0.4567336440086365, "step": 1854, "utility_loss": 0.884374737739563 }, { "cosine_similarity": 0, "epoch": 1.728797763280522, "grad_norm": 1.1646945713116994, "learning_rate": 2.3541594753192957e-05, "loss": 1.3121, "reason_loss": 0.47154179215431213, "step": 1855, "utility_loss": 0.8405923247337341 }, { "cosine_similarity": 0, "epoch": 1.7297297297297298, "grad_norm": 0.9555470795747367, "learning_rate": 2.3524335519502935e-05, "loss": 1.281, "reason_loss": 0.4800471067428589, "step": 1856, "utility_loss": 0.8009591102600098 }, { "cosine_similarity": 0, "epoch": 1.7306616961789376, "grad_norm": 0.9961863794867445, "learning_rate": 2.3507076285812913e-05, "loss": 1.4335, "reason_loss": 0.5018437504768372, "step": 1857, "utility_loss": 0.9316084384918213 }, { "cosine_similarity": 0, "epoch": 1.7315936626281454, "grad_norm": 1.0025270234804, "learning_rate": 2.3489817052122887e-05, "loss": 1.6156, "reason_loss": 0.45995235443115234, "step": 1858, "utility_loss": 1.1556382179260254 }, { "cosine_similarity": 0, "epoch": 1.7325256290773532, "grad_norm": 0.9909793898777077, "learning_rate": 2.3472557818432865e-05, "loss": 1.5407, "reason_loss": 0.47003912925720215, "step": 1859, "utility_loss": 1.0706756114959717 }, { "cosine_similarity": 0, "epoch": 1.733457595526561, "grad_norm": 1.067607084903009, "learning_rate": 2.345529858474284e-05, "loss": 1.7175, "reason_loss": 0.4533865451812744, "step": 1860, "utility_loss": 1.2641311883926392 }, { "cosine_similarity": 0, "epoch": 1.7343895619757688, "grad_norm": 1.0216776374689718, "learning_rate": 2.3438039351052817e-05, "loss": 1.5236, "reason_loss": 0.46248412132263184, "step": 1861, "utility_loss": 1.0611069202423096 }, { "cosine_similarity": 0, "epoch": 1.7353215284249766, "grad_norm": 0.9394426322647192, "learning_rate": 2.342078011736279e-05, "loss": 1.3631, "reason_loss": 0.4568895101547241, "step": 1862, "utility_loss": 0.906225860118866 }, { "cosine_similarity": 0, "epoch": 1.7362534948741846, "grad_norm": 0.9639445185229555, "learning_rate": 2.3403520883672765e-05, "loss": 1.2668, "reason_loss": 0.4868074059486389, "step": 1863, "utility_loss": 0.7799468636512756 }, { "cosine_similarity": 0, "epoch": 1.7371854613233924, "grad_norm": 1.1194620518856055, "learning_rate": 2.338626164998274e-05, "loss": 1.5124, "reason_loss": 0.4906284213066101, "step": 1864, "utility_loss": 1.0217424631118774 }, { "cosine_similarity": 0, "epoch": 1.7381174277726001, "grad_norm": 0.9839884300706933, "learning_rate": 2.3369002416292717e-05, "loss": 1.2906, "reason_loss": 0.4800538122653961, "step": 1865, "utility_loss": 0.8105908036231995 }, { "cosine_similarity": 0, "epoch": 1.7390493942218082, "grad_norm": 1.1962565625613935, "learning_rate": 2.335174318260269e-05, "loss": 1.698, "reason_loss": 0.46595966815948486, "step": 1866, "utility_loss": 1.2320077419281006 }, { "cosine_similarity": 0, "epoch": 1.739981360671016, "grad_norm": 0.9640115851178732, "learning_rate": 2.333448394891267e-05, "loss": 1.6934, "reason_loss": 0.4885638952255249, "step": 1867, "utility_loss": 1.2048020362854004 }, { "cosine_similarity": 0, "epoch": 1.7409133271202237, "grad_norm": 1.1353906313198778, "learning_rate": 2.3317224715222647e-05, "loss": 1.4484, "reason_loss": 0.46338894963264465, "step": 1868, "utility_loss": 0.9850326180458069 }, { "cosine_similarity": 0, "epoch": 1.7418452935694315, "grad_norm": 0.9000536673452466, "learning_rate": 2.329996548153262e-05, "loss": 1.418, "reason_loss": 0.4686642289161682, "step": 1869, "utility_loss": 0.9493311643600464 }, { "cosine_similarity": 0, "epoch": 1.7427772600186393, "grad_norm": 1.3049891913878242, "learning_rate": 2.32827062478426e-05, "loss": 1.7275, "reason_loss": 0.4691733121871948, "step": 1870, "utility_loss": 1.2582979202270508 }, { "cosine_similarity": 0, "epoch": 1.743709226467847, "grad_norm": 1.0678843720050548, "learning_rate": 2.3265447014152573e-05, "loss": 1.7076, "reason_loss": 0.4875583052635193, "step": 1871, "utility_loss": 1.220058560371399 }, { "cosine_similarity": 0, "epoch": 1.7446411929170549, "grad_norm": 0.9993002293371852, "learning_rate": 2.324818778046255e-05, "loss": 1.4521, "reason_loss": 0.4912923276424408, "step": 1872, "utility_loss": 0.9607943296432495 }, { "cosine_similarity": 0, "epoch": 1.7455731593662627, "grad_norm": 1.1266853670330637, "learning_rate": 2.3230928546772525e-05, "loss": 1.8686, "reason_loss": 0.4894280433654785, "step": 1873, "utility_loss": 1.3792164325714111 }, { "cosine_similarity": 0, "epoch": 1.7465051258154707, "grad_norm": 1.303658079778485, "learning_rate": 2.32136693130825e-05, "loss": 1.4524, "reason_loss": 0.4605591595172882, "step": 1874, "utility_loss": 0.9918769598007202 }, { "cosine_similarity": 0, "epoch": 1.7474370922646785, "grad_norm": 1.164184178280448, "learning_rate": 2.3196410079392474e-05, "loss": 2.2868, "reason_loss": 0.4725685715675354, "step": 1875, "utility_loss": 1.814259648323059 }, { "cosine_similarity": 0, "epoch": 1.7483690587138863, "grad_norm": 1.0244990827945908, "learning_rate": 2.317915084570245e-05, "loss": 1.5116, "reason_loss": 0.4936360716819763, "step": 1876, "utility_loss": 1.0179870128631592 }, { "cosine_similarity": 0, "epoch": 1.7493010251630943, "grad_norm": 1.067250880747298, "learning_rate": 2.316189161201243e-05, "loss": 1.3652, "reason_loss": 0.4568139314651489, "step": 1877, "utility_loss": 0.9083414077758789 }, { "cosine_similarity": 0, "epoch": 1.750232991612302, "grad_norm": 1.147589284690212, "learning_rate": 2.3144632378322403e-05, "loss": 1.7046, "reason_loss": 0.47341853380203247, "step": 1878, "utility_loss": 1.2312228679656982 }, { "cosine_similarity": 0, "epoch": 1.7511649580615098, "grad_norm": 1.060361252747089, "learning_rate": 2.312737314463238e-05, "loss": 1.3839, "reason_loss": 0.489082008600235, "step": 1879, "utility_loss": 0.8948060274124146 }, { "cosine_similarity": 0, "epoch": 1.7520969245107176, "grad_norm": 1.0033245324498012, "learning_rate": 2.3110113910942355e-05, "loss": 1.1802, "reason_loss": 0.4810073971748352, "step": 1880, "utility_loss": 0.6991961002349854 }, { "cosine_similarity": 0, "epoch": 1.7530288909599254, "grad_norm": 0.9275288032941141, "learning_rate": 2.3092854677252333e-05, "loss": 1.5348, "reason_loss": 0.4881260395050049, "step": 1881, "utility_loss": 1.0467194318771362 }, { "cosine_similarity": 0, "epoch": 1.7539608574091332, "grad_norm": 0.9759883106938928, "learning_rate": 2.3075595443562307e-05, "loss": 1.6145, "reason_loss": 0.4736333191394806, "step": 1882, "utility_loss": 1.1409066915512085 }, { "cosine_similarity": 0, "epoch": 1.754892823858341, "grad_norm": 1.0171120892994823, "learning_rate": 2.305833620987228e-05, "loss": 1.2792, "reason_loss": 0.4817647635936737, "step": 1883, "utility_loss": 0.7974346876144409 }, { "cosine_similarity": 0, "epoch": 1.7558247903075488, "grad_norm": 0.952715280261958, "learning_rate": 2.3041076976182256e-05, "loss": 1.6189, "reason_loss": 0.46957579255104065, "step": 1884, "utility_loss": 1.149305820465088 }, { "cosine_similarity": 0, "epoch": 1.7567567567567568, "grad_norm": 2.1300445984350866, "learning_rate": 2.3023817742492234e-05, "loss": 1.3663, "reason_loss": 0.4762900471687317, "step": 1885, "utility_loss": 0.8900524973869324 }, { "cosine_similarity": 0, "epoch": 1.7576887232059646, "grad_norm": 1.01734266592006, "learning_rate": 2.3006558508802208e-05, "loss": 1.7061, "reason_loss": 0.48703235387802124, "step": 1886, "utility_loss": 1.21910560131073 }, { "cosine_similarity": 0, "epoch": 1.7586206896551724, "grad_norm": 0.9079219438623629, "learning_rate": 2.2989299275112186e-05, "loss": 1.687, "reason_loss": 0.4820944368839264, "step": 1887, "utility_loss": 1.2049182653427124 }, { "cosine_similarity": 0, "epoch": 1.7595526561043804, "grad_norm": 1.2161578103661042, "learning_rate": 2.2972040041422163e-05, "loss": 1.8161, "reason_loss": 0.4518962502479553, "step": 1888, "utility_loss": 1.3641657829284668 }, { "cosine_similarity": 0, "epoch": 1.7604846225535882, "grad_norm": 1.1240121363671933, "learning_rate": 2.2954780807732138e-05, "loss": 1.579, "reason_loss": 0.4601326882839203, "step": 1889, "utility_loss": 1.1188420057296753 }, { "cosine_similarity": 0, "epoch": 1.761416589002796, "grad_norm": 1.1340494518485449, "learning_rate": 2.2937521574042115e-05, "loss": 1.4209, "reason_loss": 0.48229140043258667, "step": 1890, "utility_loss": 0.938647449016571 }, { "cosine_similarity": 0, "epoch": 1.7623485554520038, "grad_norm": 1.4574971340123106, "learning_rate": 2.292026234035209e-05, "loss": 1.9154, "reason_loss": 0.4563570022583008, "step": 1891, "utility_loss": 1.4590426683425903 }, { "cosine_similarity": 0, "epoch": 1.7632805219012115, "grad_norm": 1.0339116008784184, "learning_rate": 2.2903003106662067e-05, "loss": 1.6605, "reason_loss": 0.4793790578842163, "step": 1892, "utility_loss": 1.1811695098876953 }, { "cosine_similarity": 0, "epoch": 1.7642124883504193, "grad_norm": 1.1495707194245706, "learning_rate": 2.288574387297204e-05, "loss": 1.3503, "reason_loss": 0.47824907302856445, "step": 1893, "utility_loss": 0.8720158934593201 }, { "cosine_similarity": 0, "epoch": 1.7651444547996271, "grad_norm": 1.0641057311740758, "learning_rate": 2.2868484639282016e-05, "loss": 1.7479, "reason_loss": 0.47470003366470337, "step": 1894, "utility_loss": 1.2732338905334473 }, { "cosine_similarity": 0, "epoch": 1.766076421248835, "grad_norm": 1.0721814112133907, "learning_rate": 2.285122540559199e-05, "loss": 1.7352, "reason_loss": 0.5010912418365479, "step": 1895, "utility_loss": 1.2341580390930176 }, { "cosine_similarity": 0, "epoch": 1.767008387698043, "grad_norm": 1.2759350924761372, "learning_rate": 2.2833966171901968e-05, "loss": 1.4947, "reason_loss": 0.47470998764038086, "step": 1896, "utility_loss": 1.0199536085128784 }, { "cosine_similarity": 0, "epoch": 1.7679403541472507, "grad_norm": 1.2280393425306253, "learning_rate": 2.2816706938211945e-05, "loss": 1.6458, "reason_loss": 0.4659118056297302, "step": 1897, "utility_loss": 1.17991042137146 }, { "cosine_similarity": 0, "epoch": 1.7688723205964585, "grad_norm": 1.0649061314307566, "learning_rate": 2.279944770452192e-05, "loss": 1.8484, "reason_loss": 0.4616577923297882, "step": 1898, "utility_loss": 1.3866958618164062 }, { "cosine_similarity": 0, "epoch": 1.7698042870456665, "grad_norm": 1.0493952385559042, "learning_rate": 2.2782188470831897e-05, "loss": 1.4812, "reason_loss": 0.4683126211166382, "step": 1899, "utility_loss": 1.0128984451293945 }, { "cosine_similarity": 0, "epoch": 1.7707362534948743, "grad_norm": 1.1713801443073044, "learning_rate": 2.276492923714187e-05, "loss": 1.4744, "reason_loss": 0.45349931716918945, "step": 1900, "utility_loss": 1.0208795070648193 }, { "cosine_similarity": 0, "epoch": 1.771668219944082, "grad_norm": 0.9397687073678034, "learning_rate": 2.274767000345185e-05, "loss": 1.3917, "reason_loss": 0.45862337946891785, "step": 1901, "utility_loss": 0.9330304265022278 }, { "cosine_similarity": 0, "epoch": 1.7726001863932899, "grad_norm": 0.8668371631407725, "learning_rate": 2.2730410769761824e-05, "loss": 1.0504, "reason_loss": 0.45324087142944336, "step": 1902, "utility_loss": 0.5971599817276001 }, { "cosine_similarity": 0, "epoch": 1.7735321528424977, "grad_norm": 0.9833318895570974, "learning_rate": 2.27131515360718e-05, "loss": 1.5607, "reason_loss": 0.43872037529945374, "step": 1903, "utility_loss": 1.1220250129699707 }, { "cosine_similarity": 0, "epoch": 1.7744641192917054, "grad_norm": 1.0510592283189955, "learning_rate": 2.2695892302381776e-05, "loss": 1.4714, "reason_loss": 0.48859235644340515, "step": 1904, "utility_loss": 0.9828471541404724 }, { "cosine_similarity": 0, "epoch": 1.7753960857409132, "grad_norm": 0.9944583870071061, "learning_rate": 2.267863306869175e-05, "loss": 1.7469, "reason_loss": 0.4745728373527527, "step": 1905, "utility_loss": 1.2723381519317627 }, { "cosine_similarity": 0, "epoch": 1.776328052190121, "grad_norm": 1.0442922291746146, "learning_rate": 2.2661373835001724e-05, "loss": 1.3598, "reason_loss": 0.48137953877449036, "step": 1906, "utility_loss": 0.878387451171875 }, { "cosine_similarity": 0, "epoch": 1.777260018639329, "grad_norm": 0.9101932086344976, "learning_rate": 2.2644114601311702e-05, "loss": 1.5571, "reason_loss": 0.48375454545021057, "step": 1907, "utility_loss": 1.0733928680419922 }, { "cosine_similarity": 0, "epoch": 1.7781919850885368, "grad_norm": 0.8885478470464562, "learning_rate": 2.262685536762168e-05, "loss": 1.588, "reason_loss": 0.4732796549797058, "step": 1908, "utility_loss": 1.1147346496582031 }, { "cosine_similarity": 0, "epoch": 1.7791239515377446, "grad_norm": 0.9966494223030354, "learning_rate": 2.2609596133931654e-05, "loss": 1.6908, "reason_loss": 0.4817650318145752, "step": 1909, "utility_loss": 1.2090449333190918 }, { "cosine_similarity": 0, "epoch": 1.7800559179869526, "grad_norm": 1.0473837542578688, "learning_rate": 2.259233690024163e-05, "loss": 1.6624, "reason_loss": 0.48251181840896606, "step": 1910, "utility_loss": 1.1798768043518066 }, { "cosine_similarity": 0, "epoch": 1.7809878844361604, "grad_norm": 1.0076910902081584, "learning_rate": 2.2575077666551606e-05, "loss": 1.7503, "reason_loss": 0.4759969115257263, "step": 1911, "utility_loss": 1.2742849588394165 }, { "cosine_similarity": 0, "epoch": 1.7819198508853682, "grad_norm": 0.8278521568005115, "learning_rate": 2.2557818432861583e-05, "loss": 1.0781, "reason_loss": 0.4400809109210968, "step": 1912, "utility_loss": 0.6380605101585388 }, { "cosine_similarity": 0, "epoch": 1.782851817334576, "grad_norm": 0.8748325306685264, "learning_rate": 2.2540559199171558e-05, "loss": 1.2591, "reason_loss": 0.46595025062561035, "step": 1913, "utility_loss": 0.7931891679763794 }, { "cosine_similarity": 0, "epoch": 1.7837837837837838, "grad_norm": 1.0153169618839908, "learning_rate": 2.2523299965481535e-05, "loss": 1.2066, "reason_loss": 0.48497098684310913, "step": 1914, "utility_loss": 0.7215911149978638 }, { "cosine_similarity": 0, "epoch": 1.7847157502329916, "grad_norm": 1.1207051352830182, "learning_rate": 2.250604073179151e-05, "loss": 1.3643, "reason_loss": 0.44498276710510254, "step": 1915, "utility_loss": 0.9193427562713623 }, { "cosine_similarity": 0, "epoch": 1.7856477166821993, "grad_norm": 1.2876929657578877, "learning_rate": 2.2488781498101484e-05, "loss": 1.4893, "reason_loss": 0.47935929894447327, "step": 1916, "utility_loss": 1.0099431276321411 }, { "cosine_similarity": 0, "epoch": 1.7865796831314071, "grad_norm": 1.144301782907324, "learning_rate": 2.2471522264411462e-05, "loss": 1.3402, "reason_loss": 0.4901065528392792, "step": 1917, "utility_loss": 0.8501361012458801 }, { "cosine_similarity": 0, "epoch": 1.7875116495806151, "grad_norm": 1.3776497314966876, "learning_rate": 2.2454263030721436e-05, "loss": 1.6884, "reason_loss": 0.4876031279563904, "step": 1918, "utility_loss": 1.2008012533187866 }, { "cosine_similarity": 0, "epoch": 1.788443616029823, "grad_norm": 1.0839681044250318, "learning_rate": 2.2437003797031414e-05, "loss": 1.4449, "reason_loss": 0.46379151940345764, "step": 1919, "utility_loss": 0.9810704588890076 }, { "cosine_similarity": 0, "epoch": 1.7893755824790307, "grad_norm": 0.9158441355181967, "learning_rate": 2.2419744563341388e-05, "loss": 1.3297, "reason_loss": 0.49606338143348694, "step": 1920, "utility_loss": 0.8336392641067505 }, { "cosine_similarity": 0, "epoch": 1.7903075489282387, "grad_norm": 0.9709387745458558, "learning_rate": 2.2402485329651366e-05, "loss": 1.3072, "reason_loss": 0.4948534071445465, "step": 1921, "utility_loss": 0.8123124837875366 }, { "cosine_similarity": 0, "epoch": 1.7912395153774465, "grad_norm": 1.070108610684228, "learning_rate": 2.238522609596134e-05, "loss": 1.4028, "reason_loss": 0.4745022654533386, "step": 1922, "utility_loss": 0.9283188581466675 }, { "cosine_similarity": 0, "epoch": 1.7921714818266543, "grad_norm": 1.0364993931295998, "learning_rate": 2.2367966862271318e-05, "loss": 1.5583, "reason_loss": 0.4794984757900238, "step": 1923, "utility_loss": 1.0788458585739136 }, { "cosine_similarity": 0, "epoch": 1.793103448275862, "grad_norm": 0.9625440434821699, "learning_rate": 2.2350707628581292e-05, "loss": 1.3399, "reason_loss": 0.46300333738327026, "step": 1924, "utility_loss": 0.8768788576126099 }, { "cosine_similarity": 0, "epoch": 1.7940354147250699, "grad_norm": 1.174720270558922, "learning_rate": 2.233344839489127e-05, "loss": 1.9065, "reason_loss": 0.4572683870792389, "step": 1925, "utility_loss": 1.4492697715759277 }, { "cosine_similarity": 0, "epoch": 1.7949673811742777, "grad_norm": 0.8847197975910311, "learning_rate": 2.2316189161201244e-05, "loss": 1.2191, "reason_loss": 0.4561801552772522, "step": 1926, "utility_loss": 0.7629164457321167 }, { "cosine_similarity": 0, "epoch": 1.7958993476234855, "grad_norm": 0.9888680839195735, "learning_rate": 2.2298929927511218e-05, "loss": 1.4648, "reason_loss": 0.4674949645996094, "step": 1927, "utility_loss": 0.9972648024559021 }, { "cosine_similarity": 0, "epoch": 1.7968313140726933, "grad_norm": 0.8518230748728506, "learning_rate": 2.2281670693821196e-05, "loss": 1.3058, "reason_loss": 0.4778228998184204, "step": 1928, "utility_loss": 0.8280017375946045 }, { "cosine_similarity": 0, "epoch": 1.7977632805219013, "grad_norm": 0.9984612795054901, "learning_rate": 2.226441146013117e-05, "loss": 1.633, "reason_loss": 0.4612978398799896, "step": 1929, "utility_loss": 1.1716787815093994 }, { "cosine_similarity": 0, "epoch": 1.798695246971109, "grad_norm": 0.913211268744489, "learning_rate": 2.2247152226441148e-05, "loss": 1.6678, "reason_loss": 0.48794040083885193, "step": 1930, "utility_loss": 1.1798741817474365 }, { "cosine_similarity": 0, "epoch": 1.7996272134203168, "grad_norm": 1.0058576856674095, "learning_rate": 2.2229892992751122e-05, "loss": 1.4769, "reason_loss": 0.47978639602661133, "step": 1931, "utility_loss": 0.9971002340316772 }, { "cosine_similarity": 0, "epoch": 1.8005591798695249, "grad_norm": 0.9275773900365925, "learning_rate": 2.22126337590611e-05, "loss": 1.3792, "reason_loss": 0.47449469566345215, "step": 1932, "utility_loss": 0.9046857357025146 }, { "cosine_similarity": 0, "epoch": 1.8014911463187326, "grad_norm": 1.1119815188192834, "learning_rate": 2.2195374525371074e-05, "loss": 1.5753, "reason_loss": 0.484258234500885, "step": 1933, "utility_loss": 1.0909967422485352 }, { "cosine_similarity": 0, "epoch": 1.8024231127679404, "grad_norm": 1.0148203037963803, "learning_rate": 2.2178115291681052e-05, "loss": 1.5386, "reason_loss": 0.4661821126937866, "step": 1934, "utility_loss": 1.072449803352356 }, { "cosine_similarity": 0, "epoch": 1.8033550792171482, "grad_norm": 1.2387383980470756, "learning_rate": 2.2160856057991026e-05, "loss": 1.3099, "reason_loss": 0.4761087894439697, "step": 1935, "utility_loss": 0.8338088989257812 }, { "cosine_similarity": 0, "epoch": 1.804287045666356, "grad_norm": 0.9959612475433526, "learning_rate": 2.2143596824301004e-05, "loss": 1.587, "reason_loss": 0.4754962921142578, "step": 1936, "utility_loss": 1.111483097076416 }, { "cosine_similarity": 0, "epoch": 1.8052190121155638, "grad_norm": 1.0883347232919145, "learning_rate": 2.2126337590610978e-05, "loss": 1.4724, "reason_loss": 0.46653443574905396, "step": 1937, "utility_loss": 1.005912184715271 }, { "cosine_similarity": 0, "epoch": 1.8061509785647716, "grad_norm": 0.8514694349642364, "learning_rate": 2.2109078356920952e-05, "loss": 1.2527, "reason_loss": 0.44211345911026, "step": 1938, "utility_loss": 0.8106152415275574 }, { "cosine_similarity": 0, "epoch": 1.8070829450139794, "grad_norm": 1.141787491935151, "learning_rate": 2.209181912323093e-05, "loss": 1.2341, "reason_loss": 0.4576558470726013, "step": 1939, "utility_loss": 0.7763965129852295 }, { "cosine_similarity": 0, "epoch": 1.8080149114631874, "grad_norm": 1.166463970313091, "learning_rate": 2.2074559889540904e-05, "loss": 1.7091, "reason_loss": 0.4632270634174347, "step": 1940, "utility_loss": 1.2459076642990112 }, { "cosine_similarity": 0, "epoch": 1.8089468779123952, "grad_norm": 1.1086391036853418, "learning_rate": 2.2057300655850882e-05, "loss": 1.6302, "reason_loss": 0.4774051606655121, "step": 1941, "utility_loss": 1.1527681350708008 }, { "cosine_similarity": 0, "epoch": 1.809878844361603, "grad_norm": 0.9042354474807813, "learning_rate": 2.2040041422160856e-05, "loss": 1.4432, "reason_loss": 0.4916456341743469, "step": 1942, "utility_loss": 0.9515547156333923 }, { "cosine_similarity": 0, "epoch": 1.810810810810811, "grad_norm": 1.027311789901501, "learning_rate": 2.2022782188470834e-05, "loss": 1.2815, "reason_loss": 0.4898955523967743, "step": 1943, "utility_loss": 0.7916322350502014 }, { "cosine_similarity": 0, "epoch": 1.8117427772600188, "grad_norm": 0.8906911246051087, "learning_rate": 2.2005522954780808e-05, "loss": 1.4699, "reason_loss": 0.48510169982910156, "step": 1944, "utility_loss": 0.9848018884658813 }, { "cosine_similarity": 0, "epoch": 1.8126747437092265, "grad_norm": 1.1451103229076591, "learning_rate": 2.1988263721090786e-05, "loss": 1.7609, "reason_loss": 0.49756866693496704, "step": 1945, "utility_loss": 1.2632956504821777 }, { "cosine_similarity": 0, "epoch": 1.8136067101584343, "grad_norm": 1.0208845393545631, "learning_rate": 2.197100448740076e-05, "loss": 1.3543, "reason_loss": 0.4620796740055084, "step": 1946, "utility_loss": 0.8921996355056763 }, { "cosine_similarity": 0, "epoch": 1.8145386766076421, "grad_norm": 1.0669334104990211, "learning_rate": 2.1953745253710738e-05, "loss": 1.4959, "reason_loss": 0.447049617767334, "step": 1947, "utility_loss": 1.048897385597229 }, { "cosine_similarity": 0, "epoch": 1.81547064305685, "grad_norm": 1.4175192899409392, "learning_rate": 2.1936486020020712e-05, "loss": 1.615, "reason_loss": 0.46852925419807434, "step": 1948, "utility_loss": 1.1464228630065918 }, { "cosine_similarity": 0, "epoch": 1.8164026095060577, "grad_norm": 0.8777663373283854, "learning_rate": 2.1919226786330686e-05, "loss": 1.3051, "reason_loss": 0.45690011978149414, "step": 1949, "utility_loss": 0.8482346534729004 }, { "cosine_similarity": 0, "epoch": 1.8173345759552655, "grad_norm": 0.9056360485814084, "learning_rate": 2.1901967552640664e-05, "loss": 1.2895, "reason_loss": 0.46012628078460693, "step": 1950, "utility_loss": 0.8294026851654053 }, { "cosine_similarity": 0, "epoch": 1.8182665424044733, "grad_norm": 1.0468988551428042, "learning_rate": 2.188470831895064e-05, "loss": 1.2842, "reason_loss": 0.47489601373672485, "step": 1951, "utility_loss": 0.8093370199203491 }, { "cosine_similarity": 0, "epoch": 1.8191985088536813, "grad_norm": 1.0197732597562918, "learning_rate": 2.1867449085260616e-05, "loss": 1.2451, "reason_loss": 0.45469769835472107, "step": 1952, "utility_loss": 0.7904502749443054 }, { "cosine_similarity": 0, "epoch": 1.820130475302889, "grad_norm": 1.174591356551977, "learning_rate": 2.185018985157059e-05, "loss": 1.3606, "reason_loss": 0.4717119336128235, "step": 1953, "utility_loss": 0.8888557553291321 }, { "cosine_similarity": 0, "epoch": 1.821062441752097, "grad_norm": 1.118590624259294, "learning_rate": 2.1832930617880568e-05, "loss": 1.5707, "reason_loss": 0.4596797227859497, "step": 1954, "utility_loss": 1.1109747886657715 }, { "cosine_similarity": 0, "epoch": 1.8219944082013049, "grad_norm": 1.0595179870036207, "learning_rate": 2.1815671384190542e-05, "loss": 1.2114, "reason_loss": 0.4738379418849945, "step": 1955, "utility_loss": 0.737551748752594 }, { "cosine_similarity": 0, "epoch": 1.8229263746505127, "grad_norm": 1.1312713331148794, "learning_rate": 2.179841215050052e-05, "loss": 1.5457, "reason_loss": 0.45001742243766785, "step": 1956, "utility_loss": 1.0956748723983765 }, { "cosine_similarity": 0, "epoch": 1.8238583410997204, "grad_norm": 1.075763444247804, "learning_rate": 2.1781152916810494e-05, "loss": 1.8494, "reason_loss": 0.4813975691795349, "step": 1957, "utility_loss": 1.367956519126892 }, { "cosine_similarity": 0, "epoch": 1.8247903075489282, "grad_norm": 1.2616711746230316, "learning_rate": 2.1763893683120472e-05, "loss": 1.4965, "reason_loss": 0.4539439082145691, "step": 1958, "utility_loss": 1.04252028465271 }, { "cosine_similarity": 0, "epoch": 1.825722273998136, "grad_norm": 1.1090056025600878, "learning_rate": 2.1746634449430446e-05, "loss": 1.6956, "reason_loss": 0.4822714924812317, "step": 1959, "utility_loss": 1.2133445739746094 }, { "cosine_similarity": 0, "epoch": 1.8266542404473438, "grad_norm": 1.0739459845322306, "learning_rate": 2.172937521574042e-05, "loss": 1.7782, "reason_loss": 0.48374003171920776, "step": 1960, "utility_loss": 1.2944450378417969 }, { "cosine_similarity": 0, "epoch": 1.8275862068965516, "grad_norm": 0.8388486722582128, "learning_rate": 2.1712115982050398e-05, "loss": 1.346, "reason_loss": 0.4729163944721222, "step": 1961, "utility_loss": 0.8730406761169434 }, { "cosine_similarity": 0, "epoch": 1.8285181733457594, "grad_norm": 1.014004396043938, "learning_rate": 2.1694856748360373e-05, "loss": 1.7617, "reason_loss": 0.48084354400634766, "step": 1962, "utility_loss": 1.2808865308761597 }, { "cosine_similarity": 0, "epoch": 1.8294501397949674, "grad_norm": 1.1433236535861682, "learning_rate": 2.167759751467035e-05, "loss": 1.7292, "reason_loss": 0.4694838225841522, "step": 1963, "utility_loss": 1.2596962451934814 }, { "cosine_similarity": 0, "epoch": 1.8303821062441752, "grad_norm": 0.9643723547442951, "learning_rate": 2.1660338280980324e-05, "loss": 1.2944, "reason_loss": 0.49237632751464844, "step": 1964, "utility_loss": 0.8020188808441162 }, { "cosine_similarity": 0, "epoch": 1.8313140726933832, "grad_norm": 0.9824120982153741, "learning_rate": 2.1643079047290302e-05, "loss": 1.62, "reason_loss": 0.4796368181705475, "step": 1965, "utility_loss": 1.140354871749878 }, { "cosine_similarity": 0, "epoch": 1.832246039142591, "grad_norm": 1.06103510676107, "learning_rate": 2.1625819813600276e-05, "loss": 1.2267, "reason_loss": 0.4698528051376343, "step": 1966, "utility_loss": 0.7568601369857788 }, { "cosine_similarity": 0, "epoch": 1.8331780055917988, "grad_norm": 0.9649643807271135, "learning_rate": 2.1608560579910254e-05, "loss": 1.5923, "reason_loss": 0.47406506538391113, "step": 1967, "utility_loss": 1.1182198524475098 }, { "cosine_similarity": 0, "epoch": 1.8341099720410066, "grad_norm": 1.1446659215895307, "learning_rate": 2.1591301346220232e-05, "loss": 1.5783, "reason_loss": 0.4553256332874298, "step": 1968, "utility_loss": 1.122978925704956 }, { "cosine_similarity": 0, "epoch": 1.8350419384902144, "grad_norm": 0.9914454377747316, "learning_rate": 2.1574042112530206e-05, "loss": 1.7861, "reason_loss": 0.5090088844299316, "step": 1969, "utility_loss": 1.2770912647247314 }, { "cosine_similarity": 0, "epoch": 1.8359739049394221, "grad_norm": 1.0291899517206775, "learning_rate": 2.155678287884018e-05, "loss": 1.369, "reason_loss": 0.4766928255558014, "step": 1970, "utility_loss": 0.8923012018203735 }, { "cosine_similarity": 0, "epoch": 1.83690587138863, "grad_norm": 0.9256375497410823, "learning_rate": 2.1539523645150155e-05, "loss": 1.3824, "reason_loss": 0.4992096424102783, "step": 1971, "utility_loss": 0.88323974609375 }, { "cosine_similarity": 0, "epoch": 1.8378378378378377, "grad_norm": 1.0606445401784295, "learning_rate": 2.1522264411460132e-05, "loss": 1.4519, "reason_loss": 0.48962274193763733, "step": 1972, "utility_loss": 0.9622988700866699 }, { "cosine_similarity": 0, "epoch": 1.8387698042870455, "grad_norm": 0.9959465161351423, "learning_rate": 2.1505005177770107e-05, "loss": 1.464, "reason_loss": 0.4728718400001526, "step": 1973, "utility_loss": 0.9911625981330872 }, { "cosine_similarity": 0, "epoch": 1.8397017707362535, "grad_norm": 1.043852870372662, "learning_rate": 2.1487745944080084e-05, "loss": 1.5946, "reason_loss": 0.4928452968597412, "step": 1974, "utility_loss": 1.101712942123413 }, { "cosine_similarity": 0, "epoch": 1.8406337371854613, "grad_norm": 1.0867514140494368, "learning_rate": 2.147048671039006e-05, "loss": 1.4679, "reason_loss": 0.48777449131011963, "step": 1975, "utility_loss": 0.9800937175750732 }, { "cosine_similarity": 0, "epoch": 1.8415657036346693, "grad_norm": 0.9364451641858986, "learning_rate": 2.1453227476700036e-05, "loss": 1.225, "reason_loss": 0.486310213804245, "step": 1976, "utility_loss": 0.7386986017227173 }, { "cosine_similarity": 0, "epoch": 1.842497670083877, "grad_norm": 0.9683064027097033, "learning_rate": 2.143596824301001e-05, "loss": 1.3553, "reason_loss": 0.48520028591156006, "step": 1977, "utility_loss": 0.8700746893882751 }, { "cosine_similarity": 0, "epoch": 1.843429636533085, "grad_norm": 1.0760450227312346, "learning_rate": 2.1418709009319988e-05, "loss": 1.4721, "reason_loss": 0.4713220000267029, "step": 1978, "utility_loss": 1.000730037689209 }, { "cosine_similarity": 0, "epoch": 1.8443616029822927, "grad_norm": 1.251363880912316, "learning_rate": 2.1401449775629963e-05, "loss": 1.5542, "reason_loss": 0.4478093385696411, "step": 1979, "utility_loss": 1.1063547134399414 }, { "cosine_similarity": 0, "epoch": 1.8452935694315005, "grad_norm": 0.9113058692021081, "learning_rate": 2.1384190541939937e-05, "loss": 1.4081, "reason_loss": 0.4564603269100189, "step": 1980, "utility_loss": 0.951664388179779 }, { "cosine_similarity": 0, "epoch": 1.8462255358807083, "grad_norm": 1.0478416118232023, "learning_rate": 2.1366931308249915e-05, "loss": 1.5235, "reason_loss": 0.46798384189605713, "step": 1981, "utility_loss": 1.055506706237793 }, { "cosine_similarity": 0, "epoch": 1.847157502329916, "grad_norm": 0.929667061921079, "learning_rate": 2.134967207455989e-05, "loss": 1.4702, "reason_loss": 0.4624266028404236, "step": 1982, "utility_loss": 1.0078181028366089 }, { "cosine_similarity": 0, "epoch": 1.8480894687791238, "grad_norm": 1.1039595730283436, "learning_rate": 2.1332412840869866e-05, "loss": 1.4729, "reason_loss": 0.4565066993236542, "step": 1983, "utility_loss": 1.0164313316345215 }, { "cosine_similarity": 0, "epoch": 1.8490214352283316, "grad_norm": 1.0879118057239092, "learning_rate": 2.131515360717984e-05, "loss": 1.3745, "reason_loss": 0.475318044424057, "step": 1984, "utility_loss": 0.8991521596908569 }, { "cosine_similarity": 0, "epoch": 1.8499534016775396, "grad_norm": 0.992695592933813, "learning_rate": 2.129789437348982e-05, "loss": 1.239, "reason_loss": 0.4646734595298767, "step": 1985, "utility_loss": 0.7742798924446106 }, { "cosine_similarity": 0, "epoch": 1.8508853681267474, "grad_norm": 1.0907209929261605, "learning_rate": 2.1280635139799793e-05, "loss": 1.4818, "reason_loss": 0.5089080333709717, "step": 1986, "utility_loss": 0.9729287028312683 }, { "cosine_similarity": 0, "epoch": 1.8518173345759554, "grad_norm": 0.9649493208664852, "learning_rate": 2.126337590610977e-05, "loss": 1.2954, "reason_loss": 0.49031755328178406, "step": 1987, "utility_loss": 0.8050563931465149 }, { "cosine_similarity": 0, "epoch": 1.8527493010251632, "grad_norm": 1.0494646990399406, "learning_rate": 2.1246116672419748e-05, "loss": 1.2924, "reason_loss": 0.49226099252700806, "step": 1988, "utility_loss": 0.800149142742157 }, { "cosine_similarity": 0, "epoch": 1.853681267474371, "grad_norm": 0.9232197508615451, "learning_rate": 2.1228857438729722e-05, "loss": 1.5359, "reason_loss": 0.4984445571899414, "step": 1989, "utility_loss": 1.037447214126587 }, { "cosine_similarity": 0, "epoch": 1.8546132339235788, "grad_norm": 1.2255829339360806, "learning_rate": 2.1211598205039697e-05, "loss": 1.5857, "reason_loss": 0.47596296668052673, "step": 1990, "utility_loss": 1.1097381114959717 }, { "cosine_similarity": 0, "epoch": 1.8555452003727866, "grad_norm": 0.9639521874844064, "learning_rate": 2.119433897134967e-05, "loss": 1.5179, "reason_loss": 0.44208890199661255, "step": 1991, "utility_loss": 1.0758020877838135 }, { "cosine_similarity": 0, "epoch": 1.8564771668219944, "grad_norm": 1.1064002143695333, "learning_rate": 2.117707973765965e-05, "loss": 1.8588, "reason_loss": 0.48405197262763977, "step": 1992, "utility_loss": 1.3747503757476807 }, { "cosine_similarity": 0, "epoch": 1.8574091332712022, "grad_norm": 1.0333977554757383, "learning_rate": 2.1159820503969623e-05, "loss": 1.7288, "reason_loss": 0.47093063592910767, "step": 1993, "utility_loss": 1.2578399181365967 }, { "cosine_similarity": 0, "epoch": 1.85834109972041, "grad_norm": 0.9574910278661352, "learning_rate": 2.11425612702796e-05, "loss": 1.5553, "reason_loss": 0.4696708917617798, "step": 1994, "utility_loss": 1.0856647491455078 }, { "cosine_similarity": 0, "epoch": 1.8592730661696177, "grad_norm": 1.0423820986371566, "learning_rate": 2.1125302036589575e-05, "loss": 1.5486, "reason_loss": 0.48444610834121704, "step": 1995, "utility_loss": 1.0641899108886719 }, { "cosine_similarity": 0, "epoch": 1.8602050326188257, "grad_norm": 0.9893717557471102, "learning_rate": 2.1108042802899553e-05, "loss": 1.4562, "reason_loss": 0.48026949167251587, "step": 1996, "utility_loss": 0.9759751558303833 }, { "cosine_similarity": 0, "epoch": 1.8611369990680335, "grad_norm": 1.0012022871671076, "learning_rate": 2.1090783569209527e-05, "loss": 1.275, "reason_loss": 0.4701675772666931, "step": 1997, "utility_loss": 0.804840087890625 }, { "cosine_similarity": 0, "epoch": 1.8620689655172413, "grad_norm": 1.0273984744229954, "learning_rate": 2.1073524335519505e-05, "loss": 1.402, "reason_loss": 0.4333246946334839, "step": 1998, "utility_loss": 0.968640148639679 }, { "cosine_similarity": 0, "epoch": 1.8630009319664493, "grad_norm": 1.0244519240575698, "learning_rate": 2.1056265101829482e-05, "loss": 1.6864, "reason_loss": 0.5098059177398682, "step": 1999, "utility_loss": 1.1765944957733154 }, { "cosine_similarity": 0, "epoch": 1.8639328984156571, "grad_norm": 1.193660977013939, "learning_rate": 2.1039005868139457e-05, "loss": 2.1069, "reason_loss": 0.4597663879394531, "step": 2000, "utility_loss": 1.647125244140625 }, { "cosine_similarity": 0, "epoch": 1.864864864864865, "grad_norm": 1.086273834134162, "learning_rate": 2.102174663444943e-05, "loss": 1.4167, "reason_loss": 0.4559289813041687, "step": 2001, "utility_loss": 0.9607393145561218 }, { "cosine_similarity": 0, "epoch": 1.8657968313140727, "grad_norm": 1.073830799556064, "learning_rate": 2.1004487400759405e-05, "loss": 1.6165, "reason_loss": 0.491710364818573, "step": 2002, "utility_loss": 1.1247880458831787 }, { "cosine_similarity": 0, "epoch": 1.8667287977632805, "grad_norm": 1.1635154730144, "learning_rate": 2.0987228167069383e-05, "loss": 1.4501, "reason_loss": 0.4422302842140198, "step": 2003, "utility_loss": 1.0078532695770264 }, { "cosine_similarity": 0, "epoch": 1.8676607642124883, "grad_norm": 1.5281288830943207, "learning_rate": 2.0969968933379357e-05, "loss": 1.7843, "reason_loss": 0.47270718216896057, "step": 2004, "utility_loss": 1.3116414546966553 }, { "cosine_similarity": 0, "epoch": 1.868592730661696, "grad_norm": 0.8073635177631111, "learning_rate": 2.0952709699689335e-05, "loss": 1.2506, "reason_loss": 0.47578608989715576, "step": 2005, "utility_loss": 0.7747923135757446 }, { "cosine_similarity": 0, "epoch": 1.8695246971109039, "grad_norm": 1.1727086228999513, "learning_rate": 2.093545046599931e-05, "loss": 1.7247, "reason_loss": 0.48737671971321106, "step": 2006, "utility_loss": 1.2373254299163818 }, { "cosine_similarity": 0, "epoch": 1.8704566635601119, "grad_norm": 1.0403390295996635, "learning_rate": 2.0918191232309287e-05, "loss": 1.4992, "reason_loss": 0.493990421295166, "step": 2007, "utility_loss": 1.0052192211151123 }, { "cosine_similarity": 0, "epoch": 1.8713886300093197, "grad_norm": 1.0707929339983564, "learning_rate": 2.0900931998619264e-05, "loss": 1.6556, "reason_loss": 0.48033276200294495, "step": 2008, "utility_loss": 1.1752359867095947 }, { "cosine_similarity": 0, "epoch": 1.8723205964585274, "grad_norm": 1.1341444337710058, "learning_rate": 2.088367276492924e-05, "loss": 1.6781, "reason_loss": 0.47826337814331055, "step": 2009, "utility_loss": 1.1998600959777832 }, { "cosine_similarity": 0, "epoch": 1.8732525629077355, "grad_norm": 1.0382484491405015, "learning_rate": 2.0866413531239216e-05, "loss": 1.3946, "reason_loss": 0.47100356221199036, "step": 2010, "utility_loss": 0.9235779047012329 }, { "cosine_similarity": 0, "epoch": 1.8741845293569432, "grad_norm": 1.1048178429189357, "learning_rate": 2.084915429754919e-05, "loss": 1.4267, "reason_loss": 0.45557719469070435, "step": 2011, "utility_loss": 0.9711290001869202 }, { "cosine_similarity": 0, "epoch": 1.875116495806151, "grad_norm": 1.220006859969143, "learning_rate": 2.0831895063859165e-05, "loss": 1.5018, "reason_loss": 0.4336918592453003, "step": 2012, "utility_loss": 1.068117618560791 }, { "cosine_similarity": 0, "epoch": 1.8760484622553588, "grad_norm": 0.8972961941106458, "learning_rate": 2.081463583016914e-05, "loss": 1.3063, "reason_loss": 0.4978647828102112, "step": 2013, "utility_loss": 0.8084009289741516 }, { "cosine_similarity": 0, "epoch": 1.8769804287045666, "grad_norm": 1.1443971905758967, "learning_rate": 2.0797376596479117e-05, "loss": 1.3143, "reason_loss": 0.48627370595932007, "step": 2014, "utility_loss": 0.8279938697814941 }, { "cosine_similarity": 0, "epoch": 1.8779123951537744, "grad_norm": 1.0084321097285398, "learning_rate": 2.078011736278909e-05, "loss": 1.2119, "reason_loss": 0.47054773569107056, "step": 2015, "utility_loss": 0.7413084506988525 }, { "cosine_similarity": 0, "epoch": 1.8788443616029822, "grad_norm": 0.8645905115093806, "learning_rate": 2.076285812909907e-05, "loss": 1.1544, "reason_loss": 0.48871052265167236, "step": 2016, "utility_loss": 0.6657301187515259 }, { "cosine_similarity": 0, "epoch": 1.87977632805219, "grad_norm": 1.1624414013255964, "learning_rate": 2.0745598895409043e-05, "loss": 1.5208, "reason_loss": 0.4500652551651001, "step": 2017, "utility_loss": 1.0706901550292969 }, { "cosine_similarity": 0, "epoch": 1.880708294501398, "grad_norm": 0.8831720552150566, "learning_rate": 2.072833966171902e-05, "loss": 1.2411, "reason_loss": 0.4473751485347748, "step": 2018, "utility_loss": 0.7937018275260925 }, { "cosine_similarity": 0, "epoch": 1.8816402609506058, "grad_norm": 1.1917498657821863, "learning_rate": 2.0711080428029e-05, "loss": 1.7138, "reason_loss": 0.4624001979827881, "step": 2019, "utility_loss": 1.251434326171875 }, { "cosine_similarity": 0, "epoch": 1.8825722273998136, "grad_norm": 0.9436501666295145, "learning_rate": 2.0693821194338973e-05, "loss": 1.3561, "reason_loss": 0.47340941429138184, "step": 2020, "utility_loss": 0.8827393054962158 }, { "cosine_similarity": 0, "epoch": 1.8835041938490216, "grad_norm": 0.9496657436118202, "learning_rate": 2.067656196064895e-05, "loss": 1.5481, "reason_loss": 0.47574901580810547, "step": 2021, "utility_loss": 1.0723516941070557 }, { "cosine_similarity": 0, "epoch": 1.8844361602982294, "grad_norm": 1.158789419450548, "learning_rate": 2.0659302726958925e-05, "loss": 1.4707, "reason_loss": 0.46023795008659363, "step": 2022, "utility_loss": 1.0104858875274658 }, { "cosine_similarity": 0, "epoch": 1.8853681267474371, "grad_norm": 1.139338186030389, "learning_rate": 2.06420434932689e-05, "loss": 1.5783, "reason_loss": 0.470974326133728, "step": 2023, "utility_loss": 1.1073424816131592 }, { "cosine_similarity": 0, "epoch": 1.886300093196645, "grad_norm": 1.3618451617208809, "learning_rate": 2.0624784259578873e-05, "loss": 1.6181, "reason_loss": 0.4558047950267792, "step": 2024, "utility_loss": 1.1622700691223145 }, { "cosine_similarity": 0, "epoch": 1.8872320596458527, "grad_norm": 0.9825562425655788, "learning_rate": 2.060752502588885e-05, "loss": 1.4364, "reason_loss": 0.4692104160785675, "step": 2025, "utility_loss": 0.9671814441680908 }, { "cosine_similarity": 0, "epoch": 1.8881640260950605, "grad_norm": 1.0011237014978378, "learning_rate": 2.0590265792198825e-05, "loss": 1.3805, "reason_loss": 0.4888487458229065, "step": 2026, "utility_loss": 0.8916250467300415 }, { "cosine_similarity": 0, "epoch": 1.8890959925442683, "grad_norm": 1.0307769197716794, "learning_rate": 2.0573006558508803e-05, "loss": 1.2818, "reason_loss": 0.4688990116119385, "step": 2027, "utility_loss": 0.8128842711448669 }, { "cosine_similarity": 0, "epoch": 1.890027958993476, "grad_norm": 1.2041726872187903, "learning_rate": 2.055574732481878e-05, "loss": 1.6293, "reason_loss": 0.45801228284835815, "step": 2028, "utility_loss": 1.1712839603424072 }, { "cosine_similarity": 0, "epoch": 1.890959925442684, "grad_norm": 0.961710763842467, "learning_rate": 2.0538488091128755e-05, "loss": 1.4456, "reason_loss": 0.4816046357154846, "step": 2029, "utility_loss": 0.9640240669250488 }, { "cosine_similarity": 0, "epoch": 1.8918918918918919, "grad_norm": 1.0828911344964378, "learning_rate": 2.0521228857438733e-05, "loss": 1.4371, "reason_loss": 0.4790763854980469, "step": 2030, "utility_loss": 0.9580202102661133 }, { "cosine_similarity": 0, "epoch": 1.8928238583410997, "grad_norm": 0.996724043574354, "learning_rate": 2.0503969623748707e-05, "loss": 1.4279, "reason_loss": 0.49035003781318665, "step": 2031, "utility_loss": 0.9375326037406921 }, { "cosine_similarity": 0, "epoch": 1.8937558247903077, "grad_norm": 1.039446439992424, "learning_rate": 2.0486710390058685e-05, "loss": 1.437, "reason_loss": 0.47577255964279175, "step": 2032, "utility_loss": 0.961185097694397 }, { "cosine_similarity": 0, "epoch": 1.8946877912395155, "grad_norm": 0.801039323641009, "learning_rate": 2.046945115636866e-05, "loss": 1.0586, "reason_loss": 0.47571542859077454, "step": 2033, "utility_loss": 0.5828524231910706 }, { "cosine_similarity": 0, "epoch": 1.8956197576887233, "grad_norm": 1.0141916560663677, "learning_rate": 2.0452191922678633e-05, "loss": 1.3014, "reason_loss": 0.44051867723464966, "step": 2034, "utility_loss": 0.860910177230835 }, { "cosine_similarity": 0, "epoch": 1.896551724137931, "grad_norm": 1.030696985512914, "learning_rate": 2.0434932688988608e-05, "loss": 1.7062, "reason_loss": 0.46091628074645996, "step": 2035, "utility_loss": 1.2452419996261597 }, { "cosine_similarity": 0, "epoch": 1.8974836905871388, "grad_norm": 1.158784836077004, "learning_rate": 2.0417673455298585e-05, "loss": 1.4383, "reason_loss": 0.49513864517211914, "step": 2036, "utility_loss": 0.9431267976760864 }, { "cosine_similarity": 0, "epoch": 1.8984156570363466, "grad_norm": 0.8324639105159026, "learning_rate": 2.040041422160856e-05, "loss": 1.1523, "reason_loss": 0.4311293959617615, "step": 2037, "utility_loss": 0.7211723327636719 }, { "cosine_similarity": 0, "epoch": 1.8993476234855544, "grad_norm": 1.0425318272024002, "learning_rate": 2.0383154987918537e-05, "loss": 1.8871, "reason_loss": 0.47346818447113037, "step": 2038, "utility_loss": 1.4136326313018799 }, { "cosine_similarity": 0, "epoch": 1.9002795899347622, "grad_norm": 1.1375830031488037, "learning_rate": 2.0365895754228515e-05, "loss": 1.7009, "reason_loss": 0.4934185743331909, "step": 2039, "utility_loss": 1.2074615955352783 }, { "cosine_similarity": 0, "epoch": 1.9012115563839702, "grad_norm": 1.0135846889814712, "learning_rate": 2.034863652053849e-05, "loss": 1.2771, "reason_loss": 0.46823829412460327, "step": 2040, "utility_loss": 0.8088685274124146 }, { "cosine_similarity": 0, "epoch": 1.902143522833178, "grad_norm": 1.168317324552421, "learning_rate": 2.0331377286848467e-05, "loss": 1.6037, "reason_loss": 0.47273027896881104, "step": 2041, "utility_loss": 1.1309906244277954 }, { "cosine_similarity": 0, "epoch": 1.9030754892823858, "grad_norm": 1.2261148898209613, "learning_rate": 2.031411805315844e-05, "loss": 1.6467, "reason_loss": 0.5078182220458984, "step": 2042, "utility_loss": 1.1388332843780518 }, { "cosine_similarity": 0, "epoch": 1.9040074557315938, "grad_norm": 0.8665446352487753, "learning_rate": 2.029685881946842e-05, "loss": 1.244, "reason_loss": 0.5235973000526428, "step": 2043, "utility_loss": 0.7204015851020813 }, { "cosine_similarity": 0, "epoch": 1.9049394221808016, "grad_norm": 1.1056198907579051, "learning_rate": 2.0279599585778393e-05, "loss": 1.7479, "reason_loss": 0.4887560307979584, "step": 2044, "utility_loss": 1.2591816186904907 }, { "cosine_similarity": 0, "epoch": 1.9058713886300094, "grad_norm": 1.114111457240786, "learning_rate": 2.0262340352088367e-05, "loss": 1.5674, "reason_loss": 0.4625868797302246, "step": 2045, "utility_loss": 1.104816198348999 }, { "cosine_similarity": 0, "epoch": 1.9068033550792172, "grad_norm": 1.0126714016105476, "learning_rate": 2.024508111839834e-05, "loss": 1.5083, "reason_loss": 0.4721558690071106, "step": 2046, "utility_loss": 1.036141276359558 }, { "cosine_similarity": 0, "epoch": 1.907735321528425, "grad_norm": 0.9515652321201997, "learning_rate": 2.022782188470832e-05, "loss": 1.3544, "reason_loss": 0.4612930119037628, "step": 2047, "utility_loss": 0.8931382894515991 }, { "cosine_similarity": 0, "epoch": 1.9086672879776327, "grad_norm": 1.2010753819292792, "learning_rate": 2.0210562651018294e-05, "loss": 1.6075, "reason_loss": 0.5190279483795166, "step": 2048, "utility_loss": 1.088423252105713 }, { "cosine_similarity": 0, "epoch": 1.9095992544268405, "grad_norm": 0.9859838473835199, "learning_rate": 2.019330341732827e-05, "loss": 1.6289, "reason_loss": 0.4578516483306885, "step": 2049, "utility_loss": 1.1710476875305176 }, { "cosine_similarity": 0, "epoch": 1.9105312208760483, "grad_norm": 1.1327808691096697, "learning_rate": 2.017604418363825e-05, "loss": 1.3176, "reason_loss": 0.48899832367897034, "step": 2050, "utility_loss": 0.828584611415863 }, { "cosine_similarity": 0, "epoch": 1.9114631873252563, "grad_norm": 1.07634268728299, "learning_rate": 2.0158784949948223e-05, "loss": 1.6373, "reason_loss": 0.49414578080177307, "step": 2051, "utility_loss": 1.1431093215942383 }, { "cosine_similarity": 0, "epoch": 1.9123951537744641, "grad_norm": 1.1572553375819234, "learning_rate": 2.01415257162582e-05, "loss": 1.4484, "reason_loss": 0.45033130049705505, "step": 2052, "utility_loss": 0.9980408549308777 }, { "cosine_similarity": 0, "epoch": 1.913327120223672, "grad_norm": 1.1939558304970357, "learning_rate": 2.0124266482568175e-05, "loss": 1.4667, "reason_loss": 0.4748551845550537, "step": 2053, "utility_loss": 0.9918198585510254 }, { "cosine_similarity": 0, "epoch": 1.91425908667288, "grad_norm": 1.0117951512164027, "learning_rate": 2.0107007248878153e-05, "loss": 1.4487, "reason_loss": 0.46736273169517517, "step": 2054, "utility_loss": 0.9813556671142578 }, { "cosine_similarity": 0, "epoch": 1.9151910531220877, "grad_norm": 0.9746752065200205, "learning_rate": 2.0089748015188127e-05, "loss": 1.6131, "reason_loss": 0.47015494108200073, "step": 2055, "utility_loss": 1.1429874897003174 }, { "cosine_similarity": 0, "epoch": 1.9161230195712955, "grad_norm": 0.8242184714014295, "learning_rate": 2.00724887814981e-05, "loss": 1.3388, "reason_loss": 0.46273550391197205, "step": 2056, "utility_loss": 0.8760173320770264 }, { "cosine_similarity": 0, "epoch": 1.9170549860205033, "grad_norm": 1.063485711088957, "learning_rate": 2.0055229547808076e-05, "loss": 1.7299, "reason_loss": 0.47817808389663696, "step": 2057, "utility_loss": 1.2516796588897705 }, { "cosine_similarity": 0, "epoch": 1.917986952469711, "grad_norm": 1.035995433563287, "learning_rate": 2.0037970314118053e-05, "loss": 1.7035, "reason_loss": 0.5110105276107788, "step": 2058, "utility_loss": 1.1925077438354492 }, { "cosine_similarity": 0, "epoch": 1.9189189189189189, "grad_norm": 1.0129335714630525, "learning_rate": 2.002071108042803e-05, "loss": 1.5906, "reason_loss": 0.4927135705947876, "step": 2059, "utility_loss": 1.0978808403015137 }, { "cosine_similarity": 0, "epoch": 1.9198508853681266, "grad_norm": 1.0553988503167844, "learning_rate": 2.0003451846738005e-05, "loss": 1.4273, "reason_loss": 0.4729371666908264, "step": 2060, "utility_loss": 0.954345166683197 }, { "cosine_similarity": 0, "epoch": 1.9207828518173344, "grad_norm": 1.125689692496713, "learning_rate": 1.9986192613047983e-05, "loss": 1.6243, "reason_loss": 0.4716029465198517, "step": 2061, "utility_loss": 1.152698278427124 }, { "cosine_similarity": 0, "epoch": 1.9217148182665424, "grad_norm": 0.88228546199653, "learning_rate": 1.9968933379357957e-05, "loss": 1.5305, "reason_loss": 0.4714236259460449, "step": 2062, "utility_loss": 1.059057593345642 }, { "cosine_similarity": 0, "epoch": 1.9226467847157502, "grad_norm": 1.1174305650718883, "learning_rate": 1.9951674145667935e-05, "loss": 1.5976, "reason_loss": 0.46723318099975586, "step": 2063, "utility_loss": 1.13034188747406 }, { "cosine_similarity": 0, "epoch": 1.923578751164958, "grad_norm": 0.9021691426375691, "learning_rate": 1.993441491197791e-05, "loss": 1.4359, "reason_loss": 0.4709671139717102, "step": 2064, "utility_loss": 0.9649667739868164 }, { "cosine_similarity": 0, "epoch": 1.924510717614166, "grad_norm": 1.0033236534066619, "learning_rate": 1.9917155678287887e-05, "loss": 1.449, "reason_loss": 0.46094608306884766, "step": 2065, "utility_loss": 0.9880056381225586 }, { "cosine_similarity": 0, "epoch": 1.9254426840633738, "grad_norm": 0.9025444311201702, "learning_rate": 1.989989644459786e-05, "loss": 1.0975, "reason_loss": 0.4656359851360321, "step": 2066, "utility_loss": 0.6318836212158203 }, { "cosine_similarity": 0, "epoch": 1.9263746505125816, "grad_norm": 1.1301026105348257, "learning_rate": 1.9882637210907836e-05, "loss": 1.4333, "reason_loss": 0.4778485894203186, "step": 2067, "utility_loss": 0.9554182291030884 }, { "cosine_similarity": 0, "epoch": 1.9273066169617894, "grad_norm": 0.9608752517709743, "learning_rate": 1.986537797721781e-05, "loss": 1.5228, "reason_loss": 0.5155261754989624, "step": 2068, "utility_loss": 1.0073105096817017 }, { "cosine_similarity": 0, "epoch": 1.9282385834109972, "grad_norm": 0.8790922989116634, "learning_rate": 1.9848118743527788e-05, "loss": 1.2834, "reason_loss": 0.43986740708351135, "step": 2069, "utility_loss": 0.8435818552970886 }, { "cosine_similarity": 0, "epoch": 1.929170549860205, "grad_norm": 0.9148746881169397, "learning_rate": 1.9830859509837765e-05, "loss": 1.476, "reason_loss": 0.4783138334751129, "step": 2070, "utility_loss": 0.9977333545684814 }, { "cosine_similarity": 0, "epoch": 1.9301025163094128, "grad_norm": 1.0484323237096405, "learning_rate": 1.981360027614774e-05, "loss": 1.51, "reason_loss": 0.5012065172195435, "step": 2071, "utility_loss": 1.0088386535644531 }, { "cosine_similarity": 0, "epoch": 1.9310344827586206, "grad_norm": 1.0804639624101173, "learning_rate": 1.9796341042457717e-05, "loss": 1.4856, "reason_loss": 0.4726111590862274, "step": 2072, "utility_loss": 1.0130113363265991 }, { "cosine_similarity": 0, "epoch": 1.9319664492078286, "grad_norm": 1.1842899184939122, "learning_rate": 1.977908180876769e-05, "loss": 1.3723, "reason_loss": 0.46595633029937744, "step": 2073, "utility_loss": 0.906365156173706 }, { "cosine_similarity": 0, "epoch": 1.9328984156570364, "grad_norm": 1.1368007573392058, "learning_rate": 1.976182257507767e-05, "loss": 1.3704, "reason_loss": 0.4889243245124817, "step": 2074, "utility_loss": 0.8814420104026794 }, { "cosine_similarity": 0, "epoch": 1.9338303821062441, "grad_norm": 0.9623331821223925, "learning_rate": 1.9744563341387643e-05, "loss": 1.5483, "reason_loss": 0.4772017002105713, "step": 2075, "utility_loss": 1.0711073875427246 }, { "cosine_similarity": 0, "epoch": 1.9347623485554521, "grad_norm": 0.9825486196748838, "learning_rate": 1.972730410769762e-05, "loss": 1.2728, "reason_loss": 0.47722023725509644, "step": 2076, "utility_loss": 0.7956246733665466 }, { "cosine_similarity": 0, "epoch": 1.93569431500466, "grad_norm": 0.814397786429703, "learning_rate": 1.9710044874007595e-05, "loss": 1.1678, "reason_loss": 0.4707573652267456, "step": 2077, "utility_loss": 0.6970387697219849 }, { "cosine_similarity": 0, "epoch": 1.9366262814538677, "grad_norm": 0.9897107475495116, "learning_rate": 1.969278564031757e-05, "loss": 1.5706, "reason_loss": 0.4718537926673889, "step": 2078, "utility_loss": 1.0987862348556519 }, { "cosine_similarity": 0, "epoch": 1.9375582479030755, "grad_norm": 1.0603077282803626, "learning_rate": 1.9675526406627547e-05, "loss": 1.5788, "reason_loss": 0.47967368364334106, "step": 2079, "utility_loss": 1.099164605140686 }, { "cosine_similarity": 0, "epoch": 1.9384902143522833, "grad_norm": 1.0181656681580937, "learning_rate": 1.9658267172937522e-05, "loss": 1.4254, "reason_loss": 0.4711364507675171, "step": 2080, "utility_loss": 0.9543061256408691 }, { "cosine_similarity": 0, "epoch": 1.939422180801491, "grad_norm": 1.562624250497246, "learning_rate": 1.96410079392475e-05, "loss": 1.4511, "reason_loss": 0.43823307752609253, "step": 2081, "utility_loss": 1.0128777027130127 }, { "cosine_similarity": 0, "epoch": 1.9403541472506989, "grad_norm": 0.9051604400736118, "learning_rate": 1.9623748705557474e-05, "loss": 1.4781, "reason_loss": 0.47086068987846375, "step": 2082, "utility_loss": 1.0072412490844727 }, { "cosine_similarity": 0, "epoch": 1.9412861136999067, "grad_norm": 0.9768330638314927, "learning_rate": 1.960648947186745e-05, "loss": 1.575, "reason_loss": 0.4700753092765808, "step": 2083, "utility_loss": 1.1049554347991943 }, { "cosine_similarity": 0, "epoch": 1.9422180801491147, "grad_norm": 0.9181929339725636, "learning_rate": 1.9589230238177426e-05, "loss": 1.1868, "reason_loss": 0.48011350631713867, "step": 2084, "utility_loss": 0.706688642501831 }, { "cosine_similarity": 0, "epoch": 1.9431500465983225, "grad_norm": 0.9550518421506715, "learning_rate": 1.9571971004487403e-05, "loss": 1.5556, "reason_loss": 0.5177814960479736, "step": 2085, "utility_loss": 1.0377711057662964 }, { "cosine_similarity": 0, "epoch": 1.9440820130475303, "grad_norm": 1.0521573036825762, "learning_rate": 1.9554711770797378e-05, "loss": 1.3543, "reason_loss": 0.493203341960907, "step": 2086, "utility_loss": 0.8610557317733765 }, { "cosine_similarity": 0, "epoch": 1.9450139794967383, "grad_norm": 1.2127221991431811, "learning_rate": 1.9537452537107352e-05, "loss": 1.9227, "reason_loss": 0.455098420381546, "step": 2087, "utility_loss": 1.4676353931427002 }, { "cosine_similarity": 0, "epoch": 1.945945945945946, "grad_norm": 0.9788702612517755, "learning_rate": 1.9520193303417326e-05, "loss": 1.5321, "reason_loss": 0.47413599491119385, "step": 2088, "utility_loss": 1.0579323768615723 }, { "cosine_similarity": 0, "epoch": 1.9468779123951538, "grad_norm": 1.051512164496014, "learning_rate": 1.9502934069727304e-05, "loss": 1.5799, "reason_loss": 0.46996307373046875, "step": 2089, "utility_loss": 1.109897255897522 }, { "cosine_similarity": 0, "epoch": 1.9478098788443616, "grad_norm": 2.153866650932839, "learning_rate": 1.948567483603728e-05, "loss": 1.4849, "reason_loss": 0.4891902208328247, "step": 2090, "utility_loss": 0.9956691861152649 }, { "cosine_similarity": 0, "epoch": 1.9487418452935694, "grad_norm": 0.8861660762011351, "learning_rate": 1.9468415602347256e-05, "loss": 1.4152, "reason_loss": 0.4550704061985016, "step": 2091, "utility_loss": 0.9600827097892761 }, { "cosine_similarity": 0, "epoch": 1.9496738117427772, "grad_norm": 0.9437795744620378, "learning_rate": 1.9451156368657234e-05, "loss": 1.3007, "reason_loss": 0.44901570677757263, "step": 2092, "utility_loss": 0.851706326007843 }, { "cosine_similarity": 0, "epoch": 1.950605778191985, "grad_norm": 1.1308372728722726, "learning_rate": 1.9433897134967208e-05, "loss": 1.7082, "reason_loss": 0.4697953462600708, "step": 2093, "utility_loss": 1.2383787631988525 }, { "cosine_similarity": 0, "epoch": 1.9515377446411928, "grad_norm": 0.9249170095470234, "learning_rate": 1.9416637901277185e-05, "loss": 1.4311, "reason_loss": 0.43306320905685425, "step": 2094, "utility_loss": 0.9980713129043579 }, { "cosine_similarity": 0, "epoch": 1.9524697110904008, "grad_norm": 1.026278875020407, "learning_rate": 1.939937866758716e-05, "loss": 1.3424, "reason_loss": 0.4790419042110443, "step": 2095, "utility_loss": 0.8633272647857666 }, { "cosine_similarity": 0, "epoch": 1.9534016775396086, "grad_norm": 0.9711355059434893, "learning_rate": 1.9382119433897137e-05, "loss": 1.5065, "reason_loss": 0.4697112441062927, "step": 2096, "utility_loss": 1.0367424488067627 }, { "cosine_similarity": 0, "epoch": 1.9543336439888164, "grad_norm": 0.8911854320003065, "learning_rate": 1.9364860200207112e-05, "loss": 1.3999, "reason_loss": 0.5114222764968872, "step": 2097, "utility_loss": 0.8884943127632141 }, { "cosine_similarity": 0, "epoch": 1.9552656104380244, "grad_norm": 1.1257056967597954, "learning_rate": 1.9347600966517086e-05, "loss": 1.4641, "reason_loss": 0.47749030590057373, "step": 2098, "utility_loss": 0.9866072535514832 }, { "cosine_similarity": 0, "epoch": 1.9561975768872322, "grad_norm": 0.8912694121390826, "learning_rate": 1.9330341732827064e-05, "loss": 1.3487, "reason_loss": 0.47725874185562134, "step": 2099, "utility_loss": 0.8714070320129395 }, { "cosine_similarity": 0, "epoch": 1.95712954333644, "grad_norm": 0.980627609064125, "learning_rate": 1.9313082499137038e-05, "loss": 1.5008, "reason_loss": 0.4765743017196655, "step": 2100, "utility_loss": 1.024223804473877 }, { "cosine_similarity": 0, "epoch": 1.9580615097856477, "grad_norm": 0.9866968942368798, "learning_rate": 1.9295823265447016e-05, "loss": 1.5787, "reason_loss": 0.4600323438644409, "step": 2101, "utility_loss": 1.1186634302139282 }, { "cosine_similarity": 0, "epoch": 1.9589934762348555, "grad_norm": 1.177418104765706, "learning_rate": 1.927856403175699e-05, "loss": 1.3331, "reason_loss": 0.452772855758667, "step": 2102, "utility_loss": 0.8802986145019531 }, { "cosine_similarity": 0, "epoch": 1.9599254426840633, "grad_norm": 0.9438866575417251, "learning_rate": 1.9261304798066968e-05, "loss": 1.5751, "reason_loss": 0.46139398217201233, "step": 2103, "utility_loss": 1.113656997680664 }, { "cosine_similarity": 0, "epoch": 1.9608574091332711, "grad_norm": 0.9944042909404548, "learning_rate": 1.9244045564376942e-05, "loss": 1.6398, "reason_loss": 0.5188644528388977, "step": 2104, "utility_loss": 1.1208980083465576 }, { "cosine_similarity": 0, "epoch": 1.961789375582479, "grad_norm": 1.037127171959645, "learning_rate": 1.922678633068692e-05, "loss": 1.681, "reason_loss": 0.47595104575157166, "step": 2105, "utility_loss": 1.2050552368164062 }, { "cosine_similarity": 0, "epoch": 1.962721342031687, "grad_norm": 1.021855882353481, "learning_rate": 1.9209527096996894e-05, "loss": 1.391, "reason_loss": 0.4679807424545288, "step": 2106, "utility_loss": 0.9230066537857056 }, { "cosine_similarity": 0, "epoch": 1.9636533084808947, "grad_norm": 0.9279766734843179, "learning_rate": 1.919226786330687e-05, "loss": 1.4145, "reason_loss": 0.45408737659454346, "step": 2107, "utility_loss": 0.9603894948959351 }, { "cosine_similarity": 0, "epoch": 1.9645852749301025, "grad_norm": 0.9930843686994123, "learning_rate": 1.9175008629616846e-05, "loss": 1.8549, "reason_loss": 0.5244609117507935, "step": 2108, "utility_loss": 1.330396294593811 }, { "cosine_similarity": 0, "epoch": 1.9655172413793105, "grad_norm": 0.9583902186538056, "learning_rate": 1.915774939592682e-05, "loss": 1.3487, "reason_loss": 0.4710903465747833, "step": 2109, "utility_loss": 0.8776587247848511 }, { "cosine_similarity": 0, "epoch": 1.9664492078285183, "grad_norm": 1.1865369314212426, "learning_rate": 1.9140490162236798e-05, "loss": 1.4536, "reason_loss": 0.48693156242370605, "step": 2110, "utility_loss": 0.9666311740875244 }, { "cosine_similarity": 0, "epoch": 1.967381174277726, "grad_norm": 1.0576673018133596, "learning_rate": 1.9123230928546772e-05, "loss": 1.463, "reason_loss": 0.504486083984375, "step": 2111, "utility_loss": 0.9584907293319702 }, { "cosine_similarity": 0, "epoch": 1.9683131407269339, "grad_norm": 1.1266289381729118, "learning_rate": 1.910597169485675e-05, "loss": 1.7301, "reason_loss": 0.4849018454551697, "step": 2112, "utility_loss": 1.2451660633087158 }, { "cosine_similarity": 0, "epoch": 1.9692451071761417, "grad_norm": 1.1518914963727596, "learning_rate": 1.9088712461166724e-05, "loss": 1.5615, "reason_loss": 0.4677965044975281, "step": 2113, "utility_loss": 1.0937237739562988 }, { "cosine_similarity": 0, "epoch": 1.9701770736253494, "grad_norm": 1.2740117476937036, "learning_rate": 1.9071453227476702e-05, "loss": 1.3366, "reason_loss": 0.4803486168384552, "step": 2114, "utility_loss": 0.8562489151954651 }, { "cosine_similarity": 0, "epoch": 1.9711090400745572, "grad_norm": 0.9159299325917836, "learning_rate": 1.9054193993786676e-05, "loss": 1.334, "reason_loss": 0.4847245514392853, "step": 2115, "utility_loss": 0.8493164777755737 }, { "cosine_similarity": 0, "epoch": 1.972041006523765, "grad_norm": 0.9635850706905977, "learning_rate": 1.9036934760096654e-05, "loss": 1.5176, "reason_loss": 0.4795299172401428, "step": 2116, "utility_loss": 1.0380498170852661 }, { "cosine_similarity": 0, "epoch": 1.972972972972973, "grad_norm": 1.0260594586320049, "learning_rate": 1.9019675526406628e-05, "loss": 1.2653, "reason_loss": 0.47439974546432495, "step": 2117, "utility_loss": 0.7909030914306641 }, { "cosine_similarity": 0, "epoch": 1.9739049394221808, "grad_norm": 1.0622124436399887, "learning_rate": 1.9002416292716606e-05, "loss": 1.2725, "reason_loss": 0.49252602458000183, "step": 2118, "utility_loss": 0.7799557447433472 }, { "cosine_similarity": 0, "epoch": 1.9748369058713886, "grad_norm": 1.1211637509033903, "learning_rate": 1.898515705902658e-05, "loss": 1.6267, "reason_loss": 0.4651235342025757, "step": 2119, "utility_loss": 1.161562442779541 }, { "cosine_similarity": 0, "epoch": 1.9757688723205966, "grad_norm": 1.1180815601815774, "learning_rate": 1.8967897825336554e-05, "loss": 1.5453, "reason_loss": 0.4366310238838196, "step": 2120, "utility_loss": 1.1086738109588623 }, { "cosine_similarity": 0, "epoch": 1.9767008387698044, "grad_norm": 1.226318023696733, "learning_rate": 1.8950638591646532e-05, "loss": 1.502, "reason_loss": 0.47968798875808716, "step": 2121, "utility_loss": 1.0223534107208252 }, { "cosine_similarity": 0, "epoch": 1.9776328052190122, "grad_norm": 1.1854710760963707, "learning_rate": 1.8933379357956506e-05, "loss": 1.5655, "reason_loss": 0.4768142104148865, "step": 2122, "utility_loss": 1.088639736175537 }, { "cosine_similarity": 0, "epoch": 1.97856477166822, "grad_norm": 1.0509985883380415, "learning_rate": 1.8916120124266484e-05, "loss": 1.7033, "reason_loss": 0.47868093848228455, "step": 2123, "utility_loss": 1.224611520767212 }, { "cosine_similarity": 0, "epoch": 1.9794967381174278, "grad_norm": 1.104829809436013, "learning_rate": 1.8898860890576458e-05, "loss": 1.5023, "reason_loss": 0.471682608127594, "step": 2124, "utility_loss": 1.0305901765823364 }, { "cosine_similarity": 0, "epoch": 1.9804287045666356, "grad_norm": 1.0667964170886646, "learning_rate": 1.8881601656886436e-05, "loss": 1.6779, "reason_loss": 0.4391416311264038, "step": 2125, "utility_loss": 1.2387721538543701 }, { "cosine_similarity": 0, "epoch": 1.9813606710158433, "grad_norm": 1.0055662238898684, "learning_rate": 1.886434242319641e-05, "loss": 1.5193, "reason_loss": 0.44925767183303833, "step": 2126, "utility_loss": 1.0700204372406006 }, { "cosine_similarity": 0, "epoch": 1.9822926374650511, "grad_norm": 1.2129641136679143, "learning_rate": 1.8847083189506388e-05, "loss": 1.6121, "reason_loss": 0.4742031991481781, "step": 2127, "utility_loss": 1.1379152536392212 }, { "cosine_similarity": 0, "epoch": 1.983224603914259, "grad_norm": 0.9371094845326189, "learning_rate": 1.8829823955816362e-05, "loss": 1.7589, "reason_loss": 0.493163526058197, "step": 2128, "utility_loss": 1.265768051147461 }, { "cosine_similarity": 0, "epoch": 1.984156570363467, "grad_norm": 0.8919386070040024, "learning_rate": 1.881256472212634e-05, "loss": 1.5145, "reason_loss": 0.48921215534210205, "step": 2129, "utility_loss": 1.0252639055252075 }, { "cosine_similarity": 0, "epoch": 1.9850885368126747, "grad_norm": 0.9087139654454407, "learning_rate": 1.8795305488436314e-05, "loss": 1.5343, "reason_loss": 0.4755018353462219, "step": 2130, "utility_loss": 1.0587834119796753 }, { "cosine_similarity": 0, "epoch": 1.9860205032618827, "grad_norm": 0.8868213344170874, "learning_rate": 1.877804625474629e-05, "loss": 1.2768, "reason_loss": 0.4589500427246094, "step": 2131, "utility_loss": 0.8178997039794922 }, { "cosine_similarity": 0, "epoch": 1.9869524697110905, "grad_norm": 0.9291408553369064, "learning_rate": 1.8760787021056266e-05, "loss": 1.4472, "reason_loss": 0.46034497022628784, "step": 2132, "utility_loss": 0.9869015216827393 }, { "cosine_similarity": 0, "epoch": 1.9878844361602983, "grad_norm": 0.9628832460703358, "learning_rate": 1.874352778736624e-05, "loss": 1.338, "reason_loss": 0.4376210868358612, "step": 2133, "utility_loss": 0.9003338813781738 }, { "cosine_similarity": 0, "epoch": 1.988816402609506, "grad_norm": 0.9291038137990515, "learning_rate": 1.8726268553676218e-05, "loss": 1.364, "reason_loss": 0.45525145530700684, "step": 2134, "utility_loss": 0.9087613224983215 }, { "cosine_similarity": 0, "epoch": 1.9897483690587139, "grad_norm": 0.9312527610438633, "learning_rate": 1.8709009319986192e-05, "loss": 1.3586, "reason_loss": 0.47323527932167053, "step": 2135, "utility_loss": 0.8853641748428345 }, { "cosine_similarity": 0, "epoch": 1.9906803355079217, "grad_norm": 0.9659072534598553, "learning_rate": 1.869175008629617e-05, "loss": 1.4878, "reason_loss": 0.4469730854034424, "step": 2136, "utility_loss": 1.0408204793930054 }, { "cosine_similarity": 0, "epoch": 1.9916123019571295, "grad_norm": 1.2152801794953154, "learning_rate": 1.8674490852606144e-05, "loss": 1.3717, "reason_loss": 0.4847300350666046, "step": 2137, "utility_loss": 0.8869904279708862 }, { "cosine_similarity": 0, "epoch": 1.9925442684063372, "grad_norm": 0.869540733425527, "learning_rate": 1.8657231618916122e-05, "loss": 1.3683, "reason_loss": 0.4622262120246887, "step": 2138, "utility_loss": 0.9060938954353333 }, { "cosine_similarity": 0, "epoch": 1.993476234855545, "grad_norm": 0.9237280460176982, "learning_rate": 1.86399723852261e-05, "loss": 1.3528, "reason_loss": 0.47054147720336914, "step": 2139, "utility_loss": 0.8822401762008667 }, { "cosine_similarity": 0, "epoch": 1.994408201304753, "grad_norm": 0.9186727974053503, "learning_rate": 1.8622713151536074e-05, "loss": 1.4015, "reason_loss": 0.490167498588562, "step": 2140, "utility_loss": 0.9113705158233643 }, { "cosine_similarity": 0, "epoch": 1.9953401677539608, "grad_norm": 1.084738599407471, "learning_rate": 1.8605453917846048e-05, "loss": 1.4508, "reason_loss": 0.456941694021225, "step": 2141, "utility_loss": 0.9938838481903076 }, { "cosine_similarity": 0, "epoch": 1.9962721342031688, "grad_norm": 0.9416691749700704, "learning_rate": 1.8588194684156023e-05, "loss": 1.3396, "reason_loss": 0.46718281507492065, "step": 2142, "utility_loss": 0.8723909854888916 }, { "cosine_similarity": 0, "epoch": 1.9972041006523766, "grad_norm": 1.160383987219321, "learning_rate": 1.8570935450466e-05, "loss": 1.4055, "reason_loss": 0.44939059019088745, "step": 2143, "utility_loss": 0.9561429023742676 }, { "cosine_similarity": 0, "epoch": 1.9981360671015844, "grad_norm": 0.9328958481159393, "learning_rate": 1.8553676216775975e-05, "loss": 1.1774, "reason_loss": 0.5085748434066772, "step": 2144, "utility_loss": 0.6688075661659241 }, { "cosine_similarity": 0, "epoch": 1.9990680335507922, "grad_norm": 0.9879130865807055, "learning_rate": 1.8536416983085952e-05, "loss": 1.2515, "reason_loss": 0.49458131194114685, "step": 2145, "utility_loss": 0.7568911910057068 }, { "cosine_similarity": 0, "epoch": 2.0, "grad_norm": 0.977438459785478, "learning_rate": 1.8519157749395927e-05, "loss": 1.2696, "reason_loss": 0.44132518768310547, "step": 2146, "utility_loss": 0.828322172164917 }, { "cosine_similarity": 0, "epoch": 2.000931966449208, "grad_norm": 0.987505645836776, "learning_rate": 1.8501898515705904e-05, "loss": 1.4626, "reason_loss": 0.45763272047042847, "step": 2147, "utility_loss": 1.0049431324005127 }, { "cosine_similarity": 0, "epoch": 2.0018639328984156, "grad_norm": 1.138600627230965, "learning_rate": 1.848463928201588e-05, "loss": 1.1739, "reason_loss": 0.44093671441078186, "step": 2148, "utility_loss": 0.7329667210578918 }, { "cosine_similarity": 0, "epoch": 2.0027958993476234, "grad_norm": 0.9500926466981623, "learning_rate": 1.8467380048325856e-05, "loss": 1.1884, "reason_loss": 0.4454275369644165, "step": 2149, "utility_loss": 0.7429418563842773 }, { "cosine_similarity": 0, "epoch": 2.003727865796831, "grad_norm": 0.7965434280152116, "learning_rate": 1.8450120814635834e-05, "loss": 1.1052, "reason_loss": 0.4426797032356262, "step": 2150, "utility_loss": 0.662494421005249 }, { "cosine_similarity": 0, "epoch": 2.004659832246039, "grad_norm": 0.8576947174126217, "learning_rate": 1.8432861580945808e-05, "loss": 1.111, "reason_loss": 0.4858151078224182, "step": 2151, "utility_loss": 0.6252318620681763 }, { "cosine_similarity": 0, "epoch": 2.005591798695247, "grad_norm": 0.9377560171158192, "learning_rate": 1.8415602347255782e-05, "loss": 0.9092, "reason_loss": 0.45278066396713257, "step": 2152, "utility_loss": 0.45639657974243164 }, { "cosine_similarity": 0, "epoch": 2.006523765144455, "grad_norm": 0.9991875165319856, "learning_rate": 1.8398343113565757e-05, "loss": 0.9847, "reason_loss": 0.4958372414112091, "step": 2153, "utility_loss": 0.488839715719223 }, { "cosine_similarity": 0, "epoch": 2.0074557315936628, "grad_norm": 0.9492638314903171, "learning_rate": 1.8381083879875734e-05, "loss": 0.9363, "reason_loss": 0.5060995817184448, "step": 2154, "utility_loss": 0.43017685413360596 }, { "cosine_similarity": 0, "epoch": 2.0083876980428705, "grad_norm": 0.8709189612953279, "learning_rate": 1.836382464618571e-05, "loss": 1.0856, "reason_loss": 0.4531194567680359, "step": 2155, "utility_loss": 0.63245689868927 }, { "cosine_similarity": 0, "epoch": 2.0093196644920783, "grad_norm": 1.074681828996023, "learning_rate": 1.8346565412495686e-05, "loss": 1.1411, "reason_loss": 0.4849933385848999, "step": 2156, "utility_loss": 0.6561447381973267 }, { "cosine_similarity": 0, "epoch": 2.010251630941286, "grad_norm": 1.0295513447726643, "learning_rate": 1.832930617880566e-05, "loss": 1.0304, "reason_loss": 0.47897446155548096, "step": 2157, "utility_loss": 0.5514212846755981 }, { "cosine_similarity": 0, "epoch": 2.011183597390494, "grad_norm": 0.9042417893757312, "learning_rate": 1.831204694511564e-05, "loss": 1.0729, "reason_loss": 0.4480445384979248, "step": 2158, "utility_loss": 0.6248080730438232 }, { "cosine_similarity": 0, "epoch": 2.0121155638397017, "grad_norm": 0.8938394546480399, "learning_rate": 1.8294787711425613e-05, "loss": 1.0875, "reason_loss": 0.4572374224662781, "step": 2159, "utility_loss": 0.6302289366722107 }, { "cosine_similarity": 0, "epoch": 2.0130475302889095, "grad_norm": 0.901074239996485, "learning_rate": 1.827752847773559e-05, "loss": 0.9112, "reason_loss": 0.4683634042739868, "step": 2160, "utility_loss": 0.4428471326828003 }, { "cosine_similarity": 0, "epoch": 2.0139794967381173, "grad_norm": 0.8869828962985745, "learning_rate": 1.8260269244045568e-05, "loss": 1.2025, "reason_loss": 0.49118828773498535, "step": 2161, "utility_loss": 0.7113310098648071 }, { "cosine_similarity": 0, "epoch": 2.014911463187325, "grad_norm": 0.9528386099895361, "learning_rate": 1.8243010010355542e-05, "loss": 0.7864, "reason_loss": 0.4397110342979431, "step": 2162, "utility_loss": 0.3467114567756653 }, { "cosine_similarity": 0, "epoch": 2.0158434296365333, "grad_norm": 0.9110452238214566, "learning_rate": 1.8225750776665517e-05, "loss": 1.1965, "reason_loss": 0.46204784512519836, "step": 2163, "utility_loss": 0.7344354391098022 }, { "cosine_similarity": 0, "epoch": 2.016775396085741, "grad_norm": 0.972568152358346, "learning_rate": 1.820849154297549e-05, "loss": 1.287, "reason_loss": 0.4522591829299927, "step": 2164, "utility_loss": 0.834707498550415 }, { "cosine_similarity": 0, "epoch": 2.017707362534949, "grad_norm": 0.87515592018238, "learning_rate": 1.819123230928547e-05, "loss": 1.0267, "reason_loss": 0.4825829863548279, "step": 2165, "utility_loss": 0.544140636920929 }, { "cosine_similarity": 0, "epoch": 2.0186393289841567, "grad_norm": 0.9129981304502023, "learning_rate": 1.8173973075595443e-05, "loss": 1.0044, "reason_loss": 0.44952404499053955, "step": 2166, "utility_loss": 0.5548553466796875 }, { "cosine_similarity": 0, "epoch": 2.0195712954333644, "grad_norm": 1.0571500935471325, "learning_rate": 1.815671384190542e-05, "loss": 1.5818, "reason_loss": 0.480996698141098, "step": 2167, "utility_loss": 1.1007705926895142 }, { "cosine_similarity": 0, "epoch": 2.0205032618825722, "grad_norm": 0.9768452062940713, "learning_rate": 1.8139454608215395e-05, "loss": 1.1128, "reason_loss": 0.4916137158870697, "step": 2168, "utility_loss": 0.6211755275726318 }, { "cosine_similarity": 0, "epoch": 2.02143522833178, "grad_norm": 0.8719078220597041, "learning_rate": 1.8122195374525372e-05, "loss": 1.0318, "reason_loss": 0.4632888436317444, "step": 2169, "utility_loss": 0.5684913396835327 }, { "cosine_similarity": 0, "epoch": 2.022367194780988, "grad_norm": 1.2068427434858255, "learning_rate": 1.810493614083535e-05, "loss": 1.3084, "reason_loss": 0.45464301109313965, "step": 2170, "utility_loss": 0.8537102937698364 }, { "cosine_similarity": 0, "epoch": 2.0232991612301956, "grad_norm": 0.8925162322596745, "learning_rate": 1.8087676907145324e-05, "loss": 1.0272, "reason_loss": 0.4595727324485779, "step": 2171, "utility_loss": 0.5676758885383606 }, { "cosine_similarity": 0, "epoch": 2.0242311276794034, "grad_norm": 0.867426465622719, "learning_rate": 1.8070417673455302e-05, "loss": 1.07, "reason_loss": 0.4687776565551758, "step": 2172, "utility_loss": 0.6012559533119202 }, { "cosine_similarity": 0, "epoch": 2.025163094128611, "grad_norm": 0.7541901648133846, "learning_rate": 1.8053158439765276e-05, "loss": 1.0841, "reason_loss": 0.4740552306175232, "step": 2173, "utility_loss": 0.6100537776947021 }, { "cosine_similarity": 0, "epoch": 2.0260950605778194, "grad_norm": 0.8959372415733764, "learning_rate": 1.803589920607525e-05, "loss": 1.0394, "reason_loss": 0.45186811685562134, "step": 2174, "utility_loss": 0.5875178575515747 }, { "cosine_similarity": 0, "epoch": 2.027027027027027, "grad_norm": 1.0080961176353718, "learning_rate": 1.8018639972385225e-05, "loss": 1.3749, "reason_loss": 0.43757739663124084, "step": 2175, "utility_loss": 0.9373393654823303 }, { "cosine_similarity": 0, "epoch": 2.027958993476235, "grad_norm": 0.9694061209572333, "learning_rate": 1.8001380738695203e-05, "loss": 1.0707, "reason_loss": 0.4481326639652252, "step": 2176, "utility_loss": 0.6225467324256897 }, { "cosine_similarity": 0, "epoch": 2.0288909599254428, "grad_norm": 0.9867755257384042, "learning_rate": 1.7984121505005177e-05, "loss": 1.2535, "reason_loss": 0.47676825523376465, "step": 2177, "utility_loss": 0.7767211198806763 }, { "cosine_similarity": 0, "epoch": 2.0298229263746506, "grad_norm": 1.0354462552013433, "learning_rate": 1.7966862271315155e-05, "loss": 1.0249, "reason_loss": 0.4767334759235382, "step": 2178, "utility_loss": 0.5482074618339539 }, { "cosine_similarity": 0, "epoch": 2.0307548928238583, "grad_norm": 0.996437937935419, "learning_rate": 1.794960303762513e-05, "loss": 1.0364, "reason_loss": 0.4805822968482971, "step": 2179, "utility_loss": 0.5558151006698608 }, { "cosine_similarity": 0, "epoch": 2.031686859273066, "grad_norm": 0.728203884868348, "learning_rate": 1.7932343803935107e-05, "loss": 0.8868, "reason_loss": 0.47491514682769775, "step": 2180, "utility_loss": 0.4118815064430237 }, { "cosine_similarity": 0, "epoch": 2.032618825722274, "grad_norm": 0.8752509463977516, "learning_rate": 1.7915084570245084e-05, "loss": 1.0083, "reason_loss": 0.4829597473144531, "step": 2181, "utility_loss": 0.525373637676239 }, { "cosine_similarity": 0, "epoch": 2.0335507921714817, "grad_norm": 0.7293475284050798, "learning_rate": 1.789782533655506e-05, "loss": 0.7727, "reason_loss": 0.4757387340068817, "step": 2182, "utility_loss": 0.2969152331352234 }, { "cosine_similarity": 0, "epoch": 2.0344827586206895, "grad_norm": 1.2545271787730232, "learning_rate": 1.7880566102865033e-05, "loss": 1.2936, "reason_loss": 0.4718535542488098, "step": 2183, "utility_loss": 0.8217315673828125 }, { "cosine_similarity": 0, "epoch": 2.0354147250698973, "grad_norm": 1.850207252249645, "learning_rate": 1.786330686917501e-05, "loss": 1.2866, "reason_loss": 0.4501856863498688, "step": 2184, "utility_loss": 0.8364455103874207 }, { "cosine_similarity": 0, "epoch": 2.0363466915191055, "grad_norm": 0.9895084820504093, "learning_rate": 1.7846047635484985e-05, "loss": 0.9309, "reason_loss": 0.42500633001327515, "step": 2185, "utility_loss": 0.5058966279029846 }, { "cosine_similarity": 0, "epoch": 2.0372786579683133, "grad_norm": 1.0367112986233884, "learning_rate": 1.782878840179496e-05, "loss": 1.0645, "reason_loss": 0.4526268243789673, "step": 2186, "utility_loss": 0.611908495426178 }, { "cosine_similarity": 0, "epoch": 2.038210624417521, "grad_norm": 0.9086775819104408, "learning_rate": 1.7811529168104937e-05, "loss": 1.2068, "reason_loss": 0.48520833253860474, "step": 2187, "utility_loss": 0.7216154336929321 }, { "cosine_similarity": 0, "epoch": 2.039142590866729, "grad_norm": 1.0245618179629812, "learning_rate": 1.779426993441491e-05, "loss": 1.2327, "reason_loss": 0.4672093391418457, "step": 2188, "utility_loss": 0.765533447265625 }, { "cosine_similarity": 0, "epoch": 2.0400745573159367, "grad_norm": 0.7556410179243476, "learning_rate": 1.777701070072489e-05, "loss": 0.9284, "reason_loss": 0.44914937019348145, "step": 2189, "utility_loss": 0.47926634550094604 }, { "cosine_similarity": 0, "epoch": 2.0410065237651445, "grad_norm": 1.0838329416937096, "learning_rate": 1.7759751467034866e-05, "loss": 1.1354, "reason_loss": 0.4670601487159729, "step": 2190, "utility_loss": 0.6683265566825867 }, { "cosine_similarity": 0, "epoch": 2.0419384902143523, "grad_norm": 0.9876309142212812, "learning_rate": 1.774249223334484e-05, "loss": 1.1379, "reason_loss": 0.5013779997825623, "step": 2191, "utility_loss": 0.636570930480957 }, { "cosine_similarity": 0, "epoch": 2.04287045666356, "grad_norm": 0.8964799692271499, "learning_rate": 1.772523299965482e-05, "loss": 1.4066, "reason_loss": 0.47529226541519165, "step": 2192, "utility_loss": 0.9313281774520874 }, { "cosine_similarity": 0, "epoch": 2.043802423112768, "grad_norm": 0.8411177654819316, "learning_rate": 1.7707973765964793e-05, "loss": 0.9326, "reason_loss": 0.4540637135505676, "step": 2193, "utility_loss": 0.47857001423835754 }, { "cosine_similarity": 0, "epoch": 2.0447343895619756, "grad_norm": 0.8338037697934613, "learning_rate": 1.7690714532274767e-05, "loss": 1.2505, "reason_loss": 0.4692373275756836, "step": 2194, "utility_loss": 0.7812267541885376 }, { "cosine_similarity": 0, "epoch": 2.0456663560111834, "grad_norm": 1.1154925339210844, "learning_rate": 1.767345529858474e-05, "loss": 1.3583, "reason_loss": 0.5153582096099854, "step": 2195, "utility_loss": 0.8429876565933228 }, { "cosine_similarity": 0, "epoch": 2.0465983224603916, "grad_norm": 0.8651056778975275, "learning_rate": 1.765619606489472e-05, "loss": 1.0091, "reason_loss": 0.49056971073150635, "step": 2196, "utility_loss": 0.5185017585754395 }, { "cosine_similarity": 0, "epoch": 2.0475302889095994, "grad_norm": 0.9066462198448084, "learning_rate": 1.7638936831204693e-05, "loss": 1.0559, "reason_loss": 0.44704669713974, "step": 2197, "utility_loss": 0.6088922619819641 }, { "cosine_similarity": 0, "epoch": 2.048462255358807, "grad_norm": 0.8380824475179011, "learning_rate": 1.762167759751467e-05, "loss": 0.9206, "reason_loss": 0.49280616641044617, "step": 2198, "utility_loss": 0.42780011892318726 }, { "cosine_similarity": 0, "epoch": 2.049394221808015, "grad_norm": 0.9123855626696189, "learning_rate": 1.7604418363824645e-05, "loss": 1.0837, "reason_loss": 0.5018265247344971, "step": 2199, "utility_loss": 0.5818794965744019 }, { "cosine_similarity": 0, "epoch": 2.050326188257223, "grad_norm": 1.089560590460366, "learning_rate": 1.7587159130134623e-05, "loss": 1.3696, "reason_loss": 0.4594878554344177, "step": 2200, "utility_loss": 0.9100881218910217 }, { "cosine_similarity": 0, "epoch": 2.0512581547064306, "grad_norm": 0.8646750153575533, "learning_rate": 1.75698998964446e-05, "loss": 0.8105, "reason_loss": 0.4586130976676941, "step": 2201, "utility_loss": 0.3518942892551422 }, { "cosine_similarity": 0, "epoch": 2.0521901211556384, "grad_norm": 1.0355054392010288, "learning_rate": 1.7552640662754575e-05, "loss": 1.1587, "reason_loss": 0.48258161544799805, "step": 2202, "utility_loss": 0.6761431694030762 }, { "cosine_similarity": 0, "epoch": 2.053122087604846, "grad_norm": 0.7728728713497027, "learning_rate": 1.7535381429064553e-05, "loss": 1.0324, "reason_loss": 0.4624454975128174, "step": 2203, "utility_loss": 0.569956362247467 }, { "cosine_similarity": 0, "epoch": 2.054054054054054, "grad_norm": 0.8835126417529487, "learning_rate": 1.7518122195374527e-05, "loss": 1.0471, "reason_loss": 0.44256168603897095, "step": 2204, "utility_loss": 0.6045264005661011 }, { "cosine_similarity": 0, "epoch": 2.0549860205032617, "grad_norm": 0.9390154242700786, "learning_rate": 1.75008629616845e-05, "loss": 0.9619, "reason_loss": 0.4642190933227539, "step": 2205, "utility_loss": 0.4976484477519989 }, { "cosine_similarity": 0, "epoch": 2.0559179869524695, "grad_norm": 0.8478473435132666, "learning_rate": 1.7483603727994475e-05, "loss": 1.1146, "reason_loss": 0.4842016398906708, "step": 2206, "utility_loss": 0.6304364204406738 }, { "cosine_similarity": 0, "epoch": 2.0568499534016778, "grad_norm": 1.036161196507278, "learning_rate": 1.7466344494304453e-05, "loss": 1.1405, "reason_loss": 0.4673292338848114, "step": 2207, "utility_loss": 0.6731720566749573 }, { "cosine_similarity": 0, "epoch": 2.0577819198508855, "grad_norm": 1.211107994797292, "learning_rate": 1.7449085260614427e-05, "loss": 1.1256, "reason_loss": 0.45307213068008423, "step": 2208, "utility_loss": 0.6725557446479797 }, { "cosine_similarity": 0, "epoch": 2.0587138863000933, "grad_norm": 0.7594371419936243, "learning_rate": 1.7431826026924405e-05, "loss": 1.0664, "reason_loss": 0.4706105589866638, "step": 2209, "utility_loss": 0.5957568883895874 }, { "cosine_similarity": 0, "epoch": 2.059645852749301, "grad_norm": 0.8120695272488956, "learning_rate": 1.7414566793234383e-05, "loss": 1.1411, "reason_loss": 0.4481639862060547, "step": 2210, "utility_loss": 0.6929574012756348 }, { "cosine_similarity": 0, "epoch": 2.060577819198509, "grad_norm": 1.1098862869935775, "learning_rate": 1.7397307559544357e-05, "loss": 1.185, "reason_loss": 0.45639538764953613, "step": 2211, "utility_loss": 0.7286094427108765 }, { "cosine_similarity": 0, "epoch": 2.0615097856477167, "grad_norm": 0.75359618862621, "learning_rate": 1.7380048325854335e-05, "loss": 0.907, "reason_loss": 0.44745421409606934, "step": 2212, "utility_loss": 0.4595015347003937 }, { "cosine_similarity": 0, "epoch": 2.0624417520969245, "grad_norm": 1.098859443509886, "learning_rate": 1.736278909216431e-05, "loss": 0.9859, "reason_loss": 0.48260390758514404, "step": 2213, "utility_loss": 0.5033270120620728 }, { "cosine_similarity": 0, "epoch": 2.0633737185461323, "grad_norm": 0.9516324105469428, "learning_rate": 1.7345529858474287e-05, "loss": 1.0083, "reason_loss": 0.4505973160266876, "step": 2214, "utility_loss": 0.5576982498168945 }, { "cosine_similarity": 0, "epoch": 2.06430568499534, "grad_norm": 1.0368206201907464, "learning_rate": 1.732827062478426e-05, "loss": 1.168, "reason_loss": 0.48961928486824036, "step": 2215, "utility_loss": 0.6783583760261536 }, { "cosine_similarity": 0, "epoch": 2.065237651444548, "grad_norm": 0.8862055345192583, "learning_rate": 1.7311011391094235e-05, "loss": 0.903, "reason_loss": 0.4271620810031891, "step": 2216, "utility_loss": 0.47579750418663025 }, { "cosine_similarity": 0, "epoch": 2.0661696178937556, "grad_norm": 0.9551224644969467, "learning_rate": 1.729375215740421e-05, "loss": 0.9657, "reason_loss": 0.48455673456192017, "step": 2217, "utility_loss": 0.48114100098609924 }, { "cosine_similarity": 0, "epoch": 2.0671015843429634, "grad_norm": 0.9399855020958898, "learning_rate": 1.7276492923714187e-05, "loss": 1.2053, "reason_loss": 0.43152618408203125, "step": 2218, "utility_loss": 0.7737704515457153 }, { "cosine_similarity": 0, "epoch": 2.0680335507921717, "grad_norm": 1.0415382940085316, "learning_rate": 1.725923369002416e-05, "loss": 1.1199, "reason_loss": 0.4672680199146271, "step": 2219, "utility_loss": 0.6526516675949097 }, { "cosine_similarity": 0, "epoch": 2.0689655172413794, "grad_norm": 0.9008297393626533, "learning_rate": 1.724197445633414e-05, "loss": 0.8657, "reason_loss": 0.45896679162979126, "step": 2220, "utility_loss": 0.40676504373550415 }, { "cosine_similarity": 0, "epoch": 2.0698974836905872, "grad_norm": 0.9884089106533894, "learning_rate": 1.7224715222644117e-05, "loss": 1.2775, "reason_loss": 0.45366981625556946, "step": 2221, "utility_loss": 0.8238683938980103 }, { "cosine_similarity": 0, "epoch": 2.070829450139795, "grad_norm": 1.0136580187904298, "learning_rate": 1.720745598895409e-05, "loss": 1.1262, "reason_loss": 0.4613618552684784, "step": 2222, "utility_loss": 0.6648620367050171 }, { "cosine_similarity": 0, "epoch": 2.071761416589003, "grad_norm": 0.9400992235830807, "learning_rate": 1.719019675526407e-05, "loss": 1.0323, "reason_loss": 0.4580649435520172, "step": 2223, "utility_loss": 0.574246346950531 }, { "cosine_similarity": 0, "epoch": 2.0726933830382106, "grad_norm": 1.0723785496946945, "learning_rate": 1.7172937521574043e-05, "loss": 1.2946, "reason_loss": 0.4411523640155792, "step": 2224, "utility_loss": 0.8534587621688843 }, { "cosine_similarity": 0, "epoch": 2.0736253494874184, "grad_norm": 0.9299993939321459, "learning_rate": 1.715567828788402e-05, "loss": 1.0718, "reason_loss": 0.4427436888217926, "step": 2225, "utility_loss": 0.6290700435638428 }, { "cosine_similarity": 0, "epoch": 2.074557315936626, "grad_norm": 0.9521346400931865, "learning_rate": 1.7138419054193995e-05, "loss": 1.2312, "reason_loss": 0.4227423667907715, "step": 2226, "utility_loss": 0.8085000514984131 }, { "cosine_similarity": 0, "epoch": 2.075489282385834, "grad_norm": 1.1834359383394273, "learning_rate": 1.712115982050397e-05, "loss": 1.1002, "reason_loss": 0.4843979775905609, "step": 2227, "utility_loss": 0.6157922744750977 }, { "cosine_similarity": 0, "epoch": 2.0764212488350418, "grad_norm": 0.8268625734897292, "learning_rate": 1.7103900586813944e-05, "loss": 1.1462, "reason_loss": 0.45914313197135925, "step": 2228, "utility_loss": 0.6870840787887573 }, { "cosine_similarity": 0, "epoch": 2.0773532152842495, "grad_norm": 1.2065495304893241, "learning_rate": 1.708664135312392e-05, "loss": 1.0507, "reason_loss": 0.4587249159812927, "step": 2229, "utility_loss": 0.5919592976570129 }, { "cosine_similarity": 0, "epoch": 2.0782851817334578, "grad_norm": 0.8571404799707325, "learning_rate": 1.70693821194339e-05, "loss": 0.9359, "reason_loss": 0.44971317052841187, "step": 2230, "utility_loss": 0.4862174391746521 }, { "cosine_similarity": 0, "epoch": 2.0792171481826656, "grad_norm": 0.8669396912700144, "learning_rate": 1.7052122885743873e-05, "loss": 0.9531, "reason_loss": 0.5060844421386719, "step": 2231, "utility_loss": 0.4470415711402893 }, { "cosine_similarity": 0, "epoch": 2.0801491146318734, "grad_norm": 1.0821113395269575, "learning_rate": 1.703486365205385e-05, "loss": 1.1204, "reason_loss": 0.4413972496986389, "step": 2232, "utility_loss": 0.6790134310722351 }, { "cosine_similarity": 0, "epoch": 2.081081081081081, "grad_norm": 0.8993900291316349, "learning_rate": 1.7017604418363825e-05, "loss": 0.7988, "reason_loss": 0.4464494585990906, "step": 2233, "utility_loss": 0.35231417417526245 }, { "cosine_similarity": 0, "epoch": 2.082013047530289, "grad_norm": 0.9195873577381843, "learning_rate": 1.7000345184673803e-05, "loss": 1.0947, "reason_loss": 0.4525870084762573, "step": 2234, "utility_loss": 0.6420902013778687 }, { "cosine_similarity": 0, "epoch": 2.0829450139794967, "grad_norm": 0.9210442934580467, "learning_rate": 1.6983085950983777e-05, "loss": 1.0526, "reason_loss": 0.4399559497833252, "step": 2235, "utility_loss": 0.6126547455787659 }, { "cosine_similarity": 0, "epoch": 2.0838769804287045, "grad_norm": 1.1395801739905185, "learning_rate": 1.6965826717293755e-05, "loss": 1.0709, "reason_loss": 0.4673312306404114, "step": 2236, "utility_loss": 0.6035674810409546 }, { "cosine_similarity": 0, "epoch": 2.0848089468779123, "grad_norm": 0.9322966988332322, "learning_rate": 1.694856748360373e-05, "loss": 1.1787, "reason_loss": 0.49739453196525574, "step": 2237, "utility_loss": 0.6813471913337708 }, { "cosine_similarity": 0, "epoch": 2.08574091332712, "grad_norm": 0.9752721184491147, "learning_rate": 1.6931308249913704e-05, "loss": 1.2312, "reason_loss": 0.4940539300441742, "step": 2238, "utility_loss": 0.737136721611023 }, { "cosine_similarity": 0, "epoch": 2.086672879776328, "grad_norm": 0.8606931209016274, "learning_rate": 1.6914049016223678e-05, "loss": 1.0284, "reason_loss": 0.4625789523124695, "step": 2239, "utility_loss": 0.5658097267150879 }, { "cosine_similarity": 0, "epoch": 2.0876048462255357, "grad_norm": 1.1986012072693644, "learning_rate": 1.6896789782533655e-05, "loss": 1.1683, "reason_loss": 0.4733535051345825, "step": 2240, "utility_loss": 0.6949819922447205 }, { "cosine_similarity": 0, "epoch": 2.088536812674744, "grad_norm": 1.022874331787606, "learning_rate": 1.6879530548843633e-05, "loss": 1.1978, "reason_loss": 0.4607037901878357, "step": 2241, "utility_loss": 0.7370724678039551 }, { "cosine_similarity": 0, "epoch": 2.0894687791239517, "grad_norm": 0.9460139036424586, "learning_rate": 1.6862271315153607e-05, "loss": 0.9874, "reason_loss": 0.4723610281944275, "step": 2242, "utility_loss": 0.515074610710144 }, { "cosine_similarity": 0, "epoch": 2.0904007455731595, "grad_norm": 0.8300481606848983, "learning_rate": 1.6845012081463585e-05, "loss": 0.8893, "reason_loss": 0.46062523126602173, "step": 2243, "utility_loss": 0.4286837577819824 }, { "cosine_similarity": 0, "epoch": 2.0913327120223673, "grad_norm": 1.0208121313661596, "learning_rate": 1.682775284777356e-05, "loss": 1.0036, "reason_loss": 0.45952004194259644, "step": 2244, "utility_loss": 0.544107973575592 }, { "cosine_similarity": 0, "epoch": 2.092264678471575, "grad_norm": 0.7892441463411365, "learning_rate": 1.6810493614083537e-05, "loss": 0.9487, "reason_loss": 0.47043442726135254, "step": 2245, "utility_loss": 0.4782857894897461 }, { "cosine_similarity": 0, "epoch": 2.093196644920783, "grad_norm": 1.0660959582562208, "learning_rate": 1.679323438039351e-05, "loss": 1.0248, "reason_loss": 0.46726346015930176, "step": 2246, "utility_loss": 0.5575525760650635 }, { "cosine_similarity": 0, "epoch": 2.0941286113699906, "grad_norm": 1.214842238548579, "learning_rate": 1.677597514670349e-05, "loss": 1.1604, "reason_loss": 0.49849778413772583, "step": 2247, "utility_loss": 0.6619361639022827 }, { "cosine_similarity": 0, "epoch": 2.0950605778191984, "grad_norm": 0.7623616738258299, "learning_rate": 1.6758715913013463e-05, "loss": 0.8465, "reason_loss": 0.449400395154953, "step": 2248, "utility_loss": 0.3970780074596405 }, { "cosine_similarity": 0, "epoch": 2.095992544268406, "grad_norm": 0.8732423026446259, "learning_rate": 1.6741456679323438e-05, "loss": 0.942, "reason_loss": 0.46871304512023926, "step": 2249, "utility_loss": 0.47324419021606445 }, { "cosine_similarity": 0, "epoch": 2.096924510717614, "grad_norm": 1.1506782091562688, "learning_rate": 1.6724197445633415e-05, "loss": 1.168, "reason_loss": 0.4741349220275879, "step": 2250, "utility_loss": 0.6938841342926025 }, { "cosine_similarity": 0, "epoch": 2.0978564771668218, "grad_norm": 1.0513944694976907, "learning_rate": 1.670693821194339e-05, "loss": 1.0244, "reason_loss": 0.47099030017852783, "step": 2251, "utility_loss": 0.5534188747406006 }, { "cosine_similarity": 0, "epoch": 2.09878844361603, "grad_norm": 0.9764429312181296, "learning_rate": 1.6689678978253367e-05, "loss": 1.3928, "reason_loss": 0.46631425619125366, "step": 2252, "utility_loss": 0.9265258312225342 }, { "cosine_similarity": 0, "epoch": 2.099720410065238, "grad_norm": 0.9667898038872306, "learning_rate": 1.667241974456334e-05, "loss": 1.129, "reason_loss": 0.4695817828178406, "step": 2253, "utility_loss": 0.6594477891921997 }, { "cosine_similarity": 0, "epoch": 2.1006523765144456, "grad_norm": 1.2246044436036485, "learning_rate": 1.665516051087332e-05, "loss": 1.3062, "reason_loss": 0.4731110632419586, "step": 2254, "utility_loss": 0.8330669403076172 }, { "cosine_similarity": 0, "epoch": 2.1015843429636534, "grad_norm": 0.7852726215472899, "learning_rate": 1.6637901277183294e-05, "loss": 1.0247, "reason_loss": 0.4962279498577118, "step": 2255, "utility_loss": 0.5284320116043091 }, { "cosine_similarity": 0, "epoch": 2.102516309412861, "grad_norm": 0.9983298945251203, "learning_rate": 1.662064204349327e-05, "loss": 1.5169, "reason_loss": 0.4938315749168396, "step": 2256, "utility_loss": 1.0230764150619507 }, { "cosine_similarity": 0, "epoch": 2.103448275862069, "grad_norm": 1.0183107933615732, "learning_rate": 1.6603382809803246e-05, "loss": 1.3532, "reason_loss": 0.4517574906349182, "step": 2257, "utility_loss": 0.9013963341712952 }, { "cosine_similarity": 0, "epoch": 2.1043802423112767, "grad_norm": 0.8623885197353663, "learning_rate": 1.6586123576113223e-05, "loss": 0.9818, "reason_loss": 0.4843108057975769, "step": 2258, "utility_loss": 0.49750471115112305 }, { "cosine_similarity": 0, "epoch": 2.1053122087604845, "grad_norm": 0.8732034395200572, "learning_rate": 1.6568864342423197e-05, "loss": 1.2591, "reason_loss": 0.47395026683807373, "step": 2259, "utility_loss": 0.7851170897483826 }, { "cosine_similarity": 0, "epoch": 2.1062441752096923, "grad_norm": 0.9339944234831659, "learning_rate": 1.6551605108733172e-05, "loss": 1.0127, "reason_loss": 0.4460267722606659, "step": 2260, "utility_loss": 0.5666671395301819 }, { "cosine_similarity": 0, "epoch": 2.1071761416589, "grad_norm": 0.8623851775047595, "learning_rate": 1.653434587504315e-05, "loss": 1.1519, "reason_loss": 0.47681307792663574, "step": 2261, "utility_loss": 0.6751108169555664 }, { "cosine_similarity": 0, "epoch": 2.108108108108108, "grad_norm": 1.0507679868848863, "learning_rate": 1.6517086641353124e-05, "loss": 1.2637, "reason_loss": 0.45471251010894775, "step": 2262, "utility_loss": 0.8089950084686279 }, { "cosine_similarity": 0, "epoch": 2.109040074557316, "grad_norm": 0.8204129276652715, "learning_rate": 1.64998274076631e-05, "loss": 1.0768, "reason_loss": 0.47675055265426636, "step": 2263, "utility_loss": 0.6000198125839233 }, { "cosine_similarity": 0, "epoch": 2.109972041006524, "grad_norm": 0.8522334667481455, "learning_rate": 1.6482568173973076e-05, "loss": 0.8449, "reason_loss": 0.44894570112228394, "step": 2264, "utility_loss": 0.39597201347351074 }, { "cosine_similarity": 0, "epoch": 2.1109040074557317, "grad_norm": 0.8702261222454244, "learning_rate": 1.6465308940283053e-05, "loss": 1.0337, "reason_loss": 0.46194857358932495, "step": 2265, "utility_loss": 0.5717767477035522 }, { "cosine_similarity": 0, "epoch": 2.1118359739049395, "grad_norm": 0.8242294407994296, "learning_rate": 1.6448049706593028e-05, "loss": 1.0694, "reason_loss": 0.45673805475234985, "step": 2266, "utility_loss": 0.6126587390899658 }, { "cosine_similarity": 0, "epoch": 2.1127679403541473, "grad_norm": 1.14895188567671, "learning_rate": 1.6430790472903005e-05, "loss": 1.3222, "reason_loss": 0.4688713550567627, "step": 2267, "utility_loss": 0.8533104658126831 }, { "cosine_similarity": 0, "epoch": 2.113699906803355, "grad_norm": 0.929324112761561, "learning_rate": 1.641353123921298e-05, "loss": 1.1896, "reason_loss": 0.4650374948978424, "step": 2268, "utility_loss": 0.7245444059371948 }, { "cosine_similarity": 0, "epoch": 2.114631873252563, "grad_norm": 0.8028929995110298, "learning_rate": 1.6396272005522957e-05, "loss": 1.076, "reason_loss": 0.4691064953804016, "step": 2269, "utility_loss": 0.6068761944770813 }, { "cosine_similarity": 0, "epoch": 2.1155638397017706, "grad_norm": 1.0410787117705091, "learning_rate": 1.637901277183293e-05, "loss": 1.3874, "reason_loss": 0.5156258344650269, "step": 2270, "utility_loss": 0.8717483282089233 }, { "cosine_similarity": 0, "epoch": 2.1164958061509784, "grad_norm": 0.9640915277292876, "learning_rate": 1.6361753538142906e-05, "loss": 1.0662, "reason_loss": 0.5061289072036743, "step": 2271, "utility_loss": 0.56010502576828 }, { "cosine_similarity": 0, "epoch": 2.117427772600186, "grad_norm": 0.9099139261188608, "learning_rate": 1.6344494304452884e-05, "loss": 1.2112, "reason_loss": 0.4550457000732422, "step": 2272, "utility_loss": 0.7561267614364624 }, { "cosine_similarity": 0, "epoch": 2.118359739049394, "grad_norm": 0.9604294266457232, "learning_rate": 1.6327235070762858e-05, "loss": 1.0063, "reason_loss": 0.4596553146839142, "step": 2273, "utility_loss": 0.5466856956481934 }, { "cosine_similarity": 0, "epoch": 2.1192917054986022, "grad_norm": 1.0243134580194362, "learning_rate": 1.6309975837072836e-05, "loss": 1.0143, "reason_loss": 0.47718125581741333, "step": 2274, "utility_loss": 0.5371166467666626 }, { "cosine_similarity": 0, "epoch": 2.12022367194781, "grad_norm": 0.7750688887883826, "learning_rate": 1.629271660338281e-05, "loss": 0.9044, "reason_loss": 0.43465423583984375, "step": 2275, "utility_loss": 0.4697306752204895 }, { "cosine_similarity": 0, "epoch": 2.121155638397018, "grad_norm": 0.9315943253290719, "learning_rate": 1.6275457369692788e-05, "loss": 1.096, "reason_loss": 0.46153491735458374, "step": 2276, "utility_loss": 0.6344782114028931 }, { "cosine_similarity": 0, "epoch": 2.1220876048462256, "grad_norm": 0.9581844893732686, "learning_rate": 1.6258198136002762e-05, "loss": 1.1192, "reason_loss": 0.45713338255882263, "step": 2277, "utility_loss": 0.6620357036590576 }, { "cosine_similarity": 0, "epoch": 2.1230195712954334, "grad_norm": 1.1307119438578952, "learning_rate": 1.624093890231274e-05, "loss": 1.209, "reason_loss": 0.48894578218460083, "step": 2278, "utility_loss": 0.7200119495391846 }, { "cosine_similarity": 0, "epoch": 2.123951537744641, "grad_norm": 1.11926212690843, "learning_rate": 1.6223679668622714e-05, "loss": 0.8807, "reason_loss": 0.4545248746871948, "step": 2279, "utility_loss": 0.42614448070526123 }, { "cosine_similarity": 0, "epoch": 2.124883504193849, "grad_norm": 0.7625247928120166, "learning_rate": 1.620642043493269e-05, "loss": 0.9286, "reason_loss": 0.49349039793014526, "step": 2280, "utility_loss": 0.43512627482414246 }, { "cosine_similarity": 0, "epoch": 2.1258154706430568, "grad_norm": 1.0928454088028463, "learning_rate": 1.6189161201242666e-05, "loss": 0.9424, "reason_loss": 0.42947131395339966, "step": 2281, "utility_loss": 0.5129053592681885 }, { "cosine_similarity": 0, "epoch": 2.1267474370922645, "grad_norm": 1.0479897052743286, "learning_rate": 1.617190196755264e-05, "loss": 0.9996, "reason_loss": 0.4582259953022003, "step": 2282, "utility_loss": 0.5413721799850464 }, { "cosine_similarity": 0, "epoch": 2.1276794035414723, "grad_norm": 0.9913386027847163, "learning_rate": 1.6154642733862618e-05, "loss": 1.4811, "reason_loss": 0.4654611349105835, "step": 2283, "utility_loss": 1.0156818628311157 }, { "cosine_similarity": 0, "epoch": 2.12861136999068, "grad_norm": 1.3362889960456996, "learning_rate": 1.6137383500172592e-05, "loss": 1.1863, "reason_loss": 0.45943278074264526, "step": 2284, "utility_loss": 0.7268716096878052 }, { "cosine_similarity": 0, "epoch": 2.1295433364398884, "grad_norm": 0.8154211482237592, "learning_rate": 1.612012426648257e-05, "loss": 0.909, "reason_loss": 0.44475501775741577, "step": 2285, "utility_loss": 0.4642355442047119 }, { "cosine_similarity": 0, "epoch": 2.130475302889096, "grad_norm": 1.1112035402812457, "learning_rate": 1.6102865032792544e-05, "loss": 1.4368, "reason_loss": 0.4755522608757019, "step": 2286, "utility_loss": 0.9612865447998047 }, { "cosine_similarity": 0, "epoch": 2.131407269338304, "grad_norm": 0.9777680503850874, "learning_rate": 1.608560579910252e-05, "loss": 1.221, "reason_loss": 0.48135435581207275, "step": 2287, "utility_loss": 0.7396682500839233 }, { "cosine_similarity": 0, "epoch": 2.1323392357875117, "grad_norm": 0.9464968999009835, "learning_rate": 1.6068346565412496e-05, "loss": 0.9634, "reason_loss": 0.45279526710510254, "step": 2288, "utility_loss": 0.5105856657028198 }, { "cosine_similarity": 0, "epoch": 2.1332712022367195, "grad_norm": 0.9979370712750871, "learning_rate": 1.6051087331722474e-05, "loss": 1.2312, "reason_loss": 0.4693579375743866, "step": 2289, "utility_loss": 0.7618002891540527 }, { "cosine_similarity": 0, "epoch": 2.1342031686859273, "grad_norm": 1.1296087731543532, "learning_rate": 1.6033828098032448e-05, "loss": 1.2351, "reason_loss": 0.4512970745563507, "step": 2290, "utility_loss": 0.7838215231895447 }, { "cosine_similarity": 0, "epoch": 2.135135135135135, "grad_norm": 0.9122684938584505, "learning_rate": 1.6016568864342422e-05, "loss": 1.0905, "reason_loss": 0.45566797256469727, "step": 2291, "utility_loss": 0.6348074078559875 }, { "cosine_similarity": 0, "epoch": 2.136067101584343, "grad_norm": 0.92648950222603, "learning_rate": 1.59993096306524e-05, "loss": 0.9018, "reason_loss": 0.511474609375, "step": 2292, "utility_loss": 0.39029425382614136 }, { "cosine_similarity": 0, "epoch": 2.1369990680335507, "grad_norm": 0.9109177830197582, "learning_rate": 1.5982050396962374e-05, "loss": 1.2041, "reason_loss": 0.41692405939102173, "step": 2293, "utility_loss": 0.7871335744857788 }, { "cosine_similarity": 0, "epoch": 2.1379310344827585, "grad_norm": 0.8838921835834515, "learning_rate": 1.5964791163272352e-05, "loss": 0.9844, "reason_loss": 0.47770991921424866, "step": 2294, "utility_loss": 0.5066794157028198 }, { "cosine_similarity": 0, "epoch": 2.1388630009319662, "grad_norm": 1.041416469494091, "learning_rate": 1.5947531929582326e-05, "loss": 1.304, "reason_loss": 0.471801221370697, "step": 2295, "utility_loss": 0.83217853307724 }, { "cosine_similarity": 0, "epoch": 2.1397949673811745, "grad_norm": 0.8767238109662583, "learning_rate": 1.5930272695892304e-05, "loss": 1.0106, "reason_loss": 0.48063981533050537, "step": 2296, "utility_loss": 0.5299142599105835 }, { "cosine_similarity": 0, "epoch": 2.1407269338303823, "grad_norm": 1.1554522611074771, "learning_rate": 1.5913013462202278e-05, "loss": 1.3372, "reason_loss": 0.4677917957305908, "step": 2297, "utility_loss": 0.869437575340271 }, { "cosine_similarity": 0, "epoch": 2.14165890027959, "grad_norm": 0.9336977147682392, "learning_rate": 1.5895754228512256e-05, "loss": 1.3202, "reason_loss": 0.47700655460357666, "step": 2298, "utility_loss": 0.8432305455207825 }, { "cosine_similarity": 0, "epoch": 2.142590866728798, "grad_norm": 1.0209523634309852, "learning_rate": 1.587849499482223e-05, "loss": 1.0869, "reason_loss": 0.43303969502449036, "step": 2299, "utility_loss": 0.6539014577865601 }, { "cosine_similarity": 0, "epoch": 2.1435228331780056, "grad_norm": 1.0969479695138782, "learning_rate": 1.5861235761132208e-05, "loss": 1.0683, "reason_loss": 0.48272520303726196, "step": 2300, "utility_loss": 0.5855536460876465 }, { "cosine_similarity": 0, "epoch": 2.1444547996272134, "grad_norm": 1.0547699263155872, "learning_rate": 1.5843976527442182e-05, "loss": 1.013, "reason_loss": 0.4565049111843109, "step": 2301, "utility_loss": 0.556542158126831 }, { "cosine_similarity": 0, "epoch": 2.145386766076421, "grad_norm": 0.868994010299396, "learning_rate": 1.5826717293752156e-05, "loss": 0.9559, "reason_loss": 0.49319082498550415, "step": 2302, "utility_loss": 0.46267709136009216 }, { "cosine_similarity": 0, "epoch": 2.146318732525629, "grad_norm": 0.8313710606367076, "learning_rate": 1.5809458060062134e-05, "loss": 1.0593, "reason_loss": 0.45773082971572876, "step": 2303, "utility_loss": 0.6015660762786865 }, { "cosine_similarity": 0, "epoch": 2.147250698974837, "grad_norm": 0.8510717846219875, "learning_rate": 1.5792198826372108e-05, "loss": 1.0689, "reason_loss": 0.44227999448776245, "step": 2304, "utility_loss": 0.6266093254089355 }, { "cosine_similarity": 0, "epoch": 2.1481826654240446, "grad_norm": 0.9974506770243348, "learning_rate": 1.5774939592682086e-05, "loss": 1.1957, "reason_loss": 0.45911890268325806, "step": 2305, "utility_loss": 0.7365643978118896 }, { "cosine_similarity": 0, "epoch": 2.1491146318732524, "grad_norm": 0.9632973864406846, "learning_rate": 1.575768035899206e-05, "loss": 1.0007, "reason_loss": 0.43147751688957214, "step": 2306, "utility_loss": 0.569199800491333 }, { "cosine_similarity": 0, "epoch": 2.1500465983224606, "grad_norm": 0.8136029198042424, "learning_rate": 1.5740421125302038e-05, "loss": 0.9985, "reason_loss": 0.4976617693901062, "step": 2307, "utility_loss": 0.5008401870727539 }, { "cosine_similarity": 0, "epoch": 2.1509785647716684, "grad_norm": 1.085831806741625, "learning_rate": 1.5723161891612012e-05, "loss": 1.1268, "reason_loss": 0.4742599129676819, "step": 2308, "utility_loss": 0.6525800824165344 }, { "cosine_similarity": 0, "epoch": 2.151910531220876, "grad_norm": 1.1414656727659032, "learning_rate": 1.570590265792199e-05, "loss": 1.0772, "reason_loss": 0.472133994102478, "step": 2309, "utility_loss": 0.6050491333007812 }, { "cosine_similarity": 0, "epoch": 2.152842497670084, "grad_norm": 0.8996689958825734, "learning_rate": 1.5688643424231964e-05, "loss": 1.0098, "reason_loss": 0.4382970929145813, "step": 2310, "utility_loss": 0.5714670419692993 }, { "cosine_similarity": 0, "epoch": 2.1537744641192917, "grad_norm": 0.8864352959756002, "learning_rate": 1.5671384190541942e-05, "loss": 1.0259, "reason_loss": 0.49103811383247375, "step": 2311, "utility_loss": 0.5348626375198364 }, { "cosine_similarity": 0, "epoch": 2.1547064305684995, "grad_norm": 1.0273530050542161, "learning_rate": 1.5654124956851916e-05, "loss": 1.2414, "reason_loss": 0.4704355299472809, "step": 2312, "utility_loss": 0.7709996700286865 }, { "cosine_similarity": 0, "epoch": 2.1556383970177073, "grad_norm": 0.9786650370882691, "learning_rate": 1.563686572316189e-05, "loss": 1.3386, "reason_loss": 0.4932633936405182, "step": 2313, "utility_loss": 0.845349907875061 }, { "cosine_similarity": 0, "epoch": 2.156570363466915, "grad_norm": 0.9050166411388886, "learning_rate": 1.5619606489471868e-05, "loss": 0.8652, "reason_loss": 0.4493406414985657, "step": 2314, "utility_loss": 0.41585543751716614 }, { "cosine_similarity": 0, "epoch": 2.157502329916123, "grad_norm": 0.9433441761849868, "learning_rate": 1.5602347255781842e-05, "loss": 1.0432, "reason_loss": 0.4745999574661255, "step": 2315, "utility_loss": 0.5685855150222778 }, { "cosine_similarity": 0, "epoch": 2.1584342963653307, "grad_norm": 1.0609957017388778, "learning_rate": 1.558508802209182e-05, "loss": 1.3196, "reason_loss": 0.4766676723957062, "step": 2316, "utility_loss": 0.8428856134414673 }, { "cosine_similarity": 0, "epoch": 2.1593662628145385, "grad_norm": 1.0057450433914352, "learning_rate": 1.5567828788401794e-05, "loss": 1.0704, "reason_loss": 0.43185728788375854, "step": 2317, "utility_loss": 0.6385642290115356 }, { "cosine_similarity": 0, "epoch": 2.1602982292637467, "grad_norm": 1.034119222174226, "learning_rate": 1.5550569554711772e-05, "loss": 1.0695, "reason_loss": 0.45662418007850647, "step": 2318, "utility_loss": 0.6129019260406494 }, { "cosine_similarity": 0, "epoch": 2.1612301957129545, "grad_norm": 0.927719113477379, "learning_rate": 1.5533310321021746e-05, "loss": 0.9472, "reason_loss": 0.4393320679664612, "step": 2319, "utility_loss": 0.50791335105896 }, { "cosine_similarity": 0, "epoch": 2.1621621621621623, "grad_norm": 0.9046465328587444, "learning_rate": 1.5516051087331724e-05, "loss": 1.0732, "reason_loss": 0.44801396131515503, "step": 2320, "utility_loss": 0.6252255439758301 }, { "cosine_similarity": 0, "epoch": 2.16309412861137, "grad_norm": 1.230739958229056, "learning_rate": 1.5498791853641702e-05, "loss": 1.4268, "reason_loss": 0.47079968452453613, "step": 2321, "utility_loss": 0.9560492038726807 }, { "cosine_similarity": 0, "epoch": 2.164026095060578, "grad_norm": 0.877122293164338, "learning_rate": 1.5481532619951676e-05, "loss": 1.2, "reason_loss": 0.5064201951026917, "step": 2322, "utility_loss": 0.693597137928009 }, { "cosine_similarity": 0, "epoch": 2.1649580615097856, "grad_norm": 1.2208019573358388, "learning_rate": 1.546427338626165e-05, "loss": 1.7376, "reason_loss": 0.45152246952056885, "step": 2323, "utility_loss": 1.2860805988311768 }, { "cosine_similarity": 0, "epoch": 2.1658900279589934, "grad_norm": 0.8465158749712219, "learning_rate": 1.5447014152571625e-05, "loss": 0.9104, "reason_loss": 0.45183679461479187, "step": 2324, "utility_loss": 0.45855408906936646 }, { "cosine_similarity": 0, "epoch": 2.1668219944082012, "grad_norm": 0.8613490514945448, "learning_rate": 1.5429754918881602e-05, "loss": 0.9549, "reason_loss": 0.5017470717430115, "step": 2325, "utility_loss": 0.45310407876968384 }, { "cosine_similarity": 0, "epoch": 2.167753960857409, "grad_norm": 1.0202747959018945, "learning_rate": 1.5412495685191577e-05, "loss": 1.1255, "reason_loss": 0.4824385941028595, "step": 2326, "utility_loss": 0.6430687308311462 }, { "cosine_similarity": 0, "epoch": 2.168685927306617, "grad_norm": 0.9445575217536863, "learning_rate": 1.5395236451501554e-05, "loss": 0.9908, "reason_loss": 0.4634547829627991, "step": 2327, "utility_loss": 0.5273936986923218 }, { "cosine_similarity": 0, "epoch": 2.1696178937558246, "grad_norm": 1.0523388949982677, "learning_rate": 1.537797721781153e-05, "loss": 0.989, "reason_loss": 0.46158283948898315, "step": 2328, "utility_loss": 0.5274305939674377 }, { "cosine_similarity": 0, "epoch": 2.170549860205033, "grad_norm": 0.9958191468006966, "learning_rate": 1.5360717984121506e-05, "loss": 1.2015, "reason_loss": 0.47506943345069885, "step": 2329, "utility_loss": 0.7264260649681091 }, { "cosine_similarity": 0, "epoch": 2.1714818266542406, "grad_norm": 0.8279429285052764, "learning_rate": 1.534345875043148e-05, "loss": 1.2007, "reason_loss": 0.464199960231781, "step": 2330, "utility_loss": 0.7365100383758545 }, { "cosine_similarity": 0, "epoch": 2.1724137931034484, "grad_norm": 0.9435060131352351, "learning_rate": 1.5326199516741458e-05, "loss": 1.3809, "reason_loss": 0.4572290778160095, "step": 2331, "utility_loss": 0.9236868619918823 }, { "cosine_similarity": 0, "epoch": 2.173345759552656, "grad_norm": 1.0249604057747248, "learning_rate": 1.5308940283051436e-05, "loss": 1.1731, "reason_loss": 0.46583759784698486, "step": 2332, "utility_loss": 0.7072350978851318 }, { "cosine_similarity": 0, "epoch": 2.174277726001864, "grad_norm": 0.9366514469334176, "learning_rate": 1.529168104936141e-05, "loss": 1.0416, "reason_loss": 0.45620808005332947, "step": 2333, "utility_loss": 0.58538419008255 }, { "cosine_similarity": 0, "epoch": 2.1752096924510718, "grad_norm": 1.0115963888206227, "learning_rate": 1.5274421815671384e-05, "loss": 1.2364, "reason_loss": 0.4622576832771301, "step": 2334, "utility_loss": 0.7741302251815796 }, { "cosine_similarity": 0, "epoch": 2.1761416589002796, "grad_norm": 1.0544806812529657, "learning_rate": 1.525716258198136e-05, "loss": 1.0014, "reason_loss": 0.44207102060317993, "step": 2335, "utility_loss": 0.5592854022979736 }, { "cosine_similarity": 0, "epoch": 2.1770736253494873, "grad_norm": 1.0201312655884558, "learning_rate": 1.5239903348291338e-05, "loss": 1.1329, "reason_loss": 0.43582791090011597, "step": 2336, "utility_loss": 0.6970911026000977 }, { "cosine_similarity": 0, "epoch": 2.178005591798695, "grad_norm": 0.9439610639281532, "learning_rate": 1.5222644114601312e-05, "loss": 0.9892, "reason_loss": 0.47553741931915283, "step": 2337, "utility_loss": 0.5137026309967041 }, { "cosine_similarity": 0, "epoch": 2.178937558247903, "grad_norm": 0.8001488276214817, "learning_rate": 1.5205384880911288e-05, "loss": 0.9154, "reason_loss": 0.45593154430389404, "step": 2338, "utility_loss": 0.4594646692276001 }, { "cosine_similarity": 0, "epoch": 2.1798695246971107, "grad_norm": 1.2666171231452723, "learning_rate": 1.5188125647221263e-05, "loss": 1.1229, "reason_loss": 0.4877052307128906, "step": 2339, "utility_loss": 0.6351610422134399 }, { "cosine_similarity": 0, "epoch": 2.180801491146319, "grad_norm": 0.858892471827763, "learning_rate": 1.517086641353124e-05, "loss": 1.0991, "reason_loss": 0.43048739433288574, "step": 2340, "utility_loss": 0.6685982942581177 }, { "cosine_similarity": 0, "epoch": 2.1817334575955267, "grad_norm": 1.0316358251281028, "learning_rate": 1.5153607179841216e-05, "loss": 1.0832, "reason_loss": 0.47035181522369385, "step": 2341, "utility_loss": 0.6128263473510742 }, { "cosine_similarity": 0, "epoch": 2.1826654240447345, "grad_norm": 1.1159941199724985, "learning_rate": 1.513634794615119e-05, "loss": 1.1666, "reason_loss": 0.49300670623779297, "step": 2342, "utility_loss": 0.673575758934021 }, { "cosine_similarity": 0, "epoch": 2.1835973904939423, "grad_norm": 0.895344998904873, "learning_rate": 1.5119088712461168e-05, "loss": 1.3116, "reason_loss": 0.47077488899230957, "step": 2343, "utility_loss": 0.8408081531524658 }, { "cosine_similarity": 0, "epoch": 2.18452935694315, "grad_norm": 0.9337312684715489, "learning_rate": 1.5101829478771143e-05, "loss": 1.2489, "reason_loss": 0.4812667667865753, "step": 2344, "utility_loss": 0.7676297426223755 }, { "cosine_similarity": 0, "epoch": 2.185461323392358, "grad_norm": 0.9503705680263418, "learning_rate": 1.508457024508112e-05, "loss": 1.1945, "reason_loss": 0.48197442293167114, "step": 2345, "utility_loss": 0.7125270366668701 }, { "cosine_similarity": 0, "epoch": 2.1863932898415657, "grad_norm": 0.903755064422072, "learning_rate": 1.5067311011391095e-05, "loss": 1.0895, "reason_loss": 0.4533846378326416, "step": 2346, "utility_loss": 0.6361169815063477 }, { "cosine_similarity": 0, "epoch": 2.1873252562907735, "grad_norm": 0.9895414141669509, "learning_rate": 1.505005177770107e-05, "loss": 1.1596, "reason_loss": 0.45024722814559937, "step": 2347, "utility_loss": 0.7093833684921265 }, { "cosine_similarity": 0, "epoch": 2.1882572227399812, "grad_norm": 0.8109227669518512, "learning_rate": 1.5032792544011045e-05, "loss": 1.0604, "reason_loss": 0.4234677255153656, "step": 2348, "utility_loss": 0.6369790434837341 }, { "cosine_similarity": 0, "epoch": 2.189189189189189, "grad_norm": 0.9659290242470451, "learning_rate": 1.5015533310321023e-05, "loss": 1.1352, "reason_loss": 0.42391324043273926, "step": 2349, "utility_loss": 0.7112881541252136 }, { "cosine_similarity": 0, "epoch": 2.190121155638397, "grad_norm": 0.9234483860778738, "learning_rate": 1.4998274076630997e-05, "loss": 0.98, "reason_loss": 0.4704365134239197, "step": 2350, "utility_loss": 0.5096110105514526 }, { "cosine_similarity": 0, "epoch": 2.191053122087605, "grad_norm": 0.7742846209890293, "learning_rate": 1.4981014842940974e-05, "loss": 1.2293, "reason_loss": 0.45462527871131897, "step": 2351, "utility_loss": 0.7746440172195435 }, { "cosine_similarity": 0, "epoch": 2.191985088536813, "grad_norm": 1.099753880512353, "learning_rate": 1.496375560925095e-05, "loss": 1.5739, "reason_loss": 0.4899486303329468, "step": 2352, "utility_loss": 1.0839852094650269 }, { "cosine_similarity": 0, "epoch": 2.1929170549860206, "grad_norm": 1.10041537375069, "learning_rate": 1.4946496375560925e-05, "loss": 0.9264, "reason_loss": 0.46859312057495117, "step": 2353, "utility_loss": 0.4577876329421997 }, { "cosine_similarity": 0, "epoch": 2.1938490214352284, "grad_norm": 0.9267946596899217, "learning_rate": 1.4929237141870902e-05, "loss": 1.151, "reason_loss": 0.494484543800354, "step": 2354, "utility_loss": 0.6565595865249634 }, { "cosine_similarity": 0, "epoch": 2.194780987884436, "grad_norm": 0.8398505835031784, "learning_rate": 1.4911977908180877e-05, "loss": 0.9485, "reason_loss": 0.4741446375846863, "step": 2355, "utility_loss": 0.47437217831611633 }, { "cosine_similarity": 0, "epoch": 2.195712954333644, "grad_norm": 0.8654758596459767, "learning_rate": 1.4894718674490854e-05, "loss": 1.0357, "reason_loss": 0.5013068914413452, "step": 2356, "utility_loss": 0.534396767616272 }, { "cosine_similarity": 0, "epoch": 2.196644920782852, "grad_norm": 0.9761222937746399, "learning_rate": 1.4877459440800829e-05, "loss": 1.3989, "reason_loss": 0.4623645544052124, "step": 2357, "utility_loss": 0.9365144968032837 }, { "cosine_similarity": 0, "epoch": 2.1975768872320596, "grad_norm": 0.9041939709254527, "learning_rate": 1.4860200207110805e-05, "loss": 1.2801, "reason_loss": 0.4569844901561737, "step": 2358, "utility_loss": 0.8230692744255066 }, { "cosine_similarity": 0, "epoch": 2.1985088536812674, "grad_norm": 0.9047237302700201, "learning_rate": 1.4842940973420779e-05, "loss": 1.25, "reason_loss": 0.45612841844558716, "step": 2359, "utility_loss": 0.7938791513442993 }, { "cosine_similarity": 0, "epoch": 2.199440820130475, "grad_norm": 1.0811152032567297, "learning_rate": 1.4825681739730757e-05, "loss": 1.2081, "reason_loss": 0.4745899438858032, "step": 2360, "utility_loss": 0.7335025072097778 }, { "cosine_similarity": 0, "epoch": 2.200372786579683, "grad_norm": 0.9065181464159946, "learning_rate": 1.4808422506040731e-05, "loss": 1.2481, "reason_loss": 0.4340040981769562, "step": 2361, "utility_loss": 0.8140678405761719 }, { "cosine_similarity": 0, "epoch": 2.201304753028891, "grad_norm": 1.0865715732405075, "learning_rate": 1.4791163272350709e-05, "loss": 0.9726, "reason_loss": 0.4401191473007202, "step": 2362, "utility_loss": 0.5324496030807495 }, { "cosine_similarity": 0, "epoch": 2.202236719478099, "grad_norm": 0.94733098801345, "learning_rate": 1.4773904038660685e-05, "loss": 1.1523, "reason_loss": 0.4444318413734436, "step": 2363, "utility_loss": 0.7078804969787598 }, { "cosine_similarity": 0, "epoch": 2.2031686859273067, "grad_norm": 0.9711128818456589, "learning_rate": 1.4756644804970659e-05, "loss": 1.146, "reason_loss": 0.4490377902984619, "step": 2364, "utility_loss": 0.696929931640625 }, { "cosine_similarity": 0, "epoch": 2.2041006523765145, "grad_norm": 0.9409157130566573, "learning_rate": 1.4739385571280637e-05, "loss": 0.9721, "reason_loss": 0.4770304560661316, "step": 2365, "utility_loss": 0.4950298070907593 }, { "cosine_similarity": 0, "epoch": 2.2050326188257223, "grad_norm": 0.992255942362789, "learning_rate": 1.472212633759061e-05, "loss": 1.0137, "reason_loss": 0.4535486102104187, "step": 2366, "utility_loss": 0.5601415038108826 }, { "cosine_similarity": 0, "epoch": 2.20596458527493, "grad_norm": 1.1239561440442434, "learning_rate": 1.4704867103900589e-05, "loss": 1.0332, "reason_loss": 0.4499618411064148, "step": 2367, "utility_loss": 0.5832154154777527 }, { "cosine_similarity": 0, "epoch": 2.206896551724138, "grad_norm": 1.157424913529448, "learning_rate": 1.4687607870210563e-05, "loss": 1.3756, "reason_loss": 0.43589890003204346, "step": 2368, "utility_loss": 0.9397332668304443 }, { "cosine_similarity": 0, "epoch": 2.2078285181733457, "grad_norm": 0.8479186086876492, "learning_rate": 1.4670348636520539e-05, "loss": 0.8588, "reason_loss": 0.4503256678581238, "step": 2369, "utility_loss": 0.4085128903388977 }, { "cosine_similarity": 0, "epoch": 2.2087604846225535, "grad_norm": 1.0351752359985604, "learning_rate": 1.4653089402830513e-05, "loss": 1.1959, "reason_loss": 0.4641419053077698, "step": 2370, "utility_loss": 0.7317464351654053 }, { "cosine_similarity": 0, "epoch": 2.2096924510717613, "grad_norm": 0.9103294880332933, "learning_rate": 1.463583016914049e-05, "loss": 1.1211, "reason_loss": 0.45220690965652466, "step": 2371, "utility_loss": 0.6688653230667114 }, { "cosine_similarity": 0, "epoch": 2.210624417520969, "grad_norm": 1.0613288032848218, "learning_rate": 1.4618570935450468e-05, "loss": 0.9578, "reason_loss": 0.4579068720340729, "step": 2372, "utility_loss": 0.4999213218688965 }, { "cosine_similarity": 0, "epoch": 2.2115563839701773, "grad_norm": 0.9449455053729952, "learning_rate": 1.4601311701760443e-05, "loss": 1.2316, "reason_loss": 0.4381757974624634, "step": 2373, "utility_loss": 0.7933816313743591 }, { "cosine_similarity": 0, "epoch": 2.212488350419385, "grad_norm": 0.9551446020516731, "learning_rate": 1.4584052468070419e-05, "loss": 1.0149, "reason_loss": 0.45995694398880005, "step": 2374, "utility_loss": 0.5549685955047607 }, { "cosine_similarity": 0, "epoch": 2.213420316868593, "grad_norm": 0.9415846912790576, "learning_rate": 1.4566793234380393e-05, "loss": 1.0654, "reason_loss": 0.4711284637451172, "step": 2375, "utility_loss": 0.5942431688308716 }, { "cosine_similarity": 0, "epoch": 2.2143522833178007, "grad_norm": 1.1335659305626649, "learning_rate": 1.454953400069037e-05, "loss": 0.9882, "reason_loss": 0.48280593752861023, "step": 2376, "utility_loss": 0.505383312702179 }, { "cosine_similarity": 0, "epoch": 2.2152842497670084, "grad_norm": 0.8706027684568697, "learning_rate": 1.4532274767000345e-05, "loss": 1.086, "reason_loss": 0.46063321828842163, "step": 2377, "utility_loss": 0.625347912311554 }, { "cosine_similarity": 0, "epoch": 2.2162162162162162, "grad_norm": 0.903249880454418, "learning_rate": 1.4515015533310323e-05, "loss": 0.9572, "reason_loss": 0.4484725594520569, "step": 2378, "utility_loss": 0.5087035298347473 }, { "cosine_similarity": 0, "epoch": 2.217148182665424, "grad_norm": 0.9118993363738365, "learning_rate": 1.4497756299620297e-05, "loss": 1.0642, "reason_loss": 0.49091410636901855, "step": 2379, "utility_loss": 0.5732473134994507 }, { "cosine_similarity": 0, "epoch": 2.218080149114632, "grad_norm": 0.8852805669051478, "learning_rate": 1.4480497065930273e-05, "loss": 1.0277, "reason_loss": 0.45296555757522583, "step": 2380, "utility_loss": 0.5746933221817017 }, { "cosine_similarity": 0, "epoch": 2.2190121155638396, "grad_norm": 0.9684354044535367, "learning_rate": 1.4463237832240247e-05, "loss": 1.2048, "reason_loss": 0.48669755458831787, "step": 2381, "utility_loss": 0.7180551886558533 }, { "cosine_similarity": 0, "epoch": 2.2199440820130474, "grad_norm": 2.3991955532181684, "learning_rate": 1.4445978598550225e-05, "loss": 1.5309, "reason_loss": 0.43833938241004944, "step": 2382, "utility_loss": 1.0925692319869995 }, { "cosine_similarity": 0, "epoch": 2.220876048462255, "grad_norm": 0.8014349332292549, "learning_rate": 1.4428719364860203e-05, "loss": 0.881, "reason_loss": 0.4353744089603424, "step": 2383, "utility_loss": 0.44563764333724976 }, { "cosine_similarity": 0, "epoch": 2.2218080149114634, "grad_norm": 1.0813711645635111, "learning_rate": 1.4411460131170177e-05, "loss": 0.9195, "reason_loss": 0.45497819781303406, "step": 2384, "utility_loss": 0.46448636054992676 }, { "cosine_similarity": 0, "epoch": 2.222739981360671, "grad_norm": 0.8246883289499825, "learning_rate": 1.4394200897480153e-05, "loss": 0.9218, "reason_loss": 0.43651700019836426, "step": 2385, "utility_loss": 0.48530930280685425 }, { "cosine_similarity": 0, "epoch": 2.223671947809879, "grad_norm": 0.8916435718613749, "learning_rate": 1.4376941663790127e-05, "loss": 1.2692, "reason_loss": 0.46188557147979736, "step": 2386, "utility_loss": 0.8073437213897705 }, { "cosine_similarity": 0, "epoch": 2.2246039142590868, "grad_norm": 1.0306081884200948, "learning_rate": 1.4359682430100105e-05, "loss": 1.1629, "reason_loss": 0.4803467392921448, "step": 2387, "utility_loss": 0.6825191974639893 }, { "cosine_similarity": 0, "epoch": 2.2255358807082946, "grad_norm": 0.8739661655229854, "learning_rate": 1.4342423196410079e-05, "loss": 1.0073, "reason_loss": 0.4566611051559448, "step": 2388, "utility_loss": 0.5505932569503784 }, { "cosine_similarity": 0, "epoch": 2.2264678471575023, "grad_norm": 0.9511478129664618, "learning_rate": 1.4325163962720057e-05, "loss": 1.132, "reason_loss": 0.45833921432495117, "step": 2389, "utility_loss": 0.6736578941345215 }, { "cosine_similarity": 0, "epoch": 2.22739981360671, "grad_norm": 0.8391340078238253, "learning_rate": 1.4307904729030031e-05, "loss": 0.9073, "reason_loss": 0.45714351534843445, "step": 2390, "utility_loss": 0.4501529932022095 }, { "cosine_similarity": 0, "epoch": 2.228331780055918, "grad_norm": 0.8653411957179358, "learning_rate": 1.4290645495340007e-05, "loss": 0.9669, "reason_loss": 0.48434391617774963, "step": 2391, "utility_loss": 0.48258063197135925 }, { "cosine_similarity": 0, "epoch": 2.2292637465051257, "grad_norm": 0.9442603968175897, "learning_rate": 1.4273386261649985e-05, "loss": 1.0927, "reason_loss": 0.4838731586933136, "step": 2392, "utility_loss": 0.6087890863418579 }, { "cosine_similarity": 0, "epoch": 2.2301957129543335, "grad_norm": 1.0720246801178166, "learning_rate": 1.4256127027959959e-05, "loss": 1.1054, "reason_loss": 0.4947790801525116, "step": 2393, "utility_loss": 0.610615611076355 }, { "cosine_similarity": 0, "epoch": 2.2311276794035413, "grad_norm": 1.0022143800176593, "learning_rate": 1.4238867794269937e-05, "loss": 1.042, "reason_loss": 0.4648314118385315, "step": 2394, "utility_loss": 0.5771797895431519 }, { "cosine_similarity": 0, "epoch": 2.2320596458527495, "grad_norm": 0.8476987935576166, "learning_rate": 1.4221608560579911e-05, "loss": 0.9129, "reason_loss": 0.4504815936088562, "step": 2395, "utility_loss": 0.46237653493881226 }, { "cosine_similarity": 0, "epoch": 2.2329916123019573, "grad_norm": 1.1197845616157978, "learning_rate": 1.4204349326889887e-05, "loss": 0.9525, "reason_loss": 0.4518068730831146, "step": 2396, "utility_loss": 0.500717043876648 }, { "cosine_similarity": 0, "epoch": 2.233923578751165, "grad_norm": 1.0234648099880552, "learning_rate": 1.4187090093199861e-05, "loss": 0.9855, "reason_loss": 0.4488474130630493, "step": 2397, "utility_loss": 0.5366320610046387 }, { "cosine_similarity": 0, "epoch": 2.234855545200373, "grad_norm": 0.8757168972179187, "learning_rate": 1.4169830859509839e-05, "loss": 0.9304, "reason_loss": 0.471303790807724, "step": 2398, "utility_loss": 0.45906439423561096 }, { "cosine_similarity": 0, "epoch": 2.2357875116495807, "grad_norm": 1.0122011437476206, "learning_rate": 1.4152571625819813e-05, "loss": 1.2245, "reason_loss": 0.44621312618255615, "step": 2399, "utility_loss": 0.778295636177063 }, { "cosine_similarity": 0, "epoch": 2.2367194780987885, "grad_norm": 1.0640853849898047, "learning_rate": 1.4135312392129791e-05, "loss": 1.2975, "reason_loss": 0.46539080142974854, "step": 2400, "utility_loss": 0.8321554660797119 }, { "cosine_similarity": 0, "epoch": 2.2376514445479962, "grad_norm": 0.9828207045724869, "learning_rate": 1.4118053158439765e-05, "loss": 1.2479, "reason_loss": 0.4571377635002136, "step": 2401, "utility_loss": 0.7908046245574951 }, { "cosine_similarity": 0, "epoch": 2.238583410997204, "grad_norm": 0.8704380847127217, "learning_rate": 1.4100793924749741e-05, "loss": 1.0897, "reason_loss": 0.4261014461517334, "step": 2402, "utility_loss": 0.6635866165161133 }, { "cosine_similarity": 0, "epoch": 2.239515377446412, "grad_norm": 0.9662227710685752, "learning_rate": 1.4083534691059719e-05, "loss": 1.11, "reason_loss": 0.4311330318450928, "step": 2403, "utility_loss": 0.6788572072982788 }, { "cosine_similarity": 0, "epoch": 2.2404473438956196, "grad_norm": 1.003456619782522, "learning_rate": 1.4066275457369693e-05, "loss": 0.8142, "reason_loss": 0.4487990140914917, "step": 2404, "utility_loss": 0.36541712284088135 }, { "cosine_similarity": 0, "epoch": 2.2413793103448274, "grad_norm": 0.9816342036151413, "learning_rate": 1.404901622367967e-05, "loss": 1.0318, "reason_loss": 0.4479844570159912, "step": 2405, "utility_loss": 0.5837948322296143 }, { "cosine_similarity": 0, "epoch": 2.2423112767940356, "grad_norm": 0.9174900842557586, "learning_rate": 1.4031756989989645e-05, "loss": 0.9695, "reason_loss": 0.479076623916626, "step": 2406, "utility_loss": 0.4904226064682007 }, { "cosine_similarity": 0, "epoch": 2.2432432432432434, "grad_norm": 0.9623131102456617, "learning_rate": 1.4014497756299621e-05, "loss": 1.4382, "reason_loss": 0.45740509033203125, "step": 2407, "utility_loss": 0.9808031320571899 }, { "cosine_similarity": 0, "epoch": 2.244175209692451, "grad_norm": 1.103963208928676, "learning_rate": 1.3997238522609595e-05, "loss": 1.0371, "reason_loss": 0.4910736680030823, "step": 2408, "utility_loss": 0.5460619926452637 }, { "cosine_similarity": 0, "epoch": 2.245107176141659, "grad_norm": 1.3692077036536885, "learning_rate": 1.3979979288919573e-05, "loss": 1.1028, "reason_loss": 0.4556052088737488, "step": 2409, "utility_loss": 0.6471908092498779 }, { "cosine_similarity": 0, "epoch": 2.246039142590867, "grad_norm": 0.9236254911044142, "learning_rate": 1.3962720055229547e-05, "loss": 1.1335, "reason_loss": 0.47574809193611145, "step": 2410, "utility_loss": 0.657789409160614 }, { "cosine_similarity": 0, "epoch": 2.2469711090400746, "grad_norm": 1.2002740522360436, "learning_rate": 1.3945460821539525e-05, "loss": 1.1042, "reason_loss": 0.4745946526527405, "step": 2411, "utility_loss": 0.6296180486679077 }, { "cosine_similarity": 0, "epoch": 2.2479030754892824, "grad_norm": 0.8919583792331108, "learning_rate": 1.3928201587849501e-05, "loss": 1.0455, "reason_loss": 0.438309907913208, "step": 2412, "utility_loss": 0.6072177886962891 }, { "cosine_similarity": 0, "epoch": 2.24883504193849, "grad_norm": 0.8523298529692792, "learning_rate": 1.3910942354159475e-05, "loss": 1.0725, "reason_loss": 0.47577738761901855, "step": 2413, "utility_loss": 0.5967687368392944 }, { "cosine_similarity": 0, "epoch": 2.249767008387698, "grad_norm": 0.9560155170032199, "learning_rate": 1.3893683120469453e-05, "loss": 1.0446, "reason_loss": 0.46635428071022034, "step": 2414, "utility_loss": 0.5782784223556519 }, { "cosine_similarity": 0, "epoch": 2.2506989748369057, "grad_norm": 0.780621343352872, "learning_rate": 1.3876423886779427e-05, "loss": 1.0871, "reason_loss": 0.4710119068622589, "step": 2415, "utility_loss": 0.616059422492981 }, { "cosine_similarity": 0, "epoch": 2.2516309412861135, "grad_norm": 1.2655970280871576, "learning_rate": 1.3859164653089405e-05, "loss": 1.2425, "reason_loss": 0.46382588148117065, "step": 2416, "utility_loss": 0.778668224811554 }, { "cosine_similarity": 0, "epoch": 2.2525629077353218, "grad_norm": 0.9719953238232889, "learning_rate": 1.384190541939938e-05, "loss": 1.0394, "reason_loss": 0.4750024080276489, "step": 2417, "utility_loss": 0.564385175704956 }, { "cosine_similarity": 0, "epoch": 2.2534948741845295, "grad_norm": 1.115653456756317, "learning_rate": 1.3824646185709355e-05, "loss": 1.2026, "reason_loss": 0.48577234148979187, "step": 2418, "utility_loss": 0.7168658971786499 }, { "cosine_similarity": 0, "epoch": 2.2544268406337373, "grad_norm": 0.9275910617497733, "learning_rate": 1.380738695201933e-05, "loss": 1.1078, "reason_loss": 0.444989949464798, "step": 2419, "utility_loss": 0.6628316640853882 }, { "cosine_similarity": 0, "epoch": 2.255358807082945, "grad_norm": 1.0195849807488424, "learning_rate": 1.3790127718329307e-05, "loss": 0.9909, "reason_loss": 0.45357945561408997, "step": 2420, "utility_loss": 0.5373630523681641 }, { "cosine_similarity": 0, "epoch": 2.256290773532153, "grad_norm": 1.0666762532847296, "learning_rate": 1.3772868484639282e-05, "loss": 0.8998, "reason_loss": 0.4663558006286621, "step": 2421, "utility_loss": 0.4334624707698822 }, { "cosine_similarity": 0, "epoch": 2.2572227399813607, "grad_norm": 0.8821859996321952, "learning_rate": 1.375560925094926e-05, "loss": 0.9709, "reason_loss": 0.4630804657936096, "step": 2422, "utility_loss": 0.5077770352363586 }, { "cosine_similarity": 0, "epoch": 2.2581547064305685, "grad_norm": 1.044323898430314, "learning_rate": 1.3738350017259235e-05, "loss": 1.0087, "reason_loss": 0.4575062692165375, "step": 2423, "utility_loss": 0.5511605143547058 }, { "cosine_similarity": 0, "epoch": 2.2590866728797763, "grad_norm": 1.0767486931706436, "learning_rate": 1.372109078356921e-05, "loss": 1.0869, "reason_loss": 0.4616767168045044, "step": 2424, "utility_loss": 0.6252293586730957 }, { "cosine_similarity": 0, "epoch": 2.260018639328984, "grad_norm": 0.8669231075958587, "learning_rate": 1.3703831549879187e-05, "loss": 0.9752, "reason_loss": 0.47373485565185547, "step": 2425, "utility_loss": 0.5014644861221313 }, { "cosine_similarity": 0, "epoch": 2.260950605778192, "grad_norm": 1.269839782388847, "learning_rate": 1.3686572316189161e-05, "loss": 1.2718, "reason_loss": 0.4600455164909363, "step": 2426, "utility_loss": 0.8117400407791138 }, { "cosine_similarity": 0, "epoch": 2.2618825722273996, "grad_norm": 0.9429735399913958, "learning_rate": 1.3669313082499139e-05, "loss": 1.2172, "reason_loss": 0.4559161067008972, "step": 2427, "utility_loss": 0.7612437009811401 }, { "cosine_similarity": 0, "epoch": 2.262814538676608, "grad_norm": 0.8950081939271752, "learning_rate": 1.3652053848809113e-05, "loss": 1.1089, "reason_loss": 0.46766066551208496, "step": 2428, "utility_loss": 0.6412349939346313 }, { "cosine_similarity": 0, "epoch": 2.2637465051258157, "grad_norm": 0.9940139421727773, "learning_rate": 1.363479461511909e-05, "loss": 1.0453, "reason_loss": 0.45301300287246704, "step": 2429, "utility_loss": 0.5922718048095703 }, { "cosine_similarity": 0, "epoch": 2.2646784715750234, "grad_norm": 0.868492092597192, "learning_rate": 1.3617535381429064e-05, "loss": 0.8265, "reason_loss": 0.4592863917350769, "step": 2430, "utility_loss": 0.36720091104507446 }, { "cosine_similarity": 0, "epoch": 2.2656104380242312, "grad_norm": 0.8427002083832082, "learning_rate": 1.3600276147739041e-05, "loss": 0.9703, "reason_loss": 0.4183471202850342, "step": 2431, "utility_loss": 0.551904022693634 }, { "cosine_similarity": 0, "epoch": 2.266542404473439, "grad_norm": 0.9107372996208498, "learning_rate": 1.3583016914049019e-05, "loss": 1.1242, "reason_loss": 0.49147552251815796, "step": 2432, "utility_loss": 0.6327373385429382 }, { "cosine_similarity": 0, "epoch": 2.267474370922647, "grad_norm": 0.9662363407312455, "learning_rate": 1.3565757680358993e-05, "loss": 1.0417, "reason_loss": 0.4619857370853424, "step": 2433, "utility_loss": 0.5797379612922668 }, { "cosine_similarity": 0, "epoch": 2.2684063373718546, "grad_norm": 1.0154105097932395, "learning_rate": 1.354849844666897e-05, "loss": 1.0149, "reason_loss": 0.46835654973983765, "step": 2434, "utility_loss": 0.546510636806488 }, { "cosine_similarity": 0, "epoch": 2.2693383038210624, "grad_norm": 0.9398791365612674, "learning_rate": 1.3531239212978944e-05, "loss": 1.1752, "reason_loss": 0.489282488822937, "step": 2435, "utility_loss": 0.6859101057052612 }, { "cosine_similarity": 0, "epoch": 2.27027027027027, "grad_norm": 0.9191021877945869, "learning_rate": 1.3513979979288921e-05, "loss": 0.9283, "reason_loss": 0.44763681292533875, "step": 2436, "utility_loss": 0.48067110776901245 }, { "cosine_similarity": 0, "epoch": 2.271202236719478, "grad_norm": 0.9827577068882835, "learning_rate": 1.3496720745598896e-05, "loss": 1.1704, "reason_loss": 0.45959043502807617, "step": 2437, "utility_loss": 0.7107700705528259 }, { "cosine_similarity": 0, "epoch": 2.2721342031686858, "grad_norm": 0.8692573551490774, "learning_rate": 1.3479461511908873e-05, "loss": 0.8834, "reason_loss": 0.47437652945518494, "step": 2438, "utility_loss": 0.4090573191642761 }, { "cosine_similarity": 0, "epoch": 2.273066169617894, "grad_norm": 0.9024359930886737, "learning_rate": 1.3462202278218848e-05, "loss": 0.9418, "reason_loss": 0.4283486604690552, "step": 2439, "utility_loss": 0.513495147228241 }, { "cosine_similarity": 0, "epoch": 2.2739981360671018, "grad_norm": 0.9512118107812462, "learning_rate": 1.3444943044528824e-05, "loss": 1.2085, "reason_loss": 0.48259395360946655, "step": 2440, "utility_loss": 0.7259074449539185 }, { "cosine_similarity": 0, "epoch": 2.2749301025163096, "grad_norm": 1.0124812938092067, "learning_rate": 1.3427683810838798e-05, "loss": 0.9787, "reason_loss": 0.4581431448459625, "step": 2441, "utility_loss": 0.5205174684524536 }, { "cosine_similarity": 0, "epoch": 2.2758620689655173, "grad_norm": 0.8427743855952615, "learning_rate": 1.3410424577148775e-05, "loss": 1.1947, "reason_loss": 0.4728795289993286, "step": 2442, "utility_loss": 0.721826434135437 }, { "cosine_similarity": 0, "epoch": 2.276794035414725, "grad_norm": 1.0027424394373015, "learning_rate": 1.3393165343458751e-05, "loss": 0.9588, "reason_loss": 0.4587705135345459, "step": 2443, "utility_loss": 0.4999970495700836 }, { "cosine_similarity": 0, "epoch": 2.277726001863933, "grad_norm": 0.9056761500020496, "learning_rate": 1.3375906109768726e-05, "loss": 1.172, "reason_loss": 0.48602473735809326, "step": 2444, "utility_loss": 0.6859419345855713 }, { "cosine_similarity": 0, "epoch": 2.2786579683131407, "grad_norm": 0.908790439400286, "learning_rate": 1.3358646876078703e-05, "loss": 1.2307, "reason_loss": 0.45291668176651, "step": 2445, "utility_loss": 0.7778059244155884 }, { "cosine_similarity": 0, "epoch": 2.2795899347623485, "grad_norm": 0.9849988739414758, "learning_rate": 1.3341387642388678e-05, "loss": 1.2115, "reason_loss": 0.4616556763648987, "step": 2446, "utility_loss": 0.7498194575309753 }, { "cosine_similarity": 0, "epoch": 2.2805219012115563, "grad_norm": 1.025790046201486, "learning_rate": 1.3324128408698655e-05, "loss": 1.3434, "reason_loss": 0.46669578552246094, "step": 2447, "utility_loss": 0.8766546845436096 }, { "cosine_similarity": 0, "epoch": 2.281453867660764, "grad_norm": 1.0340239318770181, "learning_rate": 1.330686917500863e-05, "loss": 1.2857, "reason_loss": 0.45965638756752014, "step": 2448, "utility_loss": 0.826048731803894 }, { "cosine_similarity": 0, "epoch": 2.282385834109972, "grad_norm": 1.11521255074472, "learning_rate": 1.3289609941318606e-05, "loss": 1.0596, "reason_loss": 0.4671304523944855, "step": 2449, "utility_loss": 0.592462956905365 }, { "cosine_similarity": 0, "epoch": 2.28331780055918, "grad_norm": 1.0070293923721216, "learning_rate": 1.327235070762858e-05, "loss": 1.3331, "reason_loss": 0.47323575615882874, "step": 2450, "utility_loss": 0.8598899841308594 }, { "cosine_similarity": 0, "epoch": 2.284249767008388, "grad_norm": 0.9186002884240354, "learning_rate": 1.3255091473938558e-05, "loss": 1.1184, "reason_loss": 0.4805139899253845, "step": 2451, "utility_loss": 0.6378862857818604 }, { "cosine_similarity": 0, "epoch": 2.2851817334575957, "grad_norm": 0.9535637032336348, "learning_rate": 1.3237832240248535e-05, "loss": 1.061, "reason_loss": 0.4794692397117615, "step": 2452, "utility_loss": 0.5814927220344543 }, { "cosine_similarity": 0, "epoch": 2.2861136999068035, "grad_norm": 0.8304561195436682, "learning_rate": 1.322057300655851e-05, "loss": 1.0136, "reason_loss": 0.4498671293258667, "step": 2453, "utility_loss": 0.5637764930725098 }, { "cosine_similarity": 0, "epoch": 2.2870456663560113, "grad_norm": 0.9900084140317045, "learning_rate": 1.3203313772868486e-05, "loss": 1.2534, "reason_loss": 0.45566123723983765, "step": 2454, "utility_loss": 0.7977755069732666 }, { "cosine_similarity": 0, "epoch": 2.287977632805219, "grad_norm": 1.0561220902635113, "learning_rate": 1.318605453917846e-05, "loss": 1.1924, "reason_loss": 0.4713134765625, "step": 2455, "utility_loss": 0.7211235761642456 }, { "cosine_similarity": 0, "epoch": 2.288909599254427, "grad_norm": 0.9436728676784989, "learning_rate": 1.3168795305488438e-05, "loss": 1.1155, "reason_loss": 0.47877591848373413, "step": 2456, "utility_loss": 0.6367413997650146 }, { "cosine_similarity": 0, "epoch": 2.2898415657036346, "grad_norm": 1.0276443694464632, "learning_rate": 1.3151536071798412e-05, "loss": 1.2538, "reason_loss": 0.4555782675743103, "step": 2457, "utility_loss": 0.7981996536254883 }, { "cosine_similarity": 0, "epoch": 2.2907735321528424, "grad_norm": 0.9067401919287774, "learning_rate": 1.313427683810839e-05, "loss": 0.9951, "reason_loss": 0.4575928747653961, "step": 2458, "utility_loss": 0.537514328956604 }, { "cosine_similarity": 0, "epoch": 2.29170549860205, "grad_norm": 1.068834942388112, "learning_rate": 1.3117017604418364e-05, "loss": 0.9935, "reason_loss": 0.45808517932891846, "step": 2459, "utility_loss": 0.5353772044181824 }, { "cosine_similarity": 0, "epoch": 2.292637465051258, "grad_norm": 1.0549540586580324, "learning_rate": 1.309975837072834e-05, "loss": 1.0039, "reason_loss": 0.4742884635925293, "step": 2460, "utility_loss": 0.52963787317276 }, { "cosine_similarity": 0, "epoch": 2.293569431500466, "grad_norm": 0.9216069413919051, "learning_rate": 1.3082499137038314e-05, "loss": 1.0564, "reason_loss": 0.46684783697128296, "step": 2461, "utility_loss": 0.5895408987998962 }, { "cosine_similarity": 0, "epoch": 2.294501397949674, "grad_norm": 1.0626811113233945, "learning_rate": 1.3065239903348292e-05, "loss": 0.9617, "reason_loss": 0.4568029046058655, "step": 2462, "utility_loss": 0.5048650503158569 }, { "cosine_similarity": 0, "epoch": 2.295433364398882, "grad_norm": 1.0037764289442856, "learning_rate": 1.304798066965827e-05, "loss": 0.9726, "reason_loss": 0.4548282027244568, "step": 2463, "utility_loss": 0.5177831649780273 }, { "cosine_similarity": 0, "epoch": 2.2963653308480896, "grad_norm": 0.8209070573280497, "learning_rate": 1.3030721435968244e-05, "loss": 1.0039, "reason_loss": 0.45628976821899414, "step": 2464, "utility_loss": 0.547584056854248 }, { "cosine_similarity": 0, "epoch": 2.2972972972972974, "grad_norm": 1.1839978056441913, "learning_rate": 1.301346220227822e-05, "loss": 1.4218, "reason_loss": 0.45616716146469116, "step": 2465, "utility_loss": 0.9656714797019958 }, { "cosine_similarity": 0, "epoch": 2.298229263746505, "grad_norm": 0.8917358005565458, "learning_rate": 1.2996202968588194e-05, "loss": 1.0553, "reason_loss": 0.4646015167236328, "step": 2466, "utility_loss": 0.590704083442688 }, { "cosine_similarity": 0, "epoch": 2.299161230195713, "grad_norm": 1.0483889731658214, "learning_rate": 1.2978943734898172e-05, "loss": 0.9887, "reason_loss": 0.45630955696105957, "step": 2467, "utility_loss": 0.532352089881897 }, { "cosine_similarity": 0, "epoch": 2.3000931966449207, "grad_norm": 1.0743829866859047, "learning_rate": 1.2961684501208146e-05, "loss": 1.2134, "reason_loss": 0.45423007011413574, "step": 2468, "utility_loss": 0.7591532468795776 }, { "cosine_similarity": 0, "epoch": 2.3010251630941285, "grad_norm": 0.8773308079431696, "learning_rate": 1.2944425267518124e-05, "loss": 1.0414, "reason_loss": 0.42715996503829956, "step": 2469, "utility_loss": 0.614237904548645 }, { "cosine_similarity": 0, "epoch": 2.3019571295433363, "grad_norm": 1.0807306674754176, "learning_rate": 1.2927166033828098e-05, "loss": 1.2244, "reason_loss": 0.495059609413147, "step": 2470, "utility_loss": 0.7292917966842651 }, { "cosine_similarity": 0, "epoch": 2.302889095992544, "grad_norm": 0.9761841084421993, "learning_rate": 1.2909906800138074e-05, "loss": 1.2184, "reason_loss": 0.4862648844718933, "step": 2471, "utility_loss": 0.7321226596832275 }, { "cosine_similarity": 0, "epoch": 2.3038210624417523, "grad_norm": 0.876655648566123, "learning_rate": 1.2892647566448048e-05, "loss": 1.2515, "reason_loss": 0.4638954997062683, "step": 2472, "utility_loss": 0.7876391410827637 }, { "cosine_similarity": 0, "epoch": 2.3047530288909597, "grad_norm": 0.8828516119049826, "learning_rate": 1.2875388332758026e-05, "loss": 1.0258, "reason_loss": 0.48251646757125854, "step": 2473, "utility_loss": 0.5432703495025635 }, { "cosine_similarity": 0, "epoch": 2.305684995340168, "grad_norm": 1.0402001710645281, "learning_rate": 1.2858129099068004e-05, "loss": 0.9942, "reason_loss": 0.4400484561920166, "step": 2474, "utility_loss": 0.5541925430297852 }, { "cosine_similarity": 0, "epoch": 2.3066169617893757, "grad_norm": 0.8550554856808723, "learning_rate": 1.2840869865377978e-05, "loss": 1.1567, "reason_loss": 0.46127110719680786, "step": 2475, "utility_loss": 0.6954206228256226 }, { "cosine_similarity": 0, "epoch": 2.3075489282385835, "grad_norm": 0.8088462707113377, "learning_rate": 1.2823610631687954e-05, "loss": 1.1273, "reason_loss": 0.4375823140144348, "step": 2476, "utility_loss": 0.6897385120391846 }, { "cosine_similarity": 0, "epoch": 2.3084808946877913, "grad_norm": 1.0225118025209976, "learning_rate": 1.2806351397997928e-05, "loss": 1.063, "reason_loss": 0.4634614586830139, "step": 2477, "utility_loss": 0.5995506048202515 }, { "cosine_similarity": 0, "epoch": 2.309412861136999, "grad_norm": 1.1333370969201197, "learning_rate": 1.2789092164307906e-05, "loss": 1.1244, "reason_loss": 0.4668562710285187, "step": 2478, "utility_loss": 0.6575164198875427 }, { "cosine_similarity": 0, "epoch": 2.310344827586207, "grad_norm": 1.0520061783559038, "learning_rate": 1.277183293061788e-05, "loss": 1.0867, "reason_loss": 0.43491947650909424, "step": 2479, "utility_loss": 0.6518025398254395 }, { "cosine_similarity": 0, "epoch": 2.3112767940354146, "grad_norm": 0.8732981739179108, "learning_rate": 1.2754573696927858e-05, "loss": 0.8089, "reason_loss": 0.46492329239845276, "step": 2480, "utility_loss": 0.34393560886383057 }, { "cosine_similarity": 0, "epoch": 2.3122087604846224, "grad_norm": 1.072416592205571, "learning_rate": 1.2737314463237832e-05, "loss": 1.3582, "reason_loss": 0.4561386704444885, "step": 2481, "utility_loss": 0.9020271897315979 }, { "cosine_similarity": 0, "epoch": 2.31314072693383, "grad_norm": 0.8804528646599551, "learning_rate": 1.2720055229547808e-05, "loss": 1.0002, "reason_loss": 0.49975645542144775, "step": 2482, "utility_loss": 0.5004592537879944 }, { "cosine_similarity": 0, "epoch": 2.3140726933830384, "grad_norm": 0.9194663292903774, "learning_rate": 1.2702795995857786e-05, "loss": 1.0354, "reason_loss": 0.4703272879123688, "step": 2483, "utility_loss": 0.5650545954704285 }, { "cosine_similarity": 0, "epoch": 2.315004659832246, "grad_norm": 1.0143561987599392, "learning_rate": 1.268553676216776e-05, "loss": 1.0614, "reason_loss": 0.452627569437027, "step": 2484, "utility_loss": 0.6087312698364258 }, { "cosine_similarity": 0, "epoch": 2.315936626281454, "grad_norm": 0.9228818313029813, "learning_rate": 1.2668277528477738e-05, "loss": 1.2185, "reason_loss": 0.44471976161003113, "step": 2485, "utility_loss": 0.7738121747970581 }, { "cosine_similarity": 0, "epoch": 2.316868592730662, "grad_norm": 0.9885472596254509, "learning_rate": 1.2651018294787712e-05, "loss": 1.0635, "reason_loss": 0.44325363636016846, "step": 2486, "utility_loss": 0.6202436685562134 }, { "cosine_similarity": 0, "epoch": 2.3178005591798696, "grad_norm": 0.9313034881038579, "learning_rate": 1.2633759061097688e-05, "loss": 1.2352, "reason_loss": 0.4754961431026459, "step": 2487, "utility_loss": 0.7597252726554871 }, { "cosine_similarity": 0, "epoch": 2.3187325256290774, "grad_norm": 1.6601989795635097, "learning_rate": 1.2616499827407662e-05, "loss": 1.3708, "reason_loss": 0.45358526706695557, "step": 2488, "utility_loss": 0.9171749949455261 }, { "cosine_similarity": 0, "epoch": 2.319664492078285, "grad_norm": 1.093653719458185, "learning_rate": 1.259924059371764e-05, "loss": 1.0725, "reason_loss": 0.4705384373664856, "step": 2489, "utility_loss": 0.6019387245178223 }, { "cosine_similarity": 0, "epoch": 2.320596458527493, "grad_norm": 1.0507144070328287, "learning_rate": 1.2581981360027614e-05, "loss": 1.1629, "reason_loss": 0.43665003776550293, "step": 2490, "utility_loss": 0.7262266278266907 }, { "cosine_similarity": 0, "epoch": 2.3215284249767008, "grad_norm": 1.0217802956957922, "learning_rate": 1.2564722126337592e-05, "loss": 1.5271, "reason_loss": 0.4827077388763428, "step": 2491, "utility_loss": 1.0443880558013916 }, { "cosine_similarity": 0, "epoch": 2.3224603914259085, "grad_norm": 0.9441019978308852, "learning_rate": 1.2547462892647566e-05, "loss": 1.2532, "reason_loss": 0.4918133616447449, "step": 2492, "utility_loss": 0.7613541483879089 }, { "cosine_similarity": 0, "epoch": 2.3233923578751163, "grad_norm": 1.0332518974475047, "learning_rate": 1.2530203658957542e-05, "loss": 1.292, "reason_loss": 0.48070740699768066, "step": 2493, "utility_loss": 0.8113332390785217 }, { "cosine_similarity": 0, "epoch": 2.3243243243243246, "grad_norm": 1.0431362239790678, "learning_rate": 1.251294442526752e-05, "loss": 1.2028, "reason_loss": 0.4627034068107605, "step": 2494, "utility_loss": 0.7400826215744019 }, { "cosine_similarity": 0, "epoch": 2.325256290773532, "grad_norm": 0.8534610637002554, "learning_rate": 1.2495685191577494e-05, "loss": 1.1408, "reason_loss": 0.48462796211242676, "step": 2495, "utility_loss": 0.6561616063117981 }, { "cosine_similarity": 0, "epoch": 2.32618825722274, "grad_norm": 0.9415593404627947, "learning_rate": 1.247842595788747e-05, "loss": 1.0376, "reason_loss": 0.4649806022644043, "step": 2496, "utility_loss": 0.5726400017738342 }, { "cosine_similarity": 0, "epoch": 2.327120223671948, "grad_norm": 0.9242802095270118, "learning_rate": 1.2461166724197446e-05, "loss": 0.9048, "reason_loss": 0.44798415899276733, "step": 2497, "utility_loss": 0.45676738023757935 }, { "cosine_similarity": 0, "epoch": 2.3280521901211557, "grad_norm": 1.0209103168210028, "learning_rate": 1.2443907490507422e-05, "loss": 1.1422, "reason_loss": 0.4868515133857727, "step": 2498, "utility_loss": 0.6553130745887756 }, { "cosine_similarity": 0, "epoch": 2.3289841565703635, "grad_norm": 1.0528405180567502, "learning_rate": 1.2426648256817398e-05, "loss": 0.9752, "reason_loss": 0.45891904830932617, "step": 2499, "utility_loss": 0.5162901878356934 }, { "cosine_similarity": 0, "epoch": 2.3299161230195713, "grad_norm": 0.9547052107426958, "learning_rate": 1.2409389023127374e-05, "loss": 1.0067, "reason_loss": 0.4483526945114136, "step": 2500, "utility_loss": 0.558351993560791 }, { "cosine_similarity": 0, "epoch": 2.330848089468779, "grad_norm": 1.2275550596784373, "learning_rate": 1.239212978943735e-05, "loss": 1.0952, "reason_loss": 0.487814724445343, "step": 2501, "utility_loss": 0.6074143648147583 }, { "cosine_similarity": 0, "epoch": 2.331780055917987, "grad_norm": 1.18561550606021, "learning_rate": 1.2374870555747326e-05, "loss": 1.1207, "reason_loss": 0.43852293491363525, "step": 2502, "utility_loss": 0.6821863651275635 }, { "cosine_similarity": 0, "epoch": 2.3327120223671947, "grad_norm": 1.0166929126910536, "learning_rate": 1.23576113220573e-05, "loss": 1.0816, "reason_loss": 0.46150997281074524, "step": 2503, "utility_loss": 0.6200979351997375 }, { "cosine_similarity": 0, "epoch": 2.3336439888164024, "grad_norm": 1.095185593731232, "learning_rate": 1.2340352088367276e-05, "loss": 0.9978, "reason_loss": 0.44101572036743164, "step": 2504, "utility_loss": 0.5567959547042847 }, { "cosine_similarity": 0, "epoch": 2.3345759552656107, "grad_norm": 0.8870049829123251, "learning_rate": 1.2323092854677252e-05, "loss": 1.0593, "reason_loss": 0.49763503670692444, "step": 2505, "utility_loss": 0.5616756677627563 }, { "cosine_similarity": 0, "epoch": 2.335507921714818, "grad_norm": 0.9563070954729137, "learning_rate": 1.2305833620987228e-05, "loss": 0.9835, "reason_loss": 0.4607369303703308, "step": 2506, "utility_loss": 0.522802472114563 }, { "cosine_similarity": 0, "epoch": 2.3364398881640263, "grad_norm": 1.043243966601596, "learning_rate": 1.2288574387297204e-05, "loss": 1.1222, "reason_loss": 0.46967369318008423, "step": 2507, "utility_loss": 0.6525200605392456 }, { "cosine_similarity": 0, "epoch": 2.337371854613234, "grad_norm": 1.1338111852604957, "learning_rate": 1.227131515360718e-05, "loss": 1.1949, "reason_loss": 0.4822010099887848, "step": 2508, "utility_loss": 0.712693452835083 }, { "cosine_similarity": 0, "epoch": 2.338303821062442, "grad_norm": 0.9366957213353739, "learning_rate": 1.2254055919917156e-05, "loss": 1.3546, "reason_loss": 0.5021651983261108, "step": 2509, "utility_loss": 0.8524811863899231 }, { "cosine_similarity": 0, "epoch": 2.3392357875116496, "grad_norm": 0.8902754824111377, "learning_rate": 1.2236796686227132e-05, "loss": 0.9564, "reason_loss": 0.48543357849121094, "step": 2510, "utility_loss": 0.4709380567073822 }, { "cosine_similarity": 0, "epoch": 2.3401677539608574, "grad_norm": 1.0499871495968567, "learning_rate": 1.2219537452537108e-05, "loss": 1.1958, "reason_loss": 0.4667074978351593, "step": 2511, "utility_loss": 0.7291337847709656 }, { "cosine_similarity": 0, "epoch": 2.341099720410065, "grad_norm": 0.8976874530052142, "learning_rate": 1.2202278218847084e-05, "loss": 0.9894, "reason_loss": 0.45626169443130493, "step": 2512, "utility_loss": 0.5331405401229858 }, { "cosine_similarity": 0, "epoch": 2.342031686859273, "grad_norm": 0.8574373458241341, "learning_rate": 1.218501898515706e-05, "loss": 0.9167, "reason_loss": 0.4680733382701874, "step": 2513, "utility_loss": 0.448653906583786 }, { "cosine_similarity": 0, "epoch": 2.3429636533084808, "grad_norm": 0.8696567669998815, "learning_rate": 1.2167759751467034e-05, "loss": 1.0956, "reason_loss": 0.4627234935760498, "step": 2514, "utility_loss": 0.632910966873169 }, { "cosine_similarity": 0, "epoch": 2.3438956197576886, "grad_norm": 0.9951083257782732, "learning_rate": 1.215050051777701e-05, "loss": 1.0754, "reason_loss": 0.49707770347595215, "step": 2515, "utility_loss": 0.5783277153968811 }, { "cosine_similarity": 0, "epoch": 2.344827586206897, "grad_norm": 1.246796996271531, "learning_rate": 1.2133241284086986e-05, "loss": 1.1373, "reason_loss": 0.4579116106033325, "step": 2516, "utility_loss": 0.6793783903121948 }, { "cosine_similarity": 0, "epoch": 2.345759552656104, "grad_norm": 0.9096301851042563, "learning_rate": 1.2115982050396962e-05, "loss": 1.269, "reason_loss": 0.46831759810447693, "step": 2517, "utility_loss": 0.8007094264030457 }, { "cosine_similarity": 0, "epoch": 2.3466915191053124, "grad_norm": 0.8791119639082295, "learning_rate": 1.209872281670694e-05, "loss": 0.9792, "reason_loss": 0.458016961812973, "step": 2518, "utility_loss": 0.5212329626083374 }, { "cosine_similarity": 0, "epoch": 2.34762348555452, "grad_norm": 0.7906070004721021, "learning_rate": 1.2081463583016914e-05, "loss": 1.0217, "reason_loss": 0.47749701142311096, "step": 2519, "utility_loss": 0.544226884841919 }, { "cosine_similarity": 0, "epoch": 2.348555452003728, "grad_norm": 0.998091670011473, "learning_rate": 1.206420434932689e-05, "loss": 1.0028, "reason_loss": 0.4678911864757538, "step": 2520, "utility_loss": 0.5349147319793701 }, { "cosine_similarity": 0, "epoch": 2.3494874184529357, "grad_norm": 1.0447526081313927, "learning_rate": 1.2046945115636866e-05, "loss": 1.0599, "reason_loss": 0.452389657497406, "step": 2521, "utility_loss": 0.6075412034988403 }, { "cosine_similarity": 0, "epoch": 2.3504193849021435, "grad_norm": 0.8227935659977968, "learning_rate": 1.2029685881946842e-05, "loss": 0.7434, "reason_loss": 0.46019554138183594, "step": 2522, "utility_loss": 0.28323572874069214 }, { "cosine_similarity": 0, "epoch": 2.3513513513513513, "grad_norm": 0.8875018570246687, "learning_rate": 1.2012426648256818e-05, "loss": 1.4172, "reason_loss": 0.45243534445762634, "step": 2523, "utility_loss": 0.9647155404090881 }, { "cosine_similarity": 0, "epoch": 2.352283317800559, "grad_norm": 1.0615517741295721, "learning_rate": 1.1995167414566794e-05, "loss": 1.3371, "reason_loss": 0.4724532961845398, "step": 2524, "utility_loss": 0.8646032810211182 }, { "cosine_similarity": 0, "epoch": 2.353215284249767, "grad_norm": 0.8677276942682645, "learning_rate": 1.1977908180876769e-05, "loss": 1.243, "reason_loss": 0.4735717177391052, "step": 2525, "utility_loss": 0.769404411315918 }, { "cosine_similarity": 0, "epoch": 2.3541472506989747, "grad_norm": 0.9299396543500036, "learning_rate": 1.1960648947186745e-05, "loss": 1.0535, "reason_loss": 0.48483413457870483, "step": 2526, "utility_loss": 0.5686528086662292 }, { "cosine_similarity": 0, "epoch": 2.355079217148183, "grad_norm": 0.9338411551286346, "learning_rate": 1.194338971349672e-05, "loss": 1.1922, "reason_loss": 0.47776156663894653, "step": 2527, "utility_loss": 0.7144663333892822 }, { "cosine_similarity": 0, "epoch": 2.3560111835973903, "grad_norm": 1.0337404101058056, "learning_rate": 1.1926130479806698e-05, "loss": 1.1451, "reason_loss": 0.45219358801841736, "step": 2528, "utility_loss": 0.6929362416267395 }, { "cosine_similarity": 0, "epoch": 2.3569431500465985, "grad_norm": 1.522991210021942, "learning_rate": 1.1908871246116674e-05, "loss": 1.2719, "reason_loss": 0.4648938775062561, "step": 2529, "utility_loss": 0.807050883769989 }, { "cosine_similarity": 0, "epoch": 2.3578751164958063, "grad_norm": 1.1801532758183717, "learning_rate": 1.1891612012426649e-05, "loss": 1.2929, "reason_loss": 0.47231483459472656, "step": 2530, "utility_loss": 0.8205497860908508 }, { "cosine_similarity": 0, "epoch": 2.358807082945014, "grad_norm": 0.8784386528359406, "learning_rate": 1.1874352778736625e-05, "loss": 0.9614, "reason_loss": 0.4796367883682251, "step": 2531, "utility_loss": 0.481735497713089 }, { "cosine_similarity": 0, "epoch": 2.359739049394222, "grad_norm": 0.8941575038660285, "learning_rate": 1.18570935450466e-05, "loss": 1.0251, "reason_loss": 0.4620153605937958, "step": 2532, "utility_loss": 0.5630825757980347 }, { "cosine_similarity": 0, "epoch": 2.3606710158434296, "grad_norm": 0.9505826396391859, "learning_rate": 1.1839834311356576e-05, "loss": 1.0473, "reason_loss": 0.4454432725906372, "step": 2533, "utility_loss": 0.6018246412277222 }, { "cosine_similarity": 0, "epoch": 2.3616029822926374, "grad_norm": 1.0171773576158964, "learning_rate": 1.1822575077666552e-05, "loss": 1.2016, "reason_loss": 0.46800678968429565, "step": 2534, "utility_loss": 0.7336089015007019 }, { "cosine_similarity": 0, "epoch": 2.362534948741845, "grad_norm": 1.0237123245333135, "learning_rate": 1.1805315843976528e-05, "loss": 0.9092, "reason_loss": 0.458057701587677, "step": 2535, "utility_loss": 0.4511686861515045 }, { "cosine_similarity": 0, "epoch": 2.363466915191053, "grad_norm": 0.9961638139393717, "learning_rate": 1.1788056610286503e-05, "loss": 1.1312, "reason_loss": 0.4407033920288086, "step": 2536, "utility_loss": 0.6905418634414673 }, { "cosine_similarity": 0, "epoch": 2.364398881640261, "grad_norm": 0.9518407770055789, "learning_rate": 1.1770797376596479e-05, "loss": 1.0179, "reason_loss": 0.4721542298793793, "step": 2537, "utility_loss": 0.5457087755203247 }, { "cosine_similarity": 0, "epoch": 2.3653308480894686, "grad_norm": 1.1466512990594857, "learning_rate": 1.1753538142906456e-05, "loss": 1.0808, "reason_loss": 0.47256916761398315, "step": 2538, "utility_loss": 0.6082609295845032 }, { "cosine_similarity": 0, "epoch": 2.3662628145386764, "grad_norm": 0.9402946053707811, "learning_rate": 1.1736278909216432e-05, "loss": 1.2309, "reason_loss": 0.4287509024143219, "step": 2539, "utility_loss": 0.8021407127380371 }, { "cosine_similarity": 0, "epoch": 2.3671947809878846, "grad_norm": 0.9285393357584135, "learning_rate": 1.1719019675526408e-05, "loss": 1.1671, "reason_loss": 0.4413895905017853, "step": 2540, "utility_loss": 0.7256838083267212 }, { "cosine_similarity": 0, "epoch": 2.3681267474370924, "grad_norm": 1.0140374180622815, "learning_rate": 1.1701760441836383e-05, "loss": 1.6648, "reason_loss": 0.44453874230384827, "step": 2541, "utility_loss": 1.22025728225708 }, { "cosine_similarity": 0, "epoch": 2.3690587138863, "grad_norm": 1.067744038059804, "learning_rate": 1.1684501208146359e-05, "loss": 0.8976, "reason_loss": 0.4690539836883545, "step": 2542, "utility_loss": 0.4285780191421509 }, { "cosine_similarity": 0, "epoch": 2.369990680335508, "grad_norm": 1.1520803526732435, "learning_rate": 1.1667241974456335e-05, "loss": 1.2199, "reason_loss": 0.5167440176010132, "step": 2543, "utility_loss": 0.7031185626983643 }, { "cosine_similarity": 0, "epoch": 2.3709226467847158, "grad_norm": 0.9298525012892683, "learning_rate": 1.164998274076631e-05, "loss": 1.1229, "reason_loss": 0.47770923376083374, "step": 2544, "utility_loss": 0.6452122926712036 }, { "cosine_similarity": 0, "epoch": 2.3718546132339235, "grad_norm": 0.9600710002248987, "learning_rate": 1.1632723507076287e-05, "loss": 0.9404, "reason_loss": 0.4554191827774048, "step": 2545, "utility_loss": 0.48502403497695923 }, { "cosine_similarity": 0, "epoch": 2.3727865796831313, "grad_norm": 1.051147322053895, "learning_rate": 1.1615464273386263e-05, "loss": 1.0998, "reason_loss": 0.47587788105010986, "step": 2546, "utility_loss": 0.623924970626831 }, { "cosine_similarity": 0, "epoch": 2.373718546132339, "grad_norm": 0.9282789878237344, "learning_rate": 1.1598205039696237e-05, "loss": 0.9479, "reason_loss": 0.45550596714019775, "step": 2547, "utility_loss": 0.49244022369384766 }, { "cosine_similarity": 0, "epoch": 2.374650512581547, "grad_norm": 0.8651376487895241, "learning_rate": 1.1580945806006215e-05, "loss": 0.9224, "reason_loss": 0.4341626465320587, "step": 2548, "utility_loss": 0.4882085621356964 }, { "cosine_similarity": 0, "epoch": 2.3755824790307547, "grad_norm": 1.0400599649253, "learning_rate": 1.156368657231619e-05, "loss": 1.1054, "reason_loss": 0.4456624984741211, "step": 2549, "utility_loss": 0.6597785949707031 }, { "cosine_similarity": 0, "epoch": 2.3765144454799625, "grad_norm": 0.8523251497298779, "learning_rate": 1.1546427338626167e-05, "loss": 0.8439, "reason_loss": 0.44884607195854187, "step": 2550, "utility_loss": 0.3950868248939514 }, { "cosine_similarity": 0, "epoch": 2.3774464119291707, "grad_norm": 1.0202934241939763, "learning_rate": 1.152916810493614e-05, "loss": 1.0963, "reason_loss": 0.4647456407546997, "step": 2551, "utility_loss": 0.6315118670463562 }, { "cosine_similarity": 0, "epoch": 2.3783783783783785, "grad_norm": 1.0785956952247153, "learning_rate": 1.1511908871246117e-05, "loss": 0.9283, "reason_loss": 0.43705588579177856, "step": 2552, "utility_loss": 0.49125680327415466 }, { "cosine_similarity": 0, "epoch": 2.3793103448275863, "grad_norm": 0.9024388746446477, "learning_rate": 1.1494649637556093e-05, "loss": 1.1268, "reason_loss": 0.4615459442138672, "step": 2553, "utility_loss": 0.6652379631996155 }, { "cosine_similarity": 0, "epoch": 2.380242311276794, "grad_norm": 1.0114767792555663, "learning_rate": 1.1477390403866069e-05, "loss": 1.2026, "reason_loss": 0.45039987564086914, "step": 2554, "utility_loss": 0.7522189021110535 }, { "cosine_similarity": 0, "epoch": 2.381174277726002, "grad_norm": 1.110832989855726, "learning_rate": 1.1460131170176045e-05, "loss": 1.0791, "reason_loss": 0.44596895575523376, "step": 2555, "utility_loss": 0.6331439018249512 }, { "cosine_similarity": 0, "epoch": 2.3821062441752097, "grad_norm": 0.9279572984585389, "learning_rate": 1.144287193648602e-05, "loss": 1.0692, "reason_loss": 0.4609887897968292, "step": 2556, "utility_loss": 0.6082391738891602 }, { "cosine_similarity": 0, "epoch": 2.3830382106244175, "grad_norm": 1.0650700875458046, "learning_rate": 1.1425612702795995e-05, "loss": 1.2931, "reason_loss": 0.4551585912704468, "step": 2557, "utility_loss": 0.8379390239715576 }, { "cosine_similarity": 0, "epoch": 2.3839701770736252, "grad_norm": 1.043825350918128, "learning_rate": 1.1408353469105973e-05, "loss": 1.0464, "reason_loss": 0.45428353548049927, "step": 2558, "utility_loss": 0.5920931100845337 }, { "cosine_similarity": 0, "epoch": 2.384902143522833, "grad_norm": 0.9335599173071311, "learning_rate": 1.1391094235415949e-05, "loss": 0.9523, "reason_loss": 0.4829750657081604, "step": 2559, "utility_loss": 0.46935153007507324 }, { "cosine_similarity": 0, "epoch": 2.385834109972041, "grad_norm": 1.0317163902740512, "learning_rate": 1.1373835001725925e-05, "loss": 1.0542, "reason_loss": 0.4442000091075897, "step": 2560, "utility_loss": 0.6100123524665833 }, { "cosine_similarity": 0, "epoch": 2.3867660764212486, "grad_norm": 1.0651034482472124, "learning_rate": 1.13565757680359e-05, "loss": 1.1944, "reason_loss": 0.4859841763973236, "step": 2561, "utility_loss": 0.7084186673164368 }, { "cosine_similarity": 0, "epoch": 2.387698042870457, "grad_norm": 0.8795021829139013, "learning_rate": 1.1339316534345875e-05, "loss": 0.9041, "reason_loss": 0.46686387062072754, "step": 2562, "utility_loss": 0.437231183052063 }, { "cosine_similarity": 0, "epoch": 2.3886300093196646, "grad_norm": 0.9200599585122794, "learning_rate": 1.1322057300655851e-05, "loss": 1.1761, "reason_loss": 0.4816214442253113, "step": 2563, "utility_loss": 0.6944749355316162 }, { "cosine_similarity": 0, "epoch": 2.3895619757688724, "grad_norm": 1.0289038278041378, "learning_rate": 1.1304798066965827e-05, "loss": 1.1719, "reason_loss": 0.4799654483795166, "step": 2564, "utility_loss": 0.691962718963623 }, { "cosine_similarity": 0, "epoch": 2.39049394221808, "grad_norm": 0.9217440024851482, "learning_rate": 1.1287538833275803e-05, "loss": 1.06, "reason_loss": 0.4757026433944702, "step": 2565, "utility_loss": 0.5842851996421814 }, { "cosine_similarity": 0, "epoch": 2.391425908667288, "grad_norm": 0.9463359128888484, "learning_rate": 1.1270279599585779e-05, "loss": 1.2679, "reason_loss": 0.4477021098136902, "step": 2566, "utility_loss": 0.8201868534088135 }, { "cosine_similarity": 0, "epoch": 2.392357875116496, "grad_norm": 0.7874498334189065, "learning_rate": 1.1253020365895755e-05, "loss": 0.8842, "reason_loss": 0.4713292717933655, "step": 2567, "utility_loss": 0.4128792881965637 }, { "cosine_similarity": 0, "epoch": 2.3932898415657036, "grad_norm": 1.0065244380537228, "learning_rate": 1.1235761132205731e-05, "loss": 1.0334, "reason_loss": 0.4534114599227905, "step": 2568, "utility_loss": 0.5799846649169922 }, { "cosine_similarity": 0, "epoch": 2.3942218080149114, "grad_norm": 1.0196853469485836, "learning_rate": 1.1218501898515707e-05, "loss": 1.2099, "reason_loss": 0.48911866545677185, "step": 2569, "utility_loss": 0.720806360244751 }, { "cosine_similarity": 0, "epoch": 2.395153774464119, "grad_norm": 0.8769122711479836, "learning_rate": 1.1201242664825683e-05, "loss": 0.9926, "reason_loss": 0.47413432598114014, "step": 2570, "utility_loss": 0.5184451937675476 }, { "cosine_similarity": 0, "epoch": 2.396085740913327, "grad_norm": 0.8784442830460443, "learning_rate": 1.1183983431135659e-05, "loss": 1.0864, "reason_loss": 0.44645586609840393, "step": 2571, "utility_loss": 0.6399619579315186 }, { "cosine_similarity": 0, "epoch": 2.3970177073625347, "grad_norm": 0.9667906221292001, "learning_rate": 1.1166724197445635e-05, "loss": 1.3302, "reason_loss": 0.48225122690200806, "step": 2572, "utility_loss": 0.8479350805282593 }, { "cosine_similarity": 0, "epoch": 2.397949673811743, "grad_norm": 1.1123245758840938, "learning_rate": 1.1149464963755609e-05, "loss": 0.9299, "reason_loss": 0.4640544652938843, "step": 2573, "utility_loss": 0.46588030457496643 }, { "cosine_similarity": 0, "epoch": 2.3988816402609507, "grad_norm": 1.046940756300467, "learning_rate": 1.1132205730065585e-05, "loss": 0.9589, "reason_loss": 0.4633999466896057, "step": 2574, "utility_loss": 0.4954950213432312 }, { "cosine_similarity": 0, "epoch": 2.3998136067101585, "grad_norm": 1.0547057879659685, "learning_rate": 1.1114946496375561e-05, "loss": 1.1394, "reason_loss": 0.4413358271121979, "step": 2575, "utility_loss": 0.6980899572372437 }, { "cosine_similarity": 0, "epoch": 2.4007455731593663, "grad_norm": 0.8805686811591833, "learning_rate": 1.1097687262685537e-05, "loss": 1.0349, "reason_loss": 0.4698876738548279, "step": 2576, "utility_loss": 0.5650310516357422 }, { "cosine_similarity": 0, "epoch": 2.401677539608574, "grad_norm": 1.0699950980991042, "learning_rate": 1.1080428028995513e-05, "loss": 1.3784, "reason_loss": 0.46430766582489014, "step": 2577, "utility_loss": 0.9140944480895996 }, { "cosine_similarity": 0, "epoch": 2.402609506057782, "grad_norm": 1.0448543029183623, "learning_rate": 1.1063168795305489e-05, "loss": 1.0607, "reason_loss": 0.4652869701385498, "step": 2578, "utility_loss": 0.5954563021659851 }, { "cosine_similarity": 0, "epoch": 2.4035414725069897, "grad_norm": 1.0850332475120648, "learning_rate": 1.1045909561615465e-05, "loss": 1.068, "reason_loss": 0.4610379636287689, "step": 2579, "utility_loss": 0.6069748997688293 }, { "cosine_similarity": 0, "epoch": 2.4044734389561975, "grad_norm": 1.0849753253474532, "learning_rate": 1.1028650327925441e-05, "loss": 1.2335, "reason_loss": 0.4318886399269104, "step": 2580, "utility_loss": 0.8016369342803955 }, { "cosine_similarity": 0, "epoch": 2.4054054054054053, "grad_norm": 0.8110932254151205, "learning_rate": 1.1011391094235417e-05, "loss": 0.8139, "reason_loss": 0.48206937313079834, "step": 2581, "utility_loss": 0.33181625604629517 }, { "cosine_similarity": 0, "epoch": 2.406337371854613, "grad_norm": 0.961606374201338, "learning_rate": 1.0994131860545393e-05, "loss": 0.9495, "reason_loss": 0.48606783151626587, "step": 2582, "utility_loss": 0.46338963508605957 }, { "cosine_similarity": 0, "epoch": 2.407269338303821, "grad_norm": 1.0031670511423147, "learning_rate": 1.0976872626855369e-05, "loss": 1.0928, "reason_loss": 0.4840819239616394, "step": 2583, "utility_loss": 0.6087037324905396 }, { "cosine_similarity": 0, "epoch": 2.408201304753029, "grad_norm": 1.0544495949782187, "learning_rate": 1.0959613393165343e-05, "loss": 1.0596, "reason_loss": 0.505669116973877, "step": 2584, "utility_loss": 0.5539674758911133 }, { "cosine_similarity": 0, "epoch": 2.409133271202237, "grad_norm": 1.0411897079725299, "learning_rate": 1.094235415947532e-05, "loss": 1.0898, "reason_loss": 0.46441349387168884, "step": 2585, "utility_loss": 0.6253982186317444 }, { "cosine_similarity": 0, "epoch": 2.4100652376514446, "grad_norm": 0.9499452876294011, "learning_rate": 1.0925094925785295e-05, "loss": 0.9764, "reason_loss": 0.44574040174484253, "step": 2586, "utility_loss": 0.530704915523529 }, { "cosine_similarity": 0, "epoch": 2.4109972041006524, "grad_norm": 0.8463828848027324, "learning_rate": 1.0907835692095271e-05, "loss": 1.2172, "reason_loss": 0.47420716285705566, "step": 2587, "utility_loss": 0.7429447174072266 }, { "cosine_similarity": 0, "epoch": 2.4119291705498602, "grad_norm": 0.9025518277246304, "learning_rate": 1.0890576458405247e-05, "loss": 1.0167, "reason_loss": 0.4428473114967346, "step": 2588, "utility_loss": 0.5739004015922546 }, { "cosine_similarity": 0, "epoch": 2.412861136999068, "grad_norm": 0.9140691504100801, "learning_rate": 1.0873317224715223e-05, "loss": 1.113, "reason_loss": 0.4639156460762024, "step": 2589, "utility_loss": 0.649083137512207 }, { "cosine_similarity": 0, "epoch": 2.413793103448276, "grad_norm": 1.0075263247777362, "learning_rate": 1.0856057991025199e-05, "loss": 0.8928, "reason_loss": 0.4486851692199707, "step": 2590, "utility_loss": 0.4441472291946411 }, { "cosine_similarity": 0, "epoch": 2.4147250698974836, "grad_norm": 1.1577238860430703, "learning_rate": 1.0838798757335175e-05, "loss": 1.2257, "reason_loss": 0.4430619180202484, "step": 2591, "utility_loss": 0.7826417684555054 }, { "cosine_similarity": 0, "epoch": 2.4156570363466914, "grad_norm": 1.0084303604952336, "learning_rate": 1.0821539523645151e-05, "loss": 0.9727, "reason_loss": 0.4569898843765259, "step": 2592, "utility_loss": 0.5156998634338379 }, { "cosine_similarity": 0, "epoch": 2.416589002795899, "grad_norm": 0.8848762559038758, "learning_rate": 1.0804280289955127e-05, "loss": 1.1715, "reason_loss": 0.4837344288825989, "step": 2593, "utility_loss": 0.6877996921539307 }, { "cosine_similarity": 0, "epoch": 2.417520969245107, "grad_norm": 0.8201695287478047, "learning_rate": 1.0787021056265103e-05, "loss": 0.9372, "reason_loss": 0.4705541133880615, "step": 2594, "utility_loss": 0.46669480204582214 }, { "cosine_similarity": 0, "epoch": 2.418452935694315, "grad_norm": 1.0529372726504822, "learning_rate": 1.0769761822575077e-05, "loss": 1.4085, "reason_loss": 0.4709429442882538, "step": 2595, "utility_loss": 0.9375786185264587 }, { "cosine_similarity": 0, "epoch": 2.419384902143523, "grad_norm": 0.9413739114055251, "learning_rate": 1.0752502588885053e-05, "loss": 0.953, "reason_loss": 0.4924098253250122, "step": 2596, "utility_loss": 0.4605691432952881 }, { "cosine_similarity": 0, "epoch": 2.4203168685927308, "grad_norm": 0.7957793586653901, "learning_rate": 1.073524335519503e-05, "loss": 1.1742, "reason_loss": 0.48492151498794556, "step": 2597, "utility_loss": 0.68928462266922 }, { "cosine_similarity": 0, "epoch": 2.4212488350419386, "grad_norm": 1.082078979133201, "learning_rate": 1.0717984121505005e-05, "loss": 1.252, "reason_loss": 0.47132444381713867, "step": 2598, "utility_loss": 0.7807069420814514 }, { "cosine_similarity": 0, "epoch": 2.4221808014911463, "grad_norm": 1.101459730595141, "learning_rate": 1.0700724887814981e-05, "loss": 1.0539, "reason_loss": 0.4358369708061218, "step": 2599, "utility_loss": 0.6180140972137451 }, { "cosine_similarity": 0, "epoch": 2.423112767940354, "grad_norm": 1.0173373087057764, "learning_rate": 1.0683465654124957e-05, "loss": 1.0432, "reason_loss": 0.4491695761680603, "step": 2600, "utility_loss": 0.5940550565719604 }, { "cosine_similarity": 0, "epoch": 2.424044734389562, "grad_norm": 1.5091337703344596, "learning_rate": 1.0666206420434933e-05, "loss": 1.3387, "reason_loss": 0.463670015335083, "step": 2601, "utility_loss": 0.8750671148300171 }, { "cosine_similarity": 0, "epoch": 2.4249767008387697, "grad_norm": 0.9228817610231681, "learning_rate": 1.064894718674491e-05, "loss": 1.0398, "reason_loss": 0.4710448980331421, "step": 2602, "utility_loss": 0.5687506198883057 }, { "cosine_similarity": 0, "epoch": 2.4259086672879775, "grad_norm": 0.8876594180656641, "learning_rate": 1.0631687953054885e-05, "loss": 0.955, "reason_loss": 0.486020028591156, "step": 2603, "utility_loss": 0.4689342677593231 }, { "cosine_similarity": 0, "epoch": 2.4268406337371853, "grad_norm": 1.0025833375627287, "learning_rate": 1.0614428719364861e-05, "loss": 1.0317, "reason_loss": 0.45508304238319397, "step": 2604, "utility_loss": 0.576570451259613 }, { "cosine_similarity": 0, "epoch": 2.427772600186393, "grad_norm": 0.9721876357270595, "learning_rate": 1.0597169485674835e-05, "loss": 1.1573, "reason_loss": 0.4845487177371979, "step": 2605, "utility_loss": 0.6727998852729797 }, { "cosine_similarity": 0, "epoch": 2.4287045666356013, "grad_norm": 0.9620303601446158, "learning_rate": 1.0579910251984811e-05, "loss": 1.2443, "reason_loss": 0.4722100496292114, "step": 2606, "utility_loss": 0.7720906734466553 }, { "cosine_similarity": 0, "epoch": 2.429636533084809, "grad_norm": 1.0053622536011462, "learning_rate": 1.0562651018294787e-05, "loss": 1.1244, "reason_loss": 0.453689843416214, "step": 2607, "utility_loss": 0.6706706881523132 }, { "cosine_similarity": 0, "epoch": 2.430568499534017, "grad_norm": 1.0879832397152838, "learning_rate": 1.0545391784604763e-05, "loss": 1.2568, "reason_loss": 0.4515886902809143, "step": 2608, "utility_loss": 0.8052120208740234 }, { "cosine_similarity": 0, "epoch": 2.4315004659832247, "grad_norm": 0.7567347804314705, "learning_rate": 1.0528132550914741e-05, "loss": 0.9214, "reason_loss": 0.4584653377532959, "step": 2609, "utility_loss": 0.46292033791542053 }, { "cosine_similarity": 0, "epoch": 2.4324324324324325, "grad_norm": 1.085321154911148, "learning_rate": 1.0510873317224715e-05, "loss": 1.2578, "reason_loss": 0.45417138934135437, "step": 2610, "utility_loss": 0.8036310076713562 }, { "cosine_similarity": 0, "epoch": 2.4333643988816402, "grad_norm": 0.9399559202060994, "learning_rate": 1.0493614083534691e-05, "loss": 1.2786, "reason_loss": 0.4704635739326477, "step": 2611, "utility_loss": 0.8081612586975098 }, { "cosine_similarity": 0, "epoch": 2.434296365330848, "grad_norm": 0.9547923656288753, "learning_rate": 1.0476354849844667e-05, "loss": 1.0841, "reason_loss": 0.4779089093208313, "step": 2612, "utility_loss": 0.6061900854110718 }, { "cosine_similarity": 0, "epoch": 2.435228331780056, "grad_norm": 0.9132318577805162, "learning_rate": 1.0459095616154643e-05, "loss": 0.8889, "reason_loss": 0.4307705760002136, "step": 2613, "utility_loss": 0.4580954313278198 }, { "cosine_similarity": 0, "epoch": 2.4361602982292636, "grad_norm": 0.8893316262940955, "learning_rate": 1.044183638246462e-05, "loss": 0.9679, "reason_loss": 0.4498401880264282, "step": 2614, "utility_loss": 0.5180098414421082 }, { "cosine_similarity": 0, "epoch": 2.4370922646784714, "grad_norm": 0.9757408844939195, "learning_rate": 1.0424577148774595e-05, "loss": 1.1158, "reason_loss": 0.4643765985965729, "step": 2615, "utility_loss": 0.6514214277267456 }, { "cosine_similarity": 0, "epoch": 2.438024231127679, "grad_norm": 0.839136387173065, "learning_rate": 1.040731791508457e-05, "loss": 0.7941, "reason_loss": 0.4480479955673218, "step": 2616, "utility_loss": 0.3460752069950104 }, { "cosine_similarity": 0, "epoch": 2.4389561975768874, "grad_norm": 1.0919886865662527, "learning_rate": 1.0390058681394546e-05, "loss": 1.1474, "reason_loss": 0.4628598690032959, "step": 2617, "utility_loss": 0.6845783591270447 }, { "cosine_similarity": 0, "epoch": 2.439888164026095, "grad_norm": 1.0255232293503818, "learning_rate": 1.0372799447704522e-05, "loss": 1.2081, "reason_loss": 0.4438103139400482, "step": 2618, "utility_loss": 0.7642599940299988 }, { "cosine_similarity": 0, "epoch": 2.440820130475303, "grad_norm": 1.0011719645441206, "learning_rate": 1.03555402140145e-05, "loss": 1.2074, "reason_loss": 0.4712231159210205, "step": 2619, "utility_loss": 0.7362264394760132 }, { "cosine_similarity": 0, "epoch": 2.441752096924511, "grad_norm": 1.097728798565217, "learning_rate": 1.0338280980324475e-05, "loss": 1.1001, "reason_loss": 0.4692966938018799, "step": 2620, "utility_loss": 0.6307657957077026 }, { "cosine_similarity": 0, "epoch": 2.4426840633737186, "grad_norm": 0.8970982464283861, "learning_rate": 1.032102174663445e-05, "loss": 1.0617, "reason_loss": 0.4822460412979126, "step": 2621, "utility_loss": 0.5794849395751953 }, { "cosine_similarity": 0, "epoch": 2.4436160298229264, "grad_norm": 0.9452473323559797, "learning_rate": 1.0303762512944426e-05, "loss": 1.0475, "reason_loss": 0.45067769289016724, "step": 2622, "utility_loss": 0.5968654155731201 }, { "cosine_similarity": 0, "epoch": 2.444547996272134, "grad_norm": 0.9346620133810346, "learning_rate": 1.0286503279254402e-05, "loss": 1.1018, "reason_loss": 0.44461530447006226, "step": 2623, "utility_loss": 0.6571605205535889 }, { "cosine_similarity": 0, "epoch": 2.445479962721342, "grad_norm": 0.8165149109005936, "learning_rate": 1.0269244045564377e-05, "loss": 0.8299, "reason_loss": 0.45705246925354004, "step": 2624, "utility_loss": 0.3728659152984619 }, { "cosine_similarity": 0, "epoch": 2.4464119291705497, "grad_norm": 1.0145136075372625, "learning_rate": 1.0251984811874353e-05, "loss": 1.2227, "reason_loss": 0.45952826738357544, "step": 2625, "utility_loss": 0.7632083892822266 }, { "cosine_similarity": 0, "epoch": 2.4473438956197575, "grad_norm": 0.8890474994396595, "learning_rate": 1.023472557818433e-05, "loss": 1.039, "reason_loss": 0.4557707905769348, "step": 2626, "utility_loss": 0.5832473039627075 }, { "cosine_similarity": 0, "epoch": 2.4482758620689653, "grad_norm": 1.113282540644056, "learning_rate": 1.0217466344494304e-05, "loss": 1.218, "reason_loss": 0.45201951265335083, "step": 2627, "utility_loss": 0.7660030722618103 }, { "cosine_similarity": 0, "epoch": 2.4492078285181735, "grad_norm": 0.977315704769587, "learning_rate": 1.020020711080428e-05, "loss": 0.9598, "reason_loss": 0.45842039585113525, "step": 2628, "utility_loss": 0.5013626217842102 }, { "cosine_similarity": 0, "epoch": 2.4501397949673813, "grad_norm": 0.920803934164764, "learning_rate": 1.0182947877114257e-05, "loss": 0.9289, "reason_loss": 0.4751758873462677, "step": 2629, "utility_loss": 0.45376208424568176 }, { "cosine_similarity": 0, "epoch": 2.451071761416589, "grad_norm": 0.9684853075295582, "learning_rate": 1.0165688643424233e-05, "loss": 1.2366, "reason_loss": 0.45003053545951843, "step": 2630, "utility_loss": 0.7865849733352661 }, { "cosine_similarity": 0, "epoch": 2.452003727865797, "grad_norm": 0.9513934642091783, "learning_rate": 1.014842940973421e-05, "loss": 1.2812, "reason_loss": 0.46225619316101074, "step": 2631, "utility_loss": 0.8189392685890198 }, { "cosine_similarity": 0, "epoch": 2.4529356943150047, "grad_norm": 0.9231885438859214, "learning_rate": 1.0131170176044184e-05, "loss": 0.9873, "reason_loss": 0.4549928903579712, "step": 2632, "utility_loss": 0.5322768092155457 }, { "cosine_similarity": 0, "epoch": 2.4538676607642125, "grad_norm": 1.0137378099199126, "learning_rate": 1.011391094235416e-05, "loss": 1.1329, "reason_loss": 0.45501863956451416, "step": 2633, "utility_loss": 0.6778556704521179 }, { "cosine_similarity": 0, "epoch": 2.4547996272134203, "grad_norm": 0.7947908743874291, "learning_rate": 1.0096651708664136e-05, "loss": 1.0127, "reason_loss": 0.4352768659591675, "step": 2634, "utility_loss": 0.577456533908844 }, { "cosine_similarity": 0, "epoch": 2.455731593662628, "grad_norm": 1.0142445323207634, "learning_rate": 1.0079392474974112e-05, "loss": 1.0705, "reason_loss": 0.45091426372528076, "step": 2635, "utility_loss": 0.6195483803749084 }, { "cosine_similarity": 0, "epoch": 2.456663560111836, "grad_norm": 1.0026243776556303, "learning_rate": 1.0062133241284088e-05, "loss": 1.2699, "reason_loss": 0.4342154860496521, "step": 2636, "utility_loss": 0.8356947898864746 }, { "cosine_similarity": 0, "epoch": 2.4575955265610436, "grad_norm": 1.1187515069731582, "learning_rate": 1.0044874007594064e-05, "loss": 1.1757, "reason_loss": 0.4752153754234314, "step": 2637, "utility_loss": 0.7005319595336914 }, { "cosine_similarity": 0, "epoch": 2.4585274930102514, "grad_norm": 1.252474901731506, "learning_rate": 1.0027614773904038e-05, "loss": 0.9606, "reason_loss": 0.4514944553375244, "step": 2638, "utility_loss": 0.5091032981872559 }, { "cosine_similarity": 0, "epoch": 2.4594594594594597, "grad_norm": 0.8618039815600543, "learning_rate": 1.0010355540214016e-05, "loss": 1.034, "reason_loss": 0.4696454405784607, "step": 2639, "utility_loss": 0.5643531680107117 }, { "cosine_similarity": 0, "epoch": 2.4603914259086674, "grad_norm": 1.1905263189590167, "learning_rate": 9.993096306523992e-06, "loss": 1.0796, "reason_loss": 0.4469269812107086, "step": 2640, "utility_loss": 0.6326488852500916 }, { "cosine_similarity": 0, "epoch": 2.4613233923578752, "grad_norm": 0.9031657180151877, "learning_rate": 9.975837072833968e-06, "loss": 1.1606, "reason_loss": 0.4692443311214447, "step": 2641, "utility_loss": 0.6913162469863892 }, { "cosine_similarity": 0, "epoch": 2.462255358807083, "grad_norm": 0.9588234719229412, "learning_rate": 9.958577839143944e-06, "loss": 0.9494, "reason_loss": 0.4532829523086548, "step": 2642, "utility_loss": 0.4960682988166809 }, { "cosine_similarity": 0, "epoch": 2.463187325256291, "grad_norm": 0.9343562622319278, "learning_rate": 9.941318605453918e-06, "loss": 1.1656, "reason_loss": 0.456230103969574, "step": 2643, "utility_loss": 0.7093350291252136 }, { "cosine_similarity": 0, "epoch": 2.4641192917054986, "grad_norm": 0.9441822992371401, "learning_rate": 9.924059371763894e-06, "loss": 1.0577, "reason_loss": 0.45841243863105774, "step": 2644, "utility_loss": 0.5992590188980103 }, { "cosine_similarity": 0, "epoch": 2.4650512581547064, "grad_norm": 1.017172191554228, "learning_rate": 9.90680013807387e-06, "loss": 1.0328, "reason_loss": 0.4184176027774811, "step": 2645, "utility_loss": 0.6143898963928223 }, { "cosine_similarity": 0, "epoch": 2.465983224603914, "grad_norm": 1.078023318660253, "learning_rate": 9.889540904383846e-06, "loss": 1.0847, "reason_loss": 0.48077526688575745, "step": 2646, "utility_loss": 0.6039009690284729 }, { "cosine_similarity": 0, "epoch": 2.466915191053122, "grad_norm": 0.8087763111106095, "learning_rate": 9.872281670693822e-06, "loss": 0.9832, "reason_loss": 0.47287529706954956, "step": 2647, "utility_loss": 0.5103280544281006 }, { "cosine_similarity": 0, "epoch": 2.4678471575023297, "grad_norm": 1.1082729670080076, "learning_rate": 9.855022437003798e-06, "loss": 0.973, "reason_loss": 0.44848158955574036, "step": 2648, "utility_loss": 0.5245177149772644 }, { "cosine_similarity": 0, "epoch": 2.4687791239515375, "grad_norm": 0.9427884230245303, "learning_rate": 9.837763203313774e-06, "loss": 1.0229, "reason_loss": 0.45041555166244507, "step": 2649, "utility_loss": 0.5724923014640808 }, { "cosine_similarity": 0, "epoch": 2.4697110904007458, "grad_norm": 0.8240473658964454, "learning_rate": 9.82050396962375e-06, "loss": 0.9127, "reason_loss": 0.4850500822067261, "step": 2650, "utility_loss": 0.4276846647262573 }, { "cosine_similarity": 0, "epoch": 2.4706430568499536, "grad_norm": 1.1039515722784647, "learning_rate": 9.803244735933726e-06, "loss": 1.0544, "reason_loss": 0.43538349866867065, "step": 2651, "utility_loss": 0.6189894676208496 }, { "cosine_similarity": 0, "epoch": 2.4715750232991613, "grad_norm": 0.9540822163358048, "learning_rate": 9.785985502243702e-06, "loss": 1.1405, "reason_loss": 0.4755932092666626, "step": 2652, "utility_loss": 0.6648886799812317 }, { "cosine_similarity": 0, "epoch": 2.472506989748369, "grad_norm": 0.9479695819260243, "learning_rate": 9.768726268553676e-06, "loss": 1.2063, "reason_loss": 0.46114492416381836, "step": 2653, "utility_loss": 0.7451589107513428 }, { "cosine_similarity": 0, "epoch": 2.473438956197577, "grad_norm": 1.0051538517246246, "learning_rate": 9.751467034863652e-06, "loss": 1.0583, "reason_loss": 0.46624892950057983, "step": 2654, "utility_loss": 0.5920994281768799 }, { "cosine_similarity": 0, "epoch": 2.4743709226467847, "grad_norm": 0.9251568235837253, "learning_rate": 9.734207801173628e-06, "loss": 1.3208, "reason_loss": 0.479443222284317, "step": 2655, "utility_loss": 0.8414046168327332 }, { "cosine_similarity": 0, "epoch": 2.4753028890959925, "grad_norm": 1.0602354181014366, "learning_rate": 9.716948567483604e-06, "loss": 1.596, "reason_loss": 0.47163066267967224, "step": 2656, "utility_loss": 1.1243822574615479 }, { "cosine_similarity": 0, "epoch": 2.4762348555452003, "grad_norm": 0.895552653157434, "learning_rate": 9.69968933379358e-06, "loss": 0.7919, "reason_loss": 0.42603600025177, "step": 2657, "utility_loss": 0.3658595085144043 }, { "cosine_similarity": 0, "epoch": 2.477166821994408, "grad_norm": 1.1172683452253966, "learning_rate": 9.682430100103556e-06, "loss": 1.1876, "reason_loss": 0.44406285881996155, "step": 2658, "utility_loss": 0.7435202598571777 }, { "cosine_similarity": 0, "epoch": 2.478098788443616, "grad_norm": 0.9641120377161777, "learning_rate": 9.665170866413532e-06, "loss": 1.0499, "reason_loss": 0.4700848460197449, "step": 2659, "utility_loss": 0.5798065662384033 }, { "cosine_similarity": 0, "epoch": 2.4790307548928237, "grad_norm": 0.9812673176141972, "learning_rate": 9.647911632723508e-06, "loss": 1.2539, "reason_loss": 0.4646672308444977, "step": 2660, "utility_loss": 0.7891916632652283 }, { "cosine_similarity": 0, "epoch": 2.479962721342032, "grad_norm": 1.0309788833389173, "learning_rate": 9.630652399033484e-06, "loss": 1.0999, "reason_loss": 0.4534754455089569, "step": 2661, "utility_loss": 0.646385669708252 }, { "cosine_similarity": 0, "epoch": 2.4808946877912397, "grad_norm": 1.4224165069281955, "learning_rate": 9.61339316534346e-06, "loss": 1.143, "reason_loss": 0.48675668239593506, "step": 2662, "utility_loss": 0.6562931537628174 }, { "cosine_similarity": 0, "epoch": 2.4818266542404475, "grad_norm": 1.121042123338744, "learning_rate": 9.596133931653436e-06, "loss": 0.9602, "reason_loss": 0.48443472385406494, "step": 2663, "utility_loss": 0.4757874608039856 }, { "cosine_similarity": 0, "epoch": 2.4827586206896552, "grad_norm": 0.9221525048452519, "learning_rate": 9.57887469796341e-06, "loss": 1.0939, "reason_loss": 0.43937602639198303, "step": 2664, "utility_loss": 0.6545542478561401 }, { "cosine_similarity": 0, "epoch": 2.483690587138863, "grad_norm": 0.8617474528820196, "learning_rate": 9.561615464273386e-06, "loss": 0.8554, "reason_loss": 0.45356813073158264, "step": 2665, "utility_loss": 0.4017922878265381 }, { "cosine_similarity": 0, "epoch": 2.484622553588071, "grad_norm": 1.154994079204626, "learning_rate": 9.544356230583362e-06, "loss": 1.085, "reason_loss": 0.4665767550468445, "step": 2666, "utility_loss": 0.6183938384056091 }, { "cosine_similarity": 0, "epoch": 2.4855545200372786, "grad_norm": 1.0385730811840064, "learning_rate": 9.527096996893338e-06, "loss": 1.0557, "reason_loss": 0.48541104793548584, "step": 2667, "utility_loss": 0.5702613592147827 }, { "cosine_similarity": 0, "epoch": 2.4864864864864864, "grad_norm": 0.8789344096439653, "learning_rate": 9.509837763203314e-06, "loss": 1.0063, "reason_loss": 0.43832212686538696, "step": 2668, "utility_loss": 0.5680150985717773 }, { "cosine_similarity": 0, "epoch": 2.487418452935694, "grad_norm": 0.9832973557776328, "learning_rate": 9.49257852951329e-06, "loss": 1.168, "reason_loss": 0.43196314573287964, "step": 2669, "utility_loss": 0.7360637187957764 }, { "cosine_similarity": 0, "epoch": 2.488350419384902, "grad_norm": 0.848144871907553, "learning_rate": 9.475319295823266e-06, "loss": 1.0336, "reason_loss": 0.4736807346343994, "step": 2670, "utility_loss": 0.5598718523979187 }, { "cosine_similarity": 0, "epoch": 2.4892823858341098, "grad_norm": 0.9473417407491245, "learning_rate": 9.458060062133242e-06, "loss": 1.1928, "reason_loss": 0.47689127922058105, "step": 2671, "utility_loss": 0.7159360647201538 }, { "cosine_similarity": 0, "epoch": 2.490214352283318, "grad_norm": 0.8797435906811882, "learning_rate": 9.440800828443218e-06, "loss": 0.8951, "reason_loss": 0.4531816244125366, "step": 2672, "utility_loss": 0.44190406799316406 }, { "cosine_similarity": 0, "epoch": 2.491146318732526, "grad_norm": 1.1095243196339422, "learning_rate": 9.423541594753194e-06, "loss": 1.2402, "reason_loss": 0.508790135383606, "step": 2673, "utility_loss": 0.7313944101333618 }, { "cosine_similarity": 0, "epoch": 2.4920782851817336, "grad_norm": 0.8675339561902868, "learning_rate": 9.40628236106317e-06, "loss": 1.0796, "reason_loss": 0.4514641761779785, "step": 2674, "utility_loss": 0.628132700920105 }, { "cosine_similarity": 0, "epoch": 2.4930102516309414, "grad_norm": 1.0876956466720897, "learning_rate": 9.389023127373144e-06, "loss": 1.2021, "reason_loss": 0.4760898947715759, "step": 2675, "utility_loss": 0.7259891033172607 }, { "cosine_similarity": 0, "epoch": 2.493942218080149, "grad_norm": 1.0256710395270638, "learning_rate": 9.37176389368312e-06, "loss": 1.1064, "reason_loss": 0.45910245180130005, "step": 2676, "utility_loss": 0.6472572088241577 }, { "cosine_similarity": 0, "epoch": 2.494874184529357, "grad_norm": 0.9430334298035977, "learning_rate": 9.354504659993096e-06, "loss": 1.1316, "reason_loss": 0.4632326364517212, "step": 2677, "utility_loss": 0.6683191061019897 }, { "cosine_similarity": 0, "epoch": 2.4958061509785647, "grad_norm": 0.8023167973604337, "learning_rate": 9.337245426303072e-06, "loss": 0.8152, "reason_loss": 0.4619441330432892, "step": 2678, "utility_loss": 0.35321110486984253 }, { "cosine_similarity": 0, "epoch": 2.4967381174277725, "grad_norm": 0.8886746064702925, "learning_rate": 9.31998619261305e-06, "loss": 1.2184, "reason_loss": 0.4812829792499542, "step": 2679, "utility_loss": 0.737095832824707 }, { "cosine_similarity": 0, "epoch": 2.4976700838769803, "grad_norm": 1.0021997269021634, "learning_rate": 9.302726958923024e-06, "loss": 1.0449, "reason_loss": 0.48556578159332275, "step": 2680, "utility_loss": 0.5593494772911072 }, { "cosine_similarity": 0, "epoch": 2.498602050326188, "grad_norm": 0.9979840474462317, "learning_rate": 9.285467725233e-06, "loss": 1.0736, "reason_loss": 0.47451746463775635, "step": 2681, "utility_loss": 0.5990355610847473 }, { "cosine_similarity": 0, "epoch": 2.499534016775396, "grad_norm": 0.8919837709538113, "learning_rate": 9.268208491542976e-06, "loss": 1.1416, "reason_loss": 0.4514344334602356, "step": 2682, "utility_loss": 0.6901595592498779 }, { "cosine_similarity": 0, "epoch": 2.500465983224604, "grad_norm": 1.0031111381448214, "learning_rate": 9.250949257852952e-06, "loss": 1.267, "reason_loss": 0.44644489884376526, "step": 2683, "utility_loss": 0.8205655813217163 }, { "cosine_similarity": 0, "epoch": 2.501397949673812, "grad_norm": 1.0527838438672097, "learning_rate": 9.233690024162928e-06, "loss": 1.1591, "reason_loss": 0.4503680169582367, "step": 2684, "utility_loss": 0.7086968421936035 }, { "cosine_similarity": 0, "epoch": 2.5023299161230197, "grad_norm": 0.8810968814921172, "learning_rate": 9.216430790472904e-06, "loss": 1.2697, "reason_loss": 0.4810248017311096, "step": 2685, "utility_loss": 0.7886566519737244 }, { "cosine_similarity": 0, "epoch": 2.5032618825722275, "grad_norm": 0.8167188741260913, "learning_rate": 9.199171556782878e-06, "loss": 1.0634, "reason_loss": 0.48340797424316406, "step": 2686, "utility_loss": 0.5800193548202515 }, { "cosine_similarity": 0, "epoch": 2.5041938490214353, "grad_norm": 1.055954154434501, "learning_rate": 9.181912323092854e-06, "loss": 1.2273, "reason_loss": 0.46123549342155457, "step": 2687, "utility_loss": 0.7660559415817261 }, { "cosine_similarity": 0, "epoch": 2.505125815470643, "grad_norm": 0.9016786514383761, "learning_rate": 9.16465308940283e-06, "loss": 1.0537, "reason_loss": 0.4591326415538788, "step": 2688, "utility_loss": 0.5945680737495422 }, { "cosine_similarity": 0, "epoch": 2.506057781919851, "grad_norm": 0.8649598062516013, "learning_rate": 9.147393855712806e-06, "loss": 0.9055, "reason_loss": 0.4803489148616791, "step": 2689, "utility_loss": 0.4251081049442291 }, { "cosine_similarity": 0, "epoch": 2.5069897483690586, "grad_norm": 1.0207300320342996, "learning_rate": 9.130134622022784e-06, "loss": 1.1764, "reason_loss": 0.47605201601982117, "step": 2690, "utility_loss": 0.7003492116928101 }, { "cosine_similarity": 0, "epoch": 2.5079217148182664, "grad_norm": 1.0945098047293944, "learning_rate": 9.112875388332758e-06, "loss": 1.2163, "reason_loss": 0.4707348942756653, "step": 2691, "utility_loss": 0.7455917596817017 }, { "cosine_similarity": 0, "epoch": 2.508853681267474, "grad_norm": 1.1589184505738424, "learning_rate": 9.095616154642734e-06, "loss": 1.1006, "reason_loss": 0.4493659734725952, "step": 2692, "utility_loss": 0.6512523889541626 }, { "cosine_similarity": 0, "epoch": 2.509785647716682, "grad_norm": 1.0112322182584048, "learning_rate": 9.07835692095271e-06, "loss": 1.1483, "reason_loss": 0.44825315475463867, "step": 2693, "utility_loss": 0.7000229358673096 }, { "cosine_similarity": 0, "epoch": 2.5107176141658902, "grad_norm": 0.9393421220779491, "learning_rate": 9.061097687262686e-06, "loss": 1.0372, "reason_loss": 0.4709029197692871, "step": 2694, "utility_loss": 0.5662623047828674 }, { "cosine_similarity": 0, "epoch": 2.511649580615098, "grad_norm": 1.0365938870941505, "learning_rate": 9.043838453572662e-06, "loss": 1.2915, "reason_loss": 0.45778584480285645, "step": 2695, "utility_loss": 0.8337205648422241 }, { "cosine_similarity": 0, "epoch": 2.512581547064306, "grad_norm": 0.9805159898358677, "learning_rate": 9.026579219882638e-06, "loss": 1.1921, "reason_loss": 0.46877339482307434, "step": 2696, "utility_loss": 0.7233238220214844 }, { "cosine_similarity": 0, "epoch": 2.5135135135135136, "grad_norm": 1.0248400924266576, "learning_rate": 9.009319986192612e-06, "loss": 0.8394, "reason_loss": 0.4571169912815094, "step": 2697, "utility_loss": 0.38227033615112305 }, { "cosine_similarity": 0, "epoch": 2.5144454799627214, "grad_norm": 1.8803751028654452, "learning_rate": 8.992060752502588e-06, "loss": 1.4019, "reason_loss": 0.4746004045009613, "step": 2698, "utility_loss": 0.9272792935371399 }, { "cosine_similarity": 0, "epoch": 2.515377446411929, "grad_norm": 0.9155878269254173, "learning_rate": 8.974801518812564e-06, "loss": 0.9009, "reason_loss": 0.44658219814300537, "step": 2699, "utility_loss": 0.4542870819568634 }, { "cosine_similarity": 0, "epoch": 2.516309412861137, "grad_norm": 1.2601741352414206, "learning_rate": 8.957542285122542e-06, "loss": 1.0676, "reason_loss": 0.45129501819610596, "step": 2700, "utility_loss": 0.6163492202758789 }, { "cosine_similarity": 0, "epoch": 2.5172413793103448, "grad_norm": 1.0348360067402729, "learning_rate": 8.940283051432516e-06, "loss": 1.1346, "reason_loss": 0.4342440962791443, "step": 2701, "utility_loss": 0.7003127336502075 }, { "cosine_similarity": 0, "epoch": 2.5181733457595525, "grad_norm": 0.8536959125873523, "learning_rate": 8.923023817742492e-06, "loss": 1.0546, "reason_loss": 0.49278295040130615, "step": 2702, "utility_loss": 0.5618550181388855 }, { "cosine_similarity": 0, "epoch": 2.5191053122087603, "grad_norm": 0.9585454951617621, "learning_rate": 8.905764584052468e-06, "loss": 1.1515, "reason_loss": 0.43213963508605957, "step": 2703, "utility_loss": 0.719369649887085 }, { "cosine_similarity": 0, "epoch": 2.520037278657968, "grad_norm": 0.9372992000833499, "learning_rate": 8.888505350362444e-06, "loss": 0.9525, "reason_loss": 0.44402244687080383, "step": 2704, "utility_loss": 0.508520245552063 }, { "cosine_similarity": 0, "epoch": 2.5209692451071763, "grad_norm": 1.0528722966158557, "learning_rate": 8.87124611667242e-06, "loss": 1.0048, "reason_loss": 0.5116764307022095, "step": 2705, "utility_loss": 0.4931221604347229 }, { "cosine_similarity": 0, "epoch": 2.5219012115563837, "grad_norm": 1.0377246924595003, "learning_rate": 8.853986882982396e-06, "loss": 1.3209, "reason_loss": 0.4549846649169922, "step": 2706, "utility_loss": 0.8659135103225708 }, { "cosine_similarity": 0, "epoch": 2.522833178005592, "grad_norm": 1.1821053298271111, "learning_rate": 8.83672764929237e-06, "loss": 1.0536, "reason_loss": 0.46913984417915344, "step": 2707, "utility_loss": 0.5844534039497375 }, { "cosine_similarity": 0, "epoch": 2.5237651444547997, "grad_norm": 0.9941645192402312, "learning_rate": 8.819468415602347e-06, "loss": 1.108, "reason_loss": 0.4691222310066223, "step": 2708, "utility_loss": 0.6388643980026245 }, { "cosine_similarity": 0, "epoch": 2.5246971109040075, "grad_norm": 1.0413801851472748, "learning_rate": 8.802209181912323e-06, "loss": 1.1498, "reason_loss": 0.4545728862285614, "step": 2709, "utility_loss": 0.6951857805252075 }, { "cosine_similarity": 0, "epoch": 2.5256290773532153, "grad_norm": 1.2060954570016453, "learning_rate": 8.7849499482223e-06, "loss": 1.1905, "reason_loss": 0.4576328992843628, "step": 2710, "utility_loss": 0.7328402996063232 }, { "cosine_similarity": 0, "epoch": 2.526561043802423, "grad_norm": 1.393119524643138, "learning_rate": 8.767690714532276e-06, "loss": 1.3074, "reason_loss": 0.46988070011138916, "step": 2711, "utility_loss": 0.8375309705734253 }, { "cosine_similarity": 0, "epoch": 2.527493010251631, "grad_norm": 0.9145733715423972, "learning_rate": 8.75043148084225e-06, "loss": 0.9684, "reason_loss": 0.45058634877204895, "step": 2712, "utility_loss": 0.5178506374359131 }, { "cosine_similarity": 0, "epoch": 2.5284249767008387, "grad_norm": 0.9773842605739272, "learning_rate": 8.733172247152227e-06, "loss": 1.2737, "reason_loss": 0.4593919515609741, "step": 2713, "utility_loss": 0.8142821192741394 }, { "cosine_similarity": 0, "epoch": 2.5293569431500464, "grad_norm": 1.0532329533760167, "learning_rate": 8.715913013462203e-06, "loss": 1.2609, "reason_loss": 0.4502720236778259, "step": 2714, "utility_loss": 0.8106272220611572 }, { "cosine_similarity": 0, "epoch": 2.5302889095992542, "grad_norm": 1.0437711072086597, "learning_rate": 8.698653779772179e-06, "loss": 1.1392, "reason_loss": 0.48767948150634766, "step": 2715, "utility_loss": 0.6515322923660278 }, { "cosine_similarity": 0, "epoch": 2.5312208760484625, "grad_norm": 0.9303947005922718, "learning_rate": 8.681394546082154e-06, "loss": 1.1991, "reason_loss": 0.4367171823978424, "step": 2716, "utility_loss": 0.7623647451400757 }, { "cosine_similarity": 0, "epoch": 2.53215284249767, "grad_norm": 1.031196147058124, "learning_rate": 8.66413531239213e-06, "loss": 1.0693, "reason_loss": 0.49334561824798584, "step": 2717, "utility_loss": 0.5759368538856506 }, { "cosine_similarity": 0, "epoch": 2.533084808946878, "grad_norm": 1.0359519785230062, "learning_rate": 8.646876078702105e-06, "loss": 1.0765, "reason_loss": 0.4858575463294983, "step": 2718, "utility_loss": 0.5906772613525391 }, { "cosine_similarity": 0, "epoch": 2.534016775396086, "grad_norm": 0.9287659742736708, "learning_rate": 8.62961684501208e-06, "loss": 0.9954, "reason_loss": 0.46808314323425293, "step": 2719, "utility_loss": 0.5273573398590088 }, { "cosine_similarity": 0, "epoch": 2.5349487418452936, "grad_norm": 0.878391122190231, "learning_rate": 8.612357611322058e-06, "loss": 1.097, "reason_loss": 0.45460376143455505, "step": 2720, "utility_loss": 0.6424010992050171 }, { "cosine_similarity": 0, "epoch": 2.5358807082945014, "grad_norm": 1.271972862628274, "learning_rate": 8.595098377632034e-06, "loss": 1.1242, "reason_loss": 0.4390968382358551, "step": 2721, "utility_loss": 0.6851121187210083 }, { "cosine_similarity": 0, "epoch": 2.536812674743709, "grad_norm": 0.9519486661736581, "learning_rate": 8.57783914394201e-06, "loss": 0.9162, "reason_loss": 0.4728826582431793, "step": 2722, "utility_loss": 0.443268746137619 }, { "cosine_similarity": 0, "epoch": 2.537744641192917, "grad_norm": 0.9673125628167762, "learning_rate": 8.560579910251985e-06, "loss": 1.1775, "reason_loss": 0.43866071105003357, "step": 2723, "utility_loss": 0.7388750910758972 }, { "cosine_similarity": 0, "epoch": 2.5386766076421248, "grad_norm": 1.0133871261121845, "learning_rate": 8.54332067656196e-06, "loss": 1.0402, "reason_loss": 0.43763527274131775, "step": 2724, "utility_loss": 0.602575421333313 }, { "cosine_similarity": 0, "epoch": 2.5396085740913326, "grad_norm": 0.897600822921227, "learning_rate": 8.526061442871937e-06, "loss": 0.9531, "reason_loss": 0.45210000872612, "step": 2725, "utility_loss": 0.5010055303573608 }, { "cosine_similarity": 0, "epoch": 2.5405405405405403, "grad_norm": 1.1131496323222414, "learning_rate": 8.508802209181913e-06, "loss": 1.2206, "reason_loss": 0.45486149191856384, "step": 2726, "utility_loss": 0.7657746076583862 }, { "cosine_similarity": 0, "epoch": 2.5414725069897486, "grad_norm": 0.8582082377874518, "learning_rate": 8.491542975491889e-06, "loss": 1.1359, "reason_loss": 0.455068975687027, "step": 2727, "utility_loss": 0.6808228492736816 }, { "cosine_similarity": 0, "epoch": 2.542404473438956, "grad_norm": 1.0353941739981793, "learning_rate": 8.474283741801865e-06, "loss": 1.4194, "reason_loss": 0.44399240612983704, "step": 2728, "utility_loss": 0.975383996963501 }, { "cosine_similarity": 0, "epoch": 2.543336439888164, "grad_norm": 1.1097801547501507, "learning_rate": 8.457024508111839e-06, "loss": 1.0902, "reason_loss": 0.45909664034843445, "step": 2729, "utility_loss": 0.6311519742012024 }, { "cosine_similarity": 0, "epoch": 2.544268406337372, "grad_norm": 0.9447368905604145, "learning_rate": 8.439765274421817e-06, "loss": 1.1345, "reason_loss": 0.43917083740234375, "step": 2730, "utility_loss": 0.6953755617141724 }, { "cosine_similarity": 0, "epoch": 2.5452003727865797, "grad_norm": 1.136148988932836, "learning_rate": 8.422506040731793e-06, "loss": 1.3165, "reason_loss": 0.4759480953216553, "step": 2731, "utility_loss": 0.8405410647392273 }, { "cosine_similarity": 0, "epoch": 2.5461323392357875, "grad_norm": 0.9595688179172938, "learning_rate": 8.405246807041769e-06, "loss": 1.097, "reason_loss": 0.45056086778640747, "step": 2732, "utility_loss": 0.6464585065841675 }, { "cosine_similarity": 0, "epoch": 2.5470643056849953, "grad_norm": 0.8618046394362258, "learning_rate": 8.387987573351745e-06, "loss": 0.987, "reason_loss": 0.4736277759075165, "step": 2733, "utility_loss": 0.5133903622627258 }, { "cosine_similarity": 0, "epoch": 2.547996272134203, "grad_norm": 1.066910563730921, "learning_rate": 8.370728339661719e-06, "loss": 1.0792, "reason_loss": 0.46964943408966064, "step": 2734, "utility_loss": 0.6095835566520691 }, { "cosine_similarity": 0, "epoch": 2.548928238583411, "grad_norm": 0.8955583730910457, "learning_rate": 8.353469105971695e-06, "loss": 1.0854, "reason_loss": 0.4524003863334656, "step": 2735, "utility_loss": 0.6329569220542908 }, { "cosine_similarity": 0, "epoch": 2.5498602050326187, "grad_norm": 0.939786764629727, "learning_rate": 8.33620987228167e-06, "loss": 0.9197, "reason_loss": 0.43533754348754883, "step": 2736, "utility_loss": 0.4843752086162567 }, { "cosine_similarity": 0, "epoch": 2.5507921714818265, "grad_norm": 1.1145108029538857, "learning_rate": 8.318950638591647e-06, "loss": 1.3509, "reason_loss": 0.46047258377075195, "step": 2737, "utility_loss": 0.8903917074203491 }, { "cosine_similarity": 0, "epoch": 2.5517241379310347, "grad_norm": 0.810315567344364, "learning_rate": 8.301691404901623e-06, "loss": 0.9581, "reason_loss": 0.4440782368183136, "step": 2738, "utility_loss": 0.5140153169631958 }, { "cosine_similarity": 0, "epoch": 2.552656104380242, "grad_norm": 0.815686149013817, "learning_rate": 8.284432171211599e-06, "loss": 1.0192, "reason_loss": 0.4585343897342682, "step": 2739, "utility_loss": 0.5606824159622192 }, { "cosine_similarity": 0, "epoch": 2.5535880708294503, "grad_norm": 1.10919864138184, "learning_rate": 8.267172937521575e-06, "loss": 1.0953, "reason_loss": 0.4644946753978729, "step": 2740, "utility_loss": 0.630797266960144 }, { "cosine_similarity": 0, "epoch": 2.554520037278658, "grad_norm": 0.9096939802103313, "learning_rate": 8.24991370383155e-06, "loss": 1.1216, "reason_loss": 0.45133858919143677, "step": 2741, "utility_loss": 0.6702992916107178 }, { "cosine_similarity": 0, "epoch": 2.555452003727866, "grad_norm": 0.9080304556328184, "learning_rate": 8.232654470141527e-06, "loss": 0.9488, "reason_loss": 0.4245792031288147, "step": 2742, "utility_loss": 0.5241729021072388 }, { "cosine_similarity": 0, "epoch": 2.5563839701770736, "grad_norm": 0.9233811492701868, "learning_rate": 8.215395236451503e-06, "loss": 1.0388, "reason_loss": 0.48713716864585876, "step": 2743, "utility_loss": 0.5516195893287659 }, { "cosine_similarity": 0, "epoch": 2.5573159366262814, "grad_norm": 1.0406815191149552, "learning_rate": 8.198136002761479e-06, "loss": 1.2281, "reason_loss": 0.4629892110824585, "step": 2744, "utility_loss": 0.7651463150978088 }, { "cosine_similarity": 0, "epoch": 2.558247903075489, "grad_norm": 0.8906617524248459, "learning_rate": 8.180876769071453e-06, "loss": 0.8757, "reason_loss": 0.46132034063339233, "step": 2745, "utility_loss": 0.41441431641578674 }, { "cosine_similarity": 0, "epoch": 2.559179869524697, "grad_norm": 1.0953336007500514, "learning_rate": 8.163617535381429e-06, "loss": 1.3072, "reason_loss": 0.4535127878189087, "step": 2746, "utility_loss": 0.8536373376846313 }, { "cosine_similarity": 0, "epoch": 2.560111835973905, "grad_norm": 1.0858692701906634, "learning_rate": 8.146358301691405e-06, "loss": 1.2966, "reason_loss": 0.4485694169998169, "step": 2747, "utility_loss": 0.847997784614563 }, { "cosine_similarity": 0, "epoch": 2.5610438024231126, "grad_norm": 0.8227193095953378, "learning_rate": 8.129099068001381e-06, "loss": 1.0452, "reason_loss": 0.4785904884338379, "step": 2748, "utility_loss": 0.5666294097900391 }, { "cosine_similarity": 0, "epoch": 2.561975768872321, "grad_norm": 0.9897907605786124, "learning_rate": 8.111839834311357e-06, "loss": 1.1015, "reason_loss": 0.4648590683937073, "step": 2749, "utility_loss": 0.6366580724716187 }, { "cosine_similarity": 0, "epoch": 2.562907735321528, "grad_norm": 0.9346956883315092, "learning_rate": 8.094580600621333e-06, "loss": 1.0955, "reason_loss": 0.4618613123893738, "step": 2750, "utility_loss": 0.633687436580658 }, { "cosine_similarity": 0, "epoch": 2.5638397017707364, "grad_norm": 0.801718121718121, "learning_rate": 8.077321366931309e-06, "loss": 0.9667, "reason_loss": 0.44349682331085205, "step": 2751, "utility_loss": 0.5231821537017822 }, { "cosine_similarity": 0, "epoch": 2.564771668219944, "grad_norm": 0.9943779921939533, "learning_rate": 8.060062133241285e-06, "loss": 0.9328, "reason_loss": 0.4625767469406128, "step": 2752, "utility_loss": 0.4702260494232178 }, { "cosine_similarity": 0, "epoch": 2.565703634669152, "grad_norm": 0.8906575568852543, "learning_rate": 8.04280289955126e-06, "loss": 1.1142, "reason_loss": 0.4764159619808197, "step": 2753, "utility_loss": 0.6377586126327515 }, { "cosine_similarity": 0, "epoch": 2.5666356011183598, "grad_norm": 0.8792097963465747, "learning_rate": 8.025543665861237e-06, "loss": 1.0183, "reason_loss": 0.4730386734008789, "step": 2754, "utility_loss": 0.5452988743782043 }, { "cosine_similarity": 0, "epoch": 2.5675675675675675, "grad_norm": 0.9052859800954737, "learning_rate": 8.008284432171211e-06, "loss": 1.0202, "reason_loss": 0.45045343041419983, "step": 2755, "utility_loss": 0.5697829723358154 }, { "cosine_similarity": 0, "epoch": 2.5684995340167753, "grad_norm": 0.9625841623368512, "learning_rate": 7.991025198481187e-06, "loss": 1.1872, "reason_loss": 0.4619622528553009, "step": 2756, "utility_loss": 0.7251904010772705 }, { "cosine_similarity": 0, "epoch": 2.569431500465983, "grad_norm": 0.9026423595704326, "learning_rate": 7.973765964791163e-06, "loss": 0.9245, "reason_loss": 0.4524678885936737, "step": 2757, "utility_loss": 0.47208136320114136 }, { "cosine_similarity": 0, "epoch": 2.570363466915191, "grad_norm": 1.0580293846280842, "learning_rate": 7.956506731101139e-06, "loss": 1.0146, "reason_loss": 0.44693371653556824, "step": 2758, "utility_loss": 0.5676297545433044 }, { "cosine_similarity": 0, "epoch": 2.5712954333643987, "grad_norm": 0.8321076760896076, "learning_rate": 7.939247497411115e-06, "loss": 0.9314, "reason_loss": 0.46219876408576965, "step": 2759, "utility_loss": 0.4692004919052124 }, { "cosine_similarity": 0, "epoch": 2.572227399813607, "grad_norm": 0.9469494278605607, "learning_rate": 7.921988263721091e-06, "loss": 0.9204, "reason_loss": 0.5009799003601074, "step": 2760, "utility_loss": 0.41941192746162415 }, { "cosine_similarity": 0, "epoch": 2.5731593662628143, "grad_norm": 0.9864051351473502, "learning_rate": 7.904729030031067e-06, "loss": 1.0127, "reason_loss": 0.4737169146537781, "step": 2761, "utility_loss": 0.5389364361763 }, { "cosine_similarity": 0, "epoch": 2.5740913327120225, "grad_norm": 0.9971545441698046, "learning_rate": 7.887469796341043e-06, "loss": 1.209, "reason_loss": 0.4546853005886078, "step": 2762, "utility_loss": 0.7542743682861328 }, { "cosine_similarity": 0, "epoch": 2.5750232991612303, "grad_norm": 0.8772328506982839, "learning_rate": 7.870210562651019e-06, "loss": 0.973, "reason_loss": 0.47865429520606995, "step": 2763, "utility_loss": 0.4943569302558899 }, { "cosine_similarity": 0, "epoch": 2.575955265610438, "grad_norm": 0.8918043125457221, "learning_rate": 7.852951328960995e-06, "loss": 1.1302, "reason_loss": 0.4596347212791443, "step": 2764, "utility_loss": 0.6705783009529114 }, { "cosine_similarity": 0, "epoch": 2.576887232059646, "grad_norm": 1.3522999117474066, "learning_rate": 7.835692095270971e-06, "loss": 1.3883, "reason_loss": 0.46338608860969543, "step": 2765, "utility_loss": 0.9249426126480103 }, { "cosine_similarity": 0, "epoch": 2.5778191985088537, "grad_norm": 0.886541298764037, "learning_rate": 7.818432861580945e-06, "loss": 0.8285, "reason_loss": 0.4712423086166382, "step": 2766, "utility_loss": 0.35723963379859924 }, { "cosine_similarity": 0, "epoch": 2.5787511649580614, "grad_norm": 0.8015609336607623, "learning_rate": 7.801173627890921e-06, "loss": 0.7998, "reason_loss": 0.4295787513256073, "step": 2767, "utility_loss": 0.37019234895706177 }, { "cosine_similarity": 0, "epoch": 2.5796831314072692, "grad_norm": 1.062531473503878, "learning_rate": 7.783914394200897e-06, "loss": 1.1256, "reason_loss": 0.45349282026290894, "step": 2768, "utility_loss": 0.6721398830413818 }, { "cosine_similarity": 0, "epoch": 2.580615097856477, "grad_norm": 0.9262016425758296, "learning_rate": 7.766655160510873e-06, "loss": 1.0798, "reason_loss": 0.4851830005645752, "step": 2769, "utility_loss": 0.5946221947669983 }, { "cosine_similarity": 0, "epoch": 2.581547064305685, "grad_norm": 0.970807192474719, "learning_rate": 7.749395926820851e-06, "loss": 1.2186, "reason_loss": 0.4076347351074219, "step": 2770, "utility_loss": 0.811008870601654 }, { "cosine_similarity": 0, "epoch": 2.582479030754893, "grad_norm": 0.9489728495204768, "learning_rate": 7.732136693130825e-06, "loss": 1.0323, "reason_loss": 0.47371989488601685, "step": 2771, "utility_loss": 0.5586151480674744 }, { "cosine_similarity": 0, "epoch": 2.5834109972041004, "grad_norm": 0.8987308585999481, "learning_rate": 7.714877459440801e-06, "loss": 1.1245, "reason_loss": 0.436911940574646, "step": 2772, "utility_loss": 0.687602162361145 }, { "cosine_similarity": 0, "epoch": 2.5843429636533086, "grad_norm": 0.7573172560967906, "learning_rate": 7.697618225750777e-06, "loss": 1.0319, "reason_loss": 0.46594494581222534, "step": 2773, "utility_loss": 0.5659964084625244 }, { "cosine_similarity": 0, "epoch": 2.5852749301025164, "grad_norm": 0.8444223791443433, "learning_rate": 7.680358992060753e-06, "loss": 0.8775, "reason_loss": 0.444294273853302, "step": 2774, "utility_loss": 0.43320661783218384 }, { "cosine_similarity": 0, "epoch": 2.586206896551724, "grad_norm": 0.8729202260542, "learning_rate": 7.663099758370729e-06, "loss": 0.9288, "reason_loss": 0.4589616656303406, "step": 2775, "utility_loss": 0.4698830842971802 }, { "cosine_similarity": 0, "epoch": 2.587138863000932, "grad_norm": 0.8178926923917, "learning_rate": 7.645840524680705e-06, "loss": 1.036, "reason_loss": 0.44460630416870117, "step": 2776, "utility_loss": 0.5913769602775574 }, { "cosine_similarity": 0, "epoch": 2.5880708294501398, "grad_norm": 1.0245336975024864, "learning_rate": 7.62858129099068e-06, "loss": 1.1543, "reason_loss": 0.45582154393196106, "step": 2777, "utility_loss": 0.6984351873397827 }, { "cosine_similarity": 0, "epoch": 2.5890027958993476, "grad_norm": 1.0055219434771665, "learning_rate": 7.611322057300656e-06, "loss": 1.1004, "reason_loss": 0.463581383228302, "step": 2778, "utility_loss": 0.6368587017059326 }, { "cosine_similarity": 0, "epoch": 2.5899347623485554, "grad_norm": 0.9024529132671343, "learning_rate": 7.594062823610631e-06, "loss": 1.0692, "reason_loss": 0.4790799915790558, "step": 2779, "utility_loss": 0.5901512503623962 }, { "cosine_similarity": 0, "epoch": 2.590866728797763, "grad_norm": 0.8138829324497683, "learning_rate": 7.576803589920608e-06, "loss": 1.0773, "reason_loss": 0.46579229831695557, "step": 2780, "utility_loss": 0.6115040183067322 }, { "cosine_similarity": 0, "epoch": 2.591798695246971, "grad_norm": 1.0947480471325322, "learning_rate": 7.559544356230584e-06, "loss": 1.1778, "reason_loss": 0.46242135763168335, "step": 2781, "utility_loss": 0.7153534889221191 }, { "cosine_similarity": 0, "epoch": 2.592730661696179, "grad_norm": 1.0178311016312505, "learning_rate": 7.54228512254056e-06, "loss": 1.0097, "reason_loss": 0.4746018946170807, "step": 2782, "utility_loss": 0.5350667238235474 }, { "cosine_similarity": 0, "epoch": 2.5936626281453865, "grad_norm": 0.9444457309081434, "learning_rate": 7.525025888850535e-06, "loss": 1.236, "reason_loss": 0.4717745780944824, "step": 2783, "utility_loss": 0.7642703056335449 }, { "cosine_similarity": 0, "epoch": 2.5945945945945947, "grad_norm": 1.0250182616888348, "learning_rate": 7.507766655160511e-06, "loss": 1.1998, "reason_loss": 0.43537765741348267, "step": 2784, "utility_loss": 0.7643847465515137 }, { "cosine_similarity": 0, "epoch": 2.5955265610438025, "grad_norm": 0.9319785884892405, "learning_rate": 7.490507421470487e-06, "loss": 1.0135, "reason_loss": 0.450251042842865, "step": 2785, "utility_loss": 0.5632970929145813 }, { "cosine_similarity": 0, "epoch": 2.5964585274930103, "grad_norm": 1.0809773719548643, "learning_rate": 7.473248187780462e-06, "loss": 1.2273, "reason_loss": 0.49568480253219604, "step": 2786, "utility_loss": 0.7315924167633057 }, { "cosine_similarity": 0, "epoch": 2.597390493942218, "grad_norm": 0.8660086956714582, "learning_rate": 7.455988954090438e-06, "loss": 1.1989, "reason_loss": 0.46650809049606323, "step": 2787, "utility_loss": 0.7323635220527649 }, { "cosine_similarity": 0, "epoch": 2.598322460391426, "grad_norm": 0.8744951318647266, "learning_rate": 7.438729720400414e-06, "loss": 1.1385, "reason_loss": 0.49058181047439575, "step": 2788, "utility_loss": 0.6478804349899292 }, { "cosine_similarity": 0, "epoch": 2.5992544268406337, "grad_norm": 1.1290918335703246, "learning_rate": 7.4214704867103895e-06, "loss": 1.0999, "reason_loss": 0.4615051746368408, "step": 2789, "utility_loss": 0.6383692026138306 }, { "cosine_similarity": 0, "epoch": 2.6001863932898415, "grad_norm": 1.1939372028264703, "learning_rate": 7.4042112530203655e-06, "loss": 1.1227, "reason_loss": 0.43575793504714966, "step": 2790, "utility_loss": 0.6869122982025146 }, { "cosine_similarity": 0, "epoch": 2.6011183597390493, "grad_norm": 0.9520621685864985, "learning_rate": 7.386952019330342e-06, "loss": 1.0948, "reason_loss": 0.4924508035182953, "step": 2791, "utility_loss": 0.6023155450820923 }, { "cosine_similarity": 0, "epoch": 2.602050326188257, "grad_norm": 0.870474432864398, "learning_rate": 7.369692785640318e-06, "loss": 1.135, "reason_loss": 0.4701489508152008, "step": 2792, "utility_loss": 0.6648530960083008 }, { "cosine_similarity": 0, "epoch": 2.6029822926374653, "grad_norm": 1.0689954135302597, "learning_rate": 7.352433551950294e-06, "loss": 1.182, "reason_loss": 0.4601511061191559, "step": 2793, "utility_loss": 0.7218988537788391 }, { "cosine_similarity": 0, "epoch": 2.6039142590866726, "grad_norm": 1.0564990046609763, "learning_rate": 7.335174318260269e-06, "loss": 1.0826, "reason_loss": 0.46574464440345764, "step": 2794, "utility_loss": 0.6168755292892456 }, { "cosine_similarity": 0, "epoch": 2.604846225535881, "grad_norm": 0.8087288918917181, "learning_rate": 7.317915084570245e-06, "loss": 1.0275, "reason_loss": 0.4714348316192627, "step": 2795, "utility_loss": 0.5560312271118164 }, { "cosine_similarity": 0, "epoch": 2.6057781919850886, "grad_norm": 1.1967780069690466, "learning_rate": 7.300655850880221e-06, "loss": 1.2213, "reason_loss": 0.4356042146682739, "step": 2796, "utility_loss": 0.7857066988945007 }, { "cosine_similarity": 0, "epoch": 2.6067101584342964, "grad_norm": 0.7628030473729218, "learning_rate": 7.2833966171901965e-06, "loss": 1.0386, "reason_loss": 0.44926291704177856, "step": 2797, "utility_loss": 0.5893487930297852 }, { "cosine_similarity": 0, "epoch": 2.607642124883504, "grad_norm": 1.1317010594833796, "learning_rate": 7.2661373835001725e-06, "loss": 1.3172, "reason_loss": 0.46043330430984497, "step": 2798, "utility_loss": 0.8567174077033997 }, { "cosine_similarity": 0, "epoch": 2.608574091332712, "grad_norm": 1.1093463276633497, "learning_rate": 7.2488781498101485e-06, "loss": 0.9472, "reason_loss": 0.47238704562187195, "step": 2799, "utility_loss": 0.4748089015483856 }, { "cosine_similarity": 0, "epoch": 2.60950605778192, "grad_norm": 0.942906016713585, "learning_rate": 7.231618916120124e-06, "loss": 1.1069, "reason_loss": 0.4668903946876526, "step": 2800, "utility_loss": 0.6400260329246521 }, { "cosine_similarity": 0, "epoch": 2.6104380242311276, "grad_norm": 0.933903858913845, "learning_rate": 7.214359682430101e-06, "loss": 1.1176, "reason_loss": 0.46406373381614685, "step": 2801, "utility_loss": 0.6535742282867432 }, { "cosine_similarity": 0, "epoch": 2.6113699906803354, "grad_norm": 1.0547167862025633, "learning_rate": 7.1971004487400764e-06, "loss": 1.0044, "reason_loss": 0.4716877341270447, "step": 2802, "utility_loss": 0.5326966643333435 }, { "cosine_similarity": 0, "epoch": 2.612301957129543, "grad_norm": 1.0553116094473447, "learning_rate": 7.179841215050052e-06, "loss": 0.9937, "reason_loss": 0.4716331362724304, "step": 2803, "utility_loss": 0.5220721960067749 }, { "cosine_similarity": 0, "epoch": 2.6132339235787514, "grad_norm": 1.022832934533631, "learning_rate": 7.162581981360028e-06, "loss": 1.0547, "reason_loss": 0.4654030203819275, "step": 2804, "utility_loss": 0.5893000364303589 }, { "cosine_similarity": 0, "epoch": 2.6141658900279587, "grad_norm": 0.9441390735157286, "learning_rate": 7.1453227476700035e-06, "loss": 1.0231, "reason_loss": 0.45238229632377625, "step": 2805, "utility_loss": 0.5707100629806519 }, { "cosine_similarity": 0, "epoch": 2.615097856477167, "grad_norm": 1.0267283568648755, "learning_rate": 7.1280635139799795e-06, "loss": 1.516, "reason_loss": 0.4677571654319763, "step": 2806, "utility_loss": 1.048213243484497 }, { "cosine_similarity": 0, "epoch": 2.6160298229263748, "grad_norm": 0.8440475476041002, "learning_rate": 7.1108042802899555e-06, "loss": 0.954, "reason_loss": 0.43990570306777954, "step": 2807, "utility_loss": 0.5141351819038391 }, { "cosine_similarity": 0, "epoch": 2.6169617893755825, "grad_norm": 0.9436455294160462, "learning_rate": 7.093545046599931e-06, "loss": 1.0776, "reason_loss": 0.45191097259521484, "step": 2808, "utility_loss": 0.625685453414917 }, { "cosine_similarity": 0, "epoch": 2.6178937558247903, "grad_norm": 0.8821153834216586, "learning_rate": 7.076285812909907e-06, "loss": 1.3181, "reason_loss": 0.47935304045677185, "step": 2809, "utility_loss": 0.8387199640274048 }, { "cosine_similarity": 0, "epoch": 2.618825722273998, "grad_norm": 0.8185886129683991, "learning_rate": 7.059026579219883e-06, "loss": 0.8476, "reason_loss": 0.4373813271522522, "step": 2810, "utility_loss": 0.4102363586425781 }, { "cosine_similarity": 0, "epoch": 2.619757688723206, "grad_norm": 0.974546455090056, "learning_rate": 7.0417673455298594e-06, "loss": 0.9894, "reason_loss": 0.4659390449523926, "step": 2811, "utility_loss": 0.5234239101409912 }, { "cosine_similarity": 0, "epoch": 2.6206896551724137, "grad_norm": 0.8553116169585014, "learning_rate": 7.024508111839835e-06, "loss": 0.9049, "reason_loss": 0.4678374230861664, "step": 2812, "utility_loss": 0.4370861053466797 }, { "cosine_similarity": 0, "epoch": 2.6216216216216215, "grad_norm": 0.9029189217387801, "learning_rate": 7.0072488781498106e-06, "loss": 0.8147, "reason_loss": 0.47585242986679077, "step": 2813, "utility_loss": 0.33884164690971375 }, { "cosine_similarity": 0, "epoch": 2.6225535880708293, "grad_norm": 0.8588005226907653, "learning_rate": 6.9899896444597865e-06, "loss": 1.133, "reason_loss": 0.474509060382843, "step": 2814, "utility_loss": 0.6585112810134888 }, { "cosine_similarity": 0, "epoch": 2.6234855545200375, "grad_norm": 0.8557001349291106, "learning_rate": 6.9727304107697625e-06, "loss": 1.0088, "reason_loss": 0.4321138858795166, "step": 2815, "utility_loss": 0.5766452550888062 }, { "cosine_similarity": 0, "epoch": 2.624417520969245, "grad_norm": 0.7762222232612161, "learning_rate": 6.955471177079738e-06, "loss": 0.9294, "reason_loss": 0.47176074981689453, "step": 2816, "utility_loss": 0.457601398229599 }, { "cosine_similarity": 0, "epoch": 2.625349487418453, "grad_norm": 0.9732257186675158, "learning_rate": 6.938211943389714e-06, "loss": 1.1501, "reason_loss": 0.46037429571151733, "step": 2817, "utility_loss": 0.6897416114807129 }, { "cosine_similarity": 0, "epoch": 2.626281453867661, "grad_norm": 0.9983514174562798, "learning_rate": 6.92095270969969e-06, "loss": 1.0705, "reason_loss": 0.4944213628768921, "step": 2818, "utility_loss": 0.5761227607727051 }, { "cosine_similarity": 0, "epoch": 2.6272134203168687, "grad_norm": 0.8272770269373523, "learning_rate": 6.903693476009665e-06, "loss": 0.9285, "reason_loss": 0.4783535897731781, "step": 2819, "utility_loss": 0.45011869072914124 }, { "cosine_similarity": 0, "epoch": 2.6281453867660765, "grad_norm": 0.9778017185649805, "learning_rate": 6.886434242319641e-06, "loss": 0.9439, "reason_loss": 0.4745682179927826, "step": 2820, "utility_loss": 0.4693055748939514 }, { "cosine_similarity": 0, "epoch": 2.6290773532152842, "grad_norm": 1.1936914898643838, "learning_rate": 6.869175008629618e-06, "loss": 1.3121, "reason_loss": 0.4845069944858551, "step": 2821, "utility_loss": 0.8276199102401733 }, { "cosine_similarity": 0, "epoch": 2.630009319664492, "grad_norm": 0.9708741942194883, "learning_rate": 6.8519157749395936e-06, "loss": 1.2076, "reason_loss": 0.47100526094436646, "step": 2822, "utility_loss": 0.7365882396697998 }, { "cosine_similarity": 0, "epoch": 2.6309412861137, "grad_norm": 1.0528674425856805, "learning_rate": 6.8346565412495696e-06, "loss": 1.4115, "reason_loss": 0.4551706612110138, "step": 2823, "utility_loss": 0.9563781023025513 }, { "cosine_similarity": 0, "epoch": 2.6318732525629076, "grad_norm": 0.9942840782215704, "learning_rate": 6.817397307559545e-06, "loss": 1.2385, "reason_loss": 0.45829492807388306, "step": 2824, "utility_loss": 0.7802505493164062 }, { "cosine_similarity": 0, "epoch": 2.6328052190121154, "grad_norm": 0.8794826905391097, "learning_rate": 6.800138073869521e-06, "loss": 1.0032, "reason_loss": 0.45925527811050415, "step": 2825, "utility_loss": 0.5439929962158203 }, { "cosine_similarity": 0, "epoch": 2.6337371854613236, "grad_norm": 0.9559915171540951, "learning_rate": 6.782878840179497e-06, "loss": 1.0517, "reason_loss": 0.4598887860774994, "step": 2826, "utility_loss": 0.5918090343475342 }, { "cosine_similarity": 0, "epoch": 2.634669151910531, "grad_norm": 0.963569504825477, "learning_rate": 6.765619606489472e-06, "loss": 1.2825, "reason_loss": 0.48647379875183105, "step": 2827, "utility_loss": 0.7960007190704346 }, { "cosine_similarity": 0, "epoch": 2.635601118359739, "grad_norm": 1.0742349357878171, "learning_rate": 6.748360372799448e-06, "loss": 1.2008, "reason_loss": 0.4389894902706146, "step": 2828, "utility_loss": 0.7618398666381836 }, { "cosine_similarity": 0, "epoch": 2.636533084808947, "grad_norm": 1.2404460701134399, "learning_rate": 6.731101139109424e-06, "loss": 0.9529, "reason_loss": 0.47267046570777893, "step": 2829, "utility_loss": 0.4801807999610901 }, { "cosine_similarity": 0, "epoch": 2.637465051258155, "grad_norm": 1.0597220866504158, "learning_rate": 6.713841905419399e-06, "loss": 1.3851, "reason_loss": 0.44785797595977783, "step": 2830, "utility_loss": 0.9372803568840027 }, { "cosine_similarity": 0, "epoch": 2.6383970177073626, "grad_norm": 1.1707521563018777, "learning_rate": 6.696582671729376e-06, "loss": 1.0277, "reason_loss": 0.4618369936943054, "step": 2831, "utility_loss": 0.5658971667289734 }, { "cosine_similarity": 0, "epoch": 2.6393289841565704, "grad_norm": 1.2390911343135358, "learning_rate": 6.679323438039352e-06, "loss": 1.1125, "reason_loss": 0.4502216875553131, "step": 2832, "utility_loss": 0.6622788906097412 }, { "cosine_similarity": 0, "epoch": 2.640260950605778, "grad_norm": 0.8211962135591215, "learning_rate": 6.662064204349328e-06, "loss": 0.907, "reason_loss": 0.4621829390525818, "step": 2833, "utility_loss": 0.44486555457115173 }, { "cosine_similarity": 0, "epoch": 2.641192917054986, "grad_norm": 0.9207197085374231, "learning_rate": 6.644804970659303e-06, "loss": 1.2553, "reason_loss": 0.45767396688461304, "step": 2834, "utility_loss": 0.7976295948028564 }, { "cosine_similarity": 0, "epoch": 2.6421248835041937, "grad_norm": 0.8526487677312377, "learning_rate": 6.627545736969279e-06, "loss": 0.965, "reason_loss": 0.5075228214263916, "step": 2835, "utility_loss": 0.4575234651565552 }, { "cosine_similarity": 0, "epoch": 2.6430568499534015, "grad_norm": 0.9662418943278042, "learning_rate": 6.610286503279255e-06, "loss": 0.8803, "reason_loss": 0.4396234154701233, "step": 2836, "utility_loss": 0.44069766998291016 }, { "cosine_similarity": 0, "epoch": 2.6439888164026097, "grad_norm": 1.0190673910901855, "learning_rate": 6.59302726958923e-06, "loss": 1.3244, "reason_loss": 0.4761747121810913, "step": 2837, "utility_loss": 0.8482578992843628 }, { "cosine_similarity": 0, "epoch": 2.644920782851817, "grad_norm": 0.8397391967851717, "learning_rate": 6.575768035899206e-06, "loss": 1.1804, "reason_loss": 0.4925670921802521, "step": 2838, "utility_loss": 0.6878757476806641 }, { "cosine_similarity": 0, "epoch": 2.6458527493010253, "grad_norm": 0.9421864604186913, "learning_rate": 6.558508802209182e-06, "loss": 0.9043, "reason_loss": 0.45898276567459106, "step": 2839, "utility_loss": 0.4452984035015106 }, { "cosine_similarity": 0, "epoch": 2.646784715750233, "grad_norm": 0.9427525175305939, "learning_rate": 6.541249568519157e-06, "loss": 1.107, "reason_loss": 0.45224523544311523, "step": 2840, "utility_loss": 0.6547635793685913 }, { "cosine_similarity": 0, "epoch": 2.647716682199441, "grad_norm": 0.9058772829930478, "learning_rate": 6.523990334829135e-06, "loss": 1.0177, "reason_loss": 0.4370395839214325, "step": 2841, "utility_loss": 0.5807022452354431 }, { "cosine_similarity": 0, "epoch": 2.6486486486486487, "grad_norm": 1.1065673656531168, "learning_rate": 6.50673110113911e-06, "loss": 1.5057, "reason_loss": 0.46416181325912476, "step": 2842, "utility_loss": 1.0415210723876953 }, { "cosine_similarity": 0, "epoch": 2.6495806150978565, "grad_norm": 0.9330606600666165, "learning_rate": 6.489471867449086e-06, "loss": 1.0752, "reason_loss": 0.4677804708480835, "step": 2843, "utility_loss": 0.6074395775794983 }, { "cosine_similarity": 0, "epoch": 2.6505125815470643, "grad_norm": 0.997315803782804, "learning_rate": 6.472212633759062e-06, "loss": 0.9503, "reason_loss": 0.46487629413604736, "step": 2844, "utility_loss": 0.4853961765766144 }, { "cosine_similarity": 0, "epoch": 2.651444547996272, "grad_norm": 0.8401085032848826, "learning_rate": 6.454953400069037e-06, "loss": 1.0242, "reason_loss": 0.4637804627418518, "step": 2845, "utility_loss": 0.5604144930839539 }, { "cosine_similarity": 0, "epoch": 2.65237651444548, "grad_norm": 0.9457410131597382, "learning_rate": 6.437694166379013e-06, "loss": 1.1581, "reason_loss": 0.4470856189727783, "step": 2846, "utility_loss": 0.7110514044761658 }, { "cosine_similarity": 0, "epoch": 2.6533084808946876, "grad_norm": 0.9733966178267641, "learning_rate": 6.420434932688989e-06, "loss": 1.1823, "reason_loss": 0.4828490614891052, "step": 2847, "utility_loss": 0.6994460225105286 }, { "cosine_similarity": 0, "epoch": 2.654240447343896, "grad_norm": 0.9018251025843187, "learning_rate": 6.403175698998964e-06, "loss": 1.1055, "reason_loss": 0.45721083879470825, "step": 2848, "utility_loss": 0.6483235359191895 }, { "cosine_similarity": 0, "epoch": 2.655172413793103, "grad_norm": 0.9935720619966877, "learning_rate": 6.38591646530894e-06, "loss": 1.2563, "reason_loss": 0.4576026201248169, "step": 2849, "utility_loss": 0.7987321615219116 }, { "cosine_similarity": 0, "epoch": 2.6561043802423114, "grad_norm": 0.94492903787392, "learning_rate": 6.368657231618916e-06, "loss": 1.2132, "reason_loss": 0.4985506534576416, "step": 2850, "utility_loss": 0.7146461009979248 }, { "cosine_similarity": 0, "epoch": 2.6570363466915192, "grad_norm": 0.9193892944540432, "learning_rate": 6.351397997928893e-06, "loss": 0.9406, "reason_loss": 0.4736554026603699, "step": 2851, "utility_loss": 0.46697574853897095 }, { "cosine_similarity": 0, "epoch": 2.657968313140727, "grad_norm": 0.840761503018807, "learning_rate": 6.334138764238869e-06, "loss": 0.9135, "reason_loss": 0.4538062810897827, "step": 2852, "utility_loss": 0.45970726013183594 }, { "cosine_similarity": 0, "epoch": 2.658900279589935, "grad_norm": 1.0700258805899854, "learning_rate": 6.316879530548844e-06, "loss": 1.0686, "reason_loss": 0.4681827425956726, "step": 2853, "utility_loss": 0.6004138588905334 }, { "cosine_similarity": 0, "epoch": 2.6598322460391426, "grad_norm": 0.9769729251400798, "learning_rate": 6.29962029685882e-06, "loss": 1.1761, "reason_loss": 0.4603687524795532, "step": 2854, "utility_loss": 0.7157772779464722 }, { "cosine_similarity": 0, "epoch": 2.6607642124883504, "grad_norm": 0.8786424709968054, "learning_rate": 6.282361063168796e-06, "loss": 1.0847, "reason_loss": 0.47496822476387024, "step": 2855, "utility_loss": 0.609694242477417 }, { "cosine_similarity": 0, "epoch": 2.661696178937558, "grad_norm": 0.7880232636899854, "learning_rate": 6.265101829478771e-06, "loss": 1.1941, "reason_loss": 0.4680525064468384, "step": 2856, "utility_loss": 0.7260886430740356 }, { "cosine_similarity": 0, "epoch": 2.662628145386766, "grad_norm": 0.940956448175967, "learning_rate": 6.247842595788747e-06, "loss": 1.0997, "reason_loss": 0.4898187518119812, "step": 2857, "utility_loss": 0.6099019646644592 }, { "cosine_similarity": 0, "epoch": 2.6635601118359737, "grad_norm": 0.9122001168886773, "learning_rate": 6.230583362098723e-06, "loss": 1.083, "reason_loss": 0.4566715657711029, "step": 2858, "utility_loss": 0.6263774633407593 }, { "cosine_similarity": 0, "epoch": 2.664492078285182, "grad_norm": 1.134880425707007, "learning_rate": 6.213324128408699e-06, "loss": 1.195, "reason_loss": 0.45364296436309814, "step": 2859, "utility_loss": 0.741355299949646 }, { "cosine_similarity": 0, "epoch": 2.6654240447343893, "grad_norm": 0.9327897278800408, "learning_rate": 6.196064894718675e-06, "loss": 1.2031, "reason_loss": 0.5024683475494385, "step": 2860, "utility_loss": 0.7006078958511353 }, { "cosine_similarity": 0, "epoch": 2.6663560111835976, "grad_norm": 1.0551754647203873, "learning_rate": 6.17880566102865e-06, "loss": 0.9058, "reason_loss": 0.46384096145629883, "step": 2861, "utility_loss": 0.4419998526573181 }, { "cosine_similarity": 0, "epoch": 2.6672879776328053, "grad_norm": 0.9357376417279631, "learning_rate": 6.161546427338626e-06, "loss": 1.1402, "reason_loss": 0.5145814418792725, "step": 2862, "utility_loss": 0.6255868673324585 }, { "cosine_similarity": 0, "epoch": 2.668219944082013, "grad_norm": 0.9133815254737322, "learning_rate": 6.144287193648602e-06, "loss": 1.1005, "reason_loss": 0.4508739709854126, "step": 2863, "utility_loss": 0.649650514125824 }, { "cosine_similarity": 0, "epoch": 2.669151910531221, "grad_norm": 0.8736091675337685, "learning_rate": 6.127027959958578e-06, "loss": 1.0076, "reason_loss": 0.4519398510456085, "step": 2864, "utility_loss": 0.5556230545043945 }, { "cosine_similarity": 0, "epoch": 2.6700838769804287, "grad_norm": 1.1598214377705003, "learning_rate": 6.109768726268554e-06, "loss": 1.5574, "reason_loss": 0.4637829065322876, "step": 2865, "utility_loss": 1.093639850616455 }, { "cosine_similarity": 0, "epoch": 2.6710158434296365, "grad_norm": 0.9111268409493155, "learning_rate": 6.09250949257853e-06, "loss": 1.2671, "reason_loss": 0.4627251625061035, "step": 2866, "utility_loss": 0.8043862581253052 }, { "cosine_similarity": 0, "epoch": 2.6719478098788443, "grad_norm": 1.0460338295221967, "learning_rate": 6.075250258888505e-06, "loss": 1.1872, "reason_loss": 0.4717981815338135, "step": 2867, "utility_loss": 0.715368926525116 }, { "cosine_similarity": 0, "epoch": 2.672879776328052, "grad_norm": 0.9127476026489124, "learning_rate": 6.057991025198481e-06, "loss": 0.9807, "reason_loss": 0.45364242792129517, "step": 2868, "utility_loss": 0.5270171165466309 }, { "cosine_similarity": 0, "epoch": 2.67381174277726, "grad_norm": 0.7845732655162293, "learning_rate": 6.040731791508457e-06, "loss": 0.8759, "reason_loss": 0.4392348527908325, "step": 2869, "utility_loss": 0.4366253614425659 }, { "cosine_similarity": 0, "epoch": 2.674743709226468, "grad_norm": 0.9171895165535553, "learning_rate": 6.023472557818433e-06, "loss": 1.0078, "reason_loss": 0.44820407032966614, "step": 2870, "utility_loss": 0.5595839619636536 }, { "cosine_similarity": 0, "epoch": 2.6756756756756754, "grad_norm": 1.0246944431978267, "learning_rate": 6.006213324128409e-06, "loss": 1.1324, "reason_loss": 0.4942169487476349, "step": 2871, "utility_loss": 0.6382303237915039 }, { "cosine_similarity": 0, "epoch": 2.6766076421248837, "grad_norm": 0.8665282598452342, "learning_rate": 5.988954090438384e-06, "loss": 0.8619, "reason_loss": 0.4783172011375427, "step": 2872, "utility_loss": 0.38355880975723267 }, { "cosine_similarity": 0, "epoch": 2.6775396085740915, "grad_norm": 1.0582494668769518, "learning_rate": 5.97169485674836e-06, "loss": 1.2891, "reason_loss": 0.4609187841415405, "step": 2873, "utility_loss": 0.8281998038291931 }, { "cosine_similarity": 0, "epoch": 2.6784715750232992, "grad_norm": 1.1256204110251042, "learning_rate": 5.954435623058337e-06, "loss": 1.2872, "reason_loss": 0.48383772373199463, "step": 2874, "utility_loss": 0.8033815622329712 }, { "cosine_similarity": 0, "epoch": 2.679403541472507, "grad_norm": 1.0280914617105494, "learning_rate": 5.937176389368312e-06, "loss": 1.1693, "reason_loss": 0.5084508657455444, "step": 2875, "utility_loss": 0.660857081413269 }, { "cosine_similarity": 0, "epoch": 2.680335507921715, "grad_norm": 1.4808355863519835, "learning_rate": 5.919917155678288e-06, "loss": 1.1522, "reason_loss": 0.48536211252212524, "step": 2876, "utility_loss": 0.666851818561554 }, { "cosine_similarity": 0, "epoch": 2.6812674743709226, "grad_norm": 0.8861612337151331, "learning_rate": 5.902657921988264e-06, "loss": 0.8477, "reason_loss": 0.4698463976383209, "step": 2877, "utility_loss": 0.37790003418922424 }, { "cosine_similarity": 0, "epoch": 2.6821994408201304, "grad_norm": 0.9236340189597475, "learning_rate": 5.885398688298239e-06, "loss": 1.0525, "reason_loss": 0.48955637216567993, "step": 2878, "utility_loss": 0.5629673004150391 }, { "cosine_similarity": 0, "epoch": 2.683131407269338, "grad_norm": 0.94892396529825, "learning_rate": 5.868139454608216e-06, "loss": 0.9152, "reason_loss": 0.4494999051094055, "step": 2879, "utility_loss": 0.4656580090522766 }, { "cosine_similarity": 0, "epoch": 2.684063373718546, "grad_norm": 0.8677163953920212, "learning_rate": 5.850880220918191e-06, "loss": 1.0254, "reason_loss": 0.4639601707458496, "step": 2880, "utility_loss": 0.5614444017410278 }, { "cosine_similarity": 0, "epoch": 2.684995340167754, "grad_norm": 0.9897522013463286, "learning_rate": 5.833620987228167e-06, "loss": 1.2947, "reason_loss": 0.4389759302139282, "step": 2881, "utility_loss": 0.8557514548301697 }, { "cosine_similarity": 0, "epoch": 2.6859273066169616, "grad_norm": 1.0480109353027902, "learning_rate": 5.816361753538143e-06, "loss": 1.1184, "reason_loss": 0.44764789938926697, "step": 2882, "utility_loss": 0.670761227607727 }, { "cosine_similarity": 0, "epoch": 2.68685927306617, "grad_norm": 0.8696213148569727, "learning_rate": 5.7991025198481184e-06, "loss": 1.3028, "reason_loss": 0.45406267046928406, "step": 2883, "utility_loss": 0.8487268686294556 }, { "cosine_similarity": 0, "epoch": 2.6877912395153776, "grad_norm": 1.0369765277888168, "learning_rate": 5.781843286158095e-06, "loss": 1.0804, "reason_loss": 0.4760833978652954, "step": 2884, "utility_loss": 0.6042795181274414 }, { "cosine_similarity": 0, "epoch": 2.6887232059645854, "grad_norm": 0.7662632888410277, "learning_rate": 5.76458405246807e-06, "loss": 0.9528, "reason_loss": 0.48636001348495483, "step": 2885, "utility_loss": 0.46648257970809937 }, { "cosine_similarity": 0, "epoch": 2.689655172413793, "grad_norm": 0.7608215490627507, "learning_rate": 5.747324818778046e-06, "loss": 0.9529, "reason_loss": 0.47997739911079407, "step": 2886, "utility_loss": 0.4729544222354889 }, { "cosine_similarity": 0, "epoch": 2.690587138863001, "grad_norm": 0.8424489341739048, "learning_rate": 5.730065585088022e-06, "loss": 0.9192, "reason_loss": 0.46835577487945557, "step": 2887, "utility_loss": 0.4508676528930664 }, { "cosine_similarity": 0, "epoch": 2.6915191053122087, "grad_norm": 1.0168017387046782, "learning_rate": 5.7128063513979975e-06, "loss": 1.0025, "reason_loss": 0.46454450488090515, "step": 2888, "utility_loss": 0.5379950404167175 }, { "cosine_similarity": 0, "epoch": 2.6924510717614165, "grad_norm": 0.9225849546812839, "learning_rate": 5.695547117707974e-06, "loss": 0.9903, "reason_loss": 0.4590076804161072, "step": 2889, "utility_loss": 0.5313160419464111 }, { "cosine_similarity": 0, "epoch": 2.6933830382106243, "grad_norm": 0.893467661405209, "learning_rate": 5.67828788401795e-06, "loss": 1.117, "reason_loss": 0.4573169946670532, "step": 2890, "utility_loss": 0.6596361398696899 }, { "cosine_similarity": 0, "epoch": 2.694315004659832, "grad_norm": 0.9711640331321124, "learning_rate": 5.6610286503279255e-06, "loss": 1.0089, "reason_loss": 0.47706839442253113, "step": 2891, "utility_loss": 0.5318078994750977 }, { "cosine_similarity": 0, "epoch": 2.6952469711090403, "grad_norm": 0.9498557160676196, "learning_rate": 5.6437694166379015e-06, "loss": 1.0263, "reason_loss": 0.4502719044685364, "step": 2892, "utility_loss": 0.5760347247123718 }, { "cosine_similarity": 0, "epoch": 2.6961789375582477, "grad_norm": 0.8140895375030971, "learning_rate": 5.6265101829478774e-06, "loss": 1.0351, "reason_loss": 0.47655346989631653, "step": 2893, "utility_loss": 0.5585449934005737 }, { "cosine_similarity": 0, "epoch": 2.697110904007456, "grad_norm": 1.00321655681296, "learning_rate": 5.609250949257853e-06, "loss": 1.0837, "reason_loss": 0.4690660834312439, "step": 2894, "utility_loss": 0.6146471500396729 }, { "cosine_similarity": 0, "epoch": 2.6980428704566637, "grad_norm": 0.8419698199116573, "learning_rate": 5.591991715567829e-06, "loss": 1.0182, "reason_loss": 0.4676115810871124, "step": 2895, "utility_loss": 0.5505567193031311 }, { "cosine_similarity": 0, "epoch": 2.6989748369058715, "grad_norm": 1.0075322591386613, "learning_rate": 5.5747324818778045e-06, "loss": 1.4704, "reason_loss": 0.4385017156600952, "step": 2896, "utility_loss": 1.0319453477859497 }, { "cosine_similarity": 0, "epoch": 2.6999068033550793, "grad_norm": 0.8105510477042436, "learning_rate": 5.5574732481877805e-06, "loss": 0.9741, "reason_loss": 0.4434671998023987, "step": 2897, "utility_loss": 0.5305929183959961 }, { "cosine_similarity": 0, "epoch": 2.700838769804287, "grad_norm": 1.0411351737233654, "learning_rate": 5.5402140144977565e-06, "loss": 1.3446, "reason_loss": 0.4622310698032379, "step": 2898, "utility_loss": 0.8823944926261902 }, { "cosine_similarity": 0, "epoch": 2.701770736253495, "grad_norm": 0.9221400184867972, "learning_rate": 5.5229547808077325e-06, "loss": 1.0434, "reason_loss": 0.4513729512691498, "step": 2899, "utility_loss": 0.5920629501342773 }, { "cosine_similarity": 0, "epoch": 2.7027027027027026, "grad_norm": 0.9330459902405285, "learning_rate": 5.5056955471177085e-06, "loss": 1.1445, "reason_loss": 0.4745780825614929, "step": 2900, "utility_loss": 0.6699556112289429 }, { "cosine_similarity": 0, "epoch": 2.7036346691519104, "grad_norm": 1.0189273105603547, "learning_rate": 5.4884363134276845e-06, "loss": 0.9301, "reason_loss": 0.44672954082489014, "step": 2901, "utility_loss": 0.48332709074020386 }, { "cosine_similarity": 0, "epoch": 2.704566635601118, "grad_norm": 0.7983476239010563, "learning_rate": 5.47117707973766e-06, "loss": 1.0105, "reason_loss": 0.44115492701530457, "step": 2902, "utility_loss": 0.5693908333778381 }, { "cosine_similarity": 0, "epoch": 2.7054986020503264, "grad_norm": 0.8731479205949109, "learning_rate": 5.453917846047636e-06, "loss": 0.8621, "reason_loss": 0.46134012937545776, "step": 2903, "utility_loss": 0.4007260203361511 }, { "cosine_similarity": 0, "epoch": 2.706430568499534, "grad_norm": 1.1067483574641248, "learning_rate": 5.4366586123576116e-06, "loss": 1.4352, "reason_loss": 0.47095000743865967, "step": 2904, "utility_loss": 0.9642082452774048 }, { "cosine_similarity": 0, "epoch": 2.707362534948742, "grad_norm": 0.9030609468976253, "learning_rate": 5.4193993786675876e-06, "loss": 1.0232, "reason_loss": 0.46851882338523865, "step": 2905, "utility_loss": 0.5546441078186035 }, { "cosine_similarity": 0, "epoch": 2.70829450139795, "grad_norm": 1.0846112829267702, "learning_rate": 5.4021401449775635e-06, "loss": 1.1403, "reason_loss": 0.5125150084495544, "step": 2906, "utility_loss": 0.6278025507926941 }, { "cosine_similarity": 0, "epoch": 2.7092264678471576, "grad_norm": 1.2854679969512468, "learning_rate": 5.384880911287539e-06, "loss": 1.107, "reason_loss": 0.4955475926399231, "step": 2907, "utility_loss": 0.6114965081214905 }, { "cosine_similarity": 0, "epoch": 2.7101584342963654, "grad_norm": 0.8828882991159304, "learning_rate": 5.367621677597515e-06, "loss": 0.8553, "reason_loss": 0.4468293786048889, "step": 2908, "utility_loss": 0.40850722789764404 }, { "cosine_similarity": 0, "epoch": 2.711090400745573, "grad_norm": 0.8037580573356883, "learning_rate": 5.350362443907491e-06, "loss": 0.926, "reason_loss": 0.4528241455554962, "step": 2909, "utility_loss": 0.47321945428848267 }, { "cosine_similarity": 0, "epoch": 2.712022367194781, "grad_norm": 1.1631322438652913, "learning_rate": 5.333103210217467e-06, "loss": 1.173, "reason_loss": 0.4876445233821869, "step": 2910, "utility_loss": 0.6853352785110474 }, { "cosine_similarity": 0, "epoch": 2.7129543336439887, "grad_norm": 0.9384043043028885, "learning_rate": 5.315843976527443e-06, "loss": 1.0469, "reason_loss": 0.45926201343536377, "step": 2911, "utility_loss": 0.5876302719116211 }, { "cosine_similarity": 0, "epoch": 2.7138863000931965, "grad_norm": 0.9213818083976142, "learning_rate": 5.298584742837418e-06, "loss": 1.0057, "reason_loss": 0.46617117524147034, "step": 2912, "utility_loss": 0.5395472049713135 }, { "cosine_similarity": 0, "epoch": 2.7148182665424043, "grad_norm": 0.7951837608373005, "learning_rate": 5.281325509147394e-06, "loss": 0.9995, "reason_loss": 0.5081158876419067, "step": 2913, "utility_loss": 0.49142706394195557 }, { "cosine_similarity": 0, "epoch": 2.7157502329916126, "grad_norm": 0.7034207161590053, "learning_rate": 5.2640662754573706e-06, "loss": 0.8361, "reason_loss": 0.43648436665534973, "step": 2914, "utility_loss": 0.3996378779411316 }, { "cosine_similarity": 0, "epoch": 2.71668219944082, "grad_norm": 1.1409295401757826, "learning_rate": 5.246807041767346e-06, "loss": 1.0663, "reason_loss": 0.4542621374130249, "step": 2915, "utility_loss": 0.6120010018348694 }, { "cosine_similarity": 0, "epoch": 2.717614165890028, "grad_norm": 0.8849760341121542, "learning_rate": 5.229547808077322e-06, "loss": 0.9707, "reason_loss": 0.4602084755897522, "step": 2916, "utility_loss": 0.5104470252990723 }, { "cosine_similarity": 0, "epoch": 2.718546132339236, "grad_norm": 0.9682885659419126, "learning_rate": 5.212288574387298e-06, "loss": 0.9775, "reason_loss": 0.43787625432014465, "step": 2917, "utility_loss": 0.5396579504013062 }, { "cosine_similarity": 0, "epoch": 2.7194780987884437, "grad_norm": 1.0096251581629538, "learning_rate": 5.195029340697273e-06, "loss": 1.2872, "reason_loss": 0.45968756079673767, "step": 2918, "utility_loss": 0.8275160789489746 }, { "cosine_similarity": 0, "epoch": 2.7204100652376515, "grad_norm": 0.973329248064761, "learning_rate": 5.17777010700725e-06, "loss": 1.134, "reason_loss": 0.48350441455841064, "step": 2919, "utility_loss": 0.6504964232444763 }, { "cosine_similarity": 0, "epoch": 2.7213420316868593, "grad_norm": 1.1298612657233809, "learning_rate": 5.160510873317225e-06, "loss": 1.1115, "reason_loss": 0.4553924798965454, "step": 2920, "utility_loss": 0.6561435461044312 }, { "cosine_similarity": 0, "epoch": 2.722273998136067, "grad_norm": 1.005298565985147, "learning_rate": 5.143251639627201e-06, "loss": 0.9854, "reason_loss": 0.46571117639541626, "step": 2921, "utility_loss": 0.5197265148162842 }, { "cosine_similarity": 0, "epoch": 2.723205964585275, "grad_norm": 0.9611014625068781, "learning_rate": 5.125992405937177e-06, "loss": 1.0258, "reason_loss": 0.4652855396270752, "step": 2922, "utility_loss": 0.5604845285415649 }, { "cosine_similarity": 0, "epoch": 2.7241379310344827, "grad_norm": 1.0624054784615686, "learning_rate": 5.108733172247152e-06, "loss": 1.0685, "reason_loss": 0.4316662549972534, "step": 2923, "utility_loss": 0.6368041634559631 }, { "cosine_similarity": 0, "epoch": 2.7250698974836904, "grad_norm": 1.0136477013942304, "learning_rate": 5.091473938557129e-06, "loss": 1.064, "reason_loss": 0.4633239507675171, "step": 2924, "utility_loss": 0.6006895303726196 }, { "cosine_similarity": 0, "epoch": 2.7260018639328987, "grad_norm": 0.9836035813213818, "learning_rate": 5.074214704867105e-06, "loss": 1.112, "reason_loss": 0.4545404613018036, "step": 2925, "utility_loss": 0.6574375629425049 }, { "cosine_similarity": 0, "epoch": 2.726933830382106, "grad_norm": 0.8665090717729889, "learning_rate": 5.05695547117708e-06, "loss": 0.9809, "reason_loss": 0.47389745712280273, "step": 2926, "utility_loss": 0.5069745779037476 }, { "cosine_similarity": 0, "epoch": 2.7278657968313142, "grad_norm": 1.405820110273388, "learning_rate": 5.039696237487056e-06, "loss": 1.0968, "reason_loss": 0.48610755801200867, "step": 2927, "utility_loss": 0.6107298135757446 }, { "cosine_similarity": 0, "epoch": 2.728797763280522, "grad_norm": 0.8351954591780987, "learning_rate": 5.022437003797032e-06, "loss": 0.9492, "reason_loss": 0.4436931610107422, "step": 2928, "utility_loss": 0.5054859519004822 }, { "cosine_similarity": 0, "epoch": 2.72972972972973, "grad_norm": 0.915168668118793, "learning_rate": 5.005177770107008e-06, "loss": 0.8542, "reason_loss": 0.47566530108451843, "step": 2929, "utility_loss": 0.37852177023887634 }, { "cosine_similarity": 0, "epoch": 2.7306616961789376, "grad_norm": 0.877609437793335, "learning_rate": 4.987918536416984e-06, "loss": 1.0933, "reason_loss": 0.460612416267395, "step": 2930, "utility_loss": 0.6326791644096375 }, { "cosine_similarity": 0, "epoch": 2.7315936626281454, "grad_norm": 1.0270256432507485, "learning_rate": 4.970659302726959e-06, "loss": 1.1683, "reason_loss": 0.4464719891548157, "step": 2931, "utility_loss": 0.7218067646026611 }, { "cosine_similarity": 0, "epoch": 2.732525629077353, "grad_norm": 0.8712115732791071, "learning_rate": 4.953400069036935e-06, "loss": 0.9034, "reason_loss": 0.4670451879501343, "step": 2932, "utility_loss": 0.4363778531551361 }, { "cosine_similarity": 0, "epoch": 2.733457595526561, "grad_norm": 1.5532352882317162, "learning_rate": 4.936140835346911e-06, "loss": 1.294, "reason_loss": 0.4604533612728119, "step": 2933, "utility_loss": 0.8335440158843994 }, { "cosine_similarity": 0, "epoch": 2.7343895619757688, "grad_norm": 1.1200781839211518, "learning_rate": 4.918881601656887e-06, "loss": 1.4655, "reason_loss": 0.47705304622650146, "step": 2934, "utility_loss": 0.9884586334228516 }, { "cosine_similarity": 0, "epoch": 2.7353215284249766, "grad_norm": 0.8256646336932095, "learning_rate": 4.901622367966863e-06, "loss": 0.8288, "reason_loss": 0.43979722261428833, "step": 2935, "utility_loss": 0.38899433612823486 }, { "cosine_similarity": 0, "epoch": 2.736253494874185, "grad_norm": 1.0655555535003396, "learning_rate": 4.884363134276838e-06, "loss": 1.2727, "reason_loss": 0.4647779166698456, "step": 2936, "utility_loss": 0.8079316020011902 }, { "cosine_similarity": 0, "epoch": 2.737185461323392, "grad_norm": 0.7498467351170314, "learning_rate": 4.867103900586814e-06, "loss": 0.9159, "reason_loss": 0.49329352378845215, "step": 2937, "utility_loss": 0.422635555267334 }, { "cosine_similarity": 0, "epoch": 2.7381174277726004, "grad_norm": 1.114236879097446, "learning_rate": 4.84984466689679e-06, "loss": 1.2237, "reason_loss": 0.46553370356559753, "step": 2938, "utility_loss": 0.7582097053527832 }, { "cosine_similarity": 0, "epoch": 2.739049394221808, "grad_norm": 1.0656387050724356, "learning_rate": 4.832585433206766e-06, "loss": 1.1198, "reason_loss": 0.45728904008865356, "step": 2939, "utility_loss": 0.6624687314033508 }, { "cosine_similarity": 0, "epoch": 2.739981360671016, "grad_norm": 0.7314735267532575, "learning_rate": 4.815326199516742e-06, "loss": 0.8718, "reason_loss": 0.479312002658844, "step": 2940, "utility_loss": 0.3925052285194397 }, { "cosine_similarity": 0, "epoch": 2.7409133271202237, "grad_norm": 1.1026865770329368, "learning_rate": 4.798066965826718e-06, "loss": 1.1589, "reason_loss": 0.4612278938293457, "step": 2941, "utility_loss": 0.6976571083068848 }, { "cosine_similarity": 0, "epoch": 2.7418452935694315, "grad_norm": 1.017635653721896, "learning_rate": 4.780807732136693e-06, "loss": 0.9977, "reason_loss": 0.48750513792037964, "step": 2942, "utility_loss": 0.510212242603302 }, { "cosine_similarity": 0, "epoch": 2.7427772600186393, "grad_norm": 0.8836123705192453, "learning_rate": 4.763548498446669e-06, "loss": 1.0313, "reason_loss": 0.444693922996521, "step": 2943, "utility_loss": 0.5866110324859619 }, { "cosine_similarity": 0, "epoch": 2.743709226467847, "grad_norm": 0.9170854517735493, "learning_rate": 4.746289264756645e-06, "loss": 1.0226, "reason_loss": 0.4661048650741577, "step": 2944, "utility_loss": 0.5565201044082642 }, { "cosine_similarity": 0, "epoch": 2.744641192917055, "grad_norm": 0.7866266419085556, "learning_rate": 4.729030031066621e-06, "loss": 0.863, "reason_loss": 0.4458266496658325, "step": 2945, "utility_loss": 0.41714340448379517 }, { "cosine_similarity": 0, "epoch": 2.7455731593662627, "grad_norm": 1.0676535621926546, "learning_rate": 4.711770797376597e-06, "loss": 1.1766, "reason_loss": 0.48196280002593994, "step": 2946, "utility_loss": 0.6946802139282227 }, { "cosine_similarity": 0, "epoch": 2.746505125815471, "grad_norm": 1.0336150637616013, "learning_rate": 4.694511563686572e-06, "loss": 1.021, "reason_loss": 0.4603605270385742, "step": 2947, "utility_loss": 0.5606025457382202 }, { "cosine_similarity": 0, "epoch": 2.7474370922646782, "grad_norm": 0.8698986679670856, "learning_rate": 4.677252329996548e-06, "loss": 0.9139, "reason_loss": 0.45877012610435486, "step": 2948, "utility_loss": 0.45512112975120544 }, { "cosine_similarity": 0, "epoch": 2.7483690587138865, "grad_norm": 0.7894377540523284, "learning_rate": 4.659993096306525e-06, "loss": 0.9646, "reason_loss": 0.4791417121887207, "step": 2949, "utility_loss": 0.48549652099609375 }, { "cosine_similarity": 0, "epoch": 2.7493010251630943, "grad_norm": 0.8068683488150779, "learning_rate": 4.6427338626165e-06, "loss": 0.9759, "reason_loss": 0.4819928705692291, "step": 2950, "utility_loss": 0.4938693046569824 }, { "cosine_similarity": 0, "epoch": 2.750232991612302, "grad_norm": 0.9197843663146054, "learning_rate": 4.625474628926476e-06, "loss": 1.0259, "reason_loss": 0.445919394493103, "step": 2951, "utility_loss": 0.5799909234046936 }, { "cosine_similarity": 0, "epoch": 2.75116495806151, "grad_norm": 0.8582686705186026, "learning_rate": 4.608215395236452e-06, "loss": 0.8768, "reason_loss": 0.4378304183483124, "step": 2952, "utility_loss": 0.43897688388824463 }, { "cosine_similarity": 0, "epoch": 2.7520969245107176, "grad_norm": 0.943988211788624, "learning_rate": 4.590956161546427e-06, "loss": 1.0672, "reason_loss": 0.49449077248573303, "step": 2953, "utility_loss": 0.5727018713951111 }, { "cosine_similarity": 0, "epoch": 2.7530288909599254, "grad_norm": 1.0220091224901922, "learning_rate": 4.573696927856403e-06, "loss": 1.0965, "reason_loss": 0.45604684948921204, "step": 2954, "utility_loss": 0.6404649019241333 }, { "cosine_similarity": 0, "epoch": 2.753960857409133, "grad_norm": 1.1186411368932436, "learning_rate": 4.556437694166379e-06, "loss": 1.1122, "reason_loss": 0.4385278522968292, "step": 2955, "utility_loss": 0.6737094521522522 }, { "cosine_similarity": 0, "epoch": 2.754892823858341, "grad_norm": 0.9676736884035999, "learning_rate": 4.539178460476355e-06, "loss": 1.1237, "reason_loss": 0.46831920742988586, "step": 2956, "utility_loss": 0.6553642153739929 }, { "cosine_similarity": 0, "epoch": 2.755824790307549, "grad_norm": 0.9800848823298765, "learning_rate": 4.521919226786331e-06, "loss": 0.9366, "reason_loss": 0.4514620304107666, "step": 2957, "utility_loss": 0.48518308997154236 }, { "cosine_similarity": 0, "epoch": 2.756756756756757, "grad_norm": 0.9181078251699007, "learning_rate": 4.504659993096306e-06, "loss": 1.1201, "reason_loss": 0.46200764179229736, "step": 2958, "utility_loss": 0.6580764055252075 }, { "cosine_similarity": 0, "epoch": 2.7576887232059644, "grad_norm": 1.1615988644832203, "learning_rate": 4.487400759406282e-06, "loss": 1.1467, "reason_loss": 0.46495676040649414, "step": 2959, "utility_loss": 0.6817494034767151 }, { "cosine_similarity": 0, "epoch": 2.7586206896551726, "grad_norm": 1.1200349723963143, "learning_rate": 4.470141525716258e-06, "loss": 1.0259, "reason_loss": 0.43398773670196533, "step": 2960, "utility_loss": 0.5919268131256104 }, { "cosine_similarity": 0, "epoch": 2.7595526561043804, "grad_norm": 1.1186070424593753, "learning_rate": 4.452882292026234e-06, "loss": 1.2449, "reason_loss": 0.48571354150772095, "step": 2961, "utility_loss": 0.7592071294784546 }, { "cosine_similarity": 0, "epoch": 2.760484622553588, "grad_norm": 1.014517908888548, "learning_rate": 4.43562305833621e-06, "loss": 1.1614, "reason_loss": 0.4790003299713135, "step": 2962, "utility_loss": 0.6824459433555603 }, { "cosine_similarity": 0, "epoch": 2.761416589002796, "grad_norm": 0.9428643454757558, "learning_rate": 4.418363824646185e-06, "loss": 1.0693, "reason_loss": 0.4552971124649048, "step": 2963, "utility_loss": 0.6140044331550598 }, { "cosine_similarity": 0, "epoch": 2.7623485554520038, "grad_norm": 1.015628044756544, "learning_rate": 4.401104590956161e-06, "loss": 1.2261, "reason_loss": 0.43497946858406067, "step": 2964, "utility_loss": 0.7911389470100403 }, { "cosine_similarity": 0, "epoch": 2.7632805219012115, "grad_norm": 0.871172811193024, "learning_rate": 4.383845357266138e-06, "loss": 1.0543, "reason_loss": 0.4524915814399719, "step": 2965, "utility_loss": 0.601842999458313 }, { "cosine_similarity": 0, "epoch": 2.7642124883504193, "grad_norm": 0.8846242481087002, "learning_rate": 4.366586123576113e-06, "loss": 1.1066, "reason_loss": 0.48408347368240356, "step": 2966, "utility_loss": 0.6225087642669678 }, { "cosine_similarity": 0, "epoch": 2.765144454799627, "grad_norm": 0.8543028208741251, "learning_rate": 4.349326889886089e-06, "loss": 0.8299, "reason_loss": 0.4357902705669403, "step": 2967, "utility_loss": 0.39410096406936646 }, { "cosine_similarity": 0, "epoch": 2.766076421248835, "grad_norm": 0.9559877728253553, "learning_rate": 4.332067656196065e-06, "loss": 1.093, "reason_loss": 0.47795674204826355, "step": 2968, "utility_loss": 0.6150211691856384 }, { "cosine_similarity": 0, "epoch": 2.767008387698043, "grad_norm": 0.8294486280214145, "learning_rate": 4.31480842250604e-06, "loss": 1.2244, "reason_loss": 0.4833298623561859, "step": 2969, "utility_loss": 0.7410492897033691 }, { "cosine_similarity": 0, "epoch": 2.7679403541472505, "grad_norm": 0.8371919397200687, "learning_rate": 4.297549188816017e-06, "loss": 0.906, "reason_loss": 0.46386662125587463, "step": 2970, "utility_loss": 0.4421563148498535 }, { "cosine_similarity": 0, "epoch": 2.7688723205964587, "grad_norm": 0.9174862620461508, "learning_rate": 4.280289955125992e-06, "loss": 1.0131, "reason_loss": 0.4635779857635498, "step": 2971, "utility_loss": 0.5495091080665588 }, { "cosine_similarity": 0, "epoch": 2.7698042870456665, "grad_norm": 1.0139001137890267, "learning_rate": 4.263030721435968e-06, "loss": 1.5897, "reason_loss": 0.4626162648200989, "step": 2972, "utility_loss": 1.1271189451217651 }, { "cosine_similarity": 0, "epoch": 2.7707362534948743, "grad_norm": 1.064986883818413, "learning_rate": 4.245771487745944e-06, "loss": 1.2922, "reason_loss": 0.46073198318481445, "step": 2973, "utility_loss": 0.831421971321106 }, { "cosine_similarity": 0, "epoch": 2.771668219944082, "grad_norm": 0.811279656584352, "learning_rate": 4.2285122540559194e-06, "loss": 1.1203, "reason_loss": 0.47649505734443665, "step": 2974, "utility_loss": 0.6438292264938354 }, { "cosine_similarity": 0, "epoch": 2.77260018639329, "grad_norm": 0.8764050452388036, "learning_rate": 4.211253020365896e-06, "loss": 0.9694, "reason_loss": 0.4639824330806732, "step": 2975, "utility_loss": 0.5054471492767334 }, { "cosine_similarity": 0, "epoch": 2.7735321528424977, "grad_norm": 0.8786243378793153, "learning_rate": 4.193993786675872e-06, "loss": 0.9292, "reason_loss": 0.4744065999984741, "step": 2976, "utility_loss": 0.454830139875412 }, { "cosine_similarity": 0, "epoch": 2.7744641192917054, "grad_norm": 0.9108113270739789, "learning_rate": 4.176734552985847e-06, "loss": 1.0952, "reason_loss": 0.45996105670928955, "step": 2977, "utility_loss": 0.6352444291114807 }, { "cosine_similarity": 0, "epoch": 2.7753960857409132, "grad_norm": 1.0784079489101361, "learning_rate": 4.159475319295823e-06, "loss": 1.2448, "reason_loss": 0.4425724446773529, "step": 2978, "utility_loss": 0.8022207021713257 }, { "cosine_similarity": 0, "epoch": 2.776328052190121, "grad_norm": 0.9920771547481799, "learning_rate": 4.142216085605799e-06, "loss": 1.028, "reason_loss": 0.4615200161933899, "step": 2979, "utility_loss": 0.5664541721343994 }, { "cosine_similarity": 0, "epoch": 2.7772600186393293, "grad_norm": 1.0821982440924296, "learning_rate": 4.124956851915775e-06, "loss": 1.1758, "reason_loss": 0.45018529891967773, "step": 2980, "utility_loss": 0.7256089448928833 }, { "cosine_similarity": 0, "epoch": 2.7781919850885366, "grad_norm": 1.0337624728131452, "learning_rate": 4.107697618225751e-06, "loss": 1.1425, "reason_loss": 0.4494451880455017, "step": 2981, "utility_loss": 0.6930344700813293 }, { "cosine_similarity": 0, "epoch": 2.779123951537745, "grad_norm": 1.6048005019613714, "learning_rate": 4.0904383845357265e-06, "loss": 1.5373, "reason_loss": 0.5117236375808716, "step": 2982, "utility_loss": 1.0256208181381226 }, { "cosine_similarity": 0, "epoch": 2.7800559179869526, "grad_norm": 0.7935511136768755, "learning_rate": 4.0731791508457025e-06, "loss": 0.8129, "reason_loss": 0.447958379983902, "step": 2983, "utility_loss": 0.3649137020111084 }, { "cosine_similarity": 0, "epoch": 2.7809878844361604, "grad_norm": 0.8558047824512113, "learning_rate": 4.0559199171556784e-06, "loss": 1.1302, "reason_loss": 0.48142266273498535, "step": 2984, "utility_loss": 0.6488157510757446 }, { "cosine_similarity": 0, "epoch": 2.781919850885368, "grad_norm": 0.9141735946779098, "learning_rate": 4.0386606834656544e-06, "loss": 1.1054, "reason_loss": 0.4635600745677948, "step": 2985, "utility_loss": 0.6418517827987671 }, { "cosine_similarity": 0, "epoch": 2.782851817334576, "grad_norm": 1.079162229308755, "learning_rate": 4.02140144977563e-06, "loss": 0.8856, "reason_loss": 0.480001300573349, "step": 2986, "utility_loss": 0.4056428074836731 }, { "cosine_similarity": 0, "epoch": 2.7837837837837838, "grad_norm": 1.0278795984677727, "learning_rate": 4.0041422160856055e-06, "loss": 0.9752, "reason_loss": 0.47478747367858887, "step": 2987, "utility_loss": 0.5004560947418213 }, { "cosine_similarity": 0, "epoch": 2.7847157502329916, "grad_norm": 0.9285582709415062, "learning_rate": 3.9868829823955815e-06, "loss": 1.191, "reason_loss": 0.4531548321247101, "step": 2988, "utility_loss": 0.737825870513916 }, { "cosine_similarity": 0, "epoch": 2.7856477166821993, "grad_norm": 0.903797643061516, "learning_rate": 3.9696237487055575e-06, "loss": 0.9917, "reason_loss": 0.4488396644592285, "step": 2989, "utility_loss": 0.5428358912467957 }, { "cosine_similarity": 0, "epoch": 2.786579683131407, "grad_norm": 0.9912763610019066, "learning_rate": 3.9523645150155335e-06, "loss": 0.9616, "reason_loss": 0.46279382705688477, "step": 2990, "utility_loss": 0.49885326623916626 }, { "cosine_similarity": 0, "epoch": 2.7875116495806154, "grad_norm": 0.8437297662047566, "learning_rate": 3.9351052813255095e-06, "loss": 0.8301, "reason_loss": 0.4502365291118622, "step": 2991, "utility_loss": 0.37982237339019775 }, { "cosine_similarity": 0, "epoch": 2.7884436160298227, "grad_norm": 0.8889856669723617, "learning_rate": 3.9178460476354855e-06, "loss": 1.0078, "reason_loss": 0.4390002489089966, "step": 2992, "utility_loss": 0.5687587261199951 }, { "cosine_similarity": 0, "epoch": 2.789375582479031, "grad_norm": 0.9136642644555139, "learning_rate": 3.900586813945461e-06, "loss": 1.0788, "reason_loss": 0.47495412826538086, "step": 2993, "utility_loss": 0.6038815975189209 }, { "cosine_similarity": 0, "epoch": 2.7903075489282387, "grad_norm": 1.0462701407072055, "learning_rate": 3.883327580255437e-06, "loss": 1.0071, "reason_loss": 0.4748133718967438, "step": 2994, "utility_loss": 0.5323230028152466 }, { "cosine_similarity": 0, "epoch": 2.7912395153774465, "grad_norm": 0.896629563621684, "learning_rate": 3.8660683465654126e-06, "loss": 1.1282, "reason_loss": 0.45096415281295776, "step": 2995, "utility_loss": 0.6772739887237549 }, { "cosine_similarity": 0, "epoch": 2.7921714818266543, "grad_norm": 0.9492375461946233, "learning_rate": 3.8488091128753886e-06, "loss": 0.8801, "reason_loss": 0.4684450030326843, "step": 2996, "utility_loss": 0.4117037057876587 }, { "cosine_similarity": 0, "epoch": 2.793103448275862, "grad_norm": 0.824124939522337, "learning_rate": 3.8315498791853645e-06, "loss": 0.8786, "reason_loss": 0.4473063051700592, "step": 2997, "utility_loss": 0.4312450885772705 }, { "cosine_similarity": 0, "epoch": 2.79403541472507, "grad_norm": 0.923001392291915, "learning_rate": 3.81429064549534e-06, "loss": 1.103, "reason_loss": 0.4604325294494629, "step": 2998, "utility_loss": 0.6425780653953552 }, { "cosine_similarity": 0, "epoch": 2.7949673811742777, "grad_norm": 0.9122972748535169, "learning_rate": 3.7970314118053157e-06, "loss": 1.2375, "reason_loss": 0.46687135100364685, "step": 2999, "utility_loss": 0.770581841468811 }, { "cosine_similarity": 0, "epoch": 2.7958993476234855, "grad_norm": 0.832499133366245, "learning_rate": 3.779772178115292e-06, "loss": 1.0814, "reason_loss": 0.441769540309906, "step": 3000, "utility_loss": 0.6396691203117371 }, { "cosine_similarity": 0, "epoch": 2.7968313140726933, "grad_norm": 0.9037036781392711, "learning_rate": 3.7625129444252676e-06, "loss": 0.9861, "reason_loss": 0.4721672832965851, "step": 3001, "utility_loss": 0.5139462947845459 }, { "cosine_similarity": 0, "epoch": 2.7977632805219015, "grad_norm": 0.798894570261432, "learning_rate": 3.7452537107352436e-06, "loss": 1.1247, "reason_loss": 0.470725417137146, "step": 3002, "utility_loss": 0.6539466381072998 }, { "cosine_similarity": 0, "epoch": 2.798695246971109, "grad_norm": 0.8725441835047015, "learning_rate": 3.727994477045219e-06, "loss": 0.9633, "reason_loss": 0.44072550535202026, "step": 3003, "utility_loss": 0.5225356221199036 }, { "cosine_similarity": 0, "epoch": 2.799627213420317, "grad_norm": 0.97768251752777, "learning_rate": 3.7107352433551947e-06, "loss": 1.014, "reason_loss": 0.47546181082725525, "step": 3004, "utility_loss": 0.5384907722473145 }, { "cosine_similarity": 0, "epoch": 2.800559179869525, "grad_norm": 0.9446043038102714, "learning_rate": 3.693476009665171e-06, "loss": 0.9799, "reason_loss": 0.4756319522857666, "step": 3005, "utility_loss": 0.5042714476585388 }, { "cosine_similarity": 0, "epoch": 2.8014911463187326, "grad_norm": 0.8509449929464012, "learning_rate": 3.676216775975147e-06, "loss": 1.0428, "reason_loss": 0.509768009185791, "step": 3006, "utility_loss": 0.5329900979995728 }, { "cosine_similarity": 0, "epoch": 2.8024231127679404, "grad_norm": 0.8905947198773057, "learning_rate": 3.6589575422851227e-06, "loss": 1.1385, "reason_loss": 0.5218290686607361, "step": 3007, "utility_loss": 0.6166383624076843 }, { "cosine_similarity": 0, "epoch": 2.803355079217148, "grad_norm": 1.1459301494105103, "learning_rate": 3.6416983085950983e-06, "loss": 1.3885, "reason_loss": 0.458257794380188, "step": 3008, "utility_loss": 0.9302200078964233 }, { "cosine_similarity": 0, "epoch": 2.804287045666356, "grad_norm": 1.0398092391529377, "learning_rate": 3.6244390749050742e-06, "loss": 1.2036, "reason_loss": 0.42973172664642334, "step": 3009, "utility_loss": 0.7739064693450928 }, { "cosine_similarity": 0, "epoch": 2.805219012115564, "grad_norm": 0.976254393752245, "learning_rate": 3.6071798412150506e-06, "loss": 1.0795, "reason_loss": 0.4544910788536072, "step": 3010, "utility_loss": 0.6250523328781128 }, { "cosine_similarity": 0, "epoch": 2.8061509785647716, "grad_norm": 0.9254832449592397, "learning_rate": 3.589920607525026e-06, "loss": 1.0557, "reason_loss": 0.4420658051967621, "step": 3011, "utility_loss": 0.6136704087257385 }, { "cosine_similarity": 0, "epoch": 2.8070829450139794, "grad_norm": 0.6928371671324218, "learning_rate": 3.5726613738350018e-06, "loss": 0.8451, "reason_loss": 0.44233429431915283, "step": 3012, "utility_loss": 0.4027700424194336 }, { "cosine_similarity": 0, "epoch": 2.8080149114631876, "grad_norm": 1.018617010248058, "learning_rate": 3.5554021401449777e-06, "loss": 1.1369, "reason_loss": 0.44241946935653687, "step": 3013, "utility_loss": 0.6944748163223267 }, { "cosine_similarity": 0, "epoch": 2.808946877912395, "grad_norm": 1.031098073285426, "learning_rate": 3.5381429064549533e-06, "loss": 0.9542, "reason_loss": 0.4570646286010742, "step": 3014, "utility_loss": 0.49712252616882324 }, { "cosine_similarity": 0, "epoch": 2.809878844361603, "grad_norm": 0.9899056046347001, "learning_rate": 3.5208836727649297e-06, "loss": 1.1446, "reason_loss": 0.46863168478012085, "step": 3015, "utility_loss": 0.6760154962539673 }, { "cosine_similarity": 0, "epoch": 2.810810810810811, "grad_norm": 0.9607547401674581, "learning_rate": 3.5036244390749053e-06, "loss": 0.8871, "reason_loss": 0.4653913974761963, "step": 3016, "utility_loss": 0.42170199751853943 }, { "cosine_similarity": 0, "epoch": 2.8117427772600188, "grad_norm": 0.955783884584018, "learning_rate": 3.4863652053848813e-06, "loss": 1.0273, "reason_loss": 0.426718533039093, "step": 3017, "utility_loss": 0.6005995273590088 }, { "cosine_similarity": 0, "epoch": 2.8126747437092265, "grad_norm": 1.407236967384187, "learning_rate": 3.469105971694857e-06, "loss": 1.2685, "reason_loss": 0.4550444781780243, "step": 3018, "utility_loss": 0.8134667873382568 }, { "cosine_similarity": 0, "epoch": 2.8136067101584343, "grad_norm": 0.800777704240654, "learning_rate": 3.4518467380048324e-06, "loss": 1.0301, "reason_loss": 0.45598503947257996, "step": 3019, "utility_loss": 0.5741139650344849 }, { "cosine_similarity": 0, "epoch": 2.814538676607642, "grad_norm": 1.0156725647037255, "learning_rate": 3.434587504314809e-06, "loss": 1.0163, "reason_loss": 0.4652462303638458, "step": 3020, "utility_loss": 0.5510981678962708 }, { "cosine_similarity": 0, "epoch": 2.81547064305685, "grad_norm": 1.1190839364448821, "learning_rate": 3.4173282706247848e-06, "loss": 1.1601, "reason_loss": 0.4793165326118469, "step": 3021, "utility_loss": 0.6807788610458374 }, { "cosine_similarity": 0, "epoch": 2.8164026095060577, "grad_norm": 1.0129477816859893, "learning_rate": 3.4000690369347603e-06, "loss": 1.0952, "reason_loss": 0.4759479761123657, "step": 3022, "utility_loss": 0.6192711591720581 }, { "cosine_similarity": 0, "epoch": 2.8173345759552655, "grad_norm": 0.8441495976062422, "learning_rate": 3.382809803244736e-06, "loss": 0.8852, "reason_loss": 0.4524301588535309, "step": 3023, "utility_loss": 0.43275099992752075 }, { "cosine_similarity": 0, "epoch": 2.8182665424044733, "grad_norm": 0.7961245509480788, "learning_rate": 3.365550569554712e-06, "loss": 0.8355, "reason_loss": 0.470296710729599, "step": 3024, "utility_loss": 0.3651812672615051 }, { "cosine_similarity": 0, "epoch": 2.819198508853681, "grad_norm": 1.030830444045222, "learning_rate": 3.348291335864688e-06, "loss": 1.2723, "reason_loss": 0.47644585371017456, "step": 3025, "utility_loss": 0.7958544492721558 }, { "cosine_similarity": 0, "epoch": 2.8201304753028893, "grad_norm": 0.9362095802185896, "learning_rate": 3.331032102174664e-06, "loss": 0.986, "reason_loss": 0.48839572072029114, "step": 3026, "utility_loss": 0.4976394772529602 }, { "cosine_similarity": 0, "epoch": 2.821062441752097, "grad_norm": 1.131006939270791, "learning_rate": 3.3137728684846394e-06, "loss": 1.38, "reason_loss": 0.4643917679786682, "step": 3027, "utility_loss": 0.9156269431114197 }, { "cosine_similarity": 0, "epoch": 2.821994408201305, "grad_norm": 1.033335005464041, "learning_rate": 3.296513634794615e-06, "loss": 1.0951, "reason_loss": 0.5136392712593079, "step": 3028, "utility_loss": 0.5814770460128784 }, { "cosine_similarity": 0, "epoch": 2.8229263746505127, "grad_norm": 0.8134790418608535, "learning_rate": 3.279254401104591e-06, "loss": 0.9916, "reason_loss": 0.4684484601020813, "step": 3029, "utility_loss": 0.5231550931930542 }, { "cosine_similarity": 0, "epoch": 2.8238583410997204, "grad_norm": 0.9631735590086171, "learning_rate": 3.2619951674145674e-06, "loss": 0.9942, "reason_loss": 0.44575244188308716, "step": 3030, "utility_loss": 0.5484734773635864 }, { "cosine_similarity": 0, "epoch": 2.8247903075489282, "grad_norm": 0.9303201424495516, "learning_rate": 3.244735933724543e-06, "loss": 1.018, "reason_loss": 0.47320038080215454, "step": 3031, "utility_loss": 0.5447902083396912 }, { "cosine_similarity": 0, "epoch": 2.825722273998136, "grad_norm": 0.9533589136666193, "learning_rate": 3.2274767000345185e-06, "loss": 1.014, "reason_loss": 0.4626644253730774, "step": 3032, "utility_loss": 0.5513545274734497 }, { "cosine_similarity": 0, "epoch": 2.826654240447344, "grad_norm": 0.9551956496846796, "learning_rate": 3.2102174663444945e-06, "loss": 0.9397, "reason_loss": 0.457547128200531, "step": 3033, "utility_loss": 0.4821985960006714 }, { "cosine_similarity": 0, "epoch": 2.8275862068965516, "grad_norm": 0.9232211581429796, "learning_rate": 3.19295823265447e-06, "loss": 0.9039, "reason_loss": 0.45170727372169495, "step": 3034, "utility_loss": 0.4522312879562378 }, { "cosine_similarity": 0, "epoch": 2.8285181733457594, "grad_norm": 1.0601043554091671, "learning_rate": 3.1756989989644464e-06, "loss": 1.2647, "reason_loss": 0.4858590066432953, "step": 3035, "utility_loss": 0.7788196206092834 }, { "cosine_similarity": 0, "epoch": 2.829450139794967, "grad_norm": 0.9959036131600278, "learning_rate": 3.158439765274422e-06, "loss": 0.9366, "reason_loss": 0.4368252456188202, "step": 3036, "utility_loss": 0.49977636337280273 }, { "cosine_similarity": 0, "epoch": 2.8303821062441754, "grad_norm": 1.6717681919006528, "learning_rate": 3.141180531584398e-06, "loss": 1.299, "reason_loss": 0.468901664018631, "step": 3037, "utility_loss": 0.8300566673278809 }, { "cosine_similarity": 0, "epoch": 2.831314072693383, "grad_norm": 1.00744913710527, "learning_rate": 3.1239212978943735e-06, "loss": 1.1433, "reason_loss": 0.4694341719150543, "step": 3038, "utility_loss": 0.6738861799240112 }, { "cosine_similarity": 0, "epoch": 2.832246039142591, "grad_norm": 1.021215525247517, "learning_rate": 3.1066620642043495e-06, "loss": 1.0672, "reason_loss": 0.4708684980869293, "step": 3039, "utility_loss": 0.5963707566261292 }, { "cosine_similarity": 0, "epoch": 2.8331780055917988, "grad_norm": 0.925139159424529, "learning_rate": 3.089402830514325e-06, "loss": 1.1077, "reason_loss": 0.443734347820282, "step": 3040, "utility_loss": 0.6639842987060547 }, { "cosine_similarity": 0, "epoch": 2.8341099720410066, "grad_norm": 0.8933586205968205, "learning_rate": 3.072143596824301e-06, "loss": 0.9716, "reason_loss": 0.4384438395500183, "step": 3041, "utility_loss": 0.5331867933273315 }, { "cosine_similarity": 0, "epoch": 2.8350419384902144, "grad_norm": 0.8008441498419893, "learning_rate": 3.054884363134277e-06, "loss": 0.8506, "reason_loss": 0.4508257508277893, "step": 3042, "utility_loss": 0.3997976779937744 }, { "cosine_similarity": 0, "epoch": 2.835973904939422, "grad_norm": 0.8165305589034116, "learning_rate": 3.0376251294442526e-06, "loss": 1.0706, "reason_loss": 0.46704405546188354, "step": 3043, "utility_loss": 0.6035162210464478 }, { "cosine_similarity": 0, "epoch": 2.83690587138863, "grad_norm": 1.0453575327590985, "learning_rate": 3.0203658957542286e-06, "loss": 0.9402, "reason_loss": 0.46062803268432617, "step": 3044, "utility_loss": 0.4796138405799866 }, { "cosine_similarity": 0, "epoch": 2.8378378378378377, "grad_norm": 0.9239753260891167, "learning_rate": 3.0031066620642046e-06, "loss": 1.0123, "reason_loss": 0.4601888954639435, "step": 3045, "utility_loss": 0.5521153211593628 }, { "cosine_similarity": 0, "epoch": 2.8387698042870455, "grad_norm": 0.8254801515106752, "learning_rate": 2.98584742837418e-06, "loss": 1.1132, "reason_loss": 0.46228551864624023, "step": 3046, "utility_loss": 0.6508709192276001 }, { "cosine_similarity": 0, "epoch": 2.8397017707362533, "grad_norm": 0.8164138143559628, "learning_rate": 2.968588194684156e-06, "loss": 0.9418, "reason_loss": 0.4526965022087097, "step": 3047, "utility_loss": 0.4890942871570587 }, { "cosine_similarity": 0, "epoch": 2.8406337371854615, "grad_norm": 0.8998901083753961, "learning_rate": 2.951328960994132e-06, "loss": 0.8192, "reason_loss": 0.4440254867076874, "step": 3048, "utility_loss": 0.37516677379608154 }, { "cosine_similarity": 0, "epoch": 2.8415657036346693, "grad_norm": 1.1084023546756554, "learning_rate": 2.934069727304108e-06, "loss": 1.0558, "reason_loss": 0.5028755068778992, "step": 3049, "utility_loss": 0.5529265999794006 }, { "cosine_similarity": 0, "epoch": 2.842497670083877, "grad_norm": 0.9941512549037403, "learning_rate": 2.9168104936140837e-06, "loss": 0.9887, "reason_loss": 0.4650868773460388, "step": 3050, "utility_loss": 0.5236178636550903 }, { "cosine_similarity": 0, "epoch": 2.843429636533085, "grad_norm": 1.1700405650839503, "learning_rate": 2.8995512599240592e-06, "loss": 0.9862, "reason_loss": 0.4551953673362732, "step": 3051, "utility_loss": 0.5310406684875488 }, { "cosine_similarity": 0, "epoch": 2.8443616029822927, "grad_norm": 1.120797300594317, "learning_rate": 2.882292026234035e-06, "loss": 0.9458, "reason_loss": 0.4803621172904968, "step": 3052, "utility_loss": 0.4654480516910553 }, { "cosine_similarity": 0, "epoch": 2.8452935694315005, "grad_norm": 0.9512370430576633, "learning_rate": 2.865032792544011e-06, "loss": 0.9577, "reason_loss": 0.4380106031894684, "step": 3053, "utility_loss": 0.519673764705658 }, { "cosine_similarity": 0, "epoch": 2.8462255358807083, "grad_norm": 1.0402074321620953, "learning_rate": 2.847773558853987e-06, "loss": 1.0067, "reason_loss": 0.45589974522590637, "step": 3054, "utility_loss": 0.5507528185844421 }, { "cosine_similarity": 0, "epoch": 2.847157502329916, "grad_norm": 1.2718135180980303, "learning_rate": 2.8305143251639627e-06, "loss": 1.176, "reason_loss": 0.47890961170196533, "step": 3055, "utility_loss": 0.6970915794372559 }, { "cosine_similarity": 0, "epoch": 2.848089468779124, "grad_norm": 0.9401453663258834, "learning_rate": 2.8132550914739387e-06, "loss": 1.062, "reason_loss": 0.46427425742149353, "step": 3056, "utility_loss": 0.5977587699890137 }, { "cosine_similarity": 0, "epoch": 2.8490214352283316, "grad_norm": 1.0095067904273993, "learning_rate": 2.7959958577839147e-06, "loss": 1.2331, "reason_loss": 0.4601019620895386, "step": 3057, "utility_loss": 0.7730255722999573 }, { "cosine_similarity": 0, "epoch": 2.8499534016775394, "grad_norm": 1.0622725123688064, "learning_rate": 2.7787366240938903e-06, "loss": 1.0249, "reason_loss": 0.4567135274410248, "step": 3058, "utility_loss": 0.5681774616241455 }, { "cosine_similarity": 0, "epoch": 2.8508853681267476, "grad_norm": 1.0583912574396508, "learning_rate": 2.7614773904038662e-06, "loss": 1.1897, "reason_loss": 0.462702214717865, "step": 3059, "utility_loss": 0.7269521951675415 }, { "cosine_similarity": 0, "epoch": 2.8518173345759554, "grad_norm": 1.0586909545029146, "learning_rate": 2.7442181567138422e-06, "loss": 1.1104, "reason_loss": 0.44577062129974365, "step": 3060, "utility_loss": 0.6645985245704651 }, { "cosine_similarity": 0, "epoch": 2.852749301025163, "grad_norm": 0.914140736299721, "learning_rate": 2.726958923023818e-06, "loss": 0.9759, "reason_loss": 0.45881927013397217, "step": 3061, "utility_loss": 0.5170981884002686 }, { "cosine_similarity": 0, "epoch": 2.853681267474371, "grad_norm": 0.957315748944671, "learning_rate": 2.7096996893337938e-06, "loss": 0.9656, "reason_loss": 0.4495736360549927, "step": 3062, "utility_loss": 0.5160250067710876 }, { "cosine_similarity": 0, "epoch": 2.854613233923579, "grad_norm": 1.117153139774324, "learning_rate": 2.6924404556437693e-06, "loss": 1.0731, "reason_loss": 0.46392953395843506, "step": 3063, "utility_loss": 0.6091887950897217 }, { "cosine_similarity": 0, "epoch": 2.8555452003727866, "grad_norm": 0.9276060949191947, "learning_rate": 2.6751812219537453e-06, "loss": 1.0275, "reason_loss": 0.47254931926727295, "step": 3064, "utility_loss": 0.5549206733703613 }, { "cosine_similarity": 0, "epoch": 2.8564771668219944, "grad_norm": 0.9343467798797531, "learning_rate": 2.6579219882637213e-06, "loss": 0.9423, "reason_loss": 0.4525710642337799, "step": 3065, "utility_loss": 0.48974061012268066 }, { "cosine_similarity": 0, "epoch": 2.857409133271202, "grad_norm": 1.2836106102922091, "learning_rate": 2.640662754573697e-06, "loss": 1.2011, "reason_loss": 0.47880756855010986, "step": 3066, "utility_loss": 0.7222995162010193 }, { "cosine_similarity": 0, "epoch": 2.85834109972041, "grad_norm": 1.0302730037184664, "learning_rate": 2.623403520883673e-06, "loss": 1.1713, "reason_loss": 0.4488908052444458, "step": 3067, "utility_loss": 0.7223724126815796 }, { "cosine_similarity": 0, "epoch": 2.8592730661696177, "grad_norm": 0.9640608379704683, "learning_rate": 2.606144287193649e-06, "loss": 0.9886, "reason_loss": 0.4444206953048706, "step": 3068, "utility_loss": 0.5441589951515198 }, { "cosine_similarity": 0, "epoch": 2.8602050326188255, "grad_norm": 0.7586671150385255, "learning_rate": 2.588885053503625e-06, "loss": 0.845, "reason_loss": 0.46866515278816223, "step": 3069, "utility_loss": 0.3763068616390228 }, { "cosine_similarity": 0, "epoch": 2.8611369990680338, "grad_norm": 1.13758474642972, "learning_rate": 2.5716258198136004e-06, "loss": 1.388, "reason_loss": 0.4584062993526459, "step": 3070, "utility_loss": 0.9296420812606812 }, { "cosine_similarity": 0, "epoch": 2.862068965517241, "grad_norm": 1.0650831073685054, "learning_rate": 2.554366586123576e-06, "loss": 1.1767, "reason_loss": 0.4570639133453369, "step": 3071, "utility_loss": 0.7196266651153564 }, { "cosine_similarity": 0, "epoch": 2.8630009319664493, "grad_norm": 0.9710430307906737, "learning_rate": 2.5371073524335523e-06, "loss": 1.1128, "reason_loss": 0.4798569977283478, "step": 3072, "utility_loss": 0.632967472076416 }, { "cosine_similarity": 0, "epoch": 2.863932898415657, "grad_norm": 1.0989863362006749, "learning_rate": 2.519848118743528e-06, "loss": 1.247, "reason_loss": 0.4568016231060028, "step": 3073, "utility_loss": 0.7901898622512817 }, { "cosine_similarity": 0, "epoch": 2.864864864864865, "grad_norm": 0.9776291087263843, "learning_rate": 2.502588885053504e-06, "loss": 0.9782, "reason_loss": 0.46356746554374695, "step": 3074, "utility_loss": 0.5146498084068298 }, { "cosine_similarity": 0, "epoch": 2.8657968313140727, "grad_norm": 0.9161087457377, "learning_rate": 2.4853296513634795e-06, "loss": 1.0241, "reason_loss": 0.4610590934753418, "step": 3075, "utility_loss": 0.563083291053772 }, { "cosine_similarity": 0, "epoch": 2.8667287977632805, "grad_norm": 0.94825928294605, "learning_rate": 2.4680704176734554e-06, "loss": 1.1357, "reason_loss": 0.4577503204345703, "step": 3076, "utility_loss": 0.6779557466506958 }, { "cosine_similarity": 0, "epoch": 2.8676607642124883, "grad_norm": 0.8896689595198948, "learning_rate": 2.4508111839834314e-06, "loss": 1.1177, "reason_loss": 0.460666298866272, "step": 3077, "utility_loss": 0.6570133566856384 }, { "cosine_similarity": 0, "epoch": 2.868592730661696, "grad_norm": 1.0203583680693917, "learning_rate": 2.433551950293407e-06, "loss": 1.312, "reason_loss": 0.4630095958709717, "step": 3078, "utility_loss": 0.8489698767662048 }, { "cosine_similarity": 0, "epoch": 2.869524697110904, "grad_norm": 0.7358448440133282, "learning_rate": 2.416292716603383e-06, "loss": 0.7978, "reason_loss": 0.4845331013202667, "step": 3079, "utility_loss": 0.3132638931274414 }, { "cosine_similarity": 0, "epoch": 2.8704566635601116, "grad_norm": 1.0621704331245212, "learning_rate": 2.399033482913359e-06, "loss": 1.2633, "reason_loss": 0.4746095538139343, "step": 3080, "utility_loss": 0.788672685623169 }, { "cosine_similarity": 0, "epoch": 2.87138863000932, "grad_norm": 1.0164236244049196, "learning_rate": 2.3817742492233345e-06, "loss": 1.12, "reason_loss": 0.4678283929824829, "step": 3081, "utility_loss": 0.6522212624549866 }, { "cosine_similarity": 0, "epoch": 2.872320596458527, "grad_norm": 1.010853115985716, "learning_rate": 2.3645150155333105e-06, "loss": 0.8962, "reason_loss": 0.45258983969688416, "step": 3082, "utility_loss": 0.44356369972229004 }, { "cosine_similarity": 0, "epoch": 2.8732525629077355, "grad_norm": 0.9714986290634071, "learning_rate": 2.347255781843286e-06, "loss": 0.9408, "reason_loss": 0.43555140495300293, "step": 3083, "utility_loss": 0.5052205920219421 }, { "cosine_similarity": 0, "epoch": 2.8741845293569432, "grad_norm": 0.9325554471951354, "learning_rate": 2.3299965481532625e-06, "loss": 1.069, "reason_loss": 0.4674336910247803, "step": 3084, "utility_loss": 0.6015260219573975 }, { "cosine_similarity": 0, "epoch": 2.875116495806151, "grad_norm": 0.8500523530904026, "learning_rate": 2.312737314463238e-06, "loss": 0.8842, "reason_loss": 0.4336865544319153, "step": 3085, "utility_loss": 0.45051074028015137 }, { "cosine_similarity": 0, "epoch": 2.876048462255359, "grad_norm": 1.0794934930402242, "learning_rate": 2.2954780807732136e-06, "loss": 1.1029, "reason_loss": 0.4325379431247711, "step": 3086, "utility_loss": 0.670356273651123 }, { "cosine_similarity": 0, "epoch": 2.8769804287045666, "grad_norm": 0.7960120851087239, "learning_rate": 2.2782188470831896e-06, "loss": 0.9321, "reason_loss": 0.478263795375824, "step": 3087, "utility_loss": 0.4538244605064392 }, { "cosine_similarity": 0, "epoch": 2.8779123951537744, "grad_norm": 0.926133151590105, "learning_rate": 2.2609596133931656e-06, "loss": 1.1701, "reason_loss": 0.4423168897628784, "step": 3088, "utility_loss": 0.7277486324310303 }, { "cosine_similarity": 0, "epoch": 2.878844361602982, "grad_norm": 0.9748425201505811, "learning_rate": 2.243700379703141e-06, "loss": 0.9064, "reason_loss": 0.4628893733024597, "step": 3089, "utility_loss": 0.4434654116630554 }, { "cosine_similarity": 0, "epoch": 2.87977632805219, "grad_norm": 0.8687072139887867, "learning_rate": 2.226441146013117e-06, "loss": 0.9553, "reason_loss": 0.4734022319316864, "step": 3090, "utility_loss": 0.4818898141384125 }, { "cosine_similarity": 0, "epoch": 2.8807082945013978, "grad_norm": 0.9044440087318203, "learning_rate": 2.2091819123230927e-06, "loss": 1.045, "reason_loss": 0.4557688236236572, "step": 3091, "utility_loss": 0.5892547369003296 }, { "cosine_similarity": 0, "epoch": 2.881640260950606, "grad_norm": 1.0332526551642336, "learning_rate": 2.191922678633069e-06, "loss": 0.9155, "reason_loss": 0.45567893981933594, "step": 3092, "utility_loss": 0.45981574058532715 }, { "cosine_similarity": 0, "epoch": 2.8825722273998133, "grad_norm": 0.8433385340861456, "learning_rate": 2.1746634449430446e-06, "loss": 0.9362, "reason_loss": 0.4682399034500122, "step": 3093, "utility_loss": 0.4679200351238251 }, { "cosine_similarity": 0, "epoch": 2.8835041938490216, "grad_norm": 0.8980859480543398, "learning_rate": 2.15740421125302e-06, "loss": 0.9359, "reason_loss": 0.4653862714767456, "step": 3094, "utility_loss": 0.470486044883728 }, { "cosine_similarity": 0, "epoch": 2.8844361602982294, "grad_norm": 0.9181023201539332, "learning_rate": 2.140144977562996e-06, "loss": 1.0731, "reason_loss": 0.44174516201019287, "step": 3095, "utility_loss": 0.6313199400901794 }, { "cosine_similarity": 0, "epoch": 2.885368126747437, "grad_norm": 1.0027075843524411, "learning_rate": 2.122885743872972e-06, "loss": 0.9516, "reason_loss": 0.4743691086769104, "step": 3096, "utility_loss": 0.4771978259086609 }, { "cosine_similarity": 0, "epoch": 2.886300093196645, "grad_norm": 1.047564725847709, "learning_rate": 2.105626510182948e-06, "loss": 1.1037, "reason_loss": 0.47789329290390015, "step": 3097, "utility_loss": 0.6258127689361572 }, { "cosine_similarity": 0, "epoch": 2.8872320596458527, "grad_norm": 0.7637430923365871, "learning_rate": 2.0883672764929237e-06, "loss": 0.9724, "reason_loss": 0.4762330651283264, "step": 3098, "utility_loss": 0.4961407482624054 }, { "cosine_similarity": 0, "epoch": 2.8881640260950605, "grad_norm": 0.954145118984612, "learning_rate": 2.0711080428028997e-06, "loss": 0.9601, "reason_loss": 0.44667670130729675, "step": 3099, "utility_loss": 0.5134152770042419 }, { "cosine_similarity": 0, "epoch": 2.8890959925442683, "grad_norm": 0.9728297643357388, "learning_rate": 2.0538488091128757e-06, "loss": 1.0177, "reason_loss": 0.4578479528427124, "step": 3100, "utility_loss": 0.5598124265670776 }, { "cosine_similarity": 0, "epoch": 2.890027958993476, "grad_norm": 0.891020756693992, "learning_rate": 2.0365895754228512e-06, "loss": 0.8057, "reason_loss": 0.4396228492259979, "step": 3101, "utility_loss": 0.3660958409309387 }, { "cosine_similarity": 0, "epoch": 2.890959925442684, "grad_norm": 1.2687557169193577, "learning_rate": 2.0193303417328272e-06, "loss": 1.0836, "reason_loss": 0.4472969174385071, "step": 3102, "utility_loss": 0.6362907886505127 }, { "cosine_similarity": 0, "epoch": 2.891891891891892, "grad_norm": 0.8836664761808636, "learning_rate": 2.0020711080428028e-06, "loss": 0.9205, "reason_loss": 0.4433653950691223, "step": 3103, "utility_loss": 0.4771035313606262 }, { "cosine_similarity": 0, "epoch": 2.8928238583410995, "grad_norm": 1.0901405456519635, "learning_rate": 1.9848118743527788e-06, "loss": 1.1457, "reason_loss": 0.43953219056129456, "step": 3104, "utility_loss": 0.7061873078346252 }, { "cosine_similarity": 0, "epoch": 2.8937558247903077, "grad_norm": 0.7017801628779983, "learning_rate": 1.9675526406627547e-06, "loss": 0.9614, "reason_loss": 0.48081743717193604, "step": 3105, "utility_loss": 0.4805695414543152 }, { "cosine_similarity": 0, "epoch": 2.8946877912395155, "grad_norm": 0.891524143302647, "learning_rate": 1.9502934069727303e-06, "loss": 1.0127, "reason_loss": 0.468657523393631, "step": 3106, "utility_loss": 0.544029951095581 }, { "cosine_similarity": 0, "epoch": 2.8956197576887233, "grad_norm": 0.7686599575302026, "learning_rate": 1.9330341732827063e-06, "loss": 0.8606, "reason_loss": 0.45451924204826355, "step": 3107, "utility_loss": 0.4060328006744385 }, { "cosine_similarity": 0, "epoch": 2.896551724137931, "grad_norm": 1.1320733076776235, "learning_rate": 1.9157749395926823e-06, "loss": 1.3098, "reason_loss": 0.4830765724182129, "step": 3108, "utility_loss": 0.8266937732696533 }, { "cosine_similarity": 0, "epoch": 2.897483690587139, "grad_norm": 0.9821562378679483, "learning_rate": 1.8985157059026578e-06, "loss": 1.1499, "reason_loss": 0.48543494939804077, "step": 3109, "utility_loss": 0.6644191741943359 }, { "cosine_similarity": 0, "epoch": 2.8984156570363466, "grad_norm": 0.8595799030611673, "learning_rate": 1.8812564722126338e-06, "loss": 1.0959, "reason_loss": 0.466316282749176, "step": 3110, "utility_loss": 0.629546046257019 }, { "cosine_similarity": 0, "epoch": 2.8993476234855544, "grad_norm": 0.8019739168448562, "learning_rate": 1.8639972385226096e-06, "loss": 0.8827, "reason_loss": 0.45999616384506226, "step": 3111, "utility_loss": 0.42267894744873047 }, { "cosine_similarity": 0, "epoch": 2.900279589934762, "grad_norm": 0.6979091233854238, "learning_rate": 1.8467380048325856e-06, "loss": 0.7293, "reason_loss": 0.4506050646305084, "step": 3112, "utility_loss": 0.27866804599761963 }, { "cosine_similarity": 0, "epoch": 2.90121155638397, "grad_norm": 0.991897244446537, "learning_rate": 1.8294787711425613e-06, "loss": 1.1331, "reason_loss": 0.4560296833515167, "step": 3113, "utility_loss": 0.6770362257957458 }, { "cosine_similarity": 0, "epoch": 2.9021435228331782, "grad_norm": 0.9127246520854106, "learning_rate": 1.8122195374525371e-06, "loss": 0.8671, "reason_loss": 0.4116467535495758, "step": 3114, "utility_loss": 0.4554521441459656 }, { "cosine_similarity": 0, "epoch": 2.9030754892823856, "grad_norm": 0.9191892520706194, "learning_rate": 1.794960303762513e-06, "loss": 1.0723, "reason_loss": 0.463455468416214, "step": 3115, "utility_loss": 0.608820378780365 }, { "cosine_similarity": 0, "epoch": 2.904007455731594, "grad_norm": 0.8546533311510036, "learning_rate": 1.7777010700724889e-06, "loss": 1.0819, "reason_loss": 0.46540704369544983, "step": 3116, "utility_loss": 0.616495668888092 }, { "cosine_similarity": 0, "epoch": 2.9049394221808016, "grad_norm": 1.2486810419318195, "learning_rate": 1.7604418363824649e-06, "loss": 0.945, "reason_loss": 0.4645363688468933, "step": 3117, "utility_loss": 0.4805133640766144 }, { "cosine_similarity": 0, "epoch": 2.9058713886300094, "grad_norm": 0.9013722798435699, "learning_rate": 1.7431826026924406e-06, "loss": 1.0425, "reason_loss": 0.4586327075958252, "step": 3118, "utility_loss": 0.5838336944580078 }, { "cosine_similarity": 0, "epoch": 2.906803355079217, "grad_norm": 0.8774197271849373, "learning_rate": 1.7259233690024162e-06, "loss": 1.0204, "reason_loss": 0.4596273601055145, "step": 3119, "utility_loss": 0.5607998371124268 }, { "cosine_similarity": 0, "epoch": 2.907735321528425, "grad_norm": 1.039554399903328, "learning_rate": 1.7086641353123924e-06, "loss": 1.1834, "reason_loss": 0.4693416357040405, "step": 3120, "utility_loss": 0.7140159010887146 }, { "cosine_similarity": 0, "epoch": 2.9086672879776327, "grad_norm": 1.0859901202559983, "learning_rate": 1.691404901622368e-06, "loss": 1.0648, "reason_loss": 0.4425690770149231, "step": 3121, "utility_loss": 0.6222567558288574 }, { "cosine_similarity": 0, "epoch": 2.9095992544268405, "grad_norm": 0.9176943341560125, "learning_rate": 1.674145667932344e-06, "loss": 0.9507, "reason_loss": 0.4673770070075989, "step": 3122, "utility_loss": 0.4833316206932068 }, { "cosine_similarity": 0, "epoch": 2.9105312208760483, "grad_norm": 1.028815142352516, "learning_rate": 1.6568864342423197e-06, "loss": 1.0754, "reason_loss": 0.4534192383289337, "step": 3123, "utility_loss": 0.6220019459724426 }, { "cosine_similarity": 0, "epoch": 2.911463187325256, "grad_norm": 0.9470695875647174, "learning_rate": 1.6396272005522955e-06, "loss": 0.9974, "reason_loss": 0.43859416246414185, "step": 3124, "utility_loss": 0.5587908625602722 }, { "cosine_similarity": 0, "epoch": 2.9123951537744643, "grad_norm": 1.1386891391716312, "learning_rate": 1.6223679668622715e-06, "loss": 0.9807, "reason_loss": 0.4505559206008911, "step": 3125, "utility_loss": 0.5301423072814941 }, { "cosine_similarity": 0, "epoch": 2.9133271202236717, "grad_norm": 0.7876103315671968, "learning_rate": 1.6051087331722472e-06, "loss": 0.8037, "reason_loss": 0.43359825015068054, "step": 3126, "utility_loss": 0.37010639905929565 }, { "cosine_similarity": 0, "epoch": 2.91425908667288, "grad_norm": 1.0949376947146832, "learning_rate": 1.5878494994822232e-06, "loss": 1.1792, "reason_loss": 0.43942081928253174, "step": 3127, "utility_loss": 0.7397453188896179 }, { "cosine_similarity": 0, "epoch": 2.9151910531220877, "grad_norm": 0.9655808767021843, "learning_rate": 1.570590265792199e-06, "loss": 0.9854, "reason_loss": 0.44060879945755005, "step": 3128, "utility_loss": 0.5448251366615295 }, { "cosine_similarity": 0, "epoch": 2.9161230195712955, "grad_norm": 1.0247388639901893, "learning_rate": 1.5533310321021748e-06, "loss": 0.9209, "reason_loss": 0.4507002830505371, "step": 3129, "utility_loss": 0.4702398478984833 }, { "cosine_similarity": 0, "epoch": 2.9170549860205033, "grad_norm": 0.9295172429769623, "learning_rate": 1.5360717984121505e-06, "loss": 1.1088, "reason_loss": 0.47862508893013, "step": 3130, "utility_loss": 0.6301440000534058 }, { "cosine_similarity": 0, "epoch": 2.917986952469711, "grad_norm": 0.9839859336020008, "learning_rate": 1.5188125647221263e-06, "loss": 0.9169, "reason_loss": 0.4305906295776367, "step": 3131, "utility_loss": 0.4863433539867401 }, { "cosine_similarity": 0, "epoch": 2.918918918918919, "grad_norm": 0.9290346570327734, "learning_rate": 1.5015533310321023e-06, "loss": 1.1915, "reason_loss": 0.4838207960128784, "step": 3132, "utility_loss": 0.7076407670974731 }, { "cosine_similarity": 0, "epoch": 2.9198508853681266, "grad_norm": 1.2122649452756948, "learning_rate": 1.484294097342078e-06, "loss": 1.1436, "reason_loss": 0.4673719108104706, "step": 3133, "utility_loss": 0.6762592792510986 }, { "cosine_similarity": 0, "epoch": 2.9207828518173344, "grad_norm": 0.9445554658788411, "learning_rate": 1.467034863652054e-06, "loss": 0.9466, "reason_loss": 0.4619872570037842, "step": 3134, "utility_loss": 0.48459306359291077 }, { "cosine_similarity": 0, "epoch": 2.9217148182665422, "grad_norm": 1.0040599782217217, "learning_rate": 1.4497756299620296e-06, "loss": 1.0633, "reason_loss": 0.47179263830184937, "step": 3135, "utility_loss": 0.5915032029151917 }, { "cosine_similarity": 0, "epoch": 2.9226467847157505, "grad_norm": 0.7370879349068477, "learning_rate": 1.4325163962720056e-06, "loss": 0.7633, "reason_loss": 0.45078569650650024, "step": 3136, "utility_loss": 0.3124915361404419 }, { "cosine_similarity": 0, "epoch": 2.923578751164958, "grad_norm": 0.9077648684555278, "learning_rate": 1.4152571625819814e-06, "loss": 0.812, "reason_loss": 0.44019705057144165, "step": 3137, "utility_loss": 0.37178829312324524 }, { "cosine_similarity": 0, "epoch": 2.924510717614166, "grad_norm": 1.0051175823214904, "learning_rate": 1.3979979288919574e-06, "loss": 1.0164, "reason_loss": 0.44862645864486694, "step": 3138, "utility_loss": 0.567787766456604 }, { "cosine_similarity": 0, "epoch": 2.925442684063374, "grad_norm": 0.7999983286712452, "learning_rate": 1.3807386952019331e-06, "loss": 1.0856, "reason_loss": 0.440654456615448, "step": 3139, "utility_loss": 0.6449127197265625 }, { "cosine_similarity": 0, "epoch": 2.9263746505125816, "grad_norm": 0.8537163770277133, "learning_rate": 1.363479461511909e-06, "loss": 0.8876, "reason_loss": 0.4642489552497864, "step": 3140, "utility_loss": 0.42330390214920044 }, { "cosine_similarity": 0, "epoch": 2.9273066169617894, "grad_norm": 0.9343309849801665, "learning_rate": 1.3462202278218847e-06, "loss": 1.1356, "reason_loss": 0.43220019340515137, "step": 3141, "utility_loss": 0.7033551931381226 }, { "cosine_similarity": 0, "epoch": 2.928238583410997, "grad_norm": 0.9911536428143922, "learning_rate": 1.3289609941318607e-06, "loss": 1.0985, "reason_loss": 0.4567262530326843, "step": 3142, "utility_loss": 0.641778826713562 }, { "cosine_similarity": 0, "epoch": 2.929170549860205, "grad_norm": 0.8248208242305618, "learning_rate": 1.3117017604418364e-06, "loss": 0.9163, "reason_loss": 0.47121769189834595, "step": 3143, "utility_loss": 0.4450676441192627 }, { "cosine_similarity": 0, "epoch": 2.9301025163094128, "grad_norm": 0.9833214381722342, "learning_rate": 1.2944425267518124e-06, "loss": 1.1151, "reason_loss": 0.502205491065979, "step": 3144, "utility_loss": 0.6129385232925415 }, { "cosine_similarity": 0, "epoch": 2.9310344827586206, "grad_norm": 1.1468055123721852, "learning_rate": 1.277183293061788e-06, "loss": 1.1891, "reason_loss": 0.4655199646949768, "step": 3145, "utility_loss": 0.723564863204956 }, { "cosine_similarity": 0, "epoch": 2.9319664492078283, "grad_norm": 1.0010995017199436, "learning_rate": 1.259924059371764e-06, "loss": 1.1997, "reason_loss": 0.4967483878135681, "step": 3146, "utility_loss": 0.7029370069503784 }, { "cosine_similarity": 0, "epoch": 2.9328984156570366, "grad_norm": 0.8609692092776656, "learning_rate": 1.2426648256817397e-06, "loss": 0.9448, "reason_loss": 0.47731828689575195, "step": 3147, "utility_loss": 0.4675039052963257 }, { "cosine_similarity": 0, "epoch": 2.933830382106244, "grad_norm": 0.8528638756209758, "learning_rate": 1.2254055919917157e-06, "loss": 1.2862, "reason_loss": 0.44772815704345703, "step": 3148, "utility_loss": 0.8384741544723511 }, { "cosine_similarity": 0, "epoch": 2.934762348555452, "grad_norm": 0.8475508457877897, "learning_rate": 1.2081463583016915e-06, "loss": 1.1346, "reason_loss": 0.4849068522453308, "step": 3149, "utility_loss": 0.6496725678443909 }, { "cosine_similarity": 0, "epoch": 2.93569431500466, "grad_norm": 1.0867255265991784, "learning_rate": 1.1908871246116673e-06, "loss": 1.1137, "reason_loss": 0.4475167989730835, "step": 3150, "utility_loss": 0.6661360859870911 }, { "cosine_similarity": 0, "epoch": 2.9366262814538677, "grad_norm": 0.8957608070149022, "learning_rate": 1.173627890921643e-06, "loss": 1.0234, "reason_loss": 0.46235722303390503, "step": 3151, "utility_loss": 0.5610154271125793 }, { "cosine_similarity": 0, "epoch": 2.9375582479030755, "grad_norm": 1.0601156555116122, "learning_rate": 1.156368657231619e-06, "loss": 0.936, "reason_loss": 0.4833255410194397, "step": 3152, "utility_loss": 0.4526938199996948 }, { "cosine_similarity": 0, "epoch": 2.9384902143522833, "grad_norm": 1.1915932617985694, "learning_rate": 1.1391094235415948e-06, "loss": 1.1848, "reason_loss": 0.4436776638031006, "step": 3153, "utility_loss": 0.7411578893661499 }, { "cosine_similarity": 0, "epoch": 2.939422180801491, "grad_norm": 0.8124959823653335, "learning_rate": 1.1218501898515706e-06, "loss": 0.7988, "reason_loss": 0.4863015115261078, "step": 3154, "utility_loss": 0.3124670088291168 }, { "cosine_similarity": 0, "epoch": 2.940354147250699, "grad_norm": 0.9437090787671194, "learning_rate": 1.1045909561615463e-06, "loss": 1.2352, "reason_loss": 0.46707862615585327, "step": 3155, "utility_loss": 0.7681113481521606 }, { "cosine_similarity": 0, "epoch": 2.9412861136999067, "grad_norm": 1.0347986297143474, "learning_rate": 1.0873317224715223e-06, "loss": 0.9397, "reason_loss": 0.4578549861907959, "step": 3156, "utility_loss": 0.4818357825279236 }, { "cosine_similarity": 0, "epoch": 2.9422180801491145, "grad_norm": 1.2145048309987294, "learning_rate": 1.070072488781498e-06, "loss": 1.258, "reason_loss": 0.48122575879096985, "step": 3157, "utility_loss": 0.7767968773841858 }, { "cosine_similarity": 0, "epoch": 2.9431500465983227, "grad_norm": 0.8756798896254481, "learning_rate": 1.052813255091474e-06, "loss": 0.983, "reason_loss": 0.4562689960002899, "step": 3158, "utility_loss": 0.5267133116722107 }, { "cosine_similarity": 0, "epoch": 2.94408201304753, "grad_norm": 0.8718031564755876, "learning_rate": 1.0355540214014498e-06, "loss": 1.1794, "reason_loss": 0.4451886713504791, "step": 3159, "utility_loss": 0.7342606782913208 }, { "cosine_similarity": 0, "epoch": 2.9450139794967383, "grad_norm": 1.461237319656289, "learning_rate": 1.0182947877114256e-06, "loss": 0.9755, "reason_loss": 0.4693828225135803, "step": 3160, "utility_loss": 0.50611412525177 }, { "cosine_similarity": 0, "epoch": 2.945945945945946, "grad_norm": 1.0608910582157836, "learning_rate": 1.0010355540214014e-06, "loss": 1.0663, "reason_loss": 0.4489452540874481, "step": 3161, "utility_loss": 0.6174039840698242 }, { "cosine_similarity": 0, "epoch": 2.946877912395154, "grad_norm": 1.0113584652321104, "learning_rate": 9.837763203313774e-07, "loss": 0.99, "reason_loss": 0.4438955783843994, "step": 3162, "utility_loss": 0.546071469783783 }, { "cosine_similarity": 0, "epoch": 2.9478098788443616, "grad_norm": 1.0394161828188255, "learning_rate": 9.665170866413531e-07, "loss": 1.0057, "reason_loss": 0.48236197233200073, "step": 3163, "utility_loss": 0.5233355760574341 }, { "cosine_similarity": 0, "epoch": 2.9487418452935694, "grad_norm": 1.1972369075939657, "learning_rate": 9.492578529513289e-07, "loss": 1.2066, "reason_loss": 0.44452133774757385, "step": 3164, "utility_loss": 0.7621200084686279 }, { "cosine_similarity": 0, "epoch": 2.949673811742777, "grad_norm": 1.0192255326919888, "learning_rate": 9.319986192613048e-07, "loss": 1.0985, "reason_loss": 0.45075029134750366, "step": 3165, "utility_loss": 0.647794246673584 }, { "cosine_similarity": 0, "epoch": 2.950605778191985, "grad_norm": 0.8828013229295896, "learning_rate": 9.147393855712807e-07, "loss": 0.8942, "reason_loss": 0.4515182375907898, "step": 3166, "utility_loss": 0.4426575303077698 }, { "cosine_similarity": 0, "epoch": 2.951537744641193, "grad_norm": 1.0011054180175534, "learning_rate": 8.974801518812566e-07, "loss": 1.0875, "reason_loss": 0.4625788927078247, "step": 3167, "utility_loss": 0.6248745322227478 }, { "cosine_similarity": 0, "epoch": 2.9524697110904006, "grad_norm": 0.8444489915202046, "learning_rate": 8.802209181912324e-07, "loss": 1.1709, "reason_loss": 0.48618024587631226, "step": 3168, "utility_loss": 0.6847126483917236 }, { "cosine_similarity": 0, "epoch": 2.953401677539609, "grad_norm": 1.049664630842679, "learning_rate": 8.629616845012081e-07, "loss": 1.184, "reason_loss": 0.46275031566619873, "step": 3169, "utility_loss": 0.7212235331535339 }, { "cosine_similarity": 0, "epoch": 2.954333643988816, "grad_norm": 0.9325571152427599, "learning_rate": 8.45702450811184e-07, "loss": 1.1629, "reason_loss": 0.46547552943229675, "step": 3170, "utility_loss": 0.6974629163742065 }, { "cosine_similarity": 0, "epoch": 2.9552656104380244, "grad_norm": 0.8283322596207505, "learning_rate": 8.284432171211599e-07, "loss": 1.0114, "reason_loss": 0.4830734133720398, "step": 3171, "utility_loss": 0.5282787680625916 }, { "cosine_similarity": 0, "epoch": 2.956197576887232, "grad_norm": 0.8364310009153416, "learning_rate": 8.111839834311357e-07, "loss": 0.9629, "reason_loss": 0.46869978308677673, "step": 3172, "utility_loss": 0.49422913789749146 }, { "cosine_similarity": 0, "epoch": 2.95712954333644, "grad_norm": 0.8617860571865905, "learning_rate": 7.939247497411116e-07, "loss": 1.1367, "reason_loss": 0.48407989740371704, "step": 3173, "utility_loss": 0.6526117920875549 }, { "cosine_similarity": 0, "epoch": 2.9580615097856477, "grad_norm": 1.0175224036591768, "learning_rate": 7.766655160510874e-07, "loss": 1.2954, "reason_loss": 0.429958701133728, "step": 3174, "utility_loss": 0.8654502630233765 }, { "cosine_similarity": 0, "epoch": 2.9589934762348555, "grad_norm": 0.8019859386179837, "learning_rate": 7.594062823610632e-07, "loss": 0.9127, "reason_loss": 0.4655647873878479, "step": 3175, "utility_loss": 0.44709667563438416 }, { "cosine_similarity": 0, "epoch": 2.9599254426840633, "grad_norm": 0.771483566259842, "learning_rate": 7.42147048671039e-07, "loss": 0.8512, "reason_loss": 0.4474440813064575, "step": 3176, "utility_loss": 0.40380463004112244 }, { "cosine_similarity": 0, "epoch": 2.960857409133271, "grad_norm": 0.8540778330450188, "learning_rate": 7.248878149810148e-07, "loss": 1.0313, "reason_loss": 0.48681724071502686, "step": 3177, "utility_loss": 0.5445016026496887 }, { "cosine_similarity": 0, "epoch": 2.961789375582479, "grad_norm": 1.0952101196639477, "learning_rate": 7.076285812909907e-07, "loss": 1.1528, "reason_loss": 0.4547043442726135, "step": 3178, "utility_loss": 0.6981362104415894 }, { "cosine_similarity": 0, "epoch": 2.9627213420316867, "grad_norm": 1.1855783715503871, "learning_rate": 6.903693476009666e-07, "loss": 1.2197, "reason_loss": 0.45792147517204285, "step": 3179, "utility_loss": 0.7617751359939575 }, { "cosine_similarity": 0, "epoch": 2.963653308480895, "grad_norm": 0.9129814018597542, "learning_rate": 6.731101139109423e-07, "loss": 1.0713, "reason_loss": 0.4783209562301636, "step": 3180, "utility_loss": 0.5930085778236389 }, { "cosine_similarity": 0, "epoch": 2.9645852749301023, "grad_norm": 1.3469266424469186, "learning_rate": 6.558508802209182e-07, "loss": 1.3216, "reason_loss": 0.44825121760368347, "step": 3181, "utility_loss": 0.873397707939148 }, { "cosine_similarity": 0, "epoch": 2.9655172413793105, "grad_norm": 0.8896973683930127, "learning_rate": 6.38591646530894e-07, "loss": 1.1557, "reason_loss": 0.4611002802848816, "step": 3182, "utility_loss": 0.6945793628692627 }, { "cosine_similarity": 0, "epoch": 2.9664492078285183, "grad_norm": 0.9772671564759876, "learning_rate": 6.213324128408699e-07, "loss": 0.9882, "reason_loss": 0.4709814786911011, "step": 3183, "utility_loss": 0.51726233959198 }, { "cosine_similarity": 0, "epoch": 2.967381174277726, "grad_norm": 1.0738280433052338, "learning_rate": 6.040731791508457e-07, "loss": 1.2352, "reason_loss": 0.46587613224983215, "step": 3184, "utility_loss": 0.769321620464325 }, { "cosine_similarity": 0, "epoch": 2.968313140726934, "grad_norm": 0.8276792985977053, "learning_rate": 5.868139454608215e-07, "loss": 0.8864, "reason_loss": 0.44382351636886597, "step": 3185, "utility_loss": 0.44256383180618286 }, { "cosine_similarity": 0, "epoch": 2.9692451071761417, "grad_norm": 1.1580769053010038, "learning_rate": 5.695547117707974e-07, "loss": 1.1166, "reason_loss": 0.45228520035743713, "step": 3186, "utility_loss": 0.6643642783164978 }, { "cosine_similarity": 0, "epoch": 2.9701770736253494, "grad_norm": 1.2063611663601361, "learning_rate": 5.522954780807732e-07, "loss": 1.155, "reason_loss": 0.45306849479675293, "step": 3187, "utility_loss": 0.7019010782241821 }, { "cosine_similarity": 0, "epoch": 2.9711090400745572, "grad_norm": 0.9607580763427619, "learning_rate": 5.35036244390749e-07, "loss": 1.0881, "reason_loss": 0.45175492763519287, "step": 3188, "utility_loss": 0.6363784074783325 }, { "cosine_similarity": 0, "epoch": 2.972041006523765, "grad_norm": 1.0774879566452567, "learning_rate": 5.177770107007249e-07, "loss": 1.0008, "reason_loss": 0.4674168825149536, "step": 3189, "utility_loss": 0.533414363861084 }, { "cosine_similarity": 0, "epoch": 2.972972972972973, "grad_norm": 0.8896173717428915, "learning_rate": 5.005177770107007e-07, "loss": 0.8488, "reason_loss": 0.44970703125, "step": 3190, "utility_loss": 0.3990490436553955 }, { "cosine_similarity": 0, "epoch": 2.973904939422181, "grad_norm": 1.301826355919268, "learning_rate": 4.832585433206766e-07, "loss": 1.1595, "reason_loss": 0.46465831995010376, "step": 3191, "utility_loss": 0.6948108077049255 }, { "cosine_similarity": 0, "epoch": 2.9748369058713884, "grad_norm": 1.0522588435249867, "learning_rate": 4.659993096306524e-07, "loss": 1.1263, "reason_loss": 0.46393853425979614, "step": 3192, "utility_loss": 0.6623862981796265 }, { "cosine_similarity": 0, "epoch": 2.9757688723205966, "grad_norm": 0.907089802513996, "learning_rate": 4.487400759406283e-07, "loss": 1.0869, "reason_loss": 0.45256465673446655, "step": 3193, "utility_loss": 0.6343705654144287 }, { "cosine_similarity": 0, "epoch": 2.9767008387698044, "grad_norm": 0.9391471597690118, "learning_rate": 4.3148084225060405e-07, "loss": 1.1182, "reason_loss": 0.46939218044281006, "step": 3194, "utility_loss": 0.6487709283828735 }, { "cosine_similarity": 0, "epoch": 2.977632805219012, "grad_norm": 0.8960540262315742, "learning_rate": 4.142216085605799e-07, "loss": 0.9127, "reason_loss": 0.4554705023765564, "step": 3195, "utility_loss": 0.4572370648384094 }, { "cosine_similarity": 0, "epoch": 2.97856477166822, "grad_norm": 0.9908420584109984, "learning_rate": 3.969623748705558e-07, "loss": 1.0549, "reason_loss": 0.4689088463783264, "step": 3196, "utility_loss": 0.5859769582748413 }, { "cosine_similarity": 0, "epoch": 2.9794967381174278, "grad_norm": 0.9781760482089061, "learning_rate": 3.797031411805316e-07, "loss": 1.0832, "reason_loss": 0.45174452662467957, "step": 3197, "utility_loss": 0.6315022706985474 }, { "cosine_similarity": 0, "epoch": 2.9804287045666356, "grad_norm": 1.0416558801114169, "learning_rate": 3.624439074905074e-07, "loss": 1.1153, "reason_loss": 0.4751947224140167, "step": 3198, "utility_loss": 0.6400561928749084 }, { "cosine_similarity": 0, "epoch": 2.9813606710158433, "grad_norm": 0.8979897689664054, "learning_rate": 3.451846738004833e-07, "loss": 1.0044, "reason_loss": 0.4882888197898865, "step": 3199, "utility_loss": 0.5161543488502502 }, { "cosine_similarity": 0, "epoch": 2.982292637465051, "grad_norm": 0.9796917968659944, "learning_rate": 3.279254401104591e-07, "loss": 1.0938, "reason_loss": 0.4635624587535858, "step": 3200, "utility_loss": 0.6302542686462402 }, { "cosine_similarity": 0, "epoch": 2.983224603914259, "grad_norm": 0.9250727529378844, "learning_rate": 3.1066620642043493e-07, "loss": 1.0519, "reason_loss": 0.468940794467926, "step": 3201, "utility_loss": 0.5829391479492188 }, { "cosine_similarity": 0, "epoch": 2.984156570363467, "grad_norm": 1.040480016418714, "learning_rate": 2.9340697273041076e-07, "loss": 1.1604, "reason_loss": 0.44625717401504517, "step": 3202, "utility_loss": 0.7141589522361755 }, { "cosine_similarity": 0, "epoch": 2.9850885368126745, "grad_norm": 0.9985462295213419, "learning_rate": 2.761477390403866e-07, "loss": 1.1817, "reason_loss": 0.4525839686393738, "step": 3203, "utility_loss": 0.7291494011878967 }, { "cosine_similarity": 0, "epoch": 2.9860205032618827, "grad_norm": 0.8839627481350555, "learning_rate": 2.5888850535036246e-07, "loss": 0.9592, "reason_loss": 0.47169581055641174, "step": 3204, "utility_loss": 0.48751160502433777 }, { "cosine_similarity": 0, "epoch": 2.9869524697110905, "grad_norm": 1.207063172713228, "learning_rate": 2.416292716603383e-07, "loss": 1.1944, "reason_loss": 0.45199674367904663, "step": 3205, "utility_loss": 0.7424492835998535 }, { "cosine_similarity": 0, "epoch": 2.9878844361602983, "grad_norm": 0.8745755192126891, "learning_rate": 2.2437003797031414e-07, "loss": 0.8576, "reason_loss": 0.4535302519798279, "step": 3206, "utility_loss": 0.4040886163711548 }, { "cosine_similarity": 0, "epoch": 2.988816402609506, "grad_norm": 0.7662556012812808, "learning_rate": 2.0711080428028996e-07, "loss": 0.9632, "reason_loss": 0.4910391867160797, "step": 3207, "utility_loss": 0.4721655249595642 }, { "cosine_similarity": 0, "epoch": 2.989748369058714, "grad_norm": 1.0300799549636523, "learning_rate": 1.898515705902658e-07, "loss": 1.1071, "reason_loss": 0.42923077940940857, "step": 3208, "utility_loss": 0.677864670753479 }, { "cosine_similarity": 0, "epoch": 2.9906803355079217, "grad_norm": 1.0153381984509793, "learning_rate": 1.7259233690024164e-07, "loss": 1.0659, "reason_loss": 0.47450491786003113, "step": 3209, "utility_loss": 0.5914134383201599 }, { "cosine_similarity": 0, "epoch": 2.9916123019571295, "grad_norm": 0.8058197337509435, "learning_rate": 1.5533310321021747e-07, "loss": 1.037, "reason_loss": 0.4548601806163788, "step": 3210, "utility_loss": 0.582105815410614 }, { "cosine_similarity": 0, "epoch": 2.9925442684063372, "grad_norm": 1.0953445507231954, "learning_rate": 1.380738695201933e-07, "loss": 1.2941, "reason_loss": 0.46360236406326294, "step": 3211, "utility_loss": 0.8304613828659058 }, { "cosine_similarity": 0, "epoch": 2.993476234855545, "grad_norm": 1.0929326228944236, "learning_rate": 1.2081463583016914e-07, "loss": 0.9808, "reason_loss": 0.48088473081588745, "step": 3212, "utility_loss": 0.4999399185180664 }, { "cosine_similarity": 0, "epoch": 2.9944082013047533, "grad_norm": 0.9988159143339709, "learning_rate": 1.0355540214014498e-07, "loss": 1.1316, "reason_loss": 0.4665939211845398, "step": 3213, "utility_loss": 0.6649887561798096 }, { "cosine_similarity": 0, "epoch": 2.9953401677539606, "grad_norm": 0.9172200396819983, "learning_rate": 8.629616845012082e-08, "loss": 1.1289, "reason_loss": 0.4767378568649292, "step": 3214, "utility_loss": 0.6521244049072266 }, { "cosine_similarity": 0, "epoch": 2.996272134203169, "grad_norm": 0.8029996970574904, "learning_rate": 6.903693476009665e-08, "loss": 0.8611, "reason_loss": 0.4568973183631897, "step": 3215, "utility_loss": 0.40416577458381653 }, { "cosine_similarity": 0, "epoch": 2.9972041006523766, "grad_norm": 0.9257716626573358, "learning_rate": 5.177770107007249e-08, "loss": 0.8632, "reason_loss": 0.47078436613082886, "step": 3216, "utility_loss": 0.39236879348754883 }, { "cosine_similarity": 0, "epoch": 2.9981360671015844, "grad_norm": 0.90711596679424, "learning_rate": 3.451846738004832e-08, "loss": 0.9661, "reason_loss": 0.46590572595596313, "step": 3217, "utility_loss": 0.5001872777938843 }, { "cosine_similarity": 0, "epoch": 2.999068033550792, "grad_norm": 0.93527118248604, "learning_rate": 1.725923369002416e-08, "loss": 1.0524, "reason_loss": 0.43764275312423706, "step": 3218, "utility_loss": 0.614737331867218 }, { "cosine_similarity": 0, "epoch": 3.0, "grad_norm": 0.7157698986992822, "learning_rate": 0.0, "loss": 0.7602, "reason_loss": 0.4803256690502167, "step": 3219, "utility_loss": 0.279866099357605 }, { "epoch": 3.0, "step": 3219, "total_flos": 0.0, "train_loss": 1.648494730254218, "train_runtime": 123617.2982, "train_samples_per_second": 0.417, "train_steps_per_second": 0.026 } ], "logging_steps": 1, "max_steps": 3219, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }