{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.820723262374968, "eval_steps": 250, "global_step": 800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0010259040779687098, "grad_norm": 9.798241135332411, "learning_rate": 3.3333333333333334e-08, "loss": 1.5793, "step": 1 }, { "epoch": 0.0020518081559374197, "grad_norm": 9.598717132793555, "learning_rate": 6.666666666666667e-08, "loss": 1.7229, "step": 2 }, { "epoch": 0.00307771223390613, "grad_norm": 10.260768981836485, "learning_rate": 1e-07, "loss": 1.6317, "step": 3 }, { "epoch": 0.004103616311874839, "grad_norm": 9.059955288981689, "learning_rate": 1.3333333333333334e-07, "loss": 1.6176, "step": 4 }, { "epoch": 0.005129520389843549, "grad_norm": 8.709116139461566, "learning_rate": 1.6666666666666665e-07, "loss": 1.5098, "step": 5 }, { "epoch": 0.00615542446781226, "grad_norm": 9.86995805736169, "learning_rate": 2e-07, "loss": 1.5554, "step": 6 }, { "epoch": 0.00718132854578097, "grad_norm": 9.269724208142758, "learning_rate": 2.3333333333333333e-07, "loss": 1.5685, "step": 7 }, { "epoch": 0.008207232623749679, "grad_norm": 9.788683578279787, "learning_rate": 2.6666666666666667e-07, "loss": 1.6332, "step": 8 }, { "epoch": 0.00923313670171839, "grad_norm": 9.329733080585035, "learning_rate": 3e-07, "loss": 1.5211, "step": 9 }, { "epoch": 0.010259040779687098, "grad_norm": 8.603455933686602, "learning_rate": 3.333333333333333e-07, "loss": 1.531, "step": 10 }, { "epoch": 0.011284944857655809, "grad_norm": 9.703453172271272, "learning_rate": 3.666666666666666e-07, "loss": 1.5336, "step": 11 }, { "epoch": 0.01231084893562452, "grad_norm": 8.366416586343489, "learning_rate": 4e-07, "loss": 1.5916, "step": 12 }, { "epoch": 0.013336753013593229, "grad_norm": 8.816992612572204, "learning_rate": 4.3333333333333335e-07, "loss": 1.6659, "step": 13 }, { "epoch": 0.01436265709156194, "grad_norm": 9.162032804849005, "learning_rate": 4.6666666666666666e-07, "loss": 1.5406, "step": 14 }, { "epoch": 0.015388561169530648, "grad_norm": 8.732450364954722, "learning_rate": 5e-07, "loss": 1.5083, "step": 15 }, { "epoch": 0.016414465247499357, "grad_norm": 8.622668586551894, "learning_rate": 5.333333333333333e-07, "loss": 1.5596, "step": 16 }, { "epoch": 0.01744036932546807, "grad_norm": 7.133974565680819, "learning_rate": 5.666666666666666e-07, "loss": 1.5639, "step": 17 }, { "epoch": 0.01846627340343678, "grad_norm": 6.991354736869125, "learning_rate": 6e-07, "loss": 1.4727, "step": 18 }, { "epoch": 0.019492177481405488, "grad_norm": 6.779705570699807, "learning_rate": 6.333333333333332e-07, "loss": 1.5018, "step": 19 }, { "epoch": 0.020518081559374197, "grad_norm": 6.980027846638078, "learning_rate": 6.666666666666666e-07, "loss": 1.5915, "step": 20 }, { "epoch": 0.02154398563734291, "grad_norm": 6.45230816051997, "learning_rate": 7e-07, "loss": 1.4633, "step": 21 }, { "epoch": 0.022569889715311618, "grad_norm": 5.935560467510461, "learning_rate": 7.333333333333332e-07, "loss": 1.519, "step": 22 }, { "epoch": 0.023595793793280327, "grad_norm": 5.412080062014559, "learning_rate": 7.666666666666667e-07, "loss": 1.5106, "step": 23 }, { "epoch": 0.02462169787124904, "grad_norm": 4.643500965809458, "learning_rate": 8e-07, "loss": 1.3979, "step": 24 }, { "epoch": 0.02564760194921775, "grad_norm": 4.425426929208595, "learning_rate": 8.333333333333333e-07, "loss": 1.3665, "step": 25 }, { "epoch": 0.026673506027186458, "grad_norm": 4.222303529390681, "learning_rate": 8.666666666666667e-07, "loss": 1.4494, "step": 26 }, { "epoch": 0.027699410105155167, "grad_norm": 4.369459716992631, "learning_rate": 9e-07, "loss": 1.419, "step": 27 }, { "epoch": 0.02872531418312388, "grad_norm": 4.039114576068017, "learning_rate": 9.333333333333333e-07, "loss": 1.4929, "step": 28 }, { "epoch": 0.029751218261092588, "grad_norm": 3.7836259253536135, "learning_rate": 9.666666666666666e-07, "loss": 1.3719, "step": 29 }, { "epoch": 0.030777122339061297, "grad_norm": 3.706293156036854, "learning_rate": 1e-06, "loss": 1.4779, "step": 30 }, { "epoch": 0.031803026417030006, "grad_norm": 3.460814827864659, "learning_rate": 9.999972311759113e-07, "loss": 1.3536, "step": 31 }, { "epoch": 0.032828930494998715, "grad_norm": 3.3017900915165086, "learning_rate": 9.99988924734311e-07, "loss": 1.3524, "step": 32 }, { "epoch": 0.03385483457296743, "grad_norm": 3.455224010733439, "learning_rate": 9.999750807671956e-07, "loss": 1.4297, "step": 33 }, { "epoch": 0.03488073865093614, "grad_norm": 3.63256540031341, "learning_rate": 9.999556994278908e-07, "loss": 1.4719, "step": 34 }, { "epoch": 0.03590664272890485, "grad_norm": 3.8514951366560823, "learning_rate": 9.999307809310508e-07, "loss": 1.3979, "step": 35 }, { "epoch": 0.03693254680687356, "grad_norm": 3.9586226136352396, "learning_rate": 9.999003255526553e-07, "loss": 1.3285, "step": 36 }, { "epoch": 0.03795845088484227, "grad_norm": 3.806464587016546, "learning_rate": 9.998643336300069e-07, "loss": 1.4278, "step": 37 }, { "epoch": 0.038984354962810976, "grad_norm": 3.153561780968849, "learning_rate": 9.998228055617262e-07, "loss": 1.4004, "step": 38 }, { "epoch": 0.040010259040779685, "grad_norm": 3.281790653235281, "learning_rate": 9.997757418077494e-07, "loss": 1.3469, "step": 39 }, { "epoch": 0.041036163118748394, "grad_norm": 3.1079807809050792, "learning_rate": 9.997231428893215e-07, "loss": 1.2929, "step": 40 }, { "epoch": 0.04206206719671711, "grad_norm": 2.940384594816459, "learning_rate": 9.99665009388991e-07, "loss": 1.2889, "step": 41 }, { "epoch": 0.04308797127468582, "grad_norm": 2.9910864286989036, "learning_rate": 9.996013419506033e-07, "loss": 1.3704, "step": 42 }, { "epoch": 0.04411387535265453, "grad_norm": 3.097652651590371, "learning_rate": 9.995321412792947e-07, "loss": 1.4157, "step": 43 }, { "epoch": 0.045139779430623236, "grad_norm": 2.9908105852033384, "learning_rate": 9.994574081414829e-07, "loss": 1.3665, "step": 44 }, { "epoch": 0.046165683508591945, "grad_norm": 3.1050321703200847, "learning_rate": 9.993771433648598e-07, "loss": 1.5106, "step": 45 }, { "epoch": 0.047191587586560654, "grad_norm": 3.0440366145712945, "learning_rate": 9.992913478383809e-07, "loss": 1.4094, "step": 46 }, { "epoch": 0.04821749166452936, "grad_norm": 2.6336951924027123, "learning_rate": 9.992000225122578e-07, "loss": 1.2391, "step": 47 }, { "epoch": 0.04924339574249808, "grad_norm": 2.9411301278665816, "learning_rate": 9.991031683979451e-07, "loss": 1.4004, "step": 48 }, { "epoch": 0.05026929982046679, "grad_norm": 2.588518890678191, "learning_rate": 9.990007865681312e-07, "loss": 1.4288, "step": 49 }, { "epoch": 0.0512952038984355, "grad_norm": 3.0977479141233384, "learning_rate": 9.98892878156725e-07, "loss": 1.2934, "step": 50 }, { "epoch": 0.052321107976404206, "grad_norm": 2.891254887655398, "learning_rate": 9.98779444358844e-07, "loss": 1.2993, "step": 51 }, { "epoch": 0.053347012054372915, "grad_norm": 2.8843613231387346, "learning_rate": 9.986604864308015e-07, "loss": 1.3691, "step": 52 }, { "epoch": 0.054372916132341624, "grad_norm": 3.058804698335043, "learning_rate": 9.985360056900914e-07, "loss": 1.3385, "step": 53 }, { "epoch": 0.05539882021031033, "grad_norm": 2.908824401209796, "learning_rate": 9.98406003515375e-07, "loss": 1.4562, "step": 54 }, { "epoch": 0.05642472428827905, "grad_norm": 3.5082197663137977, "learning_rate": 9.98270481346465e-07, "loss": 1.3692, "step": 55 }, { "epoch": 0.05745062836624776, "grad_norm": 2.94858359371842, "learning_rate": 9.981294406843093e-07, "loss": 1.3498, "step": 56 }, { "epoch": 0.05847653244421647, "grad_norm": 2.6795852716884374, "learning_rate": 9.979828830909754e-07, "loss": 1.3321, "step": 57 }, { "epoch": 0.059502436522185176, "grad_norm": 2.9978152584208053, "learning_rate": 9.978308101896316e-07, "loss": 1.3412, "step": 58 }, { "epoch": 0.060528340600153885, "grad_norm": 2.7552572011179497, "learning_rate": 9.97673223664531e-07, "loss": 1.4021, "step": 59 }, { "epoch": 0.061554244678122594, "grad_norm": 2.637599483243523, "learning_rate": 9.975101252609903e-07, "loss": 1.3292, "step": 60 }, { "epoch": 0.0625801487560913, "grad_norm": 2.938536066745437, "learning_rate": 9.973415167853734e-07, "loss": 1.2372, "step": 61 }, { "epoch": 0.06360605283406001, "grad_norm": 2.7530123525727466, "learning_rate": 9.971674001050686e-07, "loss": 1.3277, "step": 62 }, { "epoch": 0.06463195691202872, "grad_norm": 2.753702797656271, "learning_rate": 9.969877771484698e-07, "loss": 1.2756, "step": 63 }, { "epoch": 0.06565786098999743, "grad_norm": 2.707817535821633, "learning_rate": 9.968026499049549e-07, "loss": 1.2586, "step": 64 }, { "epoch": 0.06668376506796614, "grad_norm": 2.7234356724029567, "learning_rate": 9.966120204248625e-07, "loss": 1.3249, "step": 65 }, { "epoch": 0.06770966914593486, "grad_norm": 2.92461615737126, "learning_rate": 9.964158908194706e-07, "loss": 1.3265, "step": 66 }, { "epoch": 0.06873557322390357, "grad_norm": 2.593339469946713, "learning_rate": 9.962142632609732e-07, "loss": 1.3534, "step": 67 }, { "epoch": 0.06976147730187228, "grad_norm": 2.9860559274993466, "learning_rate": 9.960071399824547e-07, "loss": 1.3433, "step": 68 }, { "epoch": 0.07078738137984099, "grad_norm": 2.6875123035895796, "learning_rate": 9.957945232778672e-07, "loss": 1.3273, "step": 69 }, { "epoch": 0.0718132854578097, "grad_norm": 2.870657819908334, "learning_rate": 9.955764155020035e-07, "loss": 1.2982, "step": 70 }, { "epoch": 0.0728391895357784, "grad_norm": 3.0078112960966137, "learning_rate": 9.95352819070472e-07, "loss": 1.2967, "step": 71 }, { "epoch": 0.07386509361374712, "grad_norm": 2.6631173635617307, "learning_rate": 9.951237364596692e-07, "loss": 1.3039, "step": 72 }, { "epoch": 0.07489099769171582, "grad_norm": 2.903289343918584, "learning_rate": 9.94889170206753e-07, "loss": 1.2931, "step": 73 }, { "epoch": 0.07591690176968453, "grad_norm": 2.5803057970449594, "learning_rate": 9.946491229096141e-07, "loss": 1.3029, "step": 74 }, { "epoch": 0.07694280584765324, "grad_norm": 2.587911044453264, "learning_rate": 9.94403597226848e-07, "loss": 1.233, "step": 75 }, { "epoch": 0.07796870992562195, "grad_norm": 2.7832901812089923, "learning_rate": 9.941525958777235e-07, "loss": 1.2965, "step": 76 }, { "epoch": 0.07899461400359066, "grad_norm": 2.681414225480931, "learning_rate": 9.938961216421557e-07, "loss": 1.274, "step": 77 }, { "epoch": 0.08002051808155937, "grad_norm": 2.749129791384422, "learning_rate": 9.936341773606722e-07, "loss": 1.3564, "step": 78 }, { "epoch": 0.08104642215952808, "grad_norm": 2.615169662982429, "learning_rate": 9.93366765934384e-07, "loss": 1.3874, "step": 79 }, { "epoch": 0.08207232623749679, "grad_norm": 2.7336596773654627, "learning_rate": 9.930938903249516e-07, "loss": 1.2904, "step": 80 }, { "epoch": 0.08309823031546551, "grad_norm": 2.695518831245502, "learning_rate": 9.928155535545534e-07, "loss": 1.351, "step": 81 }, { "epoch": 0.08412413439343422, "grad_norm": 2.475635111783967, "learning_rate": 9.925317587058514e-07, "loss": 1.2317, "step": 82 }, { "epoch": 0.08515003847140293, "grad_norm": 2.8880852942092607, "learning_rate": 9.922425089219581e-07, "loss": 1.225, "step": 83 }, { "epoch": 0.08617594254937164, "grad_norm": 2.8478287390427353, "learning_rate": 9.919478074064001e-07, "loss": 1.261, "step": 84 }, { "epoch": 0.08720184662734035, "grad_norm": 2.637223722117266, "learning_rate": 9.91647657423084e-07, "loss": 1.3015, "step": 85 }, { "epoch": 0.08822775070530905, "grad_norm": 2.654960461830896, "learning_rate": 9.913420622962604e-07, "loss": 1.3029, "step": 86 }, { "epoch": 0.08925365478327776, "grad_norm": 3.075422859212528, "learning_rate": 9.910310254104854e-07, "loss": 1.2842, "step": 87 }, { "epoch": 0.09027955886124647, "grad_norm": 2.7924836932599635, "learning_rate": 9.907145502105846e-07, "loss": 1.2982, "step": 88 }, { "epoch": 0.09130546293921518, "grad_norm": 2.6508581949070016, "learning_rate": 9.90392640201615e-07, "loss": 1.3225, "step": 89 }, { "epoch": 0.09233136701718389, "grad_norm": 2.7887285958862904, "learning_rate": 9.900652989488253e-07, "loss": 1.3516, "step": 90 }, { "epoch": 0.0933572710951526, "grad_norm": 2.778310945122497, "learning_rate": 9.897325300776167e-07, "loss": 1.2409, "step": 91 }, { "epoch": 0.09438317517312131, "grad_norm": 2.771955143672296, "learning_rate": 9.893943372735032e-07, "loss": 1.3554, "step": 92 }, { "epoch": 0.09540907925109002, "grad_norm": 2.641330715037395, "learning_rate": 9.8905072428207e-07, "loss": 1.2767, "step": 93 }, { "epoch": 0.09643498332905873, "grad_norm": 2.7999670492066673, "learning_rate": 9.887016949089332e-07, "loss": 1.2957, "step": 94 }, { "epoch": 0.09746088740702745, "grad_norm": 2.9949645451613134, "learning_rate": 9.883472530196966e-07, "loss": 1.3359, "step": 95 }, { "epoch": 0.09848679148499616, "grad_norm": 2.497849004552429, "learning_rate": 9.879874025399087e-07, "loss": 1.3566, "step": 96 }, { "epoch": 0.09951269556296487, "grad_norm": 2.722740998468592, "learning_rate": 9.876221474550207e-07, "loss": 1.3512, "step": 97 }, { "epoch": 0.10053859964093358, "grad_norm": 2.953223838054628, "learning_rate": 9.872514918103405e-07, "loss": 1.3452, "step": 98 }, { "epoch": 0.10156450371890229, "grad_norm": 2.5063398128121417, "learning_rate": 9.868754397109895e-07, "loss": 1.3567, "step": 99 }, { "epoch": 0.102590407796871, "grad_norm": 2.5885598718043066, "learning_rate": 9.864939953218561e-07, "loss": 1.2673, "step": 100 }, { "epoch": 0.1036163118748397, "grad_norm": 2.620316954154948, "learning_rate": 9.8610716286755e-07, "loss": 1.3432, "step": 101 }, { "epoch": 0.10464221595280841, "grad_norm": 2.7576659564287995, "learning_rate": 9.85714946632355e-07, "loss": 1.2954, "step": 102 }, { "epoch": 0.10566812003077712, "grad_norm": 2.6107378359741147, "learning_rate": 9.853173509601823e-07, "loss": 1.3089, "step": 103 }, { "epoch": 0.10669402410874583, "grad_norm": 2.72531292391567, "learning_rate": 9.84914380254522e-07, "loss": 1.2754, "step": 104 }, { "epoch": 0.10771992818671454, "grad_norm": 2.6204968346720197, "learning_rate": 9.845060389783937e-07, "loss": 1.311, "step": 105 }, { "epoch": 0.10874583226468325, "grad_norm": 2.403778961837896, "learning_rate": 9.840923316542983e-07, "loss": 1.2194, "step": 106 }, { "epoch": 0.10977173634265196, "grad_norm": 2.841225529794086, "learning_rate": 9.83673262864167e-07, "loss": 1.2714, "step": 107 }, { "epoch": 0.11079764042062067, "grad_norm": 2.680466409381784, "learning_rate": 9.832488372493108e-07, "loss": 1.3238, "step": 108 }, { "epoch": 0.11182354449858938, "grad_norm": 2.7899613949194393, "learning_rate": 9.82819059510369e-07, "loss": 1.3607, "step": 109 }, { "epoch": 0.1128494485765581, "grad_norm": 2.741199596943549, "learning_rate": 9.82383934407258e-07, "loss": 1.2373, "step": 110 }, { "epoch": 0.1138753526545268, "grad_norm": 2.680900338879034, "learning_rate": 9.819434667591166e-07, "loss": 1.332, "step": 111 }, { "epoch": 0.11490125673249552, "grad_norm": 2.6867928214379266, "learning_rate": 9.814976614442547e-07, "loss": 1.2636, "step": 112 }, { "epoch": 0.11592716081046422, "grad_norm": 2.530746336531516, "learning_rate": 9.810465234000987e-07, "loss": 1.2427, "step": 113 }, { "epoch": 0.11695306488843293, "grad_norm": 3.079117319825233, "learning_rate": 9.805900576231357e-07, "loss": 1.3352, "step": 114 }, { "epoch": 0.11797896896640164, "grad_norm": 2.705858226495669, "learning_rate": 9.801282691688595e-07, "loss": 1.2462, "step": 115 }, { "epoch": 0.11900487304437035, "grad_norm": 2.642585557515256, "learning_rate": 9.796611631517141e-07, "loss": 1.3102, "step": 116 }, { "epoch": 0.12003077712233906, "grad_norm": 2.591744882697605, "learning_rate": 9.791887447450374e-07, "loss": 1.3265, "step": 117 }, { "epoch": 0.12105668120030777, "grad_norm": 2.5766253217072137, "learning_rate": 9.787110191810026e-07, "loss": 1.3054, "step": 118 }, { "epoch": 0.12208258527827648, "grad_norm": 2.77284938502092, "learning_rate": 9.782279917505627e-07, "loss": 1.245, "step": 119 }, { "epoch": 0.12310848935624519, "grad_norm": 2.8563203008600837, "learning_rate": 9.77739667803389e-07, "loss": 1.3478, "step": 120 }, { "epoch": 0.1241343934342139, "grad_norm": 2.565482050398409, "learning_rate": 9.772460527478142e-07, "loss": 1.2775, "step": 121 }, { "epoch": 0.1251602975121826, "grad_norm": 2.7054539949137615, "learning_rate": 9.76747152050771e-07, "loss": 1.3187, "step": 122 }, { "epoch": 0.12618620159015131, "grad_norm": 2.5989084819509967, "learning_rate": 9.762429712377331e-07, "loss": 1.2934, "step": 123 }, { "epoch": 0.12721210566812002, "grad_norm": 2.510970489271258, "learning_rate": 9.75733515892652e-07, "loss": 1.2603, "step": 124 }, { "epoch": 0.12823800974608873, "grad_norm": 2.5595730950106925, "learning_rate": 9.752187916578967e-07, "loss": 1.2883, "step": 125 }, { "epoch": 0.12926391382405744, "grad_norm": 2.67442136336687, "learning_rate": 9.746988042341907e-07, "loss": 1.2817, "step": 126 }, { "epoch": 0.13028981790202615, "grad_norm": 2.708509570780934, "learning_rate": 9.741735593805486e-07, "loss": 1.2078, "step": 127 }, { "epoch": 0.13131572197999486, "grad_norm": 2.678010158167899, "learning_rate": 9.736430629142128e-07, "loss": 1.3796, "step": 128 }, { "epoch": 0.13234162605796357, "grad_norm": 2.844285265615473, "learning_rate": 9.731073207105896e-07, "loss": 1.3091, "step": 129 }, { "epoch": 0.13336753013593228, "grad_norm": 2.7814463008959334, "learning_rate": 9.725663387031816e-07, "loss": 1.3682, "step": 130 }, { "epoch": 0.13439343421390101, "grad_norm": 2.6513652068190403, "learning_rate": 9.720201228835256e-07, "loss": 1.2806, "step": 131 }, { "epoch": 0.13541933829186972, "grad_norm": 2.808046184764404, "learning_rate": 9.714686793011235e-07, "loss": 1.1529, "step": 132 }, { "epoch": 0.13644524236983843, "grad_norm": 2.6158983121591004, "learning_rate": 9.709120140633763e-07, "loss": 1.2301, "step": 133 }, { "epoch": 0.13747114644780714, "grad_norm": 2.6087976837956406, "learning_rate": 9.703501333355166e-07, "loss": 1.2512, "step": 134 }, { "epoch": 0.13849705052577585, "grad_norm": 2.3938681704241818, "learning_rate": 9.697830433405399e-07, "loss": 1.3158, "step": 135 }, { "epoch": 0.13952295460374456, "grad_norm": 2.5859416655464216, "learning_rate": 9.692107503591358e-07, "loss": 1.1996, "step": 136 }, { "epoch": 0.14054885868171327, "grad_norm": 2.814296800240032, "learning_rate": 9.68633260729619e-07, "loss": 1.3123, "step": 137 }, { "epoch": 0.14157476275968198, "grad_norm": 2.4891637404994125, "learning_rate": 9.680505808478581e-07, "loss": 1.2231, "step": 138 }, { "epoch": 0.14260066683765069, "grad_norm": 2.863453159988515, "learning_rate": 9.674627171672054e-07, "loss": 1.2959, "step": 139 }, { "epoch": 0.1436265709156194, "grad_norm": 2.538454344691091, "learning_rate": 9.668696761984254e-07, "loss": 1.2674, "step": 140 }, { "epoch": 0.1446524749935881, "grad_norm": 2.5004273929320395, "learning_rate": 9.662714645096229e-07, "loss": 1.2116, "step": 141 }, { "epoch": 0.1456783790715568, "grad_norm": 2.6371352778169657, "learning_rate": 9.656680887261692e-07, "loss": 1.2476, "step": 142 }, { "epoch": 0.14670428314952552, "grad_norm": 2.683569477616342, "learning_rate": 9.650595555306302e-07, "loss": 1.3046, "step": 143 }, { "epoch": 0.14773018722749423, "grad_norm": 2.912553498396746, "learning_rate": 9.644458716626911e-07, "loss": 1.2318, "step": 144 }, { "epoch": 0.14875609130546294, "grad_norm": 2.5818573358818266, "learning_rate": 9.63827043919083e-07, "loss": 1.3017, "step": 145 }, { "epoch": 0.14978199538343165, "grad_norm": 2.6673972834050543, "learning_rate": 9.63203079153506e-07, "loss": 1.2966, "step": 146 }, { "epoch": 0.15080789946140036, "grad_norm": 2.8672403287450523, "learning_rate": 9.625739842765556e-07, "loss": 1.3346, "step": 147 }, { "epoch": 0.15183380353936907, "grad_norm": 2.5970617764295496, "learning_rate": 9.619397662556433e-07, "loss": 1.2169, "step": 148 }, { "epoch": 0.15285970761733778, "grad_norm": 2.4979249789960556, "learning_rate": 9.61300432114922e-07, "loss": 1.3695, "step": 149 }, { "epoch": 0.15388561169530648, "grad_norm": 2.479601711066903, "learning_rate": 9.606559889352063e-07, "loss": 1.2008, "step": 150 }, { "epoch": 0.1549115157732752, "grad_norm": 2.5898796136386144, "learning_rate": 9.600064438538961e-07, "loss": 1.328, "step": 151 }, { "epoch": 0.1559374198512439, "grad_norm": 2.798734600781089, "learning_rate": 9.593518040648952e-07, "loss": 1.244, "step": 152 }, { "epoch": 0.1569633239292126, "grad_norm": 2.4882934421406837, "learning_rate": 9.586920768185333e-07, "loss": 1.2426, "step": 153 }, { "epoch": 0.15798922800718132, "grad_norm": 2.802602250049014, "learning_rate": 9.580272694214854e-07, "loss": 1.2628, "step": 154 }, { "epoch": 0.15901513208515003, "grad_norm": 2.930136632494125, "learning_rate": 9.573573892366903e-07, "loss": 1.2351, "step": 155 }, { "epoch": 0.16004103616311874, "grad_norm": 2.3313868113568446, "learning_rate": 9.566824436832695e-07, "loss": 1.303, "step": 156 }, { "epoch": 0.16106694024108745, "grad_norm": 2.616895617747635, "learning_rate": 9.56002440236445e-07, "loss": 1.3128, "step": 157 }, { "epoch": 0.16209284431905616, "grad_norm": 2.5932182189076647, "learning_rate": 9.553173864274566e-07, "loss": 1.3312, "step": 158 }, { "epoch": 0.16311874839702487, "grad_norm": 2.6881235109044184, "learning_rate": 9.54627289843478e-07, "loss": 1.2495, "step": 159 }, { "epoch": 0.16414465247499357, "grad_norm": 2.876970939352412, "learning_rate": 9.539321581275342e-07, "loss": 1.3992, "step": 160 }, { "epoch": 0.1651705565529623, "grad_norm": 2.762056056919623, "learning_rate": 9.532319989784139e-07, "loss": 1.3294, "step": 161 }, { "epoch": 0.16619646063093102, "grad_norm": 2.5107504330050636, "learning_rate": 9.525268201505878e-07, "loss": 1.331, "step": 162 }, { "epoch": 0.16722236470889973, "grad_norm": 2.7655428881842754, "learning_rate": 9.518166294541203e-07, "loss": 1.3166, "step": 163 }, { "epoch": 0.16824826878686844, "grad_norm": 2.5472143589800678, "learning_rate": 9.511014347545837e-07, "loss": 1.2435, "step": 164 }, { "epoch": 0.16927417286483715, "grad_norm": 2.9317558173367084, "learning_rate": 9.503812439729714e-07, "loss": 1.2583, "step": 165 }, { "epoch": 0.17030007694280586, "grad_norm": 2.723414624661715, "learning_rate": 9.496560650856096e-07, "loss": 1.2796, "step": 166 }, { "epoch": 0.17132598102077456, "grad_norm": 2.5426652630538418, "learning_rate": 9.489259061240695e-07, "loss": 1.2991, "step": 167 }, { "epoch": 0.17235188509874327, "grad_norm": 2.3609000901953716, "learning_rate": 9.481907751750779e-07, "loss": 1.2761, "step": 168 }, { "epoch": 0.17337778917671198, "grad_norm": 2.6737934941791885, "learning_rate": 9.474506803804279e-07, "loss": 1.2034, "step": 169 }, { "epoch": 0.1744036932546807, "grad_norm": 2.7034503852388494, "learning_rate": 9.467056299368887e-07, "loss": 1.2918, "step": 170 }, { "epoch": 0.1754295973326494, "grad_norm": 2.6495133759903857, "learning_rate": 9.459556320961151e-07, "loss": 1.268, "step": 171 }, { "epoch": 0.1764555014106181, "grad_norm": 2.6876955044079542, "learning_rate": 9.452006951645548e-07, "loss": 1.1678, "step": 172 }, { "epoch": 0.17748140548858682, "grad_norm": 2.5152653942206045, "learning_rate": 9.444408275033586e-07, "loss": 1.1947, "step": 173 }, { "epoch": 0.17850730956655553, "grad_norm": 2.802330037908763, "learning_rate": 9.436760375282857e-07, "loss": 1.3259, "step": 174 }, { "epoch": 0.17953321364452424, "grad_norm": 2.7422708968039116, "learning_rate": 9.429063337096119e-07, "loss": 1.2465, "step": 175 }, { "epoch": 0.18055911772249295, "grad_norm": 2.826066535856552, "learning_rate": 9.421317245720352e-07, "loss": 1.3284, "step": 176 }, { "epoch": 0.18158502180046165, "grad_norm": 2.466040916453339, "learning_rate": 9.41352218694581e-07, "loss": 1.2419, "step": 177 }, { "epoch": 0.18261092587843036, "grad_norm": 2.837491368634976, "learning_rate": 9.405678247105082e-07, "loss": 1.2956, "step": 178 }, { "epoch": 0.18363682995639907, "grad_norm": 2.704111429861623, "learning_rate": 9.397785513072126e-07, "loss": 1.2389, "step": 179 }, { "epoch": 0.18466273403436778, "grad_norm": 2.427829411975647, "learning_rate": 9.38984407226131e-07, "loss": 1.2553, "step": 180 }, { "epoch": 0.1856886381123365, "grad_norm": 2.6889270088014343, "learning_rate": 9.381854012626443e-07, "loss": 1.2249, "step": 181 }, { "epoch": 0.1867145421903052, "grad_norm": 3.0319367008791813, "learning_rate": 9.373815422659805e-07, "loss": 1.2405, "step": 182 }, { "epoch": 0.1877404462682739, "grad_norm": 2.7686852678293485, "learning_rate": 9.365728391391164e-07, "loss": 1.2309, "step": 183 }, { "epoch": 0.18876635034624262, "grad_norm": 2.6725011393667577, "learning_rate": 9.357593008386784e-07, "loss": 1.2986, "step": 184 }, { "epoch": 0.18979225442421133, "grad_norm": 2.4605351470645083, "learning_rate": 9.349409363748444e-07, "loss": 1.1675, "step": 185 }, { "epoch": 0.19081815850218004, "grad_norm": 2.592275913149399, "learning_rate": 9.341177548112436e-07, "loss": 1.3206, "step": 186 }, { "epoch": 0.19184406258014874, "grad_norm": 2.941294249687017, "learning_rate": 9.332897652648555e-07, "loss": 1.2446, "step": 187 }, { "epoch": 0.19286996665811745, "grad_norm": 2.741142925639391, "learning_rate": 9.324569769059096e-07, "loss": 1.3082, "step": 188 }, { "epoch": 0.19389587073608616, "grad_norm": 2.6064903500939747, "learning_rate": 9.316193989577843e-07, "loss": 1.2599, "step": 189 }, { "epoch": 0.1949217748140549, "grad_norm": 2.6477370525921966, "learning_rate": 9.30777040696903e-07, "loss": 1.2219, "step": 190 }, { "epoch": 0.1959476788920236, "grad_norm": 2.758046928790272, "learning_rate": 9.299299114526334e-07, "loss": 1.2352, "step": 191 }, { "epoch": 0.19697358296999232, "grad_norm": 2.6839436039460907, "learning_rate": 9.29078020607183e-07, "loss": 1.2706, "step": 192 }, { "epoch": 0.19799948704796103, "grad_norm": 2.4457645270891444, "learning_rate": 9.28221377595495e-07, "loss": 1.2831, "step": 193 }, { "epoch": 0.19902539112592973, "grad_norm": 2.547455908474382, "learning_rate": 9.273599919051452e-07, "loss": 1.3182, "step": 194 }, { "epoch": 0.20005129520389844, "grad_norm": 2.777434855703004, "learning_rate": 9.264938730762348e-07, "loss": 1.3255, "step": 195 }, { "epoch": 0.20107719928186715, "grad_norm": 2.615909597544814, "learning_rate": 9.256230307012869e-07, "loss": 1.1474, "step": 196 }, { "epoch": 0.20210310335983586, "grad_norm": 3.06791169964388, "learning_rate": 9.247474744251387e-07, "loss": 1.2645, "step": 197 }, { "epoch": 0.20312900743780457, "grad_norm": 2.6202584863601976, "learning_rate": 9.238672139448353e-07, "loss": 1.275, "step": 198 }, { "epoch": 0.20415491151577328, "grad_norm": 2.601162569382904, "learning_rate": 9.229822590095229e-07, "loss": 1.274, "step": 199 }, { "epoch": 0.205180815593742, "grad_norm": 2.6008093073665877, "learning_rate": 9.220926194203392e-07, "loss": 1.3114, "step": 200 }, { "epoch": 0.2062067196717107, "grad_norm": 2.7639300809591765, "learning_rate": 9.211983050303065e-07, "loss": 1.3243, "step": 201 }, { "epoch": 0.2072326237496794, "grad_norm": 2.414680199787791, "learning_rate": 9.202993257442216e-07, "loss": 1.2389, "step": 202 }, { "epoch": 0.20825852782764812, "grad_norm": 2.6254293412284158, "learning_rate": 9.193956915185465e-07, "loss": 1.2058, "step": 203 }, { "epoch": 0.20928443190561682, "grad_norm": 2.448512350031076, "learning_rate": 9.184874123612981e-07, "loss": 1.2387, "step": 204 }, { "epoch": 0.21031033598358553, "grad_norm": 2.5690470438213406, "learning_rate": 9.175744983319373e-07, "loss": 1.26, "step": 205 }, { "epoch": 0.21133624006155424, "grad_norm": 2.754390048080123, "learning_rate": 9.166569595412574e-07, "loss": 1.1595, "step": 206 }, { "epoch": 0.21236214413952295, "grad_norm": 2.774729015636808, "learning_rate": 9.157348061512726e-07, "loss": 1.2775, "step": 207 }, { "epoch": 0.21338804821749166, "grad_norm": 2.7682686071100853, "learning_rate": 9.148080483751048e-07, "loss": 1.3079, "step": 208 }, { "epoch": 0.21441395229546037, "grad_norm": 2.782086823919593, "learning_rate": 9.138766964768711e-07, "loss": 1.2042, "step": 209 }, { "epoch": 0.21543985637342908, "grad_norm": 2.6465176185194674, "learning_rate": 9.129407607715696e-07, "loss": 1.3243, "step": 210 }, { "epoch": 0.2164657604513978, "grad_norm": 2.435336652939164, "learning_rate": 9.12000251624966e-07, "loss": 1.2992, "step": 211 }, { "epoch": 0.2174916645293665, "grad_norm": 2.598474192950846, "learning_rate": 9.110551794534775e-07, "loss": 1.2117, "step": 212 }, { "epoch": 0.2185175686073352, "grad_norm": 2.6249002298839756, "learning_rate": 9.101055547240586e-07, "loss": 1.305, "step": 213 }, { "epoch": 0.21954347268530391, "grad_norm": 2.82690930390355, "learning_rate": 9.091513879540844e-07, "loss": 1.3207, "step": 214 }, { "epoch": 0.22056937676327262, "grad_norm": 2.436101223347268, "learning_rate": 9.08192689711235e-07, "loss": 1.1575, "step": 215 }, { "epoch": 0.22159528084124133, "grad_norm": 2.3958033529310088, "learning_rate": 9.072294706133774e-07, "loss": 1.244, "step": 216 }, { "epoch": 0.22262118491921004, "grad_norm": 2.6527085410686078, "learning_rate": 9.062617413284485e-07, "loss": 1.172, "step": 217 }, { "epoch": 0.22364708899717875, "grad_norm": 2.648011195059752, "learning_rate": 9.052895125743369e-07, "loss": 1.3308, "step": 218 }, { "epoch": 0.2246729930751475, "grad_norm": 2.568920128652391, "learning_rate": 9.043127951187642e-07, "loss": 1.1837, "step": 219 }, { "epoch": 0.2256988971531162, "grad_norm": 2.5261344724437045, "learning_rate": 9.033315997791659e-07, "loss": 1.3071, "step": 220 }, { "epoch": 0.2267248012310849, "grad_norm": 2.482972261372125, "learning_rate": 9.023459374225708e-07, "loss": 1.2766, "step": 221 }, { "epoch": 0.2277507053090536, "grad_norm": 2.728013709536128, "learning_rate": 9.013558189654817e-07, "loss": 1.2879, "step": 222 }, { "epoch": 0.22877660938702232, "grad_norm": 2.509526243928691, "learning_rate": 9.003612553737543e-07, "loss": 1.2326, "step": 223 }, { "epoch": 0.22980251346499103, "grad_norm": 2.600184311388481, "learning_rate": 8.993622576624746e-07, "loss": 1.2468, "step": 224 }, { "epoch": 0.23082841754295974, "grad_norm": 2.996100810613903, "learning_rate": 8.983588368958387e-07, "loss": 1.2802, "step": 225 }, { "epoch": 0.23185432162092845, "grad_norm": 3.145733520640035, "learning_rate": 8.973510041870287e-07, "loss": 1.3561, "step": 226 }, { "epoch": 0.23288022569889716, "grad_norm": 2.677928117781213, "learning_rate": 8.963387706980907e-07, "loss": 1.2575, "step": 227 }, { "epoch": 0.23390612977686587, "grad_norm": 2.3983433304776542, "learning_rate": 8.953221476398105e-07, "loss": 1.3267, "step": 228 }, { "epoch": 0.23493203385483458, "grad_norm": 2.5880313443096146, "learning_rate": 8.943011462715897e-07, "loss": 1.3981, "step": 229 }, { "epoch": 0.23595793793280329, "grad_norm": 2.8392528250107882, "learning_rate": 8.932757779013213e-07, "loss": 1.2559, "step": 230 }, { "epoch": 0.236983842010772, "grad_norm": 2.5686226393466525, "learning_rate": 8.922460538852634e-07, "loss": 1.2202, "step": 231 }, { "epoch": 0.2380097460887407, "grad_norm": 2.509078780435471, "learning_rate": 8.912119856279149e-07, "loss": 1.1649, "step": 232 }, { "epoch": 0.2390356501667094, "grad_norm": 2.8313736339242257, "learning_rate": 8.901735845818884e-07, "loss": 1.303, "step": 233 }, { "epoch": 0.24006155424467812, "grad_norm": 2.63535412753487, "learning_rate": 8.891308622477829e-07, "loss": 1.2393, "step": 234 }, { "epoch": 0.24108745832264683, "grad_norm": 2.357470458300626, "learning_rate": 8.880838301740575e-07, "loss": 1.2319, "step": 235 }, { "epoch": 0.24211336240061554, "grad_norm": 2.633624233788043, "learning_rate": 8.870324999569024e-07, "loss": 1.3239, "step": 236 }, { "epoch": 0.24313926647858425, "grad_norm": 2.541409949570618, "learning_rate": 8.859768832401117e-07, "loss": 1.2875, "step": 237 }, { "epoch": 0.24416517055655296, "grad_norm": 2.7389024190179123, "learning_rate": 8.849169917149531e-07, "loss": 1.2948, "step": 238 }, { "epoch": 0.24519107463452167, "grad_norm": 2.7594743544624536, "learning_rate": 8.838528371200394e-07, "loss": 1.3199, "step": 239 }, { "epoch": 0.24621697871249038, "grad_norm": 2.553894247097559, "learning_rate": 8.827844312411982e-07, "loss": 1.3282, "step": 240 }, { "epoch": 0.24724288279045908, "grad_norm": 2.41692947947688, "learning_rate": 8.817117859113412e-07, "loss": 1.242, "step": 241 }, { "epoch": 0.2482687868684278, "grad_norm": 2.723064556658783, "learning_rate": 8.806349130103332e-07, "loss": 1.1648, "step": 242 }, { "epoch": 0.2492946909463965, "grad_norm": 2.643873563364823, "learning_rate": 8.795538244648609e-07, "loss": 1.2753, "step": 243 }, { "epoch": 0.2503205950243652, "grad_norm": 2.6856636444384385, "learning_rate": 8.784685322483003e-07, "loss": 1.2634, "step": 244 }, { "epoch": 0.2513464991023339, "grad_norm": 2.960535082783671, "learning_rate": 8.77379048380584e-07, "loss": 1.1963, "step": 245 }, { "epoch": 0.25237240318030263, "grad_norm": 3.2907999278170066, "learning_rate": 8.762853849280691e-07, "loss": 1.2236, "step": 246 }, { "epoch": 0.25339830725827134, "grad_norm": 2.706088121459989, "learning_rate": 8.751875540034025e-07, "loss": 1.2284, "step": 247 }, { "epoch": 0.25442421133624005, "grad_norm": 2.8342758112128656, "learning_rate": 8.740855677653867e-07, "loss": 1.3529, "step": 248 }, { "epoch": 0.25545011541420876, "grad_norm": 2.657409992434949, "learning_rate": 8.72979438418846e-07, "loss": 1.2265, "step": 249 }, { "epoch": 0.25647601949217746, "grad_norm": 2.733820536997817, "learning_rate": 8.718691782144907e-07, "loss": 1.3085, "step": 250 }, { "epoch": 0.25647601949217746, "eval_uground_MCTS_chains_SFT_val_loss": 1.2850571870803833, "eval_uground_MCTS_chains_SFT_val_runtime": 234.9849, "eval_uground_MCTS_chains_SFT_val_samples_per_second": 7.741, "eval_uground_MCTS_chains_SFT_val_steps_per_second": 0.97, "step": 250 }, { "epoch": 0.2575019235701462, "grad_norm": 2.6848225964360886, "learning_rate": 8.707547994487817e-07, "loss": 1.2179, "step": 251 }, { "epoch": 0.2585278276481149, "grad_norm": 2.6683129178340717, "learning_rate": 8.69636314463794e-07, "loss": 1.2185, "step": 252 }, { "epoch": 0.2595537317260836, "grad_norm": 2.7296939230032047, "learning_rate": 8.685137356470802e-07, "loss": 1.2974, "step": 253 }, { "epoch": 0.2605796358040523, "grad_norm": 2.5819217942943387, "learning_rate": 8.673870754315336e-07, "loss": 1.2832, "step": 254 }, { "epoch": 0.261605539882021, "grad_norm": 2.7286375807681202, "learning_rate": 8.662563462952498e-07, "loss": 1.1838, "step": 255 }, { "epoch": 0.2626314439599897, "grad_norm": 2.8389791464688825, "learning_rate": 8.651215607613891e-07, "loss": 1.2626, "step": 256 }, { "epoch": 0.26365734803795843, "grad_norm": 2.667042240854244, "learning_rate": 8.639827313980377e-07, "loss": 1.2649, "step": 257 }, { "epoch": 0.26468325211592714, "grad_norm": 2.6062798312002853, "learning_rate": 8.628398708180679e-07, "loss": 1.2032, "step": 258 }, { "epoch": 0.26570915619389585, "grad_norm": 2.8969301485628893, "learning_rate": 8.61692991679e-07, "loss": 1.2119, "step": 259 }, { "epoch": 0.26673506027186455, "grad_norm": 2.6804950419511164, "learning_rate": 8.605421066828598e-07, "loss": 1.2812, "step": 260 }, { "epoch": 0.26776096434983326, "grad_norm": 2.8172486497816807, "learning_rate": 8.593872285760399e-07, "loss": 1.2254, "step": 261 }, { "epoch": 0.26878686842780203, "grad_norm": 2.5633187420692307, "learning_rate": 8.582283701491575e-07, "loss": 1.2842, "step": 262 }, { "epoch": 0.26981277250577074, "grad_norm": 2.67558705929831, "learning_rate": 8.570655442369133e-07, "loss": 1.2636, "step": 263 }, { "epoch": 0.27083867658373945, "grad_norm": 2.8468850830803425, "learning_rate": 8.558987637179487e-07, "loss": 1.214, "step": 264 }, { "epoch": 0.27186458066170816, "grad_norm": 2.6831972490521965, "learning_rate": 8.547280415147037e-07, "loss": 1.2931, "step": 265 }, { "epoch": 0.27289048473967686, "grad_norm": 2.6462503105531896, "learning_rate": 8.535533905932737e-07, "loss": 1.2655, "step": 266 }, { "epoch": 0.2739163888176456, "grad_norm": 2.3586365587982967, "learning_rate": 8.523748239632659e-07, "loss": 1.2476, "step": 267 }, { "epoch": 0.2749422928956143, "grad_norm": 2.54829905356015, "learning_rate": 8.51192354677655e-07, "loss": 1.1603, "step": 268 }, { "epoch": 0.275968196973583, "grad_norm": 2.5266352307165705, "learning_rate": 8.500059958326384e-07, "loss": 1.2981, "step": 269 }, { "epoch": 0.2769941010515517, "grad_norm": 2.6922548930973167, "learning_rate": 8.488157605674924e-07, "loss": 1.2659, "step": 270 }, { "epoch": 0.2780200051295204, "grad_norm": 2.664526394721468, "learning_rate": 8.47621662064425e-07, "loss": 1.2074, "step": 271 }, { "epoch": 0.2790459092074891, "grad_norm": 2.6008818489590477, "learning_rate": 8.464237135484309e-07, "loss": 1.312, "step": 272 }, { "epoch": 0.2800718132854578, "grad_norm": 2.5513968093248733, "learning_rate": 8.452219282871451e-07, "loss": 1.2602, "step": 273 }, { "epoch": 0.28109771736342654, "grad_norm": 2.645895710122177, "learning_rate": 8.440163195906958e-07, "loss": 1.2199, "step": 274 }, { "epoch": 0.28212362144139524, "grad_norm": 2.555562047484234, "learning_rate": 8.42806900811556e-07, "loss": 1.2419, "step": 275 }, { "epoch": 0.28314952551936395, "grad_norm": 2.668319098287759, "learning_rate": 8.415936853443974e-07, "loss": 1.2866, "step": 276 }, { "epoch": 0.28417542959733266, "grad_norm": 2.5876812386597856, "learning_rate": 8.40376686625941e-07, "loss": 1.2495, "step": 277 }, { "epoch": 0.28520133367530137, "grad_norm": 2.761080694836167, "learning_rate": 8.391559181348081e-07, "loss": 1.3327, "step": 278 }, { "epoch": 0.2862272377532701, "grad_norm": 2.5148075285885323, "learning_rate": 8.379313933913714e-07, "loss": 1.2356, "step": 279 }, { "epoch": 0.2872531418312388, "grad_norm": 2.8048853074405913, "learning_rate": 8.367031259576056e-07, "loss": 1.3033, "step": 280 }, { "epoch": 0.2882790459092075, "grad_norm": 2.5855751773469, "learning_rate": 8.354711294369362e-07, "loss": 1.2116, "step": 281 }, { "epoch": 0.2893049499871762, "grad_norm": 2.6409683165441833, "learning_rate": 8.342354174740902e-07, "loss": 1.291, "step": 282 }, { "epoch": 0.2903308540651449, "grad_norm": 2.5609773198767583, "learning_rate": 8.329960037549433e-07, "loss": 1.1966, "step": 283 }, { "epoch": 0.2913567581431136, "grad_norm": 2.454732765461205, "learning_rate": 8.317529020063703e-07, "loss": 1.2519, "step": 284 }, { "epoch": 0.29238266222108233, "grad_norm": 2.5134417168975127, "learning_rate": 8.305061259960909e-07, "loss": 1.3021, "step": 285 }, { "epoch": 0.29340856629905104, "grad_norm": 2.7770733269324683, "learning_rate": 8.292556895325194e-07, "loss": 1.2724, "step": 286 }, { "epoch": 0.29443447037701975, "grad_norm": 2.7282701405098226, "learning_rate": 8.280016064646098e-07, "loss": 1.3078, "step": 287 }, { "epoch": 0.29546037445498846, "grad_norm": 2.9061694081822815, "learning_rate": 8.267438906817039e-07, "loss": 1.3072, "step": 288 }, { "epoch": 0.29648627853295717, "grad_norm": 2.789078860545301, "learning_rate": 8.25482556113377e-07, "loss": 1.2562, "step": 289 }, { "epoch": 0.2975121826109259, "grad_norm": 2.5716595982006147, "learning_rate": 8.242176167292826e-07, "loss": 1.242, "step": 290 }, { "epoch": 0.2985380866888946, "grad_norm": 2.639971746858166, "learning_rate": 8.229490865389998e-07, "loss": 1.2395, "step": 291 }, { "epoch": 0.2995639907668633, "grad_norm": 2.478159221268824, "learning_rate": 8.216769795918762e-07, "loss": 1.1481, "step": 292 }, { "epoch": 0.300589894844832, "grad_norm": 2.5242932402585376, "learning_rate": 8.204013099768732e-07, "loss": 1.2662, "step": 293 }, { "epoch": 0.3016157989228007, "grad_norm": 2.598195840538781, "learning_rate": 8.1912209182241e-07, "loss": 1.2947, "step": 294 }, { "epoch": 0.3026417030007694, "grad_norm": 2.7173961511536264, "learning_rate": 8.178393392962066e-07, "loss": 1.2896, "step": 295 }, { "epoch": 0.30366760707873813, "grad_norm": 2.7206884235512527, "learning_rate": 8.165530666051275e-07, "loss": 1.2786, "step": 296 }, { "epoch": 0.30469351115670684, "grad_norm": 2.7552783724459706, "learning_rate": 8.152632879950238e-07, "loss": 1.2365, "step": 297 }, { "epoch": 0.30571941523467555, "grad_norm": 2.6147443498413954, "learning_rate": 8.139700177505759e-07, "loss": 1.2179, "step": 298 }, { "epoch": 0.30674531931264426, "grad_norm": 2.606663274306208, "learning_rate": 8.126732701951351e-07, "loss": 1.3034, "step": 299 }, { "epoch": 0.30777122339061297, "grad_norm": 2.648199256292807, "learning_rate": 8.113730596905648e-07, "loss": 1.1252, "step": 300 }, { "epoch": 0.3087971274685817, "grad_norm": 2.4523122279421417, "learning_rate": 8.100694006370816e-07, "loss": 1.2627, "step": 301 }, { "epoch": 0.3098230315465504, "grad_norm": 2.966417644370533, "learning_rate": 8.087623074730959e-07, "loss": 1.2693, "step": 302 }, { "epoch": 0.3108489356245191, "grad_norm": 2.7468880750816647, "learning_rate": 8.07451794675052e-07, "loss": 1.2192, "step": 303 }, { "epoch": 0.3118748397024878, "grad_norm": 2.7445599934088327, "learning_rate": 8.061378767572673e-07, "loss": 1.2482, "step": 304 }, { "epoch": 0.3129007437804565, "grad_norm": 2.377263362356011, "learning_rate": 8.048205682717724e-07, "loss": 1.22, "step": 305 }, { "epoch": 0.3139266478584252, "grad_norm": 2.682567930380336, "learning_rate": 8.034998838081489e-07, "loss": 1.3312, "step": 306 }, { "epoch": 0.31495255193639393, "grad_norm": 2.6542762724022717, "learning_rate": 8.021758379933686e-07, "loss": 1.2426, "step": 307 }, { "epoch": 0.31597845601436264, "grad_norm": 2.691345504635165, "learning_rate": 8.008484454916316e-07, "loss": 1.195, "step": 308 }, { "epoch": 0.31700436009233135, "grad_norm": 2.490433627351295, "learning_rate": 7.995177210042028e-07, "loss": 1.1885, "step": 309 }, { "epoch": 0.31803026417030006, "grad_norm": 2.6751579195821242, "learning_rate": 7.981836792692507e-07, "loss": 1.201, "step": 310 }, { "epoch": 0.31905616824826877, "grad_norm": 2.670875208828667, "learning_rate": 7.968463350616825e-07, "loss": 1.2289, "step": 311 }, { "epoch": 0.3200820723262375, "grad_norm": 2.745027291047488, "learning_rate": 7.955057031929819e-07, "loss": 1.2896, "step": 312 }, { "epoch": 0.3211079764042062, "grad_norm": 2.561763884212081, "learning_rate": 7.941617985110442e-07, "loss": 1.3243, "step": 313 }, { "epoch": 0.3221338804821749, "grad_norm": 2.6240454275926344, "learning_rate": 7.928146359000117e-07, "loss": 1.3075, "step": 314 }, { "epoch": 0.3231597845601436, "grad_norm": 2.6273454218750434, "learning_rate": 7.914642302801097e-07, "loss": 1.3616, "step": 315 }, { "epoch": 0.3241856886381123, "grad_norm": 2.563951932815581, "learning_rate": 7.901105966074806e-07, "loss": 1.2714, "step": 316 }, { "epoch": 0.325211592716081, "grad_norm": 2.5822875744818563, "learning_rate": 7.887537498740187e-07, "loss": 1.3283, "step": 317 }, { "epoch": 0.32623749679404973, "grad_norm": 2.6143885876617627, "learning_rate": 7.873937051072035e-07, "loss": 1.253, "step": 318 }, { "epoch": 0.32726340087201844, "grad_norm": 2.647031579928053, "learning_rate": 7.860304773699338e-07, "loss": 1.3734, "step": 319 }, { "epoch": 0.32828930494998715, "grad_norm": 2.6072419451698625, "learning_rate": 7.846640817603607e-07, "loss": 1.2476, "step": 320 }, { "epoch": 0.3293152090279559, "grad_norm": 2.6913716325413737, "learning_rate": 7.83294533411721e-07, "loss": 1.2436, "step": 321 }, { "epoch": 0.3303411131059246, "grad_norm": 2.5669118480954913, "learning_rate": 7.819218474921679e-07, "loss": 1.2327, "step": 322 }, { "epoch": 0.33136701718389333, "grad_norm": 2.4680065253742063, "learning_rate": 7.805460392046053e-07, "loss": 1.2094, "step": 323 }, { "epoch": 0.33239292126186204, "grad_norm": 2.791361144884489, "learning_rate": 7.791671237865174e-07, "loss": 1.2787, "step": 324 }, { "epoch": 0.33341882533983075, "grad_norm": 2.3530874149345355, "learning_rate": 7.777851165098011e-07, "loss": 1.2706, "step": 325 }, { "epoch": 0.33444472941779946, "grad_norm": 2.7864142928485442, "learning_rate": 7.764000326805966e-07, "loss": 1.2568, "step": 326 }, { "epoch": 0.33547063349576817, "grad_norm": 2.744024330823426, "learning_rate": 7.75011887639118e-07, "loss": 1.1573, "step": 327 }, { "epoch": 0.3364965375737369, "grad_norm": 2.4274620710022696, "learning_rate": 7.736206967594827e-07, "loss": 1.2552, "step": 328 }, { "epoch": 0.3375224416517056, "grad_norm": 2.433652617983981, "learning_rate": 7.722264754495421e-07, "loss": 1.1821, "step": 329 }, { "epoch": 0.3385483457296743, "grad_norm": 2.6471120476788963, "learning_rate": 7.708292391507105e-07, "loss": 1.2633, "step": 330 }, { "epoch": 0.339574249807643, "grad_norm": 2.4674613719486542, "learning_rate": 7.694290033377938e-07, "loss": 1.1524, "step": 331 }, { "epoch": 0.3406001538856117, "grad_norm": 2.7466941946417003, "learning_rate": 7.680257835188186e-07, "loss": 1.2265, "step": 332 }, { "epoch": 0.3416260579635804, "grad_norm": 2.5000486774159514, "learning_rate": 7.666195952348606e-07, "loss": 1.2611, "step": 333 }, { "epoch": 0.34265196204154913, "grad_norm": 2.567371463839335, "learning_rate": 7.652104540598712e-07, "loss": 1.2303, "step": 334 }, { "epoch": 0.34367786611951784, "grad_norm": 2.5729629715661266, "learning_rate": 7.63798375600507e-07, "loss": 1.2566, "step": 335 }, { "epoch": 0.34470377019748655, "grad_norm": 2.647727174139296, "learning_rate": 7.623833754959551e-07, "loss": 1.2118, "step": 336 }, { "epoch": 0.34572967427545526, "grad_norm": 2.8141439244256894, "learning_rate": 7.609654694177612e-07, "loss": 1.2393, "step": 337 }, { "epoch": 0.34675557835342397, "grad_norm": 2.5047983163137335, "learning_rate": 7.595446730696553e-07, "loss": 1.2848, "step": 338 }, { "epoch": 0.3477814824313927, "grad_norm": 2.8575885947114954, "learning_rate": 7.581210021873778e-07, "loss": 1.2739, "step": 339 }, { "epoch": 0.3488073865093614, "grad_norm": 2.5879263426142303, "learning_rate": 7.56694472538506e-07, "loss": 1.2399, "step": 340 }, { "epoch": 0.3498332905873301, "grad_norm": 2.634462986237627, "learning_rate": 7.552650999222783e-07, "loss": 1.1886, "step": 341 }, { "epoch": 0.3508591946652988, "grad_norm": 2.644196413288821, "learning_rate": 7.538329001694199e-07, "loss": 1.189, "step": 342 }, { "epoch": 0.3518850987432675, "grad_norm": 2.46112345657162, "learning_rate": 7.523978891419678e-07, "loss": 1.3348, "step": 343 }, { "epoch": 0.3529110028212362, "grad_norm": 2.749240231393701, "learning_rate": 7.509600827330942e-07, "loss": 1.3394, "step": 344 }, { "epoch": 0.35393690689920493, "grad_norm": 2.690306971315971, "learning_rate": 7.495194968669311e-07, "loss": 1.1576, "step": 345 }, { "epoch": 0.35496281097717364, "grad_norm": 2.539756877499731, "learning_rate": 7.480761474983943e-07, "loss": 1.2425, "step": 346 }, { "epoch": 0.35598871505514235, "grad_norm": 2.794783648579977, "learning_rate": 7.466300506130052e-07, "loss": 1.2182, "step": 347 }, { "epoch": 0.35701461913311106, "grad_norm": 2.824643687827716, "learning_rate": 7.451812222267157e-07, "loss": 1.218, "step": 348 }, { "epoch": 0.35804052321107976, "grad_norm": 2.667873083406682, "learning_rate": 7.437296783857296e-07, "loss": 1.1692, "step": 349 }, { "epoch": 0.3590664272890485, "grad_norm": 2.671390395878333, "learning_rate": 7.422754351663251e-07, "loss": 1.1613, "step": 350 }, { "epoch": 0.3600923313670172, "grad_norm": 2.8795003735438813, "learning_rate": 7.408185086746766e-07, "loss": 1.2302, "step": 351 }, { "epoch": 0.3611182354449859, "grad_norm": 2.6611306550789684, "learning_rate": 7.39358915046677e-07, "loss": 1.3027, "step": 352 }, { "epoch": 0.3621441395229546, "grad_norm": 2.6035638980444933, "learning_rate": 7.378966704477584e-07, "loss": 1.2342, "step": 353 }, { "epoch": 0.3631700436009233, "grad_norm": 2.4479505443923197, "learning_rate": 7.364317910727127e-07, "loss": 1.2259, "step": 354 }, { "epoch": 0.364195947678892, "grad_norm": 2.795523604698995, "learning_rate": 7.349642931455131e-07, "loss": 1.1781, "step": 355 }, { "epoch": 0.3652218517568607, "grad_norm": 2.5499819208366006, "learning_rate": 7.334941929191343e-07, "loss": 1.2789, "step": 356 }, { "epoch": 0.36624775583482944, "grad_norm": 2.7893689673549686, "learning_rate": 7.320215066753722e-07, "loss": 1.2962, "step": 357 }, { "epoch": 0.36727365991279814, "grad_norm": 2.644819217736545, "learning_rate": 7.305462507246629e-07, "loss": 1.2008, "step": 358 }, { "epoch": 0.36829956399076685, "grad_norm": 2.7769763671178795, "learning_rate": 7.290684414059034e-07, "loss": 1.2561, "step": 359 }, { "epoch": 0.36932546806873556, "grad_norm": 2.7516891619949715, "learning_rate": 7.2758809508627e-07, "loss": 1.1101, "step": 360 }, { "epoch": 0.37035137214670427, "grad_norm": 2.426378655817816, "learning_rate": 7.261052281610367e-07, "loss": 1.2136, "step": 361 }, { "epoch": 0.371377276224673, "grad_norm": 2.6154616984297414, "learning_rate": 7.246198570533944e-07, "loss": 1.2197, "step": 362 }, { "epoch": 0.3724031803026417, "grad_norm": 2.5360984469645467, "learning_rate": 7.231319982142679e-07, "loss": 1.1605, "step": 363 }, { "epoch": 0.3734290843806104, "grad_norm": 2.825045376287288, "learning_rate": 7.216416681221353e-07, "loss": 1.2416, "step": 364 }, { "epoch": 0.3744549884585791, "grad_norm": 2.5276250456119844, "learning_rate": 7.201488832828438e-07, "loss": 1.19, "step": 365 }, { "epoch": 0.3754808925365478, "grad_norm": 2.4928208511714627, "learning_rate": 7.186536602294278e-07, "loss": 1.2168, "step": 366 }, { "epoch": 0.3765067966145165, "grad_norm": 2.6143894885373102, "learning_rate": 7.171560155219256e-07, "loss": 1.2404, "step": 367 }, { "epoch": 0.37753270069248523, "grad_norm": 2.60017799652903, "learning_rate": 7.156559657471966e-07, "loss": 1.2826, "step": 368 }, { "epoch": 0.37855860477045394, "grad_norm": 2.6824928604628564, "learning_rate": 7.141535275187363e-07, "loss": 1.1865, "step": 369 }, { "epoch": 0.37958450884842265, "grad_norm": 2.603460949625, "learning_rate": 7.126487174764935e-07, "loss": 1.1988, "step": 370 }, { "epoch": 0.38061041292639136, "grad_norm": 2.679412906737572, "learning_rate": 7.11141552286685e-07, "loss": 1.2827, "step": 371 }, { "epoch": 0.38163631700436007, "grad_norm": 2.9611932859479055, "learning_rate": 7.096320486416124e-07, "loss": 1.2008, "step": 372 }, { "epoch": 0.3826622210823288, "grad_norm": 2.748214027279323, "learning_rate": 7.081202232594758e-07, "loss": 1.2062, "step": 373 }, { "epoch": 0.3836881251602975, "grad_norm": 2.610256542655334, "learning_rate": 7.06606092884189e-07, "loss": 1.2218, "step": 374 }, { "epoch": 0.3847140292382662, "grad_norm": 2.568752573794102, "learning_rate": 7.050896742851952e-07, "loss": 1.259, "step": 375 }, { "epoch": 0.3857399333162349, "grad_norm": 2.729994050097171, "learning_rate": 7.035709842572792e-07, "loss": 1.1736, "step": 376 }, { "epoch": 0.3867658373942036, "grad_norm": 2.865709205643247, "learning_rate": 7.020500396203837e-07, "loss": 1.2853, "step": 377 }, { "epoch": 0.3877917414721723, "grad_norm": 2.6383244403757065, "learning_rate": 7.005268572194207e-07, "loss": 1.2394, "step": 378 }, { "epoch": 0.3888176455501411, "grad_norm": 2.756577015435794, "learning_rate": 6.990014539240873e-07, "loss": 1.1993, "step": 379 }, { "epoch": 0.3898435496281098, "grad_norm": 2.5090775530327805, "learning_rate": 6.974738466286765e-07, "loss": 1.2351, "step": 380 }, { "epoch": 0.3908694537060785, "grad_norm": 2.7812691019728093, "learning_rate": 6.959440522518923e-07, "loss": 1.199, "step": 381 }, { "epoch": 0.3918953577840472, "grad_norm": 2.546674174824266, "learning_rate": 6.944120877366604e-07, "loss": 1.154, "step": 382 }, { "epoch": 0.3929212618620159, "grad_norm": 3.0015148003584557, "learning_rate": 6.928779700499419e-07, "loss": 1.2702, "step": 383 }, { "epoch": 0.39394716593998463, "grad_norm": 2.5008648231818627, "learning_rate": 6.913417161825449e-07, "loss": 1.2137, "step": 384 }, { "epoch": 0.39497307001795334, "grad_norm": 2.4588983698528772, "learning_rate": 6.898033431489361e-07, "loss": 1.1406, "step": 385 }, { "epoch": 0.39599897409592205, "grad_norm": 2.527134330865664, "learning_rate": 6.882628679870531e-07, "loss": 1.3066, "step": 386 }, { "epoch": 0.39702487817389076, "grad_norm": 2.6472686614664225, "learning_rate": 6.867203077581145e-07, "loss": 1.3113, "step": 387 }, { "epoch": 0.39805078225185947, "grad_norm": 2.764798950791151, "learning_rate": 6.851756795464323e-07, "loss": 1.2906, "step": 388 }, { "epoch": 0.3990766863298282, "grad_norm": 2.7405389624240653, "learning_rate": 6.836290004592213e-07, "loss": 1.2001, "step": 389 }, { "epoch": 0.4001025904077969, "grad_norm": 2.631861163892539, "learning_rate": 6.820802876264111e-07, "loss": 1.1641, "step": 390 }, { "epoch": 0.4011284944857656, "grad_norm": 2.6174482541499136, "learning_rate": 6.805295582004551e-07, "loss": 1.2187, "step": 391 }, { "epoch": 0.4021543985637343, "grad_norm": 2.852360371699908, "learning_rate": 6.789768293561413e-07, "loss": 1.308, "step": 392 }, { "epoch": 0.403180302641703, "grad_norm": 2.5465668197579743, "learning_rate": 6.774221182904017e-07, "loss": 1.2074, "step": 393 }, { "epoch": 0.4042062067196717, "grad_norm": 2.864528470906807, "learning_rate": 6.758654422221224e-07, "loss": 1.1637, "step": 394 }, { "epoch": 0.40523211079764043, "grad_norm": 2.8379197072181612, "learning_rate": 6.743068183919519e-07, "loss": 1.222, "step": 395 }, { "epoch": 0.40625801487560914, "grad_norm": 2.62449855028073, "learning_rate": 6.727462640621112e-07, "loss": 1.279, "step": 396 }, { "epoch": 0.40728391895357785, "grad_norm": 2.5226829698942397, "learning_rate": 6.711837965162019e-07, "loss": 1.2924, "step": 397 }, { "epoch": 0.40830982303154656, "grad_norm": 2.6680293582268715, "learning_rate": 6.69619433059015e-07, "loss": 1.203, "step": 398 }, { "epoch": 0.40933572710951527, "grad_norm": 2.6267175358172272, "learning_rate": 6.680531910163398e-07, "loss": 1.2044, "step": 399 }, { "epoch": 0.410361631187484, "grad_norm": 2.798539704369877, "learning_rate": 6.664850877347705e-07, "loss": 1.2283, "step": 400 }, { "epoch": 0.4113875352654527, "grad_norm": 2.706328663367393, "learning_rate": 6.649151405815161e-07, "loss": 1.2072, "step": 401 }, { "epoch": 0.4124134393434214, "grad_norm": 2.5865368220778735, "learning_rate": 6.633433669442064e-07, "loss": 1.2156, "step": 402 }, { "epoch": 0.4134393434213901, "grad_norm": 2.57901369585107, "learning_rate": 6.617697842307004e-07, "loss": 1.1958, "step": 403 }, { "epoch": 0.4144652474993588, "grad_norm": 2.7016388144398267, "learning_rate": 6.601944098688927e-07, "loss": 1.195, "step": 404 }, { "epoch": 0.4154911515773275, "grad_norm": 2.556707832936113, "learning_rate": 6.586172613065215e-07, "loss": 1.1804, "step": 405 }, { "epoch": 0.41651705565529623, "grad_norm": 2.636733912969758, "learning_rate": 6.570383560109745e-07, "loss": 1.2457, "step": 406 }, { "epoch": 0.41754295973326494, "grad_norm": 2.807001379119985, "learning_rate": 6.554577114690955e-07, "loss": 1.3283, "step": 407 }, { "epoch": 0.41856886381123365, "grad_norm": 2.5257169644640998, "learning_rate": 6.538753451869913e-07, "loss": 1.2884, "step": 408 }, { "epoch": 0.41959476788920236, "grad_norm": 2.5327516189035206, "learning_rate": 6.522912746898379e-07, "loss": 1.2235, "step": 409 }, { "epoch": 0.42062067196717107, "grad_norm": 2.90569340656041, "learning_rate": 6.507055175216849e-07, "loss": 1.1792, "step": 410 }, { "epoch": 0.4216465760451398, "grad_norm": 2.5114377824774317, "learning_rate": 6.491180912452631e-07, "loss": 1.3252, "step": 411 }, { "epoch": 0.4226724801231085, "grad_norm": 2.8201576436584963, "learning_rate": 6.475290134417891e-07, "loss": 1.2259, "step": 412 }, { "epoch": 0.4236983842010772, "grad_norm": 2.5655086880724065, "learning_rate": 6.459383017107703e-07, "loss": 1.2522, "step": 413 }, { "epoch": 0.4247242882790459, "grad_norm": 2.7241886166530165, "learning_rate": 6.443459736698105e-07, "loss": 1.1883, "step": 414 }, { "epoch": 0.4257501923570146, "grad_norm": 2.777112809092128, "learning_rate": 6.427520469544147e-07, "loss": 1.2589, "step": 415 }, { "epoch": 0.4267760964349833, "grad_norm": 2.5197028770911274, "learning_rate": 6.41156539217794e-07, "loss": 1.1807, "step": 416 }, { "epoch": 0.42780200051295203, "grad_norm": 2.7452576494954166, "learning_rate": 6.395594681306688e-07, "loss": 1.1969, "step": 417 }, { "epoch": 0.42882790459092074, "grad_norm": 2.419748401721261, "learning_rate": 6.379608513810753e-07, "loss": 1.2156, "step": 418 }, { "epoch": 0.42985380866888945, "grad_norm": 2.6007536361681347, "learning_rate": 6.363607066741672e-07, "loss": 1.1682, "step": 419 }, { "epoch": 0.43087971274685816, "grad_norm": 2.7690218693992614, "learning_rate": 6.347590517320217e-07, "loss": 1.2596, "step": 420 }, { "epoch": 0.43190561682482687, "grad_norm": 2.8021339088053887, "learning_rate": 6.331559042934418e-07, "loss": 1.2384, "step": 421 }, { "epoch": 0.4329315209027956, "grad_norm": 2.5407498593535345, "learning_rate": 6.315512821137606e-07, "loss": 1.1624, "step": 422 }, { "epoch": 0.4339574249807643, "grad_norm": 2.669181018356921, "learning_rate": 6.299452029646442e-07, "loss": 1.1831, "step": 423 }, { "epoch": 0.434983329058733, "grad_norm": 2.6902513257905567, "learning_rate": 6.28337684633895e-07, "loss": 1.1293, "step": 424 }, { "epoch": 0.4360092331367017, "grad_norm": 2.600350864738873, "learning_rate": 6.267287449252552e-07, "loss": 1.1606, "step": 425 }, { "epoch": 0.4370351372146704, "grad_norm": 2.646078066207826, "learning_rate": 6.251184016582088e-07, "loss": 1.2734, "step": 426 }, { "epoch": 0.4380610412926391, "grad_norm": 2.7517987447445904, "learning_rate": 6.235066726677845e-07, "loss": 1.2104, "step": 427 }, { "epoch": 0.43908694537060783, "grad_norm": 2.7451282679247764, "learning_rate": 6.218935758043586e-07, "loss": 1.1119, "step": 428 }, { "epoch": 0.44011284944857654, "grad_norm": 2.5657396333316105, "learning_rate": 6.202791289334571e-07, "loss": 1.2813, "step": 429 }, { "epoch": 0.44113875352654525, "grad_norm": 2.521858145927954, "learning_rate": 6.186633499355575e-07, "loss": 1.3072, "step": 430 }, { "epoch": 0.44216465760451396, "grad_norm": 2.7999275422555523, "learning_rate": 6.170462567058908e-07, "loss": 1.1443, "step": 431 }, { "epoch": 0.44319056168248266, "grad_norm": 2.8231518599718988, "learning_rate": 6.15427867154244e-07, "loss": 1.2586, "step": 432 }, { "epoch": 0.4442164657604514, "grad_norm": 2.6163010604612174, "learning_rate": 6.138081992047609e-07, "loss": 1.2694, "step": 433 }, { "epoch": 0.4452423698384201, "grad_norm": 2.4133746282742075, "learning_rate": 6.121872707957441e-07, "loss": 1.212, "step": 434 }, { "epoch": 0.4462682739163888, "grad_norm": 2.6353803046715405, "learning_rate": 6.105650998794559e-07, "loss": 1.2294, "step": 435 }, { "epoch": 0.4472941779943575, "grad_norm": 2.6224808581968744, "learning_rate": 6.089417044219201e-07, "loss": 1.2076, "step": 436 }, { "epoch": 0.44832008207232626, "grad_norm": 2.273912209414138, "learning_rate": 6.073171024027226e-07, "loss": 1.2212, "step": 437 }, { "epoch": 0.449345986150295, "grad_norm": 2.5146410294881605, "learning_rate": 6.056913118148121e-07, "loss": 1.2513, "step": 438 }, { "epoch": 0.4503718902282637, "grad_norm": 2.513841938933036, "learning_rate": 6.040643506643012e-07, "loss": 1.153, "step": 439 }, { "epoch": 0.4513977943062324, "grad_norm": 2.644906674789187, "learning_rate": 6.024362369702668e-07, "loss": 1.1479, "step": 440 }, { "epoch": 0.4524236983842011, "grad_norm": 2.7660556586821246, "learning_rate": 6.008069887645503e-07, "loss": 1.2301, "step": 441 }, { "epoch": 0.4534496024621698, "grad_norm": 2.4452998667055135, "learning_rate": 5.991766240915589e-07, "loss": 1.1561, "step": 442 }, { "epoch": 0.4544755065401385, "grad_norm": 2.5637907822415196, "learning_rate": 5.975451610080642e-07, "loss": 1.2151, "step": 443 }, { "epoch": 0.4555014106181072, "grad_norm": 2.634126808482155, "learning_rate": 5.959126175830033e-07, "loss": 1.2055, "step": 444 }, { "epoch": 0.45652731469607594, "grad_norm": 2.6995738192233323, "learning_rate": 5.942790118972786e-07, "loss": 1.1872, "step": 445 }, { "epoch": 0.45755321877404465, "grad_norm": 2.600607297977979, "learning_rate": 5.926443620435571e-07, "loss": 1.1858, "step": 446 }, { "epoch": 0.45857912285201335, "grad_norm": 2.7529683044640203, "learning_rate": 5.910086861260706e-07, "loss": 1.3244, "step": 447 }, { "epoch": 0.45960502692998206, "grad_norm": 2.4542539077385106, "learning_rate": 5.893720022604142e-07, "loss": 1.1912, "step": 448 }, { "epoch": 0.4606309310079508, "grad_norm": 2.7213636554229996, "learning_rate": 5.877343285733472e-07, "loss": 1.2338, "step": 449 }, { "epoch": 0.4616568350859195, "grad_norm": 2.5317246559946627, "learning_rate": 5.860956832025906e-07, "loss": 1.2244, "step": 450 }, { "epoch": 0.4626827391638882, "grad_norm": 2.7263289087726186, "learning_rate": 5.844560842966278e-07, "loss": 1.2377, "step": 451 }, { "epoch": 0.4637086432418569, "grad_norm": 2.5974508387893436, "learning_rate": 5.828155500145024e-07, "loss": 1.2219, "step": 452 }, { "epoch": 0.4647345473198256, "grad_norm": 2.492480904755007, "learning_rate": 5.811740985256179e-07, "loss": 1.2269, "step": 453 }, { "epoch": 0.4657604513977943, "grad_norm": 2.713260026567852, "learning_rate": 5.79531748009536e-07, "loss": 1.2114, "step": 454 }, { "epoch": 0.466786355475763, "grad_norm": 2.567056191750939, "learning_rate": 5.778885166557752e-07, "loss": 1.1719, "step": 455 }, { "epoch": 0.46781225955373174, "grad_norm": 2.2856765271337007, "learning_rate": 5.7624442266361e-07, "loss": 1.144, "step": 456 }, { "epoch": 0.46883816363170044, "grad_norm": 2.4580143502304552, "learning_rate": 5.745994842418683e-07, "loss": 1.2335, "step": 457 }, { "epoch": 0.46986406770966915, "grad_norm": 2.343512884988581, "learning_rate": 5.729537196087308e-07, "loss": 1.1682, "step": 458 }, { "epoch": 0.47088997178763786, "grad_norm": 2.5556051709696335, "learning_rate": 5.713071469915285e-07, "loss": 1.2655, "step": 459 }, { "epoch": 0.47191587586560657, "grad_norm": 2.4824036483855143, "learning_rate": 5.696597846265411e-07, "loss": 1.1478, "step": 460 }, { "epoch": 0.4729417799435753, "grad_norm": 2.6655990103099634, "learning_rate": 5.680116507587949e-07, "loss": 1.3385, "step": 461 }, { "epoch": 0.473967684021544, "grad_norm": 2.8244078669617014, "learning_rate": 5.663627636418609e-07, "loss": 1.19, "step": 462 }, { "epoch": 0.4749935880995127, "grad_norm": 2.4429409282936567, "learning_rate": 5.647131415376528e-07, "loss": 1.3615, "step": 463 }, { "epoch": 0.4760194921774814, "grad_norm": 2.9996249497285703, "learning_rate": 5.630628027162243e-07, "loss": 1.1419, "step": 464 }, { "epoch": 0.4770453962554501, "grad_norm": 2.938422500891204, "learning_rate": 5.614117654555666e-07, "loss": 1.232, "step": 465 }, { "epoch": 0.4780713003334188, "grad_norm": 2.7670713948237218, "learning_rate": 5.597600480414068e-07, "loss": 1.2346, "step": 466 }, { "epoch": 0.47909720441138753, "grad_norm": 2.794141488694239, "learning_rate": 5.58107668767005e-07, "loss": 1.2621, "step": 467 }, { "epoch": 0.48012310848935624, "grad_norm": 3.239639832399456, "learning_rate": 5.564546459329509e-07, "loss": 1.1259, "step": 468 }, { "epoch": 0.48114901256732495, "grad_norm": 2.557225226114097, "learning_rate": 5.548009978469626e-07, "loss": 1.2987, "step": 469 }, { "epoch": 0.48217491664529366, "grad_norm": 2.683440752301172, "learning_rate": 5.531467428236827e-07, "loss": 1.2546, "step": 470 }, { "epoch": 0.48320082072326237, "grad_norm": 2.508947166543078, "learning_rate": 5.514918991844758e-07, "loss": 1.2695, "step": 471 }, { "epoch": 0.4842267248012311, "grad_norm": 2.648393508091393, "learning_rate": 5.498364852572255e-07, "loss": 1.2868, "step": 472 }, { "epoch": 0.4852526288791998, "grad_norm": 2.302753325306599, "learning_rate": 5.481805193761315e-07, "loss": 1.1813, "step": 473 }, { "epoch": 0.4862785329571685, "grad_norm": 2.529547436028065, "learning_rate": 5.465240198815072e-07, "loss": 1.1878, "step": 474 }, { "epoch": 0.4873044370351372, "grad_norm": 2.4916972010131286, "learning_rate": 5.448670051195751e-07, "loss": 1.2886, "step": 475 }, { "epoch": 0.4883303411131059, "grad_norm": 2.5311295821586954, "learning_rate": 5.432094934422648e-07, "loss": 1.1612, "step": 476 }, { "epoch": 0.4893562451910746, "grad_norm": 2.5635956429429174, "learning_rate": 5.415515032070091e-07, "loss": 1.2791, "step": 477 }, { "epoch": 0.49038214926904333, "grad_norm": 2.500059103965476, "learning_rate": 5.398930527765415e-07, "loss": 1.1808, "step": 478 }, { "epoch": 0.49140805334701204, "grad_norm": 2.5521862908107154, "learning_rate": 5.38234160518692e-07, "loss": 1.2029, "step": 479 }, { "epoch": 0.49243395742498075, "grad_norm": 2.643620076098173, "learning_rate": 5.365748448061837e-07, "loss": 1.1923, "step": 480 }, { "epoch": 0.49345986150294946, "grad_norm": 2.5902182995299294, "learning_rate": 5.349151240164303e-07, "loss": 1.2226, "step": 481 }, { "epoch": 0.49448576558091817, "grad_norm": 2.768401067358843, "learning_rate": 5.332550165313312e-07, "loss": 1.174, "step": 482 }, { "epoch": 0.4955116696588869, "grad_norm": 2.557685695100021, "learning_rate": 5.315945407370686e-07, "loss": 1.1896, "step": 483 }, { "epoch": 0.4965375737368556, "grad_norm": 2.775440468842976, "learning_rate": 5.299337150239041e-07, "loss": 1.2212, "step": 484 }, { "epoch": 0.4975634778148243, "grad_norm": 2.9224945088224135, "learning_rate": 5.282725577859748e-07, "loss": 1.2949, "step": 485 }, { "epoch": 0.498589381892793, "grad_norm": 2.802770455656179, "learning_rate": 5.266110874210892e-07, "loss": 1.2073, "step": 486 }, { "epoch": 0.4996152859707617, "grad_norm": 2.824244076395358, "learning_rate": 5.249493223305244e-07, "loss": 1.1947, "step": 487 }, { "epoch": 0.5006411900487304, "grad_norm": 2.6923948351193507, "learning_rate": 5.232872809188208e-07, "loss": 1.2629, "step": 488 }, { "epoch": 0.5016670941266992, "grad_norm": 2.3968822015650386, "learning_rate": 5.216249815935797e-07, "loss": 1.2121, "step": 489 }, { "epoch": 0.5026929982046678, "grad_norm": 2.560325439840742, "learning_rate": 5.199624427652588e-07, "loss": 1.1595, "step": 490 }, { "epoch": 0.5037189022826366, "grad_norm": 2.506058680784885, "learning_rate": 5.182996828469683e-07, "loss": 1.3033, "step": 491 }, { "epoch": 0.5047448063606053, "grad_norm": 2.607500294914118, "learning_rate": 5.166367202542671e-07, "loss": 1.2084, "step": 492 }, { "epoch": 0.505770710438574, "grad_norm": 2.641008107839817, "learning_rate": 5.149735734049587e-07, "loss": 1.2329, "step": 493 }, { "epoch": 0.5067966145165427, "grad_norm": 2.541793580004398, "learning_rate": 5.133102607188874e-07, "loss": 1.2498, "step": 494 }, { "epoch": 0.5078225185945114, "grad_norm": 2.5646122609963187, "learning_rate": 5.11646800617734e-07, "loss": 1.1542, "step": 495 }, { "epoch": 0.5088484226724801, "grad_norm": 2.486587957501608, "learning_rate": 5.099832115248123e-07, "loss": 1.217, "step": 496 }, { "epoch": 0.5098743267504489, "grad_norm": 2.675452370610441, "learning_rate": 5.083195118648643e-07, "loss": 1.2943, "step": 497 }, { "epoch": 0.5109002308284175, "grad_norm": 2.5503918368054745, "learning_rate": 5.066557200638569e-07, "loss": 1.2284, "step": 498 }, { "epoch": 0.5119261349063863, "grad_norm": 2.6266723080797028, "learning_rate": 5.049918545487774e-07, "loss": 1.299, "step": 499 }, { "epoch": 0.5129520389843549, "grad_norm": 2.6465331754588055, "learning_rate": 5.033279337474294e-07, "loss": 1.252, "step": 500 }, { "epoch": 0.5129520389843549, "eval_uground_MCTS_chains_SFT_val_loss": 1.2743316888809204, "eval_uground_MCTS_chains_SFT_val_runtime": 211.067, "eval_uground_MCTS_chains_SFT_val_samples_per_second": 8.618, "eval_uground_MCTS_chains_SFT_val_steps_per_second": 1.08, "step": 500 }, { "epoch": 0.5139779430623237, "grad_norm": 3.0116752114032743, "learning_rate": 5.016639760882288e-07, "loss": 1.2094, "step": 501 }, { "epoch": 0.5150038471402923, "grad_norm": 2.5726062219813244, "learning_rate": 5e-07, "loss": 1.2787, "step": 502 }, { "epoch": 0.5160297512182611, "grad_norm": 2.5559171539739585, "learning_rate": 4.98336023911771e-07, "loss": 1.1275, "step": 503 }, { "epoch": 0.5170556552962298, "grad_norm": 2.652830973325525, "learning_rate": 4.966720662525707e-07, "loss": 1.3168, "step": 504 }, { "epoch": 0.5180815593741985, "grad_norm": 2.700794383275604, "learning_rate": 4.950081454512225e-07, "loss": 1.299, "step": 505 }, { "epoch": 0.5191074634521672, "grad_norm": 2.784294521735747, "learning_rate": 4.933442799361431e-07, "loss": 1.2955, "step": 506 }, { "epoch": 0.520133367530136, "grad_norm": 2.4208374454069337, "learning_rate": 4.916804881351357e-07, "loss": 1.1661, "step": 507 }, { "epoch": 0.5211592716081046, "grad_norm": 2.569371008392117, "learning_rate": 4.900167884751877e-07, "loss": 1.2028, "step": 508 }, { "epoch": 0.5221851756860734, "grad_norm": 2.7873211845718027, "learning_rate": 4.883531993822659e-07, "loss": 1.2652, "step": 509 }, { "epoch": 0.523211079764042, "grad_norm": 2.6113158053359333, "learning_rate": 4.866897392811126e-07, "loss": 1.3185, "step": 510 }, { "epoch": 0.5242369838420108, "grad_norm": 2.3767956668686385, "learning_rate": 4.850264265950413e-07, "loss": 1.2051, "step": 511 }, { "epoch": 0.5252628879199794, "grad_norm": 2.884877451969795, "learning_rate": 4.833632797457331e-07, "loss": 1.1469, "step": 512 }, { "epoch": 0.5262887919979482, "grad_norm": 2.571309281671942, "learning_rate": 4.817003171530317e-07, "loss": 1.2533, "step": 513 }, { "epoch": 0.5273146960759169, "grad_norm": 2.8580251389569633, "learning_rate": 4.800375572347413e-07, "loss": 1.2175, "step": 514 }, { "epoch": 0.5283406001538856, "grad_norm": 2.657024380838802, "learning_rate": 4.783750184064203e-07, "loss": 1.2324, "step": 515 }, { "epoch": 0.5293665042318543, "grad_norm": 2.4907546697657095, "learning_rate": 4.767127190811793e-07, "loss": 1.1992, "step": 516 }, { "epoch": 0.530392408309823, "grad_norm": 2.782096492852977, "learning_rate": 4.750506776694757e-07, "loss": 1.2924, "step": 517 }, { "epoch": 0.5314183123877917, "grad_norm": 2.427383520783994, "learning_rate": 4.7338891257891076e-07, "loss": 1.1579, "step": 518 }, { "epoch": 0.5324442164657605, "grad_norm": 2.5770073682499484, "learning_rate": 4.717274422140252e-07, "loss": 1.2246, "step": 519 }, { "epoch": 0.5334701205437291, "grad_norm": 2.4572503599210576, "learning_rate": 4.7006628497609604e-07, "loss": 1.2358, "step": 520 }, { "epoch": 0.5344960246216979, "grad_norm": 2.6448246950333254, "learning_rate": 4.6840545926293146e-07, "loss": 1.2461, "step": 521 }, { "epoch": 0.5355219286996665, "grad_norm": 2.5891258589659247, "learning_rate": 4.6674498346866887e-07, "loss": 1.1114, "step": 522 }, { "epoch": 0.5365478327776353, "grad_norm": 2.6559153198303678, "learning_rate": 4.650848759835697e-07, "loss": 1.2646, "step": 523 }, { "epoch": 0.5375737368556041, "grad_norm": 2.673133305478356, "learning_rate": 4.634251551938161e-07, "loss": 1.2122, "step": 524 }, { "epoch": 0.5385996409335727, "grad_norm": 2.68538453093342, "learning_rate": 4.6176583948130803e-07, "loss": 1.2235, "step": 525 }, { "epoch": 0.5396255450115415, "grad_norm": 2.440546575305456, "learning_rate": 4.601069472234584e-07, "loss": 1.2358, "step": 526 }, { "epoch": 0.5406514490895101, "grad_norm": 2.6044073884926924, "learning_rate": 4.584484967929909e-07, "loss": 1.2677, "step": 527 }, { "epoch": 0.5416773531674789, "grad_norm": 2.5476463792153305, "learning_rate": 4.5679050655773534e-07, "loss": 1.1461, "step": 528 }, { "epoch": 0.5427032572454475, "grad_norm": 2.436273544492887, "learning_rate": 4.5513299488042487e-07, "loss": 1.2116, "step": 529 }, { "epoch": 0.5437291613234163, "grad_norm": 2.5753873910087, "learning_rate": 4.5347598011849275e-07, "loss": 1.1925, "step": 530 }, { "epoch": 0.544755065401385, "grad_norm": 2.522315717983984, "learning_rate": 4.5181948062386846e-07, "loss": 1.2488, "step": 531 }, { "epoch": 0.5457809694793537, "grad_norm": 2.4561592505381777, "learning_rate": 4.501635147427745e-07, "loss": 1.1357, "step": 532 }, { "epoch": 0.5468068735573224, "grad_norm": 2.6167475341297375, "learning_rate": 4.485081008155243e-07, "loss": 1.1628, "step": 533 }, { "epoch": 0.5478327776352911, "grad_norm": 2.318388883687291, "learning_rate": 4.4685325717631734e-07, "loss": 1.1911, "step": 534 }, { "epoch": 0.5488586817132598, "grad_norm": 2.551623209073283, "learning_rate": 4.4519900215303733e-07, "loss": 1.2293, "step": 535 }, { "epoch": 0.5498845857912286, "grad_norm": 3.0020726476405875, "learning_rate": 4.4354535406704907e-07, "loss": 1.1972, "step": 536 }, { "epoch": 0.5509104898691972, "grad_norm": 2.4079569221490393, "learning_rate": 4.418923312329952e-07, "loss": 1.126, "step": 537 }, { "epoch": 0.551936393947166, "grad_norm": 2.6031632771818893, "learning_rate": 4.4023995195859313e-07, "loss": 1.2331, "step": 538 }, { "epoch": 0.5529622980251346, "grad_norm": 2.6258547925667353, "learning_rate": 4.385882345444335e-07, "loss": 1.2573, "step": 539 }, { "epoch": 0.5539882021031034, "grad_norm": 2.510150905831169, "learning_rate": 4.369371972837757e-07, "loss": 1.26, "step": 540 }, { "epoch": 0.555014106181072, "grad_norm": 2.7544197615747406, "learning_rate": 4.352868584623471e-07, "loss": 1.2229, "step": 541 }, { "epoch": 0.5560400102590408, "grad_norm": 2.6505858678051863, "learning_rate": 4.33637236358139e-07, "loss": 1.1452, "step": 542 }, { "epoch": 0.5570659143370095, "grad_norm": 2.61741589849154, "learning_rate": 4.319883492412051e-07, "loss": 1.2923, "step": 543 }, { "epoch": 0.5580918184149782, "grad_norm": 2.7258410404480182, "learning_rate": 4.30340215373459e-07, "loss": 1.1754, "step": 544 }, { "epoch": 0.5591177224929469, "grad_norm": 2.551873559715973, "learning_rate": 4.286928530084714e-07, "loss": 1.2079, "step": 545 }, { "epoch": 0.5601436265709157, "grad_norm": 2.7405100803593885, "learning_rate": 4.2704628039126914e-07, "loss": 1.1972, "step": 546 }, { "epoch": 0.5611695306488843, "grad_norm": 2.8909985376686684, "learning_rate": 4.2540051575813165e-07, "loss": 1.2332, "step": 547 }, { "epoch": 0.5621954347268531, "grad_norm": 2.78196270770764, "learning_rate": 4.2375557733639006e-07, "loss": 1.197, "step": 548 }, { "epoch": 0.5632213388048217, "grad_norm": 2.5697422125069784, "learning_rate": 4.2211148334422465e-07, "loss": 1.2079, "step": 549 }, { "epoch": 0.5642472428827905, "grad_norm": 2.6354651408048966, "learning_rate": 4.20468251990464e-07, "loss": 1.2577, "step": 550 }, { "epoch": 0.5652731469607591, "grad_norm": 2.5724042590969938, "learning_rate": 4.1882590147438197e-07, "loss": 1.2442, "step": 551 }, { "epoch": 0.5662990510387279, "grad_norm": 2.830434169158481, "learning_rate": 4.1718444998549756e-07, "loss": 1.2195, "step": 552 }, { "epoch": 0.5673249551166966, "grad_norm": 2.663741290125766, "learning_rate": 4.1554391570337223e-07, "loss": 1.3125, "step": 553 }, { "epoch": 0.5683508591946653, "grad_norm": 2.5970031739016846, "learning_rate": 4.1390431679740953e-07, "loss": 1.1242, "step": 554 }, { "epoch": 0.569376763272634, "grad_norm": 2.768091392231377, "learning_rate": 4.1226567142665287e-07, "loss": 1.2086, "step": 555 }, { "epoch": 0.5704026673506027, "grad_norm": 2.6640670169831173, "learning_rate": 4.106279977395858e-07, "loss": 1.2559, "step": 556 }, { "epoch": 0.5714285714285714, "grad_norm": 2.53235756296073, "learning_rate": 4.0899131387392943e-07, "loss": 1.1748, "step": 557 }, { "epoch": 0.5724544755065402, "grad_norm": 2.682901944624015, "learning_rate": 4.073556379564429e-07, "loss": 1.1939, "step": 558 }, { "epoch": 0.5734803795845088, "grad_norm": 2.5999213234263534, "learning_rate": 4.057209881027214e-07, "loss": 1.1405, "step": 559 }, { "epoch": 0.5745062836624776, "grad_norm": 2.8359536761627755, "learning_rate": 4.0408738241699685e-07, "loss": 1.2553, "step": 560 }, { "epoch": 0.5755321877404462, "grad_norm": 2.478862534452602, "learning_rate": 4.0245483899193586e-07, "loss": 1.1883, "step": 561 }, { "epoch": 0.576558091818415, "grad_norm": 2.9624404297867297, "learning_rate": 4.00823375908441e-07, "loss": 1.3241, "step": 562 }, { "epoch": 0.5775839958963837, "grad_norm": 2.7214306290037467, "learning_rate": 3.991930112354496e-07, "loss": 1.1486, "step": 563 }, { "epoch": 0.5786098999743524, "grad_norm": 2.5096893205743283, "learning_rate": 3.9756376302973325e-07, "loss": 1.2576, "step": 564 }, { "epoch": 0.5796358040523211, "grad_norm": 2.571283901595518, "learning_rate": 3.959356493356989e-07, "loss": 1.1346, "step": 565 }, { "epoch": 0.5806617081302898, "grad_norm": 2.644842706151376, "learning_rate": 3.943086881851878e-07, "loss": 1.1977, "step": 566 }, { "epoch": 0.5816876122082585, "grad_norm": 2.807481417987691, "learning_rate": 3.9268289759727736e-07, "loss": 1.2563, "step": 567 }, { "epoch": 0.5827135162862273, "grad_norm": 2.864431606411388, "learning_rate": 3.9105829557807973e-07, "loss": 1.2385, "step": 568 }, { "epoch": 0.5837394203641959, "grad_norm": 2.7536651973511312, "learning_rate": 3.894349001205441e-07, "loss": 1.175, "step": 569 }, { "epoch": 0.5847653244421647, "grad_norm": 2.557792248642305, "learning_rate": 3.87812729204256e-07, "loss": 1.263, "step": 570 }, { "epoch": 0.5857912285201333, "grad_norm": 2.6951777375825947, "learning_rate": 3.861918007952393e-07, "loss": 1.1879, "step": 571 }, { "epoch": 0.5868171325981021, "grad_norm": 2.5039948130909595, "learning_rate": 3.84572132845756e-07, "loss": 1.1872, "step": 572 }, { "epoch": 0.5878430366760707, "grad_norm": 2.744912722045592, "learning_rate": 3.8295374329410926e-07, "loss": 1.0991, "step": 573 }, { "epoch": 0.5888689407540395, "grad_norm": 2.7544422679030127, "learning_rate": 3.8133665006444255e-07, "loss": 1.2738, "step": 574 }, { "epoch": 0.5898948448320082, "grad_norm": 2.696877329145256, "learning_rate": 3.7972087106654296e-07, "loss": 1.1795, "step": 575 }, { "epoch": 0.5909207489099769, "grad_norm": 2.6792688172902386, "learning_rate": 3.781064241956414e-07, "loss": 1.2181, "step": 576 }, { "epoch": 0.5919466529879456, "grad_norm": 2.7459246071356636, "learning_rate": 3.7649332733221575e-07, "loss": 1.2205, "step": 577 }, { "epoch": 0.5929725570659143, "grad_norm": 2.5648768805482374, "learning_rate": 3.7488159834179135e-07, "loss": 1.1505, "step": 578 }, { "epoch": 0.593998461143883, "grad_norm": 2.430751747310971, "learning_rate": 3.732712550747449e-07, "loss": 1.1854, "step": 579 }, { "epoch": 0.5950243652218518, "grad_norm": 2.705026052502069, "learning_rate": 3.716623153661049e-07, "loss": 1.2227, "step": 580 }, { "epoch": 0.5960502692998204, "grad_norm": 2.7017035978712545, "learning_rate": 3.700547970353558e-07, "loss": 1.1729, "step": 581 }, { "epoch": 0.5970761733777892, "grad_norm": 2.5019196552045972, "learning_rate": 3.6844871788623945e-07, "loss": 1.1364, "step": 582 }, { "epoch": 0.5981020774557579, "grad_norm": 2.8882315979569886, "learning_rate": 3.6684409570655804e-07, "loss": 1.1923, "step": 583 }, { "epoch": 0.5991279815337266, "grad_norm": 2.6337103485212334, "learning_rate": 3.652409482679783e-07, "loss": 1.2562, "step": 584 }, { "epoch": 0.6001538856116954, "grad_norm": 2.8149166619473287, "learning_rate": 3.6363929332583263e-07, "loss": 1.2216, "step": 585 }, { "epoch": 0.601179789689664, "grad_norm": 2.5198337744735, "learning_rate": 3.6203914861892476e-07, "loss": 1.2763, "step": 586 }, { "epoch": 0.6022056937676328, "grad_norm": 2.5771491568943645, "learning_rate": 3.604405318693311e-07, "loss": 1.2214, "step": 587 }, { "epoch": 0.6032315978456014, "grad_norm": 2.6091632070488426, "learning_rate": 3.588434607822061e-07, "loss": 1.1978, "step": 588 }, { "epoch": 0.6042575019235702, "grad_norm": 2.5250674602170537, "learning_rate": 3.5724795304558513e-07, "loss": 1.1383, "step": 589 }, { "epoch": 0.6052834060015388, "grad_norm": 2.818153148170644, "learning_rate": 3.5565402633018957e-07, "loss": 1.2158, "step": 590 }, { "epoch": 0.6063093100795076, "grad_norm": 2.4250679915698643, "learning_rate": 3.540616982892297e-07, "loss": 1.2316, "step": 591 }, { "epoch": 0.6073352141574763, "grad_norm": 2.6768142400804544, "learning_rate": 3.5247098655821103e-07, "loss": 1.2056, "step": 592 }, { "epoch": 0.608361118235445, "grad_norm": 2.6144828178712514, "learning_rate": 3.5088190875473686e-07, "loss": 1.2977, "step": 593 }, { "epoch": 0.6093870223134137, "grad_norm": 2.4216338604546146, "learning_rate": 3.4929448247831514e-07, "loss": 1.2196, "step": 594 }, { "epoch": 0.6104129263913824, "grad_norm": 2.4463464458523814, "learning_rate": 3.4770872531016215e-07, "loss": 1.2126, "step": 595 }, { "epoch": 0.6114388304693511, "grad_norm": 2.746300393158143, "learning_rate": 3.4612465481300867e-07, "loss": 1.2965, "step": 596 }, { "epoch": 0.6124647345473199, "grad_norm": 2.550377036020753, "learning_rate": 3.445422885309045e-07, "loss": 1.255, "step": 597 }, { "epoch": 0.6134906386252885, "grad_norm": 2.5581073815113515, "learning_rate": 3.429616439890257e-07, "loss": 1.2221, "step": 598 }, { "epoch": 0.6145165427032573, "grad_norm": 2.6287106761798764, "learning_rate": 3.4138273869347846e-07, "loss": 1.1759, "step": 599 }, { "epoch": 0.6155424467812259, "grad_norm": 2.7064703602194236, "learning_rate": 3.398055901311073e-07, "loss": 1.2003, "step": 600 }, { "epoch": 0.6165683508591947, "grad_norm": 2.670960915963785, "learning_rate": 3.3823021576929964e-07, "loss": 1.1922, "step": 601 }, { "epoch": 0.6175942549371634, "grad_norm": 2.5857309156591604, "learning_rate": 3.3665663305579344e-07, "loss": 1.2312, "step": 602 }, { "epoch": 0.6186201590151321, "grad_norm": 2.6327880988339194, "learning_rate": 3.3508485941848383e-07, "loss": 1.1737, "step": 603 }, { "epoch": 0.6196460630931008, "grad_norm": 2.6453978254038133, "learning_rate": 3.335149122652293e-07, "loss": 1.1843, "step": 604 }, { "epoch": 0.6206719671710695, "grad_norm": 2.5723820192230598, "learning_rate": 3.3194680898366023e-07, "loss": 1.2656, "step": 605 }, { "epoch": 0.6216978712490382, "grad_norm": 2.5714268383189047, "learning_rate": 3.303805669409848e-07, "loss": 1.1766, "step": 606 }, { "epoch": 0.622723775327007, "grad_norm": 2.545114874785041, "learning_rate": 3.288162034837981e-07, "loss": 1.2778, "step": 607 }, { "epoch": 0.6237496794049756, "grad_norm": 2.3647835710703435, "learning_rate": 3.272537359378887e-07, "loss": 1.2307, "step": 608 }, { "epoch": 0.6247755834829444, "grad_norm": 2.971636171732171, "learning_rate": 3.2569318160804805e-07, "loss": 1.163, "step": 609 }, { "epoch": 0.625801487560913, "grad_norm": 2.4541682678346795, "learning_rate": 3.2413455777787746e-07, "loss": 1.216, "step": 610 }, { "epoch": 0.6268273916388818, "grad_norm": 2.3227177006306423, "learning_rate": 3.225778817095982e-07, "loss": 1.1738, "step": 611 }, { "epoch": 0.6278532957168504, "grad_norm": 2.5261842954294402, "learning_rate": 3.2102317064385876e-07, "loss": 1.2311, "step": 612 }, { "epoch": 0.6288791997948192, "grad_norm": 2.397049474115395, "learning_rate": 3.19470441799545e-07, "loss": 1.1192, "step": 613 }, { "epoch": 0.6299051038727879, "grad_norm": 2.593266135696937, "learning_rate": 3.179197123735889e-07, "loss": 1.2214, "step": 614 }, { "epoch": 0.6309310079507566, "grad_norm": 2.590251241187722, "learning_rate": 3.1637099954077875e-07, "loss": 1.1793, "step": 615 }, { "epoch": 0.6319569120287253, "grad_norm": 2.771110361934025, "learning_rate": 3.148243204535677e-07, "loss": 1.1597, "step": 616 }, { "epoch": 0.632982816106694, "grad_norm": 2.506372299796566, "learning_rate": 3.1327969224188546e-07, "loss": 1.2679, "step": 617 }, { "epoch": 0.6340087201846627, "grad_norm": 2.6050212777549, "learning_rate": 3.117371320129469e-07, "loss": 1.2335, "step": 618 }, { "epoch": 0.6350346242626315, "grad_norm": 2.6519984662810825, "learning_rate": 3.101966568510639e-07, "loss": 1.207, "step": 619 }, { "epoch": 0.6360605283406001, "grad_norm": 2.4414939383947947, "learning_rate": 3.086582838174551e-07, "loss": 1.1293, "step": 620 }, { "epoch": 0.6370864324185689, "grad_norm": 2.4498656223793933, "learning_rate": 3.0712202995005806e-07, "loss": 1.1637, "step": 621 }, { "epoch": 0.6381123364965375, "grad_norm": 2.4980445735258368, "learning_rate": 3.055879122633397e-07, "loss": 1.2441, "step": 622 }, { "epoch": 0.6391382405745063, "grad_norm": 2.770776748857224, "learning_rate": 3.040559477481077e-07, "loss": 1.2255, "step": 623 }, { "epoch": 0.640164144652475, "grad_norm": 2.722646125854624, "learning_rate": 3.025261533713235e-07, "loss": 1.164, "step": 624 }, { "epoch": 0.6411900487304437, "grad_norm": 2.6111255488966827, "learning_rate": 3.009985460759127e-07, "loss": 1.155, "step": 625 }, { "epoch": 0.6422159528084124, "grad_norm": 2.774808082342546, "learning_rate": 2.994731427805792e-07, "loss": 1.1843, "step": 626 }, { "epoch": 0.6432418568863811, "grad_norm": 2.729046492958925, "learning_rate": 2.979499603796163e-07, "loss": 1.1878, "step": 627 }, { "epoch": 0.6442677609643498, "grad_norm": 2.3088017207024105, "learning_rate": 2.964290157427207e-07, "loss": 1.107, "step": 628 }, { "epoch": 0.6452936650423186, "grad_norm": 2.769640462107753, "learning_rate": 2.9491032571480486e-07, "loss": 1.3354, "step": 629 }, { "epoch": 0.6463195691202872, "grad_norm": 2.6586300568678087, "learning_rate": 2.9339390711581105e-07, "loss": 1.2318, "step": 630 }, { "epoch": 0.647345473198256, "grad_norm": 2.8192282998223304, "learning_rate": 2.9187977674052424e-07, "loss": 1.2764, "step": 631 }, { "epoch": 0.6483713772762246, "grad_norm": 2.8997868168822056, "learning_rate": 2.9036795135838764e-07, "loss": 1.2166, "step": 632 }, { "epoch": 0.6493972813541934, "grad_norm": 2.708571116756967, "learning_rate": 2.88858447713315e-07, "loss": 1.2067, "step": 633 }, { "epoch": 0.650423185432162, "grad_norm": 2.620361247323107, "learning_rate": 2.8735128252350674e-07, "loss": 1.1406, "step": 634 }, { "epoch": 0.6514490895101308, "grad_norm": 2.532649091877776, "learning_rate": 2.858464724812638e-07, "loss": 1.2767, "step": 635 }, { "epoch": 0.6524749935880995, "grad_norm": 2.654953427048768, "learning_rate": 2.843440342528035e-07, "loss": 1.1596, "step": 636 }, { "epoch": 0.6535008976660682, "grad_norm": 2.639809330039523, "learning_rate": 2.8284398447807435e-07, "loss": 1.1958, "step": 637 }, { "epoch": 0.6545268017440369, "grad_norm": 2.539389618176952, "learning_rate": 2.813463397705723e-07, "loss": 1.2032, "step": 638 }, { "epoch": 0.6555527058220056, "grad_norm": 2.427603600782808, "learning_rate": 2.798511167171562e-07, "loss": 1.2111, "step": 639 }, { "epoch": 0.6565786098999743, "grad_norm": 2.522000223511891, "learning_rate": 2.783583318778646e-07, "loss": 1.1634, "step": 640 }, { "epoch": 0.6576045139779431, "grad_norm": 2.5130918357831455, "learning_rate": 2.768680017857319e-07, "loss": 1.2287, "step": 641 }, { "epoch": 0.6586304180559118, "grad_norm": 2.7221180506008285, "learning_rate": 2.753801429466056e-07, "loss": 1.2837, "step": 642 }, { "epoch": 0.6596563221338805, "grad_norm": 2.7920485812373403, "learning_rate": 2.7389477183896313e-07, "loss": 1.0884, "step": 643 }, { "epoch": 0.6606822262118492, "grad_norm": 2.4167742978657145, "learning_rate": 2.7241190491372987e-07, "loss": 1.1931, "step": 644 }, { "epoch": 0.6617081302898179, "grad_norm": 2.6876746385015586, "learning_rate": 2.7093155859409667e-07, "loss": 1.263, "step": 645 }, { "epoch": 0.6627340343677867, "grad_norm": 2.544082336181996, "learning_rate": 2.6945374927533697e-07, "loss": 1.1986, "step": 646 }, { "epoch": 0.6637599384457553, "grad_norm": 2.5769749930127177, "learning_rate": 2.6797849332462785e-07, "loss": 1.3002, "step": 647 }, { "epoch": 0.6647858425237241, "grad_norm": 2.765429290049575, "learning_rate": 2.665058070808654e-07, "loss": 1.293, "step": 648 }, { "epoch": 0.6658117466016927, "grad_norm": 2.7225782297173216, "learning_rate": 2.650357068544869e-07, "loss": 1.22, "step": 649 }, { "epoch": 0.6668376506796615, "grad_norm": 2.4495687580818433, "learning_rate": 2.635682089272875e-07, "loss": 1.1719, "step": 650 }, { "epoch": 0.6678635547576302, "grad_norm": 2.515217933274136, "learning_rate": 2.621033295522417e-07, "loss": 1.3224, "step": 651 }, { "epoch": 0.6688894588355989, "grad_norm": 2.6424207426144815, "learning_rate": 2.6064108495332293e-07, "loss": 1.1448, "step": 652 }, { "epoch": 0.6699153629135676, "grad_norm": 2.5858491438102056, "learning_rate": 2.5918149132532336e-07, "loss": 1.1662, "step": 653 }, { "epoch": 0.6709412669915363, "grad_norm": 2.6381268455957594, "learning_rate": 2.5772456483367497e-07, "loss": 1.1797, "step": 654 }, { "epoch": 0.671967171069505, "grad_norm": 2.6228781987684364, "learning_rate": 2.5627032161427036e-07, "loss": 1.2355, "step": 655 }, { "epoch": 0.6729930751474738, "grad_norm": 2.615456043233998, "learning_rate": 2.5481877777328424e-07, "loss": 1.2654, "step": 656 }, { "epoch": 0.6740189792254424, "grad_norm": 2.416561917193722, "learning_rate": 2.5336994938699503e-07, "loss": 1.12, "step": 657 }, { "epoch": 0.6750448833034112, "grad_norm": 2.642465270297251, "learning_rate": 2.5192385250160586e-07, "loss": 1.1921, "step": 658 }, { "epoch": 0.6760707873813798, "grad_norm": 2.7480834455937866, "learning_rate": 2.50480503133069e-07, "loss": 1.241, "step": 659 }, { "epoch": 0.6770966914593486, "grad_norm": 2.709971968793763, "learning_rate": 2.4903991726690583e-07, "loss": 1.1892, "step": 660 }, { "epoch": 0.6781225955373172, "grad_norm": 2.5423357822987125, "learning_rate": 2.4760211085803214e-07, "loss": 1.061, "step": 661 }, { "epoch": 0.679148499615286, "grad_norm": 2.5471809431180947, "learning_rate": 2.461670998305801e-07, "loss": 1.1406, "step": 662 }, { "epoch": 0.6801744036932547, "grad_norm": 2.2614394749490865, "learning_rate": 2.447349000777216e-07, "loss": 1.1914, "step": 663 }, { "epoch": 0.6812003077712234, "grad_norm": 2.5975786485688745, "learning_rate": 2.4330552746149404e-07, "loss": 1.1927, "step": 664 }, { "epoch": 0.6822262118491921, "grad_norm": 2.5643004029139362, "learning_rate": 2.418789978126219e-07, "loss": 1.2564, "step": 665 }, { "epoch": 0.6832521159271608, "grad_norm": 2.5694519450290922, "learning_rate": 2.4045532693034474e-07, "loss": 1.287, "step": 666 }, { "epoch": 0.6842780200051295, "grad_norm": 2.583217401294905, "learning_rate": 2.3903453058223876e-07, "loss": 1.2259, "step": 667 }, { "epoch": 0.6853039240830983, "grad_norm": 2.7713992830326264, "learning_rate": 2.3761662450404492e-07, "loss": 1.2404, "step": 668 }, { "epoch": 0.6863298281610669, "grad_norm": 2.683261081042423, "learning_rate": 2.3620162439949303e-07, "loss": 1.2424, "step": 669 }, { "epoch": 0.6873557322390357, "grad_norm": 2.6813466710719274, "learning_rate": 2.347895459401288e-07, "loss": 1.1871, "step": 670 }, { "epoch": 0.6883816363170043, "grad_norm": 2.4980969369442985, "learning_rate": 2.3338040476513947e-07, "loss": 1.196, "step": 671 }, { "epoch": 0.6894075403949731, "grad_norm": 2.6573309673650116, "learning_rate": 2.319742164811813e-07, "loss": 1.221, "step": 672 }, { "epoch": 0.6904334444729417, "grad_norm": 2.7446852008084224, "learning_rate": 2.305709966622062e-07, "loss": 1.2488, "step": 673 }, { "epoch": 0.6914593485509105, "grad_norm": 2.5674080914108384, "learning_rate": 2.2917076084928948e-07, "loss": 1.2597, "step": 674 }, { "epoch": 0.6924852526288792, "grad_norm": 2.811651191362524, "learning_rate": 2.2777352455045784e-07, "loss": 1.2413, "step": 675 }, { "epoch": 0.6935111567068479, "grad_norm": 2.572728259688939, "learning_rate": 2.2637930324051747e-07, "loss": 1.2547, "step": 676 }, { "epoch": 0.6945370607848166, "grad_norm": 2.700506373166308, "learning_rate": 2.2498811236088195e-07, "loss": 1.1831, "step": 677 }, { "epoch": 0.6955629648627853, "grad_norm": 2.5836554498566864, "learning_rate": 2.2359996731940345e-07, "loss": 1.1322, "step": 678 }, { "epoch": 0.696588868940754, "grad_norm": 2.4909146663935204, "learning_rate": 2.2221488349019902e-07, "loss": 1.1805, "step": 679 }, { "epoch": 0.6976147730187228, "grad_norm": 2.380457276679437, "learning_rate": 2.2083287621348256e-07, "loss": 1.1997, "step": 680 }, { "epoch": 0.6986406770966914, "grad_norm": 2.78505809062204, "learning_rate": 2.194539607953948e-07, "loss": 1.2253, "step": 681 }, { "epoch": 0.6996665811746602, "grad_norm": 2.5665582250329257, "learning_rate": 2.180781525078319e-07, "loss": 1.1259, "step": 682 }, { "epoch": 0.7006924852526288, "grad_norm": 2.555086105402453, "learning_rate": 2.167054665882791e-07, "loss": 1.1908, "step": 683 }, { "epoch": 0.7017183893305976, "grad_norm": 2.6561979962463154, "learning_rate": 2.1533591823963926e-07, "loss": 1.1986, "step": 684 }, { "epoch": 0.7027442934085663, "grad_norm": 2.527387762301175, "learning_rate": 2.139695226300663e-07, "loss": 1.2669, "step": 685 }, { "epoch": 0.703770197486535, "grad_norm": 2.7391799488802575, "learning_rate": 2.1260629489279657e-07, "loss": 1.1581, "step": 686 }, { "epoch": 0.7047961015645037, "grad_norm": 2.4323409695512495, "learning_rate": 2.1124625012598135e-07, "loss": 1.2396, "step": 687 }, { "epoch": 0.7058220056424724, "grad_norm": 2.599314144274195, "learning_rate": 2.0988940339251937e-07, "loss": 1.1927, "step": 688 }, { "epoch": 0.7068479097204411, "grad_norm": 2.7206436512698797, "learning_rate": 2.0853576971989035e-07, "loss": 1.1842, "step": 689 }, { "epoch": 0.7078738137984099, "grad_norm": 2.438206997849408, "learning_rate": 2.0718536409998833e-07, "loss": 1.1859, "step": 690 }, { "epoch": 0.7088997178763785, "grad_norm": 2.425979251612811, "learning_rate": 2.058382014889558e-07, "loss": 1.2472, "step": 691 }, { "epoch": 0.7099256219543473, "grad_norm": 3.1059613200905507, "learning_rate": 2.0449429680701797e-07, "loss": 1.1009, "step": 692 }, { "epoch": 0.7109515260323159, "grad_norm": 2.549687762265441, "learning_rate": 2.0315366493831755e-07, "loss": 1.2296, "step": 693 }, { "epoch": 0.7119774301102847, "grad_norm": 2.5370967790110126, "learning_rate": 2.0181632073074923e-07, "loss": 1.2098, "step": 694 }, { "epoch": 0.7130033341882533, "grad_norm": 2.4536439877324883, "learning_rate": 2.0048227899579728e-07, "loss": 1.1183, "step": 695 }, { "epoch": 0.7140292382662221, "grad_norm": 2.55316768712919, "learning_rate": 1.991515545083684e-07, "loss": 1.1191, "step": 696 }, { "epoch": 0.7150551423441908, "grad_norm": 2.4788318107618124, "learning_rate": 1.978241620066315e-07, "loss": 1.1502, "step": 697 }, { "epoch": 0.7160810464221595, "grad_norm": 2.6045581314247253, "learning_rate": 1.9650011619185126e-07, "loss": 1.1872, "step": 698 }, { "epoch": 0.7171069505001283, "grad_norm": 2.6627567620404107, "learning_rate": 1.9517943172822753e-07, "loss": 1.2355, "step": 699 }, { "epoch": 0.718132854578097, "grad_norm": 2.730504463070005, "learning_rate": 1.938621232427327e-07, "loss": 1.2131, "step": 700 }, { "epoch": 0.7191587586560657, "grad_norm": 2.4953047796323498, "learning_rate": 1.9254820532494787e-07, "loss": 1.2416, "step": 701 }, { "epoch": 0.7201846627340344, "grad_norm": 2.5640586712114986, "learning_rate": 1.9123769252690407e-07, "loss": 1.2348, "step": 702 }, { "epoch": 0.7212105668120031, "grad_norm": 2.301017245817361, "learning_rate": 1.8993059936291845e-07, "loss": 1.1246, "step": 703 }, { "epoch": 0.7222364708899718, "grad_norm": 2.560508237614933, "learning_rate": 1.8862694030943528e-07, "loss": 1.1593, "step": 704 }, { "epoch": 0.7232623749679405, "grad_norm": 2.5384108530663667, "learning_rate": 1.873267298048649e-07, "loss": 1.177, "step": 705 }, { "epoch": 0.7242882790459092, "grad_norm": 2.6017567089602784, "learning_rate": 1.8602998224942406e-07, "loss": 1.1913, "step": 706 }, { "epoch": 0.725314183123878, "grad_norm": 2.563942704168396, "learning_rate": 1.847367120049762e-07, "loss": 1.1599, "step": 707 }, { "epoch": 0.7263400872018466, "grad_norm": 2.477017573966492, "learning_rate": 1.834469333948725e-07, "loss": 1.3285, "step": 708 }, { "epoch": 0.7273659912798154, "grad_norm": 2.606062790037432, "learning_rate": 1.8216066070379332e-07, "loss": 1.1228, "step": 709 }, { "epoch": 0.728391895357784, "grad_norm": 2.627021153673398, "learning_rate": 1.808779081775901e-07, "loss": 1.254, "step": 710 }, { "epoch": 0.7294177994357528, "grad_norm": 2.732112582668254, "learning_rate": 1.7959869002312665e-07, "loss": 1.1785, "step": 711 }, { "epoch": 0.7304437035137215, "grad_norm": 2.7648989482089794, "learning_rate": 1.7832302040812392e-07, "loss": 1.2254, "step": 712 }, { "epoch": 0.7314696075916902, "grad_norm": 2.5265344506070226, "learning_rate": 1.7705091346100016e-07, "loss": 1.248, "step": 713 }, { "epoch": 0.7324955116696589, "grad_norm": 2.487884119195698, "learning_rate": 1.757823832707175e-07, "loss": 1.2019, "step": 714 }, { "epoch": 0.7335214157476276, "grad_norm": 2.7066983532459763, "learning_rate": 1.7451744388662326e-07, "loss": 1.1771, "step": 715 }, { "epoch": 0.7345473198255963, "grad_norm": 2.496085937161839, "learning_rate": 1.7325610931829616e-07, "loss": 1.1365, "step": 716 }, { "epoch": 0.735573223903565, "grad_norm": 2.5310750605865966, "learning_rate": 1.719983935353903e-07, "loss": 1.2424, "step": 717 }, { "epoch": 0.7365991279815337, "grad_norm": 2.6323596452399727, "learning_rate": 1.7074431046748074e-07, "loss": 1.2113, "step": 718 }, { "epoch": 0.7376250320595025, "grad_norm": 2.773768814333001, "learning_rate": 1.6949387400390912e-07, "loss": 1.2481, "step": 719 }, { "epoch": 0.7386509361374711, "grad_norm": 2.6027832225629637, "learning_rate": 1.682470979936298e-07, "loss": 1.1914, "step": 720 }, { "epoch": 0.7396768402154399, "grad_norm": 2.5140857014311084, "learning_rate": 1.6700399624505663e-07, "loss": 1.1656, "step": 721 }, { "epoch": 0.7407027442934085, "grad_norm": 2.6602123194953835, "learning_rate": 1.6576458252590986e-07, "loss": 1.2504, "step": 722 }, { "epoch": 0.7417286483713773, "grad_norm": 2.481197593104395, "learning_rate": 1.6452887056306376e-07, "loss": 1.1759, "step": 723 }, { "epoch": 0.742754552449346, "grad_norm": 2.6801836727829804, "learning_rate": 1.6329687404239445e-07, "loss": 1.3156, "step": 724 }, { "epoch": 0.7437804565273147, "grad_norm": 2.5124931482279105, "learning_rate": 1.6206860660862858e-07, "loss": 1.2196, "step": 725 }, { "epoch": 0.7448063606052834, "grad_norm": 2.664235589191919, "learning_rate": 1.6084408186519194e-07, "loss": 1.1395, "step": 726 }, { "epoch": 0.7458322646832521, "grad_norm": 2.327075273677125, "learning_rate": 1.5962331337405914e-07, "loss": 1.2351, "step": 727 }, { "epoch": 0.7468581687612208, "grad_norm": 2.412427852412854, "learning_rate": 1.584063146556025e-07, "loss": 1.2697, "step": 728 }, { "epoch": 0.7478840728391896, "grad_norm": 2.5543949822741987, "learning_rate": 1.5719309918844414e-07, "loss": 1.23, "step": 729 }, { "epoch": 0.7489099769171582, "grad_norm": 4.7369349678689066, "learning_rate": 1.5598368040930427e-07, "loss": 1.1512, "step": 730 }, { "epoch": 0.749935880995127, "grad_norm": 2.6739445766939745, "learning_rate": 1.5477807171285489e-07, "loss": 1.2274, "step": 731 }, { "epoch": 0.7509617850730956, "grad_norm": 2.8907340492672535, "learning_rate": 1.5357628645156918e-07, "loss": 1.1342, "step": 732 }, { "epoch": 0.7519876891510644, "grad_norm": 2.725694829583633, "learning_rate": 1.5237833793557515e-07, "loss": 1.2117, "step": 733 }, { "epoch": 0.753013593229033, "grad_norm": 2.4983300901122916, "learning_rate": 1.5118423943250768e-07, "loss": 1.1886, "step": 734 }, { "epoch": 0.7540394973070018, "grad_norm": 2.97591503230041, "learning_rate": 1.499940041673616e-07, "loss": 1.1669, "step": 735 }, { "epoch": 0.7550654013849705, "grad_norm": 2.4033448682516942, "learning_rate": 1.4880764532234514e-07, "loss": 1.0508, "step": 736 }, { "epoch": 0.7560913054629392, "grad_norm": 2.508957495407041, "learning_rate": 1.4762517603673408e-07, "loss": 1.1536, "step": 737 }, { "epoch": 0.7571172095409079, "grad_norm": 2.782391609960307, "learning_rate": 1.4644660940672627e-07, "loss": 1.1624, "step": 738 }, { "epoch": 0.7581431136188767, "grad_norm": 2.43794343363202, "learning_rate": 1.4527195848529634e-07, "loss": 1.2295, "step": 739 }, { "epoch": 0.7591690176968453, "grad_norm": 2.566694597309234, "learning_rate": 1.4410123628205134e-07, "loss": 1.1343, "step": 740 }, { "epoch": 0.7601949217748141, "grad_norm": 2.6362502212431576, "learning_rate": 1.4293445576308673e-07, "loss": 1.2362, "step": 741 }, { "epoch": 0.7612208258527827, "grad_norm": 2.4532291147501653, "learning_rate": 1.417716298508424e-07, "loss": 1.1928, "step": 742 }, { "epoch": 0.7622467299307515, "grad_norm": 2.595969156532276, "learning_rate": 1.4061277142396006e-07, "loss": 1.2755, "step": 743 }, { "epoch": 0.7632726340087201, "grad_norm": 2.517605616531437, "learning_rate": 1.3945789331714013e-07, "loss": 1.1298, "step": 744 }, { "epoch": 0.7642985380866889, "grad_norm": 2.5082145361741315, "learning_rate": 1.3830700832099994e-07, "loss": 1.1012, "step": 745 }, { "epoch": 0.7653244421646576, "grad_norm": 2.5628568172056925, "learning_rate": 1.3716012918193205e-07, "loss": 1.2271, "step": 746 }, { "epoch": 0.7663503462426263, "grad_norm": 2.4325845713405387, "learning_rate": 1.3601726860196227e-07, "loss": 1.1876, "step": 747 }, { "epoch": 0.767376250320595, "grad_norm": 2.507778203499113, "learning_rate": 1.3487843923861098e-07, "loss": 1.1841, "step": 748 }, { "epoch": 0.7684021543985637, "grad_norm": 2.492682324587268, "learning_rate": 1.337436537047501e-07, "loss": 1.2382, "step": 749 }, { "epoch": 0.7694280584765324, "grad_norm": 2.695111243135506, "learning_rate": 1.3261292456846646e-07, "loss": 1.192, "step": 750 }, { "epoch": 0.7694280584765324, "eval_uground_MCTS_chains_SFT_val_loss": 1.2723054885864258, "eval_uground_MCTS_chains_SFT_val_runtime": 210.8326, "eval_uground_MCTS_chains_SFT_val_samples_per_second": 8.628, "eval_uground_MCTS_chains_SFT_val_steps_per_second": 1.081, "step": 750 }, { "epoch": 0.7704539625545012, "grad_norm": 2.4499636226982187, "learning_rate": 1.3148626435291977e-07, "loss": 1.2008, "step": 751 }, { "epoch": 0.7714798666324698, "grad_norm": 2.749941205607747, "learning_rate": 1.30363685536206e-07, "loss": 1.1632, "step": 752 }, { "epoch": 0.7725057707104386, "grad_norm": 2.6515417658669325, "learning_rate": 1.2924520055121834e-07, "loss": 1.2075, "step": 753 }, { "epoch": 0.7735316747884072, "grad_norm": 2.668556743002617, "learning_rate": 1.2813082178550928e-07, "loss": 1.2162, "step": 754 }, { "epoch": 0.774557578866376, "grad_norm": 2.393774949444245, "learning_rate": 1.2702056158115405e-07, "loss": 1.1836, "step": 755 }, { "epoch": 0.7755834829443446, "grad_norm": 2.383685066946397, "learning_rate": 1.2591443223461333e-07, "loss": 1.2551, "step": 756 }, { "epoch": 0.7766093870223134, "grad_norm": 2.5950121153810164, "learning_rate": 1.2481244599659752e-07, "loss": 1.1199, "step": 757 }, { "epoch": 0.7776352911002822, "grad_norm": 2.683432845893599, "learning_rate": 1.2371461507193075e-07, "loss": 1.2948, "step": 758 }, { "epoch": 0.7786611951782508, "grad_norm": 2.5444949929602236, "learning_rate": 1.2262095161941588e-07, "loss": 1.1559, "step": 759 }, { "epoch": 0.7796870992562196, "grad_norm": 2.825484071880231, "learning_rate": 1.215314677516997e-07, "loss": 1.1641, "step": 760 }, { "epoch": 0.7807130033341882, "grad_norm": 2.7865772894604017, "learning_rate": 1.2044617553513897e-07, "loss": 1.1873, "step": 761 }, { "epoch": 0.781738907412157, "grad_norm": 2.5228443815683224, "learning_rate": 1.1936508698966663e-07, "loss": 1.2681, "step": 762 }, { "epoch": 0.7827648114901257, "grad_norm": 2.8999815031168636, "learning_rate": 1.1828821408865886e-07, "loss": 1.2122, "step": 763 }, { "epoch": 0.7837907155680944, "grad_norm": 2.5689794885209563, "learning_rate": 1.1721556875880167e-07, "loss": 1.2239, "step": 764 }, { "epoch": 0.7848166196460631, "grad_norm": 2.507318965085582, "learning_rate": 1.1614716287996063e-07, "loss": 1.1625, "step": 765 }, { "epoch": 0.7858425237240318, "grad_norm": 2.650879555302289, "learning_rate": 1.150830082850468e-07, "loss": 1.2091, "step": 766 }, { "epoch": 0.7868684278020005, "grad_norm": 2.644278727926541, "learning_rate": 1.1402311675988834e-07, "loss": 1.273, "step": 767 }, { "epoch": 0.7878943318799693, "grad_norm": 2.780296372251445, "learning_rate": 1.1296750004309757e-07, "loss": 1.236, "step": 768 }, { "epoch": 0.7889202359579379, "grad_norm": 2.5188724688574213, "learning_rate": 1.1191616982594259e-07, "loss": 1.1625, "step": 769 }, { "epoch": 0.7899461400359067, "grad_norm": 2.523287196036484, "learning_rate": 1.1086913775221706e-07, "loss": 1.1488, "step": 770 }, { "epoch": 0.7909720441138753, "grad_norm": 2.6829517454733716, "learning_rate": 1.0982641541811161e-07, "loss": 1.2307, "step": 771 }, { "epoch": 0.7919979481918441, "grad_norm": 2.6511117351470697, "learning_rate": 1.0878801437208496e-07, "loss": 1.229, "step": 772 }, { "epoch": 0.7930238522698128, "grad_norm": 2.9646303582070157, "learning_rate": 1.077539461147366e-07, "loss": 1.1874, "step": 773 }, { "epoch": 0.7940497563477815, "grad_norm": 2.7202486187139097, "learning_rate": 1.0672422209867876e-07, "loss": 1.154, "step": 774 }, { "epoch": 0.7950756604257502, "grad_norm": 2.5478407063833424, "learning_rate": 1.056988537284103e-07, "loss": 1.1646, "step": 775 }, { "epoch": 0.7961015645037189, "grad_norm": 2.6329035656681943, "learning_rate": 1.0467785236018944e-07, "loss": 1.284, "step": 776 }, { "epoch": 0.7971274685816876, "grad_norm": 2.581763231489404, "learning_rate": 1.0366122930190934e-07, "loss": 1.1138, "step": 777 }, { "epoch": 0.7981533726596564, "grad_norm": 2.509180799267851, "learning_rate": 1.026489958129712e-07, "loss": 1.2524, "step": 778 }, { "epoch": 0.799179276737625, "grad_norm": 2.277115810698049, "learning_rate": 1.0164116310416127e-07, "loss": 1.2171, "step": 779 }, { "epoch": 0.8002051808155938, "grad_norm": 2.5306432670776386, "learning_rate": 1.0063774233752542e-07, "loss": 1.2668, "step": 780 }, { "epoch": 0.8012310848935624, "grad_norm": 2.7565443335969966, "learning_rate": 9.963874462624566e-08, "loss": 1.2028, "step": 781 }, { "epoch": 0.8022569889715312, "grad_norm": 2.717640654108823, "learning_rate": 9.864418103451827e-08, "loss": 1.1879, "step": 782 }, { "epoch": 0.8032828930494998, "grad_norm": 2.428371028068662, "learning_rate": 9.765406257742915e-08, "loss": 1.2411, "step": 783 }, { "epoch": 0.8043087971274686, "grad_norm": 2.7349523704191516, "learning_rate": 9.666840022083422e-08, "loss": 1.1647, "step": 784 }, { "epoch": 0.8053347012054373, "grad_norm": 2.5706570443073224, "learning_rate": 9.568720488123577e-08, "loss": 1.2041, "step": 785 }, { "epoch": 0.806360605283406, "grad_norm": 2.5312663092030974, "learning_rate": 9.471048742566312e-08, "loss": 1.2376, "step": 786 }, { "epoch": 0.8073865093613747, "grad_norm": 2.567303045700278, "learning_rate": 9.373825867155155e-08, "loss": 1.2416, "step": 787 }, { "epoch": 0.8084124134393434, "grad_norm": 2.5607337841796842, "learning_rate": 9.27705293866226e-08, "loss": 1.1564, "step": 788 }, { "epoch": 0.8094383175173121, "grad_norm": 2.455235264261172, "learning_rate": 9.180731028876492e-08, "loss": 1.1916, "step": 789 }, { "epoch": 0.8104642215952809, "grad_norm": 2.3169514617874594, "learning_rate": 9.084861204591549e-08, "loss": 1.2042, "step": 790 }, { "epoch": 0.8114901256732495, "grad_norm": 2.4016974955821593, "learning_rate": 8.989444527594143e-08, "loss": 1.0985, "step": 791 }, { "epoch": 0.8125160297512183, "grad_norm": 2.682553790748834, "learning_rate": 8.894482054652247e-08, "loss": 1.1976, "step": 792 }, { "epoch": 0.8135419338291869, "grad_norm": 2.5902122938763843, "learning_rate": 8.7999748375034e-08, "loss": 1.2057, "step": 793 }, { "epoch": 0.8145678379071557, "grad_norm": 2.4236373303760272, "learning_rate": 8.705923922843039e-08, "loss": 1.2162, "step": 794 }, { "epoch": 0.8155937419851244, "grad_norm": 2.453178416086071, "learning_rate": 8.612330352312891e-08, "loss": 1.1699, "step": 795 }, { "epoch": 0.8166196460630931, "grad_norm": 2.332974640258587, "learning_rate": 8.519195162489528e-08, "loss": 1.1845, "step": 796 }, { "epoch": 0.8176455501410618, "grad_norm": 2.7398187199253767, "learning_rate": 8.426519384872732e-08, "loss": 1.2276, "step": 797 }, { "epoch": 0.8186714542190305, "grad_norm": 2.799522945659722, "learning_rate": 8.334304045874246e-08, "loss": 1.1882, "step": 798 }, { "epoch": 0.8196973582969992, "grad_norm": 2.597041968467517, "learning_rate": 8.242550166806272e-08, "loss": 1.21, "step": 799 }, { "epoch": 0.820723262374968, "grad_norm": 2.922217189437273, "learning_rate": 8.151258763870177e-08, "loss": 1.2113, "step": 800 } ], "logging_steps": 1, "max_steps": 974, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 247351370907648.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }