| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.820723262374968, | |
| "eval_steps": 250, | |
| "global_step": 800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0010259040779687098, | |
| "grad_norm": 9.798241135332411, | |
| "learning_rate": 3.3333333333333334e-08, | |
| "loss": 1.5793, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0020518081559374197, | |
| "grad_norm": 9.598717132793555, | |
| "learning_rate": 6.666666666666667e-08, | |
| "loss": 1.7229, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.00307771223390613, | |
| "grad_norm": 10.260768981836485, | |
| "learning_rate": 1e-07, | |
| "loss": 1.6317, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.004103616311874839, | |
| "grad_norm": 9.059955288981689, | |
| "learning_rate": 1.3333333333333334e-07, | |
| "loss": 1.6176, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.005129520389843549, | |
| "grad_norm": 8.709116139461566, | |
| "learning_rate": 1.6666666666666665e-07, | |
| "loss": 1.5098, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.00615542446781226, | |
| "grad_norm": 9.86995805736169, | |
| "learning_rate": 2e-07, | |
| "loss": 1.5554, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.00718132854578097, | |
| "grad_norm": 9.269724208142758, | |
| "learning_rate": 2.3333333333333333e-07, | |
| "loss": 1.5685, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.008207232623749679, | |
| "grad_norm": 9.788683578279787, | |
| "learning_rate": 2.6666666666666667e-07, | |
| "loss": 1.6332, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.00923313670171839, | |
| "grad_norm": 9.329733080585035, | |
| "learning_rate": 3e-07, | |
| "loss": 1.5211, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.010259040779687098, | |
| "grad_norm": 8.603455933686602, | |
| "learning_rate": 3.333333333333333e-07, | |
| "loss": 1.531, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.011284944857655809, | |
| "grad_norm": 9.703453172271272, | |
| "learning_rate": 3.666666666666666e-07, | |
| "loss": 1.5336, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.01231084893562452, | |
| "grad_norm": 8.366416586343489, | |
| "learning_rate": 4e-07, | |
| "loss": 1.5916, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.013336753013593229, | |
| "grad_norm": 8.816992612572204, | |
| "learning_rate": 4.3333333333333335e-07, | |
| "loss": 1.6659, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.01436265709156194, | |
| "grad_norm": 9.162032804849005, | |
| "learning_rate": 4.6666666666666666e-07, | |
| "loss": 1.5406, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.015388561169530648, | |
| "grad_norm": 8.732450364954722, | |
| "learning_rate": 5e-07, | |
| "loss": 1.5083, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.016414465247499357, | |
| "grad_norm": 8.622668586551894, | |
| "learning_rate": 5.333333333333333e-07, | |
| "loss": 1.5596, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.01744036932546807, | |
| "grad_norm": 7.133974565680819, | |
| "learning_rate": 5.666666666666666e-07, | |
| "loss": 1.5639, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.01846627340343678, | |
| "grad_norm": 6.991354736869125, | |
| "learning_rate": 6e-07, | |
| "loss": 1.4727, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.019492177481405488, | |
| "grad_norm": 6.779705570699807, | |
| "learning_rate": 6.333333333333332e-07, | |
| "loss": 1.5018, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.020518081559374197, | |
| "grad_norm": 6.980027846638078, | |
| "learning_rate": 6.666666666666666e-07, | |
| "loss": 1.5915, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02154398563734291, | |
| "grad_norm": 6.45230816051997, | |
| "learning_rate": 7e-07, | |
| "loss": 1.4633, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.022569889715311618, | |
| "grad_norm": 5.935560467510461, | |
| "learning_rate": 7.333333333333332e-07, | |
| "loss": 1.519, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.023595793793280327, | |
| "grad_norm": 5.412080062014559, | |
| "learning_rate": 7.666666666666667e-07, | |
| "loss": 1.5106, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.02462169787124904, | |
| "grad_norm": 4.643500965809458, | |
| "learning_rate": 8e-07, | |
| "loss": 1.3979, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.02564760194921775, | |
| "grad_norm": 4.425426929208595, | |
| "learning_rate": 8.333333333333333e-07, | |
| "loss": 1.3665, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.026673506027186458, | |
| "grad_norm": 4.222303529390681, | |
| "learning_rate": 8.666666666666667e-07, | |
| "loss": 1.4494, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.027699410105155167, | |
| "grad_norm": 4.369459716992631, | |
| "learning_rate": 9e-07, | |
| "loss": 1.419, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.02872531418312388, | |
| "grad_norm": 4.039114576068017, | |
| "learning_rate": 9.333333333333333e-07, | |
| "loss": 1.4929, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.029751218261092588, | |
| "grad_norm": 3.7836259253536135, | |
| "learning_rate": 9.666666666666666e-07, | |
| "loss": 1.3719, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.030777122339061297, | |
| "grad_norm": 3.706293156036854, | |
| "learning_rate": 1e-06, | |
| "loss": 1.4779, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.031803026417030006, | |
| "grad_norm": 3.460814827864659, | |
| "learning_rate": 9.999972311759113e-07, | |
| "loss": 1.3536, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.032828930494998715, | |
| "grad_norm": 3.3017900915165086, | |
| "learning_rate": 9.99988924734311e-07, | |
| "loss": 1.3524, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.03385483457296743, | |
| "grad_norm": 3.455224010733439, | |
| "learning_rate": 9.999750807671956e-07, | |
| "loss": 1.4297, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.03488073865093614, | |
| "grad_norm": 3.63256540031341, | |
| "learning_rate": 9.999556994278908e-07, | |
| "loss": 1.4719, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.03590664272890485, | |
| "grad_norm": 3.8514951366560823, | |
| "learning_rate": 9.999307809310508e-07, | |
| "loss": 1.3979, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.03693254680687356, | |
| "grad_norm": 3.9586226136352396, | |
| "learning_rate": 9.999003255526553e-07, | |
| "loss": 1.3285, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.03795845088484227, | |
| "grad_norm": 3.806464587016546, | |
| "learning_rate": 9.998643336300069e-07, | |
| "loss": 1.4278, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.038984354962810976, | |
| "grad_norm": 3.153561780968849, | |
| "learning_rate": 9.998228055617262e-07, | |
| "loss": 1.4004, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.040010259040779685, | |
| "grad_norm": 3.281790653235281, | |
| "learning_rate": 9.997757418077494e-07, | |
| "loss": 1.3469, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.041036163118748394, | |
| "grad_norm": 3.1079807809050792, | |
| "learning_rate": 9.997231428893215e-07, | |
| "loss": 1.2929, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04206206719671711, | |
| "grad_norm": 2.940384594816459, | |
| "learning_rate": 9.99665009388991e-07, | |
| "loss": 1.2889, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.04308797127468582, | |
| "grad_norm": 2.9910864286989036, | |
| "learning_rate": 9.996013419506033e-07, | |
| "loss": 1.3704, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.04411387535265453, | |
| "grad_norm": 3.097652651590371, | |
| "learning_rate": 9.995321412792947e-07, | |
| "loss": 1.4157, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.045139779430623236, | |
| "grad_norm": 2.9908105852033384, | |
| "learning_rate": 9.994574081414829e-07, | |
| "loss": 1.3665, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.046165683508591945, | |
| "grad_norm": 3.1050321703200847, | |
| "learning_rate": 9.993771433648598e-07, | |
| "loss": 1.5106, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.047191587586560654, | |
| "grad_norm": 3.0440366145712945, | |
| "learning_rate": 9.992913478383809e-07, | |
| "loss": 1.4094, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.04821749166452936, | |
| "grad_norm": 2.6336951924027123, | |
| "learning_rate": 9.992000225122578e-07, | |
| "loss": 1.2391, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.04924339574249808, | |
| "grad_norm": 2.9411301278665816, | |
| "learning_rate": 9.991031683979451e-07, | |
| "loss": 1.4004, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.05026929982046679, | |
| "grad_norm": 2.588518890678191, | |
| "learning_rate": 9.990007865681312e-07, | |
| "loss": 1.4288, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.0512952038984355, | |
| "grad_norm": 3.0977479141233384, | |
| "learning_rate": 9.98892878156725e-07, | |
| "loss": 1.2934, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.052321107976404206, | |
| "grad_norm": 2.891254887655398, | |
| "learning_rate": 9.98779444358844e-07, | |
| "loss": 1.2993, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.053347012054372915, | |
| "grad_norm": 2.8843613231387346, | |
| "learning_rate": 9.986604864308015e-07, | |
| "loss": 1.3691, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.054372916132341624, | |
| "grad_norm": 3.058804698335043, | |
| "learning_rate": 9.985360056900914e-07, | |
| "loss": 1.3385, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.05539882021031033, | |
| "grad_norm": 2.908824401209796, | |
| "learning_rate": 9.98406003515375e-07, | |
| "loss": 1.4562, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.05642472428827905, | |
| "grad_norm": 3.5082197663137977, | |
| "learning_rate": 9.98270481346465e-07, | |
| "loss": 1.3692, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.05745062836624776, | |
| "grad_norm": 2.94858359371842, | |
| "learning_rate": 9.981294406843093e-07, | |
| "loss": 1.3498, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.05847653244421647, | |
| "grad_norm": 2.6795852716884374, | |
| "learning_rate": 9.979828830909754e-07, | |
| "loss": 1.3321, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.059502436522185176, | |
| "grad_norm": 2.9978152584208053, | |
| "learning_rate": 9.978308101896316e-07, | |
| "loss": 1.3412, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.060528340600153885, | |
| "grad_norm": 2.7552572011179497, | |
| "learning_rate": 9.97673223664531e-07, | |
| "loss": 1.4021, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.061554244678122594, | |
| "grad_norm": 2.637599483243523, | |
| "learning_rate": 9.975101252609903e-07, | |
| "loss": 1.3292, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0625801487560913, | |
| "grad_norm": 2.938536066745437, | |
| "learning_rate": 9.973415167853734e-07, | |
| "loss": 1.2372, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.06360605283406001, | |
| "grad_norm": 2.7530123525727466, | |
| "learning_rate": 9.971674001050686e-07, | |
| "loss": 1.3277, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.06463195691202872, | |
| "grad_norm": 2.753702797656271, | |
| "learning_rate": 9.969877771484698e-07, | |
| "loss": 1.2756, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.06565786098999743, | |
| "grad_norm": 2.707817535821633, | |
| "learning_rate": 9.968026499049549e-07, | |
| "loss": 1.2586, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.06668376506796614, | |
| "grad_norm": 2.7234356724029567, | |
| "learning_rate": 9.966120204248625e-07, | |
| "loss": 1.3249, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.06770966914593486, | |
| "grad_norm": 2.92461615737126, | |
| "learning_rate": 9.964158908194706e-07, | |
| "loss": 1.3265, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.06873557322390357, | |
| "grad_norm": 2.593339469946713, | |
| "learning_rate": 9.962142632609732e-07, | |
| "loss": 1.3534, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.06976147730187228, | |
| "grad_norm": 2.9860559274993466, | |
| "learning_rate": 9.960071399824547e-07, | |
| "loss": 1.3433, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.07078738137984099, | |
| "grad_norm": 2.6875123035895796, | |
| "learning_rate": 9.957945232778672e-07, | |
| "loss": 1.3273, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.0718132854578097, | |
| "grad_norm": 2.870657819908334, | |
| "learning_rate": 9.955764155020035e-07, | |
| "loss": 1.2982, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0728391895357784, | |
| "grad_norm": 3.0078112960966137, | |
| "learning_rate": 9.95352819070472e-07, | |
| "loss": 1.2967, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.07386509361374712, | |
| "grad_norm": 2.6631173635617307, | |
| "learning_rate": 9.951237364596692e-07, | |
| "loss": 1.3039, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.07489099769171582, | |
| "grad_norm": 2.903289343918584, | |
| "learning_rate": 9.94889170206753e-07, | |
| "loss": 1.2931, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.07591690176968453, | |
| "grad_norm": 2.5803057970449594, | |
| "learning_rate": 9.946491229096141e-07, | |
| "loss": 1.3029, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.07694280584765324, | |
| "grad_norm": 2.587911044453264, | |
| "learning_rate": 9.94403597226848e-07, | |
| "loss": 1.233, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.07796870992562195, | |
| "grad_norm": 2.7832901812089923, | |
| "learning_rate": 9.941525958777235e-07, | |
| "loss": 1.2965, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.07899461400359066, | |
| "grad_norm": 2.681414225480931, | |
| "learning_rate": 9.938961216421557e-07, | |
| "loss": 1.274, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.08002051808155937, | |
| "grad_norm": 2.749129791384422, | |
| "learning_rate": 9.936341773606722e-07, | |
| "loss": 1.3564, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.08104642215952808, | |
| "grad_norm": 2.615169662982429, | |
| "learning_rate": 9.93366765934384e-07, | |
| "loss": 1.3874, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.08207232623749679, | |
| "grad_norm": 2.7336596773654627, | |
| "learning_rate": 9.930938903249516e-07, | |
| "loss": 1.2904, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08309823031546551, | |
| "grad_norm": 2.695518831245502, | |
| "learning_rate": 9.928155535545534e-07, | |
| "loss": 1.351, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.08412413439343422, | |
| "grad_norm": 2.475635111783967, | |
| "learning_rate": 9.925317587058514e-07, | |
| "loss": 1.2317, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.08515003847140293, | |
| "grad_norm": 2.8880852942092607, | |
| "learning_rate": 9.922425089219581e-07, | |
| "loss": 1.225, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.08617594254937164, | |
| "grad_norm": 2.8478287390427353, | |
| "learning_rate": 9.919478074064001e-07, | |
| "loss": 1.261, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.08720184662734035, | |
| "grad_norm": 2.637223722117266, | |
| "learning_rate": 9.91647657423084e-07, | |
| "loss": 1.3015, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.08822775070530905, | |
| "grad_norm": 2.654960461830896, | |
| "learning_rate": 9.913420622962604e-07, | |
| "loss": 1.3029, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.08925365478327776, | |
| "grad_norm": 3.075422859212528, | |
| "learning_rate": 9.910310254104854e-07, | |
| "loss": 1.2842, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.09027955886124647, | |
| "grad_norm": 2.7924836932599635, | |
| "learning_rate": 9.907145502105846e-07, | |
| "loss": 1.2982, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.09130546293921518, | |
| "grad_norm": 2.6508581949070016, | |
| "learning_rate": 9.90392640201615e-07, | |
| "loss": 1.3225, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.09233136701718389, | |
| "grad_norm": 2.7887285958862904, | |
| "learning_rate": 9.900652989488253e-07, | |
| "loss": 1.3516, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0933572710951526, | |
| "grad_norm": 2.778310945122497, | |
| "learning_rate": 9.897325300776167e-07, | |
| "loss": 1.2409, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.09438317517312131, | |
| "grad_norm": 2.771955143672296, | |
| "learning_rate": 9.893943372735032e-07, | |
| "loss": 1.3554, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.09540907925109002, | |
| "grad_norm": 2.641330715037395, | |
| "learning_rate": 9.8905072428207e-07, | |
| "loss": 1.2767, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.09643498332905873, | |
| "grad_norm": 2.7999670492066673, | |
| "learning_rate": 9.887016949089332e-07, | |
| "loss": 1.2957, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.09746088740702745, | |
| "grad_norm": 2.9949645451613134, | |
| "learning_rate": 9.883472530196966e-07, | |
| "loss": 1.3359, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.09848679148499616, | |
| "grad_norm": 2.497849004552429, | |
| "learning_rate": 9.879874025399087e-07, | |
| "loss": 1.3566, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.09951269556296487, | |
| "grad_norm": 2.722740998468592, | |
| "learning_rate": 9.876221474550207e-07, | |
| "loss": 1.3512, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.10053859964093358, | |
| "grad_norm": 2.953223838054628, | |
| "learning_rate": 9.872514918103405e-07, | |
| "loss": 1.3452, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.10156450371890229, | |
| "grad_norm": 2.5063398128121417, | |
| "learning_rate": 9.868754397109895e-07, | |
| "loss": 1.3567, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.102590407796871, | |
| "grad_norm": 2.5885598718043066, | |
| "learning_rate": 9.864939953218561e-07, | |
| "loss": 1.2673, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1036163118748397, | |
| "grad_norm": 2.620316954154948, | |
| "learning_rate": 9.8610716286755e-07, | |
| "loss": 1.3432, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.10464221595280841, | |
| "grad_norm": 2.7576659564287995, | |
| "learning_rate": 9.85714946632355e-07, | |
| "loss": 1.2954, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.10566812003077712, | |
| "grad_norm": 2.6107378359741147, | |
| "learning_rate": 9.853173509601823e-07, | |
| "loss": 1.3089, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.10669402410874583, | |
| "grad_norm": 2.72531292391567, | |
| "learning_rate": 9.84914380254522e-07, | |
| "loss": 1.2754, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.10771992818671454, | |
| "grad_norm": 2.6204968346720197, | |
| "learning_rate": 9.845060389783937e-07, | |
| "loss": 1.311, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.10874583226468325, | |
| "grad_norm": 2.403778961837896, | |
| "learning_rate": 9.840923316542983e-07, | |
| "loss": 1.2194, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.10977173634265196, | |
| "grad_norm": 2.841225529794086, | |
| "learning_rate": 9.83673262864167e-07, | |
| "loss": 1.2714, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.11079764042062067, | |
| "grad_norm": 2.680466409381784, | |
| "learning_rate": 9.832488372493108e-07, | |
| "loss": 1.3238, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.11182354449858938, | |
| "grad_norm": 2.7899613949194393, | |
| "learning_rate": 9.82819059510369e-07, | |
| "loss": 1.3607, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.1128494485765581, | |
| "grad_norm": 2.741199596943549, | |
| "learning_rate": 9.82383934407258e-07, | |
| "loss": 1.2373, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.1138753526545268, | |
| "grad_norm": 2.680900338879034, | |
| "learning_rate": 9.819434667591166e-07, | |
| "loss": 1.332, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.11490125673249552, | |
| "grad_norm": 2.6867928214379266, | |
| "learning_rate": 9.814976614442547e-07, | |
| "loss": 1.2636, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.11592716081046422, | |
| "grad_norm": 2.530746336531516, | |
| "learning_rate": 9.810465234000987e-07, | |
| "loss": 1.2427, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.11695306488843293, | |
| "grad_norm": 3.079117319825233, | |
| "learning_rate": 9.805900576231357e-07, | |
| "loss": 1.3352, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.11797896896640164, | |
| "grad_norm": 2.705858226495669, | |
| "learning_rate": 9.801282691688595e-07, | |
| "loss": 1.2462, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.11900487304437035, | |
| "grad_norm": 2.642585557515256, | |
| "learning_rate": 9.796611631517141e-07, | |
| "loss": 1.3102, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.12003077712233906, | |
| "grad_norm": 2.591744882697605, | |
| "learning_rate": 9.791887447450374e-07, | |
| "loss": 1.3265, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.12105668120030777, | |
| "grad_norm": 2.5766253217072137, | |
| "learning_rate": 9.787110191810026e-07, | |
| "loss": 1.3054, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.12208258527827648, | |
| "grad_norm": 2.77284938502092, | |
| "learning_rate": 9.782279917505627e-07, | |
| "loss": 1.245, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.12310848935624519, | |
| "grad_norm": 2.8563203008600837, | |
| "learning_rate": 9.77739667803389e-07, | |
| "loss": 1.3478, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1241343934342139, | |
| "grad_norm": 2.565482050398409, | |
| "learning_rate": 9.772460527478142e-07, | |
| "loss": 1.2775, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.1251602975121826, | |
| "grad_norm": 2.7054539949137615, | |
| "learning_rate": 9.76747152050771e-07, | |
| "loss": 1.3187, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.12618620159015131, | |
| "grad_norm": 2.5989084819509967, | |
| "learning_rate": 9.762429712377331e-07, | |
| "loss": 1.2934, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.12721210566812002, | |
| "grad_norm": 2.510970489271258, | |
| "learning_rate": 9.75733515892652e-07, | |
| "loss": 1.2603, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.12823800974608873, | |
| "grad_norm": 2.5595730950106925, | |
| "learning_rate": 9.752187916578967e-07, | |
| "loss": 1.2883, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.12926391382405744, | |
| "grad_norm": 2.67442136336687, | |
| "learning_rate": 9.746988042341907e-07, | |
| "loss": 1.2817, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.13028981790202615, | |
| "grad_norm": 2.708509570780934, | |
| "learning_rate": 9.741735593805486e-07, | |
| "loss": 1.2078, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.13131572197999486, | |
| "grad_norm": 2.678010158167899, | |
| "learning_rate": 9.736430629142128e-07, | |
| "loss": 1.3796, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.13234162605796357, | |
| "grad_norm": 2.844285265615473, | |
| "learning_rate": 9.731073207105896e-07, | |
| "loss": 1.3091, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.13336753013593228, | |
| "grad_norm": 2.7814463008959334, | |
| "learning_rate": 9.725663387031816e-07, | |
| "loss": 1.3682, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.13439343421390101, | |
| "grad_norm": 2.6513652068190403, | |
| "learning_rate": 9.720201228835256e-07, | |
| "loss": 1.2806, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.13541933829186972, | |
| "grad_norm": 2.808046184764404, | |
| "learning_rate": 9.714686793011235e-07, | |
| "loss": 1.1529, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.13644524236983843, | |
| "grad_norm": 2.6158983121591004, | |
| "learning_rate": 9.709120140633763e-07, | |
| "loss": 1.2301, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.13747114644780714, | |
| "grad_norm": 2.6087976837956406, | |
| "learning_rate": 9.703501333355166e-07, | |
| "loss": 1.2512, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.13849705052577585, | |
| "grad_norm": 2.3938681704241818, | |
| "learning_rate": 9.697830433405399e-07, | |
| "loss": 1.3158, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.13952295460374456, | |
| "grad_norm": 2.5859416655464216, | |
| "learning_rate": 9.692107503591358e-07, | |
| "loss": 1.1996, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.14054885868171327, | |
| "grad_norm": 2.814296800240032, | |
| "learning_rate": 9.68633260729619e-07, | |
| "loss": 1.3123, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.14157476275968198, | |
| "grad_norm": 2.4891637404994125, | |
| "learning_rate": 9.680505808478581e-07, | |
| "loss": 1.2231, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.14260066683765069, | |
| "grad_norm": 2.863453159988515, | |
| "learning_rate": 9.674627171672054e-07, | |
| "loss": 1.2959, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.1436265709156194, | |
| "grad_norm": 2.538454344691091, | |
| "learning_rate": 9.668696761984254e-07, | |
| "loss": 1.2674, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.1446524749935881, | |
| "grad_norm": 2.5004273929320395, | |
| "learning_rate": 9.662714645096229e-07, | |
| "loss": 1.2116, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.1456783790715568, | |
| "grad_norm": 2.6371352778169657, | |
| "learning_rate": 9.656680887261692e-07, | |
| "loss": 1.2476, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.14670428314952552, | |
| "grad_norm": 2.683569477616342, | |
| "learning_rate": 9.650595555306302e-07, | |
| "loss": 1.3046, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.14773018722749423, | |
| "grad_norm": 2.912553498396746, | |
| "learning_rate": 9.644458716626911e-07, | |
| "loss": 1.2318, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.14875609130546294, | |
| "grad_norm": 2.5818573358818266, | |
| "learning_rate": 9.63827043919083e-07, | |
| "loss": 1.3017, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.14978199538343165, | |
| "grad_norm": 2.6673972834050543, | |
| "learning_rate": 9.63203079153506e-07, | |
| "loss": 1.2966, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.15080789946140036, | |
| "grad_norm": 2.8672403287450523, | |
| "learning_rate": 9.625739842765556e-07, | |
| "loss": 1.3346, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.15183380353936907, | |
| "grad_norm": 2.5970617764295496, | |
| "learning_rate": 9.619397662556433e-07, | |
| "loss": 1.2169, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.15285970761733778, | |
| "grad_norm": 2.4979249789960556, | |
| "learning_rate": 9.61300432114922e-07, | |
| "loss": 1.3695, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.15388561169530648, | |
| "grad_norm": 2.479601711066903, | |
| "learning_rate": 9.606559889352063e-07, | |
| "loss": 1.2008, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1549115157732752, | |
| "grad_norm": 2.5898796136386144, | |
| "learning_rate": 9.600064438538961e-07, | |
| "loss": 1.328, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.1559374198512439, | |
| "grad_norm": 2.798734600781089, | |
| "learning_rate": 9.593518040648952e-07, | |
| "loss": 1.244, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.1569633239292126, | |
| "grad_norm": 2.4882934421406837, | |
| "learning_rate": 9.586920768185333e-07, | |
| "loss": 1.2426, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.15798922800718132, | |
| "grad_norm": 2.802602250049014, | |
| "learning_rate": 9.580272694214854e-07, | |
| "loss": 1.2628, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.15901513208515003, | |
| "grad_norm": 2.930136632494125, | |
| "learning_rate": 9.573573892366903e-07, | |
| "loss": 1.2351, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.16004103616311874, | |
| "grad_norm": 2.3313868113568446, | |
| "learning_rate": 9.566824436832695e-07, | |
| "loss": 1.303, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.16106694024108745, | |
| "grad_norm": 2.616895617747635, | |
| "learning_rate": 9.56002440236445e-07, | |
| "loss": 1.3128, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.16209284431905616, | |
| "grad_norm": 2.5932182189076647, | |
| "learning_rate": 9.553173864274566e-07, | |
| "loss": 1.3312, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.16311874839702487, | |
| "grad_norm": 2.6881235109044184, | |
| "learning_rate": 9.54627289843478e-07, | |
| "loss": 1.2495, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.16414465247499357, | |
| "grad_norm": 2.876970939352412, | |
| "learning_rate": 9.539321581275342e-07, | |
| "loss": 1.3992, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.1651705565529623, | |
| "grad_norm": 2.762056056919623, | |
| "learning_rate": 9.532319989784139e-07, | |
| "loss": 1.3294, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.16619646063093102, | |
| "grad_norm": 2.5107504330050636, | |
| "learning_rate": 9.525268201505878e-07, | |
| "loss": 1.331, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.16722236470889973, | |
| "grad_norm": 2.7655428881842754, | |
| "learning_rate": 9.518166294541203e-07, | |
| "loss": 1.3166, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.16824826878686844, | |
| "grad_norm": 2.5472143589800678, | |
| "learning_rate": 9.511014347545837e-07, | |
| "loss": 1.2435, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.16927417286483715, | |
| "grad_norm": 2.9317558173367084, | |
| "learning_rate": 9.503812439729714e-07, | |
| "loss": 1.2583, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.17030007694280586, | |
| "grad_norm": 2.723414624661715, | |
| "learning_rate": 9.496560650856096e-07, | |
| "loss": 1.2796, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.17132598102077456, | |
| "grad_norm": 2.5426652630538418, | |
| "learning_rate": 9.489259061240695e-07, | |
| "loss": 1.2991, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.17235188509874327, | |
| "grad_norm": 2.3609000901953716, | |
| "learning_rate": 9.481907751750779e-07, | |
| "loss": 1.2761, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.17337778917671198, | |
| "grad_norm": 2.6737934941791885, | |
| "learning_rate": 9.474506803804279e-07, | |
| "loss": 1.2034, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.1744036932546807, | |
| "grad_norm": 2.7034503852388494, | |
| "learning_rate": 9.467056299368887e-07, | |
| "loss": 1.2918, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1754295973326494, | |
| "grad_norm": 2.6495133759903857, | |
| "learning_rate": 9.459556320961151e-07, | |
| "loss": 1.268, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.1764555014106181, | |
| "grad_norm": 2.6876955044079542, | |
| "learning_rate": 9.452006951645548e-07, | |
| "loss": 1.1678, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.17748140548858682, | |
| "grad_norm": 2.5152653942206045, | |
| "learning_rate": 9.444408275033586e-07, | |
| "loss": 1.1947, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.17850730956655553, | |
| "grad_norm": 2.802330037908763, | |
| "learning_rate": 9.436760375282857e-07, | |
| "loss": 1.3259, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.17953321364452424, | |
| "grad_norm": 2.7422708968039116, | |
| "learning_rate": 9.429063337096119e-07, | |
| "loss": 1.2465, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.18055911772249295, | |
| "grad_norm": 2.826066535856552, | |
| "learning_rate": 9.421317245720352e-07, | |
| "loss": 1.3284, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.18158502180046165, | |
| "grad_norm": 2.466040916453339, | |
| "learning_rate": 9.41352218694581e-07, | |
| "loss": 1.2419, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.18261092587843036, | |
| "grad_norm": 2.837491368634976, | |
| "learning_rate": 9.405678247105082e-07, | |
| "loss": 1.2956, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.18363682995639907, | |
| "grad_norm": 2.704111429861623, | |
| "learning_rate": 9.397785513072126e-07, | |
| "loss": 1.2389, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.18466273403436778, | |
| "grad_norm": 2.427829411975647, | |
| "learning_rate": 9.38984407226131e-07, | |
| "loss": 1.2553, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.1856886381123365, | |
| "grad_norm": 2.6889270088014343, | |
| "learning_rate": 9.381854012626443e-07, | |
| "loss": 1.2249, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.1867145421903052, | |
| "grad_norm": 3.0319367008791813, | |
| "learning_rate": 9.373815422659805e-07, | |
| "loss": 1.2405, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.1877404462682739, | |
| "grad_norm": 2.7686852678293485, | |
| "learning_rate": 9.365728391391164e-07, | |
| "loss": 1.2309, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.18876635034624262, | |
| "grad_norm": 2.6725011393667577, | |
| "learning_rate": 9.357593008386784e-07, | |
| "loss": 1.2986, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.18979225442421133, | |
| "grad_norm": 2.4605351470645083, | |
| "learning_rate": 9.349409363748444e-07, | |
| "loss": 1.1675, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.19081815850218004, | |
| "grad_norm": 2.592275913149399, | |
| "learning_rate": 9.341177548112436e-07, | |
| "loss": 1.3206, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.19184406258014874, | |
| "grad_norm": 2.941294249687017, | |
| "learning_rate": 9.332897652648555e-07, | |
| "loss": 1.2446, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.19286996665811745, | |
| "grad_norm": 2.741142925639391, | |
| "learning_rate": 9.324569769059096e-07, | |
| "loss": 1.3082, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.19389587073608616, | |
| "grad_norm": 2.6064903500939747, | |
| "learning_rate": 9.316193989577843e-07, | |
| "loss": 1.2599, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.1949217748140549, | |
| "grad_norm": 2.6477370525921966, | |
| "learning_rate": 9.30777040696903e-07, | |
| "loss": 1.2219, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1959476788920236, | |
| "grad_norm": 2.758046928790272, | |
| "learning_rate": 9.299299114526334e-07, | |
| "loss": 1.2352, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.19697358296999232, | |
| "grad_norm": 2.6839436039460907, | |
| "learning_rate": 9.29078020607183e-07, | |
| "loss": 1.2706, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.19799948704796103, | |
| "grad_norm": 2.4457645270891444, | |
| "learning_rate": 9.28221377595495e-07, | |
| "loss": 1.2831, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.19902539112592973, | |
| "grad_norm": 2.547455908474382, | |
| "learning_rate": 9.273599919051452e-07, | |
| "loss": 1.3182, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.20005129520389844, | |
| "grad_norm": 2.777434855703004, | |
| "learning_rate": 9.264938730762348e-07, | |
| "loss": 1.3255, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.20107719928186715, | |
| "grad_norm": 2.615909597544814, | |
| "learning_rate": 9.256230307012869e-07, | |
| "loss": 1.1474, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.20210310335983586, | |
| "grad_norm": 3.06791169964388, | |
| "learning_rate": 9.247474744251387e-07, | |
| "loss": 1.2645, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.20312900743780457, | |
| "grad_norm": 2.6202584863601976, | |
| "learning_rate": 9.238672139448353e-07, | |
| "loss": 1.275, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.20415491151577328, | |
| "grad_norm": 2.601162569382904, | |
| "learning_rate": 9.229822590095229e-07, | |
| "loss": 1.274, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.205180815593742, | |
| "grad_norm": 2.6008093073665877, | |
| "learning_rate": 9.220926194203392e-07, | |
| "loss": 1.3114, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2062067196717107, | |
| "grad_norm": 2.7639300809591765, | |
| "learning_rate": 9.211983050303065e-07, | |
| "loss": 1.3243, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.2072326237496794, | |
| "grad_norm": 2.414680199787791, | |
| "learning_rate": 9.202993257442216e-07, | |
| "loss": 1.2389, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.20825852782764812, | |
| "grad_norm": 2.6254293412284158, | |
| "learning_rate": 9.193956915185465e-07, | |
| "loss": 1.2058, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.20928443190561682, | |
| "grad_norm": 2.448512350031076, | |
| "learning_rate": 9.184874123612981e-07, | |
| "loss": 1.2387, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.21031033598358553, | |
| "grad_norm": 2.5690470438213406, | |
| "learning_rate": 9.175744983319373e-07, | |
| "loss": 1.26, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.21133624006155424, | |
| "grad_norm": 2.754390048080123, | |
| "learning_rate": 9.166569595412574e-07, | |
| "loss": 1.1595, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.21236214413952295, | |
| "grad_norm": 2.774729015636808, | |
| "learning_rate": 9.157348061512726e-07, | |
| "loss": 1.2775, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.21338804821749166, | |
| "grad_norm": 2.7682686071100853, | |
| "learning_rate": 9.148080483751048e-07, | |
| "loss": 1.3079, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.21441395229546037, | |
| "grad_norm": 2.782086823919593, | |
| "learning_rate": 9.138766964768711e-07, | |
| "loss": 1.2042, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.21543985637342908, | |
| "grad_norm": 2.6465176185194674, | |
| "learning_rate": 9.129407607715696e-07, | |
| "loss": 1.3243, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.2164657604513978, | |
| "grad_norm": 2.435336652939164, | |
| "learning_rate": 9.12000251624966e-07, | |
| "loss": 1.2992, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.2174916645293665, | |
| "grad_norm": 2.598474192950846, | |
| "learning_rate": 9.110551794534775e-07, | |
| "loss": 1.2117, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.2185175686073352, | |
| "grad_norm": 2.6249002298839756, | |
| "learning_rate": 9.101055547240586e-07, | |
| "loss": 1.305, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.21954347268530391, | |
| "grad_norm": 2.82690930390355, | |
| "learning_rate": 9.091513879540844e-07, | |
| "loss": 1.3207, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.22056937676327262, | |
| "grad_norm": 2.436101223347268, | |
| "learning_rate": 9.08192689711235e-07, | |
| "loss": 1.1575, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.22159528084124133, | |
| "grad_norm": 2.3958033529310088, | |
| "learning_rate": 9.072294706133774e-07, | |
| "loss": 1.244, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.22262118491921004, | |
| "grad_norm": 2.6527085410686078, | |
| "learning_rate": 9.062617413284485e-07, | |
| "loss": 1.172, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.22364708899717875, | |
| "grad_norm": 2.648011195059752, | |
| "learning_rate": 9.052895125743369e-07, | |
| "loss": 1.3308, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.2246729930751475, | |
| "grad_norm": 2.568920128652391, | |
| "learning_rate": 9.043127951187642e-07, | |
| "loss": 1.1837, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.2256988971531162, | |
| "grad_norm": 2.5261344724437045, | |
| "learning_rate": 9.033315997791659e-07, | |
| "loss": 1.3071, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.2267248012310849, | |
| "grad_norm": 2.482972261372125, | |
| "learning_rate": 9.023459374225708e-07, | |
| "loss": 1.2766, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.2277507053090536, | |
| "grad_norm": 2.728013709536128, | |
| "learning_rate": 9.013558189654817e-07, | |
| "loss": 1.2879, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.22877660938702232, | |
| "grad_norm": 2.509526243928691, | |
| "learning_rate": 9.003612553737543e-07, | |
| "loss": 1.2326, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.22980251346499103, | |
| "grad_norm": 2.600184311388481, | |
| "learning_rate": 8.993622576624746e-07, | |
| "loss": 1.2468, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.23082841754295974, | |
| "grad_norm": 2.996100810613903, | |
| "learning_rate": 8.983588368958387e-07, | |
| "loss": 1.2802, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.23185432162092845, | |
| "grad_norm": 3.145733520640035, | |
| "learning_rate": 8.973510041870287e-07, | |
| "loss": 1.3561, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.23288022569889716, | |
| "grad_norm": 2.677928117781213, | |
| "learning_rate": 8.963387706980907e-07, | |
| "loss": 1.2575, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.23390612977686587, | |
| "grad_norm": 2.3983433304776542, | |
| "learning_rate": 8.953221476398105e-07, | |
| "loss": 1.3267, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.23493203385483458, | |
| "grad_norm": 2.5880313443096146, | |
| "learning_rate": 8.943011462715897e-07, | |
| "loss": 1.3981, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.23595793793280329, | |
| "grad_norm": 2.8392528250107882, | |
| "learning_rate": 8.932757779013213e-07, | |
| "loss": 1.2559, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.236983842010772, | |
| "grad_norm": 2.5686226393466525, | |
| "learning_rate": 8.922460538852634e-07, | |
| "loss": 1.2202, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.2380097460887407, | |
| "grad_norm": 2.509078780435471, | |
| "learning_rate": 8.912119856279149e-07, | |
| "loss": 1.1649, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.2390356501667094, | |
| "grad_norm": 2.8313736339242257, | |
| "learning_rate": 8.901735845818884e-07, | |
| "loss": 1.303, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.24006155424467812, | |
| "grad_norm": 2.63535412753487, | |
| "learning_rate": 8.891308622477829e-07, | |
| "loss": 1.2393, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.24108745832264683, | |
| "grad_norm": 2.357470458300626, | |
| "learning_rate": 8.880838301740575e-07, | |
| "loss": 1.2319, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.24211336240061554, | |
| "grad_norm": 2.633624233788043, | |
| "learning_rate": 8.870324999569024e-07, | |
| "loss": 1.3239, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.24313926647858425, | |
| "grad_norm": 2.541409949570618, | |
| "learning_rate": 8.859768832401117e-07, | |
| "loss": 1.2875, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.24416517055655296, | |
| "grad_norm": 2.7389024190179123, | |
| "learning_rate": 8.849169917149531e-07, | |
| "loss": 1.2948, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.24519107463452167, | |
| "grad_norm": 2.7594743544624536, | |
| "learning_rate": 8.838528371200394e-07, | |
| "loss": 1.3199, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.24621697871249038, | |
| "grad_norm": 2.553894247097559, | |
| "learning_rate": 8.827844312411982e-07, | |
| "loss": 1.3282, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.24724288279045908, | |
| "grad_norm": 2.41692947947688, | |
| "learning_rate": 8.817117859113412e-07, | |
| "loss": 1.242, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.2482687868684278, | |
| "grad_norm": 2.723064556658783, | |
| "learning_rate": 8.806349130103332e-07, | |
| "loss": 1.1648, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.2492946909463965, | |
| "grad_norm": 2.643873563364823, | |
| "learning_rate": 8.795538244648609e-07, | |
| "loss": 1.2753, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.2503205950243652, | |
| "grad_norm": 2.6856636444384385, | |
| "learning_rate": 8.784685322483003e-07, | |
| "loss": 1.2634, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.2513464991023339, | |
| "grad_norm": 2.960535082783671, | |
| "learning_rate": 8.77379048380584e-07, | |
| "loss": 1.1963, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.25237240318030263, | |
| "grad_norm": 3.2907999278170066, | |
| "learning_rate": 8.762853849280691e-07, | |
| "loss": 1.2236, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.25339830725827134, | |
| "grad_norm": 2.706088121459989, | |
| "learning_rate": 8.751875540034025e-07, | |
| "loss": 1.2284, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.25442421133624005, | |
| "grad_norm": 2.8342758112128656, | |
| "learning_rate": 8.740855677653867e-07, | |
| "loss": 1.3529, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.25545011541420876, | |
| "grad_norm": 2.657409992434949, | |
| "learning_rate": 8.72979438418846e-07, | |
| "loss": 1.2265, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.25647601949217746, | |
| "grad_norm": 2.733820536997817, | |
| "learning_rate": 8.718691782144907e-07, | |
| "loss": 1.3085, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.25647601949217746, | |
| "eval_uground_MCTS_chains_SFT_val_loss": 1.2850571870803833, | |
| "eval_uground_MCTS_chains_SFT_val_runtime": 234.9849, | |
| "eval_uground_MCTS_chains_SFT_val_samples_per_second": 7.741, | |
| "eval_uground_MCTS_chains_SFT_val_steps_per_second": 0.97, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2575019235701462, | |
| "grad_norm": 2.6848225964360886, | |
| "learning_rate": 8.707547994487817e-07, | |
| "loss": 1.2179, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.2585278276481149, | |
| "grad_norm": 2.6683129178340717, | |
| "learning_rate": 8.69636314463794e-07, | |
| "loss": 1.2185, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.2595537317260836, | |
| "grad_norm": 2.7296939230032047, | |
| "learning_rate": 8.685137356470802e-07, | |
| "loss": 1.2974, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.2605796358040523, | |
| "grad_norm": 2.5819217942943387, | |
| "learning_rate": 8.673870754315336e-07, | |
| "loss": 1.2832, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.261605539882021, | |
| "grad_norm": 2.7286375807681202, | |
| "learning_rate": 8.662563462952498e-07, | |
| "loss": 1.1838, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.2626314439599897, | |
| "grad_norm": 2.8389791464688825, | |
| "learning_rate": 8.651215607613891e-07, | |
| "loss": 1.2626, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.26365734803795843, | |
| "grad_norm": 2.667042240854244, | |
| "learning_rate": 8.639827313980377e-07, | |
| "loss": 1.2649, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.26468325211592714, | |
| "grad_norm": 2.6062798312002853, | |
| "learning_rate": 8.628398708180679e-07, | |
| "loss": 1.2032, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.26570915619389585, | |
| "grad_norm": 2.8969301485628893, | |
| "learning_rate": 8.61692991679e-07, | |
| "loss": 1.2119, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.26673506027186455, | |
| "grad_norm": 2.6804950419511164, | |
| "learning_rate": 8.605421066828598e-07, | |
| "loss": 1.2812, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.26776096434983326, | |
| "grad_norm": 2.8172486497816807, | |
| "learning_rate": 8.593872285760399e-07, | |
| "loss": 1.2254, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.26878686842780203, | |
| "grad_norm": 2.5633187420692307, | |
| "learning_rate": 8.582283701491575e-07, | |
| "loss": 1.2842, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.26981277250577074, | |
| "grad_norm": 2.67558705929831, | |
| "learning_rate": 8.570655442369133e-07, | |
| "loss": 1.2636, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.27083867658373945, | |
| "grad_norm": 2.8468850830803425, | |
| "learning_rate": 8.558987637179487e-07, | |
| "loss": 1.214, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.27186458066170816, | |
| "grad_norm": 2.6831972490521965, | |
| "learning_rate": 8.547280415147037e-07, | |
| "loss": 1.2931, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.27289048473967686, | |
| "grad_norm": 2.6462503105531896, | |
| "learning_rate": 8.535533905932737e-07, | |
| "loss": 1.2655, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.2739163888176456, | |
| "grad_norm": 2.3586365587982967, | |
| "learning_rate": 8.523748239632659e-07, | |
| "loss": 1.2476, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.2749422928956143, | |
| "grad_norm": 2.54829905356015, | |
| "learning_rate": 8.51192354677655e-07, | |
| "loss": 1.1603, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.275968196973583, | |
| "grad_norm": 2.5266352307165705, | |
| "learning_rate": 8.500059958326384e-07, | |
| "loss": 1.2981, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.2769941010515517, | |
| "grad_norm": 2.6922548930973167, | |
| "learning_rate": 8.488157605674924e-07, | |
| "loss": 1.2659, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.2780200051295204, | |
| "grad_norm": 2.664526394721468, | |
| "learning_rate": 8.47621662064425e-07, | |
| "loss": 1.2074, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.2790459092074891, | |
| "grad_norm": 2.6008818489590477, | |
| "learning_rate": 8.464237135484309e-07, | |
| "loss": 1.312, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.2800718132854578, | |
| "grad_norm": 2.5513968093248733, | |
| "learning_rate": 8.452219282871451e-07, | |
| "loss": 1.2602, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.28109771736342654, | |
| "grad_norm": 2.645895710122177, | |
| "learning_rate": 8.440163195906958e-07, | |
| "loss": 1.2199, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.28212362144139524, | |
| "grad_norm": 2.555562047484234, | |
| "learning_rate": 8.42806900811556e-07, | |
| "loss": 1.2419, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.28314952551936395, | |
| "grad_norm": 2.668319098287759, | |
| "learning_rate": 8.415936853443974e-07, | |
| "loss": 1.2866, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.28417542959733266, | |
| "grad_norm": 2.5876812386597856, | |
| "learning_rate": 8.40376686625941e-07, | |
| "loss": 1.2495, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.28520133367530137, | |
| "grad_norm": 2.761080694836167, | |
| "learning_rate": 8.391559181348081e-07, | |
| "loss": 1.3327, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.2862272377532701, | |
| "grad_norm": 2.5148075285885323, | |
| "learning_rate": 8.379313933913714e-07, | |
| "loss": 1.2356, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.2872531418312388, | |
| "grad_norm": 2.8048853074405913, | |
| "learning_rate": 8.367031259576056e-07, | |
| "loss": 1.3033, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.2882790459092075, | |
| "grad_norm": 2.5855751773469, | |
| "learning_rate": 8.354711294369362e-07, | |
| "loss": 1.2116, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.2893049499871762, | |
| "grad_norm": 2.6409683165441833, | |
| "learning_rate": 8.342354174740902e-07, | |
| "loss": 1.291, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.2903308540651449, | |
| "grad_norm": 2.5609773198767583, | |
| "learning_rate": 8.329960037549433e-07, | |
| "loss": 1.1966, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.2913567581431136, | |
| "grad_norm": 2.454732765461205, | |
| "learning_rate": 8.317529020063703e-07, | |
| "loss": 1.2519, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.29238266222108233, | |
| "grad_norm": 2.5134417168975127, | |
| "learning_rate": 8.305061259960909e-07, | |
| "loss": 1.3021, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.29340856629905104, | |
| "grad_norm": 2.7770733269324683, | |
| "learning_rate": 8.292556895325194e-07, | |
| "loss": 1.2724, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.29443447037701975, | |
| "grad_norm": 2.7282701405098226, | |
| "learning_rate": 8.280016064646098e-07, | |
| "loss": 1.3078, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.29546037445498846, | |
| "grad_norm": 2.9061694081822815, | |
| "learning_rate": 8.267438906817039e-07, | |
| "loss": 1.3072, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.29648627853295717, | |
| "grad_norm": 2.789078860545301, | |
| "learning_rate": 8.25482556113377e-07, | |
| "loss": 1.2562, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.2975121826109259, | |
| "grad_norm": 2.5716595982006147, | |
| "learning_rate": 8.242176167292826e-07, | |
| "loss": 1.242, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.2985380866888946, | |
| "grad_norm": 2.639971746858166, | |
| "learning_rate": 8.229490865389998e-07, | |
| "loss": 1.2395, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.2995639907668633, | |
| "grad_norm": 2.478159221268824, | |
| "learning_rate": 8.216769795918762e-07, | |
| "loss": 1.1481, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.300589894844832, | |
| "grad_norm": 2.5242932402585376, | |
| "learning_rate": 8.204013099768732e-07, | |
| "loss": 1.2662, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.3016157989228007, | |
| "grad_norm": 2.598195840538781, | |
| "learning_rate": 8.1912209182241e-07, | |
| "loss": 1.2947, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.3026417030007694, | |
| "grad_norm": 2.7173961511536264, | |
| "learning_rate": 8.178393392962066e-07, | |
| "loss": 1.2896, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.30366760707873813, | |
| "grad_norm": 2.7206884235512527, | |
| "learning_rate": 8.165530666051275e-07, | |
| "loss": 1.2786, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.30469351115670684, | |
| "grad_norm": 2.7552783724459706, | |
| "learning_rate": 8.152632879950238e-07, | |
| "loss": 1.2365, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.30571941523467555, | |
| "grad_norm": 2.6147443498413954, | |
| "learning_rate": 8.139700177505759e-07, | |
| "loss": 1.2179, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.30674531931264426, | |
| "grad_norm": 2.606663274306208, | |
| "learning_rate": 8.126732701951351e-07, | |
| "loss": 1.3034, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.30777122339061297, | |
| "grad_norm": 2.648199256292807, | |
| "learning_rate": 8.113730596905648e-07, | |
| "loss": 1.1252, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.3087971274685817, | |
| "grad_norm": 2.4523122279421417, | |
| "learning_rate": 8.100694006370816e-07, | |
| "loss": 1.2627, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.3098230315465504, | |
| "grad_norm": 2.966417644370533, | |
| "learning_rate": 8.087623074730959e-07, | |
| "loss": 1.2693, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.3108489356245191, | |
| "grad_norm": 2.7468880750816647, | |
| "learning_rate": 8.07451794675052e-07, | |
| "loss": 1.2192, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.3118748397024878, | |
| "grad_norm": 2.7445599934088327, | |
| "learning_rate": 8.061378767572673e-07, | |
| "loss": 1.2482, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.3129007437804565, | |
| "grad_norm": 2.377263362356011, | |
| "learning_rate": 8.048205682717724e-07, | |
| "loss": 1.22, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.3139266478584252, | |
| "grad_norm": 2.682567930380336, | |
| "learning_rate": 8.034998838081489e-07, | |
| "loss": 1.3312, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.31495255193639393, | |
| "grad_norm": 2.6542762724022717, | |
| "learning_rate": 8.021758379933686e-07, | |
| "loss": 1.2426, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.31597845601436264, | |
| "grad_norm": 2.691345504635165, | |
| "learning_rate": 8.008484454916316e-07, | |
| "loss": 1.195, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.31700436009233135, | |
| "grad_norm": 2.490433627351295, | |
| "learning_rate": 7.995177210042028e-07, | |
| "loss": 1.1885, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.31803026417030006, | |
| "grad_norm": 2.6751579195821242, | |
| "learning_rate": 7.981836792692507e-07, | |
| "loss": 1.201, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.31905616824826877, | |
| "grad_norm": 2.670875208828667, | |
| "learning_rate": 7.968463350616825e-07, | |
| "loss": 1.2289, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.3200820723262375, | |
| "grad_norm": 2.745027291047488, | |
| "learning_rate": 7.955057031929819e-07, | |
| "loss": 1.2896, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.3211079764042062, | |
| "grad_norm": 2.561763884212081, | |
| "learning_rate": 7.941617985110442e-07, | |
| "loss": 1.3243, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.3221338804821749, | |
| "grad_norm": 2.6240454275926344, | |
| "learning_rate": 7.928146359000117e-07, | |
| "loss": 1.3075, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.3231597845601436, | |
| "grad_norm": 2.6273454218750434, | |
| "learning_rate": 7.914642302801097e-07, | |
| "loss": 1.3616, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.3241856886381123, | |
| "grad_norm": 2.563951932815581, | |
| "learning_rate": 7.901105966074806e-07, | |
| "loss": 1.2714, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.325211592716081, | |
| "grad_norm": 2.5822875744818563, | |
| "learning_rate": 7.887537498740187e-07, | |
| "loss": 1.3283, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.32623749679404973, | |
| "grad_norm": 2.6143885876617627, | |
| "learning_rate": 7.873937051072035e-07, | |
| "loss": 1.253, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.32726340087201844, | |
| "grad_norm": 2.647031579928053, | |
| "learning_rate": 7.860304773699338e-07, | |
| "loss": 1.3734, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.32828930494998715, | |
| "grad_norm": 2.6072419451698625, | |
| "learning_rate": 7.846640817603607e-07, | |
| "loss": 1.2476, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.3293152090279559, | |
| "grad_norm": 2.6913716325413737, | |
| "learning_rate": 7.83294533411721e-07, | |
| "loss": 1.2436, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.3303411131059246, | |
| "grad_norm": 2.5669118480954913, | |
| "learning_rate": 7.819218474921679e-07, | |
| "loss": 1.2327, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.33136701718389333, | |
| "grad_norm": 2.4680065253742063, | |
| "learning_rate": 7.805460392046053e-07, | |
| "loss": 1.2094, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.33239292126186204, | |
| "grad_norm": 2.791361144884489, | |
| "learning_rate": 7.791671237865174e-07, | |
| "loss": 1.2787, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.33341882533983075, | |
| "grad_norm": 2.3530874149345355, | |
| "learning_rate": 7.777851165098011e-07, | |
| "loss": 1.2706, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.33444472941779946, | |
| "grad_norm": 2.7864142928485442, | |
| "learning_rate": 7.764000326805966e-07, | |
| "loss": 1.2568, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.33547063349576817, | |
| "grad_norm": 2.744024330823426, | |
| "learning_rate": 7.75011887639118e-07, | |
| "loss": 1.1573, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.3364965375737369, | |
| "grad_norm": 2.4274620710022696, | |
| "learning_rate": 7.736206967594827e-07, | |
| "loss": 1.2552, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.3375224416517056, | |
| "grad_norm": 2.433652617983981, | |
| "learning_rate": 7.722264754495421e-07, | |
| "loss": 1.1821, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.3385483457296743, | |
| "grad_norm": 2.6471120476788963, | |
| "learning_rate": 7.708292391507105e-07, | |
| "loss": 1.2633, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.339574249807643, | |
| "grad_norm": 2.4674613719486542, | |
| "learning_rate": 7.694290033377938e-07, | |
| "loss": 1.1524, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.3406001538856117, | |
| "grad_norm": 2.7466941946417003, | |
| "learning_rate": 7.680257835188186e-07, | |
| "loss": 1.2265, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.3416260579635804, | |
| "grad_norm": 2.5000486774159514, | |
| "learning_rate": 7.666195952348606e-07, | |
| "loss": 1.2611, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.34265196204154913, | |
| "grad_norm": 2.567371463839335, | |
| "learning_rate": 7.652104540598712e-07, | |
| "loss": 1.2303, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.34367786611951784, | |
| "grad_norm": 2.5729629715661266, | |
| "learning_rate": 7.63798375600507e-07, | |
| "loss": 1.2566, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.34470377019748655, | |
| "grad_norm": 2.647727174139296, | |
| "learning_rate": 7.623833754959551e-07, | |
| "loss": 1.2118, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.34572967427545526, | |
| "grad_norm": 2.8141439244256894, | |
| "learning_rate": 7.609654694177612e-07, | |
| "loss": 1.2393, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.34675557835342397, | |
| "grad_norm": 2.5047983163137335, | |
| "learning_rate": 7.595446730696553e-07, | |
| "loss": 1.2848, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.3477814824313927, | |
| "grad_norm": 2.8575885947114954, | |
| "learning_rate": 7.581210021873778e-07, | |
| "loss": 1.2739, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.3488073865093614, | |
| "grad_norm": 2.5879263426142303, | |
| "learning_rate": 7.56694472538506e-07, | |
| "loss": 1.2399, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.3498332905873301, | |
| "grad_norm": 2.634462986237627, | |
| "learning_rate": 7.552650999222783e-07, | |
| "loss": 1.1886, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.3508591946652988, | |
| "grad_norm": 2.644196413288821, | |
| "learning_rate": 7.538329001694199e-07, | |
| "loss": 1.189, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.3518850987432675, | |
| "grad_norm": 2.46112345657162, | |
| "learning_rate": 7.523978891419678e-07, | |
| "loss": 1.3348, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.3529110028212362, | |
| "grad_norm": 2.749240231393701, | |
| "learning_rate": 7.509600827330942e-07, | |
| "loss": 1.3394, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.35393690689920493, | |
| "grad_norm": 2.690306971315971, | |
| "learning_rate": 7.495194968669311e-07, | |
| "loss": 1.1576, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.35496281097717364, | |
| "grad_norm": 2.539756877499731, | |
| "learning_rate": 7.480761474983943e-07, | |
| "loss": 1.2425, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.35598871505514235, | |
| "grad_norm": 2.794783648579977, | |
| "learning_rate": 7.466300506130052e-07, | |
| "loss": 1.2182, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.35701461913311106, | |
| "grad_norm": 2.824643687827716, | |
| "learning_rate": 7.451812222267157e-07, | |
| "loss": 1.218, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.35804052321107976, | |
| "grad_norm": 2.667873083406682, | |
| "learning_rate": 7.437296783857296e-07, | |
| "loss": 1.1692, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.3590664272890485, | |
| "grad_norm": 2.671390395878333, | |
| "learning_rate": 7.422754351663251e-07, | |
| "loss": 1.1613, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.3600923313670172, | |
| "grad_norm": 2.8795003735438813, | |
| "learning_rate": 7.408185086746766e-07, | |
| "loss": 1.2302, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.3611182354449859, | |
| "grad_norm": 2.6611306550789684, | |
| "learning_rate": 7.39358915046677e-07, | |
| "loss": 1.3027, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.3621441395229546, | |
| "grad_norm": 2.6035638980444933, | |
| "learning_rate": 7.378966704477584e-07, | |
| "loss": 1.2342, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.3631700436009233, | |
| "grad_norm": 2.4479505443923197, | |
| "learning_rate": 7.364317910727127e-07, | |
| "loss": 1.2259, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.364195947678892, | |
| "grad_norm": 2.795523604698995, | |
| "learning_rate": 7.349642931455131e-07, | |
| "loss": 1.1781, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.3652218517568607, | |
| "grad_norm": 2.5499819208366006, | |
| "learning_rate": 7.334941929191343e-07, | |
| "loss": 1.2789, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.36624775583482944, | |
| "grad_norm": 2.7893689673549686, | |
| "learning_rate": 7.320215066753722e-07, | |
| "loss": 1.2962, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.36727365991279814, | |
| "grad_norm": 2.644819217736545, | |
| "learning_rate": 7.305462507246629e-07, | |
| "loss": 1.2008, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.36829956399076685, | |
| "grad_norm": 2.7769763671178795, | |
| "learning_rate": 7.290684414059034e-07, | |
| "loss": 1.2561, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.36932546806873556, | |
| "grad_norm": 2.7516891619949715, | |
| "learning_rate": 7.2758809508627e-07, | |
| "loss": 1.1101, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.37035137214670427, | |
| "grad_norm": 2.426378655817816, | |
| "learning_rate": 7.261052281610367e-07, | |
| "loss": 1.2136, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.371377276224673, | |
| "grad_norm": 2.6154616984297414, | |
| "learning_rate": 7.246198570533944e-07, | |
| "loss": 1.2197, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.3724031803026417, | |
| "grad_norm": 2.5360984469645467, | |
| "learning_rate": 7.231319982142679e-07, | |
| "loss": 1.1605, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.3734290843806104, | |
| "grad_norm": 2.825045376287288, | |
| "learning_rate": 7.216416681221353e-07, | |
| "loss": 1.2416, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.3744549884585791, | |
| "grad_norm": 2.5276250456119844, | |
| "learning_rate": 7.201488832828438e-07, | |
| "loss": 1.19, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.3754808925365478, | |
| "grad_norm": 2.4928208511714627, | |
| "learning_rate": 7.186536602294278e-07, | |
| "loss": 1.2168, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.3765067966145165, | |
| "grad_norm": 2.6143894885373102, | |
| "learning_rate": 7.171560155219256e-07, | |
| "loss": 1.2404, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.37753270069248523, | |
| "grad_norm": 2.60017799652903, | |
| "learning_rate": 7.156559657471966e-07, | |
| "loss": 1.2826, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.37855860477045394, | |
| "grad_norm": 2.6824928604628564, | |
| "learning_rate": 7.141535275187363e-07, | |
| "loss": 1.1865, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.37958450884842265, | |
| "grad_norm": 2.603460949625, | |
| "learning_rate": 7.126487174764935e-07, | |
| "loss": 1.1988, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.38061041292639136, | |
| "grad_norm": 2.679412906737572, | |
| "learning_rate": 7.11141552286685e-07, | |
| "loss": 1.2827, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.38163631700436007, | |
| "grad_norm": 2.9611932859479055, | |
| "learning_rate": 7.096320486416124e-07, | |
| "loss": 1.2008, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.3826622210823288, | |
| "grad_norm": 2.748214027279323, | |
| "learning_rate": 7.081202232594758e-07, | |
| "loss": 1.2062, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.3836881251602975, | |
| "grad_norm": 2.610256542655334, | |
| "learning_rate": 7.06606092884189e-07, | |
| "loss": 1.2218, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.3847140292382662, | |
| "grad_norm": 2.568752573794102, | |
| "learning_rate": 7.050896742851952e-07, | |
| "loss": 1.259, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.3857399333162349, | |
| "grad_norm": 2.729994050097171, | |
| "learning_rate": 7.035709842572792e-07, | |
| "loss": 1.1736, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.3867658373942036, | |
| "grad_norm": 2.865709205643247, | |
| "learning_rate": 7.020500396203837e-07, | |
| "loss": 1.2853, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.3877917414721723, | |
| "grad_norm": 2.6383244403757065, | |
| "learning_rate": 7.005268572194207e-07, | |
| "loss": 1.2394, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.3888176455501411, | |
| "grad_norm": 2.756577015435794, | |
| "learning_rate": 6.990014539240873e-07, | |
| "loss": 1.1993, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.3898435496281098, | |
| "grad_norm": 2.5090775530327805, | |
| "learning_rate": 6.974738466286765e-07, | |
| "loss": 1.2351, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.3908694537060785, | |
| "grad_norm": 2.7812691019728093, | |
| "learning_rate": 6.959440522518923e-07, | |
| "loss": 1.199, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.3918953577840472, | |
| "grad_norm": 2.546674174824266, | |
| "learning_rate": 6.944120877366604e-07, | |
| "loss": 1.154, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.3929212618620159, | |
| "grad_norm": 3.0015148003584557, | |
| "learning_rate": 6.928779700499419e-07, | |
| "loss": 1.2702, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.39394716593998463, | |
| "grad_norm": 2.5008648231818627, | |
| "learning_rate": 6.913417161825449e-07, | |
| "loss": 1.2137, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.39497307001795334, | |
| "grad_norm": 2.4588983698528772, | |
| "learning_rate": 6.898033431489361e-07, | |
| "loss": 1.1406, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.39599897409592205, | |
| "grad_norm": 2.527134330865664, | |
| "learning_rate": 6.882628679870531e-07, | |
| "loss": 1.3066, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.39702487817389076, | |
| "grad_norm": 2.6472686614664225, | |
| "learning_rate": 6.867203077581145e-07, | |
| "loss": 1.3113, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.39805078225185947, | |
| "grad_norm": 2.764798950791151, | |
| "learning_rate": 6.851756795464323e-07, | |
| "loss": 1.2906, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.3990766863298282, | |
| "grad_norm": 2.7405389624240653, | |
| "learning_rate": 6.836290004592213e-07, | |
| "loss": 1.2001, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.4001025904077969, | |
| "grad_norm": 2.631861163892539, | |
| "learning_rate": 6.820802876264111e-07, | |
| "loss": 1.1641, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.4011284944857656, | |
| "grad_norm": 2.6174482541499136, | |
| "learning_rate": 6.805295582004551e-07, | |
| "loss": 1.2187, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.4021543985637343, | |
| "grad_norm": 2.852360371699908, | |
| "learning_rate": 6.789768293561413e-07, | |
| "loss": 1.308, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.403180302641703, | |
| "grad_norm": 2.5465668197579743, | |
| "learning_rate": 6.774221182904017e-07, | |
| "loss": 1.2074, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.4042062067196717, | |
| "grad_norm": 2.864528470906807, | |
| "learning_rate": 6.758654422221224e-07, | |
| "loss": 1.1637, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.40523211079764043, | |
| "grad_norm": 2.8379197072181612, | |
| "learning_rate": 6.743068183919519e-07, | |
| "loss": 1.222, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.40625801487560914, | |
| "grad_norm": 2.62449855028073, | |
| "learning_rate": 6.727462640621112e-07, | |
| "loss": 1.279, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.40728391895357785, | |
| "grad_norm": 2.5226829698942397, | |
| "learning_rate": 6.711837965162019e-07, | |
| "loss": 1.2924, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.40830982303154656, | |
| "grad_norm": 2.6680293582268715, | |
| "learning_rate": 6.69619433059015e-07, | |
| "loss": 1.203, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.40933572710951527, | |
| "grad_norm": 2.6267175358172272, | |
| "learning_rate": 6.680531910163398e-07, | |
| "loss": 1.2044, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.410361631187484, | |
| "grad_norm": 2.798539704369877, | |
| "learning_rate": 6.664850877347705e-07, | |
| "loss": 1.2283, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4113875352654527, | |
| "grad_norm": 2.706328663367393, | |
| "learning_rate": 6.649151405815161e-07, | |
| "loss": 1.2072, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.4124134393434214, | |
| "grad_norm": 2.5865368220778735, | |
| "learning_rate": 6.633433669442064e-07, | |
| "loss": 1.2156, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.4134393434213901, | |
| "grad_norm": 2.57901369585107, | |
| "learning_rate": 6.617697842307004e-07, | |
| "loss": 1.1958, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.4144652474993588, | |
| "grad_norm": 2.7016388144398267, | |
| "learning_rate": 6.601944098688927e-07, | |
| "loss": 1.195, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.4154911515773275, | |
| "grad_norm": 2.556707832936113, | |
| "learning_rate": 6.586172613065215e-07, | |
| "loss": 1.1804, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.41651705565529623, | |
| "grad_norm": 2.636733912969758, | |
| "learning_rate": 6.570383560109745e-07, | |
| "loss": 1.2457, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.41754295973326494, | |
| "grad_norm": 2.807001379119985, | |
| "learning_rate": 6.554577114690955e-07, | |
| "loss": 1.3283, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.41856886381123365, | |
| "grad_norm": 2.5257169644640998, | |
| "learning_rate": 6.538753451869913e-07, | |
| "loss": 1.2884, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.41959476788920236, | |
| "grad_norm": 2.5327516189035206, | |
| "learning_rate": 6.522912746898379e-07, | |
| "loss": 1.2235, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.42062067196717107, | |
| "grad_norm": 2.90569340656041, | |
| "learning_rate": 6.507055175216849e-07, | |
| "loss": 1.1792, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.4216465760451398, | |
| "grad_norm": 2.5114377824774317, | |
| "learning_rate": 6.491180912452631e-07, | |
| "loss": 1.3252, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.4226724801231085, | |
| "grad_norm": 2.8201576436584963, | |
| "learning_rate": 6.475290134417891e-07, | |
| "loss": 1.2259, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.4236983842010772, | |
| "grad_norm": 2.5655086880724065, | |
| "learning_rate": 6.459383017107703e-07, | |
| "loss": 1.2522, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.4247242882790459, | |
| "grad_norm": 2.7241886166530165, | |
| "learning_rate": 6.443459736698105e-07, | |
| "loss": 1.1883, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.4257501923570146, | |
| "grad_norm": 2.777112809092128, | |
| "learning_rate": 6.427520469544147e-07, | |
| "loss": 1.2589, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.4267760964349833, | |
| "grad_norm": 2.5197028770911274, | |
| "learning_rate": 6.41156539217794e-07, | |
| "loss": 1.1807, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.42780200051295203, | |
| "grad_norm": 2.7452576494954166, | |
| "learning_rate": 6.395594681306688e-07, | |
| "loss": 1.1969, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.42882790459092074, | |
| "grad_norm": 2.419748401721261, | |
| "learning_rate": 6.379608513810753e-07, | |
| "loss": 1.2156, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.42985380866888945, | |
| "grad_norm": 2.6007536361681347, | |
| "learning_rate": 6.363607066741672e-07, | |
| "loss": 1.1682, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.43087971274685816, | |
| "grad_norm": 2.7690218693992614, | |
| "learning_rate": 6.347590517320217e-07, | |
| "loss": 1.2596, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.43190561682482687, | |
| "grad_norm": 2.8021339088053887, | |
| "learning_rate": 6.331559042934418e-07, | |
| "loss": 1.2384, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.4329315209027956, | |
| "grad_norm": 2.5407498593535345, | |
| "learning_rate": 6.315512821137606e-07, | |
| "loss": 1.1624, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.4339574249807643, | |
| "grad_norm": 2.669181018356921, | |
| "learning_rate": 6.299452029646442e-07, | |
| "loss": 1.1831, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.434983329058733, | |
| "grad_norm": 2.6902513257905567, | |
| "learning_rate": 6.28337684633895e-07, | |
| "loss": 1.1293, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.4360092331367017, | |
| "grad_norm": 2.600350864738873, | |
| "learning_rate": 6.267287449252552e-07, | |
| "loss": 1.1606, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.4370351372146704, | |
| "grad_norm": 2.646078066207826, | |
| "learning_rate": 6.251184016582088e-07, | |
| "loss": 1.2734, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.4380610412926391, | |
| "grad_norm": 2.7517987447445904, | |
| "learning_rate": 6.235066726677845e-07, | |
| "loss": 1.2104, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.43908694537060783, | |
| "grad_norm": 2.7451282679247764, | |
| "learning_rate": 6.218935758043586e-07, | |
| "loss": 1.1119, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.44011284944857654, | |
| "grad_norm": 2.5657396333316105, | |
| "learning_rate": 6.202791289334571e-07, | |
| "loss": 1.2813, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.44113875352654525, | |
| "grad_norm": 2.521858145927954, | |
| "learning_rate": 6.186633499355575e-07, | |
| "loss": 1.3072, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.44216465760451396, | |
| "grad_norm": 2.7999275422555523, | |
| "learning_rate": 6.170462567058908e-07, | |
| "loss": 1.1443, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.44319056168248266, | |
| "grad_norm": 2.8231518599718988, | |
| "learning_rate": 6.15427867154244e-07, | |
| "loss": 1.2586, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.4442164657604514, | |
| "grad_norm": 2.6163010604612174, | |
| "learning_rate": 6.138081992047609e-07, | |
| "loss": 1.2694, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.4452423698384201, | |
| "grad_norm": 2.4133746282742075, | |
| "learning_rate": 6.121872707957441e-07, | |
| "loss": 1.212, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.4462682739163888, | |
| "grad_norm": 2.6353803046715405, | |
| "learning_rate": 6.105650998794559e-07, | |
| "loss": 1.2294, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.4472941779943575, | |
| "grad_norm": 2.6224808581968744, | |
| "learning_rate": 6.089417044219201e-07, | |
| "loss": 1.2076, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.44832008207232626, | |
| "grad_norm": 2.273912209414138, | |
| "learning_rate": 6.073171024027226e-07, | |
| "loss": 1.2212, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.449345986150295, | |
| "grad_norm": 2.5146410294881605, | |
| "learning_rate": 6.056913118148121e-07, | |
| "loss": 1.2513, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.4503718902282637, | |
| "grad_norm": 2.513841938933036, | |
| "learning_rate": 6.040643506643012e-07, | |
| "loss": 1.153, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.4513977943062324, | |
| "grad_norm": 2.644906674789187, | |
| "learning_rate": 6.024362369702668e-07, | |
| "loss": 1.1479, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.4524236983842011, | |
| "grad_norm": 2.7660556586821246, | |
| "learning_rate": 6.008069887645503e-07, | |
| "loss": 1.2301, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.4534496024621698, | |
| "grad_norm": 2.4452998667055135, | |
| "learning_rate": 5.991766240915589e-07, | |
| "loss": 1.1561, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.4544755065401385, | |
| "grad_norm": 2.5637907822415196, | |
| "learning_rate": 5.975451610080642e-07, | |
| "loss": 1.2151, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.4555014106181072, | |
| "grad_norm": 2.634126808482155, | |
| "learning_rate": 5.959126175830033e-07, | |
| "loss": 1.2055, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.45652731469607594, | |
| "grad_norm": 2.6995738192233323, | |
| "learning_rate": 5.942790118972786e-07, | |
| "loss": 1.1872, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.45755321877404465, | |
| "grad_norm": 2.600607297977979, | |
| "learning_rate": 5.926443620435571e-07, | |
| "loss": 1.1858, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.45857912285201335, | |
| "grad_norm": 2.7529683044640203, | |
| "learning_rate": 5.910086861260706e-07, | |
| "loss": 1.3244, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.45960502692998206, | |
| "grad_norm": 2.4542539077385106, | |
| "learning_rate": 5.893720022604142e-07, | |
| "loss": 1.1912, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.4606309310079508, | |
| "grad_norm": 2.7213636554229996, | |
| "learning_rate": 5.877343285733472e-07, | |
| "loss": 1.2338, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.4616568350859195, | |
| "grad_norm": 2.5317246559946627, | |
| "learning_rate": 5.860956832025906e-07, | |
| "loss": 1.2244, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.4626827391638882, | |
| "grad_norm": 2.7263289087726186, | |
| "learning_rate": 5.844560842966278e-07, | |
| "loss": 1.2377, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.4637086432418569, | |
| "grad_norm": 2.5974508387893436, | |
| "learning_rate": 5.828155500145024e-07, | |
| "loss": 1.2219, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.4647345473198256, | |
| "grad_norm": 2.492480904755007, | |
| "learning_rate": 5.811740985256179e-07, | |
| "loss": 1.2269, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.4657604513977943, | |
| "grad_norm": 2.713260026567852, | |
| "learning_rate": 5.79531748009536e-07, | |
| "loss": 1.2114, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.466786355475763, | |
| "grad_norm": 2.567056191750939, | |
| "learning_rate": 5.778885166557752e-07, | |
| "loss": 1.1719, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.46781225955373174, | |
| "grad_norm": 2.2856765271337007, | |
| "learning_rate": 5.7624442266361e-07, | |
| "loss": 1.144, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.46883816363170044, | |
| "grad_norm": 2.4580143502304552, | |
| "learning_rate": 5.745994842418683e-07, | |
| "loss": 1.2335, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.46986406770966915, | |
| "grad_norm": 2.343512884988581, | |
| "learning_rate": 5.729537196087308e-07, | |
| "loss": 1.1682, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.47088997178763786, | |
| "grad_norm": 2.5556051709696335, | |
| "learning_rate": 5.713071469915285e-07, | |
| "loss": 1.2655, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.47191587586560657, | |
| "grad_norm": 2.4824036483855143, | |
| "learning_rate": 5.696597846265411e-07, | |
| "loss": 1.1478, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.4729417799435753, | |
| "grad_norm": 2.6655990103099634, | |
| "learning_rate": 5.680116507587949e-07, | |
| "loss": 1.3385, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.473967684021544, | |
| "grad_norm": 2.8244078669617014, | |
| "learning_rate": 5.663627636418609e-07, | |
| "loss": 1.19, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.4749935880995127, | |
| "grad_norm": 2.4429409282936567, | |
| "learning_rate": 5.647131415376528e-07, | |
| "loss": 1.3615, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.4760194921774814, | |
| "grad_norm": 2.9996249497285703, | |
| "learning_rate": 5.630628027162243e-07, | |
| "loss": 1.1419, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.4770453962554501, | |
| "grad_norm": 2.938422500891204, | |
| "learning_rate": 5.614117654555666e-07, | |
| "loss": 1.232, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.4780713003334188, | |
| "grad_norm": 2.7670713948237218, | |
| "learning_rate": 5.597600480414068e-07, | |
| "loss": 1.2346, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.47909720441138753, | |
| "grad_norm": 2.794141488694239, | |
| "learning_rate": 5.58107668767005e-07, | |
| "loss": 1.2621, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.48012310848935624, | |
| "grad_norm": 3.239639832399456, | |
| "learning_rate": 5.564546459329509e-07, | |
| "loss": 1.1259, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.48114901256732495, | |
| "grad_norm": 2.557225226114097, | |
| "learning_rate": 5.548009978469626e-07, | |
| "loss": 1.2987, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.48217491664529366, | |
| "grad_norm": 2.683440752301172, | |
| "learning_rate": 5.531467428236827e-07, | |
| "loss": 1.2546, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.48320082072326237, | |
| "grad_norm": 2.508947166543078, | |
| "learning_rate": 5.514918991844758e-07, | |
| "loss": 1.2695, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.4842267248012311, | |
| "grad_norm": 2.648393508091393, | |
| "learning_rate": 5.498364852572255e-07, | |
| "loss": 1.2868, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.4852526288791998, | |
| "grad_norm": 2.302753325306599, | |
| "learning_rate": 5.481805193761315e-07, | |
| "loss": 1.1813, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.4862785329571685, | |
| "grad_norm": 2.529547436028065, | |
| "learning_rate": 5.465240198815072e-07, | |
| "loss": 1.1878, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.4873044370351372, | |
| "grad_norm": 2.4916972010131286, | |
| "learning_rate": 5.448670051195751e-07, | |
| "loss": 1.2886, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.4883303411131059, | |
| "grad_norm": 2.5311295821586954, | |
| "learning_rate": 5.432094934422648e-07, | |
| "loss": 1.1612, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.4893562451910746, | |
| "grad_norm": 2.5635956429429174, | |
| "learning_rate": 5.415515032070091e-07, | |
| "loss": 1.2791, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.49038214926904333, | |
| "grad_norm": 2.500059103965476, | |
| "learning_rate": 5.398930527765415e-07, | |
| "loss": 1.1808, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.49140805334701204, | |
| "grad_norm": 2.5521862908107154, | |
| "learning_rate": 5.38234160518692e-07, | |
| "loss": 1.2029, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.49243395742498075, | |
| "grad_norm": 2.643620076098173, | |
| "learning_rate": 5.365748448061837e-07, | |
| "loss": 1.1923, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.49345986150294946, | |
| "grad_norm": 2.5902182995299294, | |
| "learning_rate": 5.349151240164303e-07, | |
| "loss": 1.2226, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.49448576558091817, | |
| "grad_norm": 2.768401067358843, | |
| "learning_rate": 5.332550165313312e-07, | |
| "loss": 1.174, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.4955116696588869, | |
| "grad_norm": 2.557685695100021, | |
| "learning_rate": 5.315945407370686e-07, | |
| "loss": 1.1896, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.4965375737368556, | |
| "grad_norm": 2.775440468842976, | |
| "learning_rate": 5.299337150239041e-07, | |
| "loss": 1.2212, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.4975634778148243, | |
| "grad_norm": 2.9224945088224135, | |
| "learning_rate": 5.282725577859748e-07, | |
| "loss": 1.2949, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.498589381892793, | |
| "grad_norm": 2.802770455656179, | |
| "learning_rate": 5.266110874210892e-07, | |
| "loss": 1.2073, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.4996152859707617, | |
| "grad_norm": 2.824244076395358, | |
| "learning_rate": 5.249493223305244e-07, | |
| "loss": 1.1947, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.5006411900487304, | |
| "grad_norm": 2.6923948351193507, | |
| "learning_rate": 5.232872809188208e-07, | |
| "loss": 1.2629, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.5016670941266992, | |
| "grad_norm": 2.3968822015650386, | |
| "learning_rate": 5.216249815935797e-07, | |
| "loss": 1.2121, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.5026929982046678, | |
| "grad_norm": 2.560325439840742, | |
| "learning_rate": 5.199624427652588e-07, | |
| "loss": 1.1595, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.5037189022826366, | |
| "grad_norm": 2.506058680784885, | |
| "learning_rate": 5.182996828469683e-07, | |
| "loss": 1.3033, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.5047448063606053, | |
| "grad_norm": 2.607500294914118, | |
| "learning_rate": 5.166367202542671e-07, | |
| "loss": 1.2084, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.505770710438574, | |
| "grad_norm": 2.641008107839817, | |
| "learning_rate": 5.149735734049587e-07, | |
| "loss": 1.2329, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.5067966145165427, | |
| "grad_norm": 2.541793580004398, | |
| "learning_rate": 5.133102607188874e-07, | |
| "loss": 1.2498, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.5078225185945114, | |
| "grad_norm": 2.5646122609963187, | |
| "learning_rate": 5.11646800617734e-07, | |
| "loss": 1.1542, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.5088484226724801, | |
| "grad_norm": 2.486587957501608, | |
| "learning_rate": 5.099832115248123e-07, | |
| "loss": 1.217, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.5098743267504489, | |
| "grad_norm": 2.675452370610441, | |
| "learning_rate": 5.083195118648643e-07, | |
| "loss": 1.2943, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.5109002308284175, | |
| "grad_norm": 2.5503918368054745, | |
| "learning_rate": 5.066557200638569e-07, | |
| "loss": 1.2284, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.5119261349063863, | |
| "grad_norm": 2.6266723080797028, | |
| "learning_rate": 5.049918545487774e-07, | |
| "loss": 1.299, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.5129520389843549, | |
| "grad_norm": 2.6465331754588055, | |
| "learning_rate": 5.033279337474294e-07, | |
| "loss": 1.252, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5129520389843549, | |
| "eval_uground_MCTS_chains_SFT_val_loss": 1.2743316888809204, | |
| "eval_uground_MCTS_chains_SFT_val_runtime": 211.067, | |
| "eval_uground_MCTS_chains_SFT_val_samples_per_second": 8.618, | |
| "eval_uground_MCTS_chains_SFT_val_steps_per_second": 1.08, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5139779430623237, | |
| "grad_norm": 3.0116752114032743, | |
| "learning_rate": 5.016639760882288e-07, | |
| "loss": 1.2094, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.5150038471402923, | |
| "grad_norm": 2.5726062219813244, | |
| "learning_rate": 5e-07, | |
| "loss": 1.2787, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.5160297512182611, | |
| "grad_norm": 2.5559171539739585, | |
| "learning_rate": 4.98336023911771e-07, | |
| "loss": 1.1275, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.5170556552962298, | |
| "grad_norm": 2.652830973325525, | |
| "learning_rate": 4.966720662525707e-07, | |
| "loss": 1.3168, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.5180815593741985, | |
| "grad_norm": 2.700794383275604, | |
| "learning_rate": 4.950081454512225e-07, | |
| "loss": 1.299, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.5191074634521672, | |
| "grad_norm": 2.784294521735747, | |
| "learning_rate": 4.933442799361431e-07, | |
| "loss": 1.2955, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.520133367530136, | |
| "grad_norm": 2.4208374454069337, | |
| "learning_rate": 4.916804881351357e-07, | |
| "loss": 1.1661, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.5211592716081046, | |
| "grad_norm": 2.569371008392117, | |
| "learning_rate": 4.900167884751877e-07, | |
| "loss": 1.2028, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.5221851756860734, | |
| "grad_norm": 2.7873211845718027, | |
| "learning_rate": 4.883531993822659e-07, | |
| "loss": 1.2652, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.523211079764042, | |
| "grad_norm": 2.6113158053359333, | |
| "learning_rate": 4.866897392811126e-07, | |
| "loss": 1.3185, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.5242369838420108, | |
| "grad_norm": 2.3767956668686385, | |
| "learning_rate": 4.850264265950413e-07, | |
| "loss": 1.2051, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.5252628879199794, | |
| "grad_norm": 2.884877451969795, | |
| "learning_rate": 4.833632797457331e-07, | |
| "loss": 1.1469, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.5262887919979482, | |
| "grad_norm": 2.571309281671942, | |
| "learning_rate": 4.817003171530317e-07, | |
| "loss": 1.2533, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.5273146960759169, | |
| "grad_norm": 2.8580251389569633, | |
| "learning_rate": 4.800375572347413e-07, | |
| "loss": 1.2175, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.5283406001538856, | |
| "grad_norm": 2.657024380838802, | |
| "learning_rate": 4.783750184064203e-07, | |
| "loss": 1.2324, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.5293665042318543, | |
| "grad_norm": 2.4907546697657095, | |
| "learning_rate": 4.767127190811793e-07, | |
| "loss": 1.1992, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.530392408309823, | |
| "grad_norm": 2.782096492852977, | |
| "learning_rate": 4.750506776694757e-07, | |
| "loss": 1.2924, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.5314183123877917, | |
| "grad_norm": 2.427383520783994, | |
| "learning_rate": 4.7338891257891076e-07, | |
| "loss": 1.1579, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.5324442164657605, | |
| "grad_norm": 2.5770073682499484, | |
| "learning_rate": 4.717274422140252e-07, | |
| "loss": 1.2246, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.5334701205437291, | |
| "grad_norm": 2.4572503599210576, | |
| "learning_rate": 4.7006628497609604e-07, | |
| "loss": 1.2358, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5344960246216979, | |
| "grad_norm": 2.6448246950333254, | |
| "learning_rate": 4.6840545926293146e-07, | |
| "loss": 1.2461, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.5355219286996665, | |
| "grad_norm": 2.5891258589659247, | |
| "learning_rate": 4.6674498346866887e-07, | |
| "loss": 1.1114, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.5365478327776353, | |
| "grad_norm": 2.6559153198303678, | |
| "learning_rate": 4.650848759835697e-07, | |
| "loss": 1.2646, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.5375737368556041, | |
| "grad_norm": 2.673133305478356, | |
| "learning_rate": 4.634251551938161e-07, | |
| "loss": 1.2122, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.5385996409335727, | |
| "grad_norm": 2.68538453093342, | |
| "learning_rate": 4.6176583948130803e-07, | |
| "loss": 1.2235, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.5396255450115415, | |
| "grad_norm": 2.440546575305456, | |
| "learning_rate": 4.601069472234584e-07, | |
| "loss": 1.2358, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.5406514490895101, | |
| "grad_norm": 2.6044073884926924, | |
| "learning_rate": 4.584484967929909e-07, | |
| "loss": 1.2677, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.5416773531674789, | |
| "grad_norm": 2.5476463792153305, | |
| "learning_rate": 4.5679050655773534e-07, | |
| "loss": 1.1461, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.5427032572454475, | |
| "grad_norm": 2.436273544492887, | |
| "learning_rate": 4.5513299488042487e-07, | |
| "loss": 1.2116, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.5437291613234163, | |
| "grad_norm": 2.5753873910087, | |
| "learning_rate": 4.5347598011849275e-07, | |
| "loss": 1.1925, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.544755065401385, | |
| "grad_norm": 2.522315717983984, | |
| "learning_rate": 4.5181948062386846e-07, | |
| "loss": 1.2488, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.5457809694793537, | |
| "grad_norm": 2.4561592505381777, | |
| "learning_rate": 4.501635147427745e-07, | |
| "loss": 1.1357, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.5468068735573224, | |
| "grad_norm": 2.6167475341297375, | |
| "learning_rate": 4.485081008155243e-07, | |
| "loss": 1.1628, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.5478327776352911, | |
| "grad_norm": 2.318388883687291, | |
| "learning_rate": 4.4685325717631734e-07, | |
| "loss": 1.1911, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.5488586817132598, | |
| "grad_norm": 2.551623209073283, | |
| "learning_rate": 4.4519900215303733e-07, | |
| "loss": 1.2293, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.5498845857912286, | |
| "grad_norm": 3.0020726476405875, | |
| "learning_rate": 4.4354535406704907e-07, | |
| "loss": 1.1972, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.5509104898691972, | |
| "grad_norm": 2.4079569221490393, | |
| "learning_rate": 4.418923312329952e-07, | |
| "loss": 1.126, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.551936393947166, | |
| "grad_norm": 2.6031632771818893, | |
| "learning_rate": 4.4023995195859313e-07, | |
| "loss": 1.2331, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.5529622980251346, | |
| "grad_norm": 2.6258547925667353, | |
| "learning_rate": 4.385882345444335e-07, | |
| "loss": 1.2573, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.5539882021031034, | |
| "grad_norm": 2.510150905831169, | |
| "learning_rate": 4.369371972837757e-07, | |
| "loss": 1.26, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.555014106181072, | |
| "grad_norm": 2.7544197615747406, | |
| "learning_rate": 4.352868584623471e-07, | |
| "loss": 1.2229, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.5560400102590408, | |
| "grad_norm": 2.6505858678051863, | |
| "learning_rate": 4.33637236358139e-07, | |
| "loss": 1.1452, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.5570659143370095, | |
| "grad_norm": 2.61741589849154, | |
| "learning_rate": 4.319883492412051e-07, | |
| "loss": 1.2923, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.5580918184149782, | |
| "grad_norm": 2.7258410404480182, | |
| "learning_rate": 4.30340215373459e-07, | |
| "loss": 1.1754, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.5591177224929469, | |
| "grad_norm": 2.551873559715973, | |
| "learning_rate": 4.286928530084714e-07, | |
| "loss": 1.2079, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.5601436265709157, | |
| "grad_norm": 2.7405100803593885, | |
| "learning_rate": 4.2704628039126914e-07, | |
| "loss": 1.1972, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.5611695306488843, | |
| "grad_norm": 2.8909985376686684, | |
| "learning_rate": 4.2540051575813165e-07, | |
| "loss": 1.2332, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.5621954347268531, | |
| "grad_norm": 2.78196270770764, | |
| "learning_rate": 4.2375557733639006e-07, | |
| "loss": 1.197, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.5632213388048217, | |
| "grad_norm": 2.5697422125069784, | |
| "learning_rate": 4.2211148334422465e-07, | |
| "loss": 1.2079, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.5642472428827905, | |
| "grad_norm": 2.6354651408048966, | |
| "learning_rate": 4.20468251990464e-07, | |
| "loss": 1.2577, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5652731469607591, | |
| "grad_norm": 2.5724042590969938, | |
| "learning_rate": 4.1882590147438197e-07, | |
| "loss": 1.2442, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.5662990510387279, | |
| "grad_norm": 2.830434169158481, | |
| "learning_rate": 4.1718444998549756e-07, | |
| "loss": 1.2195, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.5673249551166966, | |
| "grad_norm": 2.663741290125766, | |
| "learning_rate": 4.1554391570337223e-07, | |
| "loss": 1.3125, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.5683508591946653, | |
| "grad_norm": 2.5970031739016846, | |
| "learning_rate": 4.1390431679740953e-07, | |
| "loss": 1.1242, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.569376763272634, | |
| "grad_norm": 2.768091392231377, | |
| "learning_rate": 4.1226567142665287e-07, | |
| "loss": 1.2086, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.5704026673506027, | |
| "grad_norm": 2.6640670169831173, | |
| "learning_rate": 4.106279977395858e-07, | |
| "loss": 1.2559, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 2.53235756296073, | |
| "learning_rate": 4.0899131387392943e-07, | |
| "loss": 1.1748, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.5724544755065402, | |
| "grad_norm": 2.682901944624015, | |
| "learning_rate": 4.073556379564429e-07, | |
| "loss": 1.1939, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.5734803795845088, | |
| "grad_norm": 2.5999213234263534, | |
| "learning_rate": 4.057209881027214e-07, | |
| "loss": 1.1405, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.5745062836624776, | |
| "grad_norm": 2.8359536761627755, | |
| "learning_rate": 4.0408738241699685e-07, | |
| "loss": 1.2553, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5755321877404462, | |
| "grad_norm": 2.478862534452602, | |
| "learning_rate": 4.0245483899193586e-07, | |
| "loss": 1.1883, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.576558091818415, | |
| "grad_norm": 2.9624404297867297, | |
| "learning_rate": 4.00823375908441e-07, | |
| "loss": 1.3241, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.5775839958963837, | |
| "grad_norm": 2.7214306290037467, | |
| "learning_rate": 3.991930112354496e-07, | |
| "loss": 1.1486, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.5786098999743524, | |
| "grad_norm": 2.5096893205743283, | |
| "learning_rate": 3.9756376302973325e-07, | |
| "loss": 1.2576, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.5796358040523211, | |
| "grad_norm": 2.571283901595518, | |
| "learning_rate": 3.959356493356989e-07, | |
| "loss": 1.1346, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.5806617081302898, | |
| "grad_norm": 2.644842706151376, | |
| "learning_rate": 3.943086881851878e-07, | |
| "loss": 1.1977, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.5816876122082585, | |
| "grad_norm": 2.807481417987691, | |
| "learning_rate": 3.9268289759727736e-07, | |
| "loss": 1.2563, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.5827135162862273, | |
| "grad_norm": 2.864431606411388, | |
| "learning_rate": 3.9105829557807973e-07, | |
| "loss": 1.2385, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.5837394203641959, | |
| "grad_norm": 2.7536651973511312, | |
| "learning_rate": 3.894349001205441e-07, | |
| "loss": 1.175, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.5847653244421647, | |
| "grad_norm": 2.557792248642305, | |
| "learning_rate": 3.87812729204256e-07, | |
| "loss": 1.263, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.5857912285201333, | |
| "grad_norm": 2.6951777375825947, | |
| "learning_rate": 3.861918007952393e-07, | |
| "loss": 1.1879, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.5868171325981021, | |
| "grad_norm": 2.5039948130909595, | |
| "learning_rate": 3.84572132845756e-07, | |
| "loss": 1.1872, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.5878430366760707, | |
| "grad_norm": 2.744912722045592, | |
| "learning_rate": 3.8295374329410926e-07, | |
| "loss": 1.0991, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.5888689407540395, | |
| "grad_norm": 2.7544422679030127, | |
| "learning_rate": 3.8133665006444255e-07, | |
| "loss": 1.2738, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.5898948448320082, | |
| "grad_norm": 2.696877329145256, | |
| "learning_rate": 3.7972087106654296e-07, | |
| "loss": 1.1795, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.5909207489099769, | |
| "grad_norm": 2.6792688172902386, | |
| "learning_rate": 3.781064241956414e-07, | |
| "loss": 1.2181, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.5919466529879456, | |
| "grad_norm": 2.7459246071356636, | |
| "learning_rate": 3.7649332733221575e-07, | |
| "loss": 1.2205, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.5929725570659143, | |
| "grad_norm": 2.5648768805482374, | |
| "learning_rate": 3.7488159834179135e-07, | |
| "loss": 1.1505, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.593998461143883, | |
| "grad_norm": 2.430751747310971, | |
| "learning_rate": 3.732712550747449e-07, | |
| "loss": 1.1854, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.5950243652218518, | |
| "grad_norm": 2.705026052502069, | |
| "learning_rate": 3.716623153661049e-07, | |
| "loss": 1.2227, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.5960502692998204, | |
| "grad_norm": 2.7017035978712545, | |
| "learning_rate": 3.700547970353558e-07, | |
| "loss": 1.1729, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.5970761733777892, | |
| "grad_norm": 2.5019196552045972, | |
| "learning_rate": 3.6844871788623945e-07, | |
| "loss": 1.1364, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.5981020774557579, | |
| "grad_norm": 2.8882315979569886, | |
| "learning_rate": 3.6684409570655804e-07, | |
| "loss": 1.1923, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.5991279815337266, | |
| "grad_norm": 2.6337103485212334, | |
| "learning_rate": 3.652409482679783e-07, | |
| "loss": 1.2562, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.6001538856116954, | |
| "grad_norm": 2.8149166619473287, | |
| "learning_rate": 3.6363929332583263e-07, | |
| "loss": 1.2216, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.601179789689664, | |
| "grad_norm": 2.5198337744735, | |
| "learning_rate": 3.6203914861892476e-07, | |
| "loss": 1.2763, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.6022056937676328, | |
| "grad_norm": 2.5771491568943645, | |
| "learning_rate": 3.604405318693311e-07, | |
| "loss": 1.2214, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.6032315978456014, | |
| "grad_norm": 2.6091632070488426, | |
| "learning_rate": 3.588434607822061e-07, | |
| "loss": 1.1978, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.6042575019235702, | |
| "grad_norm": 2.5250674602170537, | |
| "learning_rate": 3.5724795304558513e-07, | |
| "loss": 1.1383, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.6052834060015388, | |
| "grad_norm": 2.818153148170644, | |
| "learning_rate": 3.5565402633018957e-07, | |
| "loss": 1.2158, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.6063093100795076, | |
| "grad_norm": 2.4250679915698643, | |
| "learning_rate": 3.540616982892297e-07, | |
| "loss": 1.2316, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.6073352141574763, | |
| "grad_norm": 2.6768142400804544, | |
| "learning_rate": 3.5247098655821103e-07, | |
| "loss": 1.2056, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.608361118235445, | |
| "grad_norm": 2.6144828178712514, | |
| "learning_rate": 3.5088190875473686e-07, | |
| "loss": 1.2977, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.6093870223134137, | |
| "grad_norm": 2.4216338604546146, | |
| "learning_rate": 3.4929448247831514e-07, | |
| "loss": 1.2196, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.6104129263913824, | |
| "grad_norm": 2.4463464458523814, | |
| "learning_rate": 3.4770872531016215e-07, | |
| "loss": 1.2126, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.6114388304693511, | |
| "grad_norm": 2.746300393158143, | |
| "learning_rate": 3.4612465481300867e-07, | |
| "loss": 1.2965, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.6124647345473199, | |
| "grad_norm": 2.550377036020753, | |
| "learning_rate": 3.445422885309045e-07, | |
| "loss": 1.255, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.6134906386252885, | |
| "grad_norm": 2.5581073815113515, | |
| "learning_rate": 3.429616439890257e-07, | |
| "loss": 1.2221, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.6145165427032573, | |
| "grad_norm": 2.6287106761798764, | |
| "learning_rate": 3.4138273869347846e-07, | |
| "loss": 1.1759, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.6155424467812259, | |
| "grad_norm": 2.7064703602194236, | |
| "learning_rate": 3.398055901311073e-07, | |
| "loss": 1.2003, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6165683508591947, | |
| "grad_norm": 2.670960915963785, | |
| "learning_rate": 3.3823021576929964e-07, | |
| "loss": 1.1922, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.6175942549371634, | |
| "grad_norm": 2.5857309156591604, | |
| "learning_rate": 3.3665663305579344e-07, | |
| "loss": 1.2312, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.6186201590151321, | |
| "grad_norm": 2.6327880988339194, | |
| "learning_rate": 3.3508485941848383e-07, | |
| "loss": 1.1737, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.6196460630931008, | |
| "grad_norm": 2.6453978254038133, | |
| "learning_rate": 3.335149122652293e-07, | |
| "loss": 1.1843, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.6206719671710695, | |
| "grad_norm": 2.5723820192230598, | |
| "learning_rate": 3.3194680898366023e-07, | |
| "loss": 1.2656, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.6216978712490382, | |
| "grad_norm": 2.5714268383189047, | |
| "learning_rate": 3.303805669409848e-07, | |
| "loss": 1.1766, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.622723775327007, | |
| "grad_norm": 2.545114874785041, | |
| "learning_rate": 3.288162034837981e-07, | |
| "loss": 1.2778, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.6237496794049756, | |
| "grad_norm": 2.3647835710703435, | |
| "learning_rate": 3.272537359378887e-07, | |
| "loss": 1.2307, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.6247755834829444, | |
| "grad_norm": 2.971636171732171, | |
| "learning_rate": 3.2569318160804805e-07, | |
| "loss": 1.163, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.625801487560913, | |
| "grad_norm": 2.4541682678346795, | |
| "learning_rate": 3.2413455777787746e-07, | |
| "loss": 1.216, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.6268273916388818, | |
| "grad_norm": 2.3227177006306423, | |
| "learning_rate": 3.225778817095982e-07, | |
| "loss": 1.1738, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.6278532957168504, | |
| "grad_norm": 2.5261842954294402, | |
| "learning_rate": 3.2102317064385876e-07, | |
| "loss": 1.2311, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.6288791997948192, | |
| "grad_norm": 2.397049474115395, | |
| "learning_rate": 3.19470441799545e-07, | |
| "loss": 1.1192, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.6299051038727879, | |
| "grad_norm": 2.593266135696937, | |
| "learning_rate": 3.179197123735889e-07, | |
| "loss": 1.2214, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.6309310079507566, | |
| "grad_norm": 2.590251241187722, | |
| "learning_rate": 3.1637099954077875e-07, | |
| "loss": 1.1793, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.6319569120287253, | |
| "grad_norm": 2.771110361934025, | |
| "learning_rate": 3.148243204535677e-07, | |
| "loss": 1.1597, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.632982816106694, | |
| "grad_norm": 2.506372299796566, | |
| "learning_rate": 3.1327969224188546e-07, | |
| "loss": 1.2679, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.6340087201846627, | |
| "grad_norm": 2.6050212777549, | |
| "learning_rate": 3.117371320129469e-07, | |
| "loss": 1.2335, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.6350346242626315, | |
| "grad_norm": 2.6519984662810825, | |
| "learning_rate": 3.101966568510639e-07, | |
| "loss": 1.207, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.6360605283406001, | |
| "grad_norm": 2.4414939383947947, | |
| "learning_rate": 3.086582838174551e-07, | |
| "loss": 1.1293, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.6370864324185689, | |
| "grad_norm": 2.4498656223793933, | |
| "learning_rate": 3.0712202995005806e-07, | |
| "loss": 1.1637, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.6381123364965375, | |
| "grad_norm": 2.4980445735258368, | |
| "learning_rate": 3.055879122633397e-07, | |
| "loss": 1.2441, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.6391382405745063, | |
| "grad_norm": 2.770776748857224, | |
| "learning_rate": 3.040559477481077e-07, | |
| "loss": 1.2255, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.640164144652475, | |
| "grad_norm": 2.722646125854624, | |
| "learning_rate": 3.025261533713235e-07, | |
| "loss": 1.164, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.6411900487304437, | |
| "grad_norm": 2.6111255488966827, | |
| "learning_rate": 3.009985460759127e-07, | |
| "loss": 1.155, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.6422159528084124, | |
| "grad_norm": 2.774808082342546, | |
| "learning_rate": 2.994731427805792e-07, | |
| "loss": 1.1843, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.6432418568863811, | |
| "grad_norm": 2.729046492958925, | |
| "learning_rate": 2.979499603796163e-07, | |
| "loss": 1.1878, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.6442677609643498, | |
| "grad_norm": 2.3088017207024105, | |
| "learning_rate": 2.964290157427207e-07, | |
| "loss": 1.107, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.6452936650423186, | |
| "grad_norm": 2.769640462107753, | |
| "learning_rate": 2.9491032571480486e-07, | |
| "loss": 1.3354, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.6463195691202872, | |
| "grad_norm": 2.6586300568678087, | |
| "learning_rate": 2.9339390711581105e-07, | |
| "loss": 1.2318, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.647345473198256, | |
| "grad_norm": 2.8192282998223304, | |
| "learning_rate": 2.9187977674052424e-07, | |
| "loss": 1.2764, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.6483713772762246, | |
| "grad_norm": 2.8997868168822056, | |
| "learning_rate": 2.9036795135838764e-07, | |
| "loss": 1.2166, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.6493972813541934, | |
| "grad_norm": 2.708571116756967, | |
| "learning_rate": 2.88858447713315e-07, | |
| "loss": 1.2067, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.650423185432162, | |
| "grad_norm": 2.620361247323107, | |
| "learning_rate": 2.8735128252350674e-07, | |
| "loss": 1.1406, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.6514490895101308, | |
| "grad_norm": 2.532649091877776, | |
| "learning_rate": 2.858464724812638e-07, | |
| "loss": 1.2767, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.6524749935880995, | |
| "grad_norm": 2.654953427048768, | |
| "learning_rate": 2.843440342528035e-07, | |
| "loss": 1.1596, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.6535008976660682, | |
| "grad_norm": 2.639809330039523, | |
| "learning_rate": 2.8284398447807435e-07, | |
| "loss": 1.1958, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.6545268017440369, | |
| "grad_norm": 2.539389618176952, | |
| "learning_rate": 2.813463397705723e-07, | |
| "loss": 1.2032, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.6555527058220056, | |
| "grad_norm": 2.427603600782808, | |
| "learning_rate": 2.798511167171562e-07, | |
| "loss": 1.2111, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.6565786098999743, | |
| "grad_norm": 2.522000223511891, | |
| "learning_rate": 2.783583318778646e-07, | |
| "loss": 1.1634, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.6576045139779431, | |
| "grad_norm": 2.5130918357831455, | |
| "learning_rate": 2.768680017857319e-07, | |
| "loss": 1.2287, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.6586304180559118, | |
| "grad_norm": 2.7221180506008285, | |
| "learning_rate": 2.753801429466056e-07, | |
| "loss": 1.2837, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.6596563221338805, | |
| "grad_norm": 2.7920485812373403, | |
| "learning_rate": 2.7389477183896313e-07, | |
| "loss": 1.0884, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.6606822262118492, | |
| "grad_norm": 2.4167742978657145, | |
| "learning_rate": 2.7241190491372987e-07, | |
| "loss": 1.1931, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.6617081302898179, | |
| "grad_norm": 2.6876746385015586, | |
| "learning_rate": 2.7093155859409667e-07, | |
| "loss": 1.263, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.6627340343677867, | |
| "grad_norm": 2.544082336181996, | |
| "learning_rate": 2.6945374927533697e-07, | |
| "loss": 1.1986, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.6637599384457553, | |
| "grad_norm": 2.5769749930127177, | |
| "learning_rate": 2.6797849332462785e-07, | |
| "loss": 1.3002, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.6647858425237241, | |
| "grad_norm": 2.765429290049575, | |
| "learning_rate": 2.665058070808654e-07, | |
| "loss": 1.293, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.6658117466016927, | |
| "grad_norm": 2.7225782297173216, | |
| "learning_rate": 2.650357068544869e-07, | |
| "loss": 1.22, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.6668376506796615, | |
| "grad_norm": 2.4495687580818433, | |
| "learning_rate": 2.635682089272875e-07, | |
| "loss": 1.1719, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6678635547576302, | |
| "grad_norm": 2.515217933274136, | |
| "learning_rate": 2.621033295522417e-07, | |
| "loss": 1.3224, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.6688894588355989, | |
| "grad_norm": 2.6424207426144815, | |
| "learning_rate": 2.6064108495332293e-07, | |
| "loss": 1.1448, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.6699153629135676, | |
| "grad_norm": 2.5858491438102056, | |
| "learning_rate": 2.5918149132532336e-07, | |
| "loss": 1.1662, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.6709412669915363, | |
| "grad_norm": 2.6381268455957594, | |
| "learning_rate": 2.5772456483367497e-07, | |
| "loss": 1.1797, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.671967171069505, | |
| "grad_norm": 2.6228781987684364, | |
| "learning_rate": 2.5627032161427036e-07, | |
| "loss": 1.2355, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.6729930751474738, | |
| "grad_norm": 2.615456043233998, | |
| "learning_rate": 2.5481877777328424e-07, | |
| "loss": 1.2654, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.6740189792254424, | |
| "grad_norm": 2.416561917193722, | |
| "learning_rate": 2.5336994938699503e-07, | |
| "loss": 1.12, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.6750448833034112, | |
| "grad_norm": 2.642465270297251, | |
| "learning_rate": 2.5192385250160586e-07, | |
| "loss": 1.1921, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.6760707873813798, | |
| "grad_norm": 2.7480834455937866, | |
| "learning_rate": 2.50480503133069e-07, | |
| "loss": 1.241, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.6770966914593486, | |
| "grad_norm": 2.709971968793763, | |
| "learning_rate": 2.4903991726690583e-07, | |
| "loss": 1.1892, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.6781225955373172, | |
| "grad_norm": 2.5423357822987125, | |
| "learning_rate": 2.4760211085803214e-07, | |
| "loss": 1.061, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.679148499615286, | |
| "grad_norm": 2.5471809431180947, | |
| "learning_rate": 2.461670998305801e-07, | |
| "loss": 1.1406, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.6801744036932547, | |
| "grad_norm": 2.2614394749490865, | |
| "learning_rate": 2.447349000777216e-07, | |
| "loss": 1.1914, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.6812003077712234, | |
| "grad_norm": 2.5975786485688745, | |
| "learning_rate": 2.4330552746149404e-07, | |
| "loss": 1.1927, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.6822262118491921, | |
| "grad_norm": 2.5643004029139362, | |
| "learning_rate": 2.418789978126219e-07, | |
| "loss": 1.2564, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.6832521159271608, | |
| "grad_norm": 2.5694519450290922, | |
| "learning_rate": 2.4045532693034474e-07, | |
| "loss": 1.287, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.6842780200051295, | |
| "grad_norm": 2.583217401294905, | |
| "learning_rate": 2.3903453058223876e-07, | |
| "loss": 1.2259, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.6853039240830983, | |
| "grad_norm": 2.7713992830326264, | |
| "learning_rate": 2.3761662450404492e-07, | |
| "loss": 1.2404, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.6863298281610669, | |
| "grad_norm": 2.683261081042423, | |
| "learning_rate": 2.3620162439949303e-07, | |
| "loss": 1.2424, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.6873557322390357, | |
| "grad_norm": 2.6813466710719274, | |
| "learning_rate": 2.347895459401288e-07, | |
| "loss": 1.1871, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.6883816363170043, | |
| "grad_norm": 2.4980969369442985, | |
| "learning_rate": 2.3338040476513947e-07, | |
| "loss": 1.196, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.6894075403949731, | |
| "grad_norm": 2.6573309673650116, | |
| "learning_rate": 2.319742164811813e-07, | |
| "loss": 1.221, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.6904334444729417, | |
| "grad_norm": 2.7446852008084224, | |
| "learning_rate": 2.305709966622062e-07, | |
| "loss": 1.2488, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.6914593485509105, | |
| "grad_norm": 2.5674080914108384, | |
| "learning_rate": 2.2917076084928948e-07, | |
| "loss": 1.2597, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.6924852526288792, | |
| "grad_norm": 2.811651191362524, | |
| "learning_rate": 2.2777352455045784e-07, | |
| "loss": 1.2413, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.6935111567068479, | |
| "grad_norm": 2.572728259688939, | |
| "learning_rate": 2.2637930324051747e-07, | |
| "loss": 1.2547, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.6945370607848166, | |
| "grad_norm": 2.700506373166308, | |
| "learning_rate": 2.2498811236088195e-07, | |
| "loss": 1.1831, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.6955629648627853, | |
| "grad_norm": 2.5836554498566864, | |
| "learning_rate": 2.2359996731940345e-07, | |
| "loss": 1.1322, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.696588868940754, | |
| "grad_norm": 2.4909146663935204, | |
| "learning_rate": 2.2221488349019902e-07, | |
| "loss": 1.1805, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.6976147730187228, | |
| "grad_norm": 2.380457276679437, | |
| "learning_rate": 2.2083287621348256e-07, | |
| "loss": 1.1997, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.6986406770966914, | |
| "grad_norm": 2.78505809062204, | |
| "learning_rate": 2.194539607953948e-07, | |
| "loss": 1.2253, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.6996665811746602, | |
| "grad_norm": 2.5665582250329257, | |
| "learning_rate": 2.180781525078319e-07, | |
| "loss": 1.1259, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.7006924852526288, | |
| "grad_norm": 2.555086105402453, | |
| "learning_rate": 2.167054665882791e-07, | |
| "loss": 1.1908, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.7017183893305976, | |
| "grad_norm": 2.6561979962463154, | |
| "learning_rate": 2.1533591823963926e-07, | |
| "loss": 1.1986, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.7027442934085663, | |
| "grad_norm": 2.527387762301175, | |
| "learning_rate": 2.139695226300663e-07, | |
| "loss": 1.2669, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.703770197486535, | |
| "grad_norm": 2.7391799488802575, | |
| "learning_rate": 2.1260629489279657e-07, | |
| "loss": 1.1581, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.7047961015645037, | |
| "grad_norm": 2.4323409695512495, | |
| "learning_rate": 2.1124625012598135e-07, | |
| "loss": 1.2396, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.7058220056424724, | |
| "grad_norm": 2.599314144274195, | |
| "learning_rate": 2.0988940339251937e-07, | |
| "loss": 1.1927, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.7068479097204411, | |
| "grad_norm": 2.7206436512698797, | |
| "learning_rate": 2.0853576971989035e-07, | |
| "loss": 1.1842, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.7078738137984099, | |
| "grad_norm": 2.438206997849408, | |
| "learning_rate": 2.0718536409998833e-07, | |
| "loss": 1.1859, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.7088997178763785, | |
| "grad_norm": 2.425979251612811, | |
| "learning_rate": 2.058382014889558e-07, | |
| "loss": 1.2472, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.7099256219543473, | |
| "grad_norm": 3.1059613200905507, | |
| "learning_rate": 2.0449429680701797e-07, | |
| "loss": 1.1009, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.7109515260323159, | |
| "grad_norm": 2.549687762265441, | |
| "learning_rate": 2.0315366493831755e-07, | |
| "loss": 1.2296, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.7119774301102847, | |
| "grad_norm": 2.5370967790110126, | |
| "learning_rate": 2.0181632073074923e-07, | |
| "loss": 1.2098, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.7130033341882533, | |
| "grad_norm": 2.4536439877324883, | |
| "learning_rate": 2.0048227899579728e-07, | |
| "loss": 1.1183, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.7140292382662221, | |
| "grad_norm": 2.55316768712919, | |
| "learning_rate": 1.991515545083684e-07, | |
| "loss": 1.1191, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.7150551423441908, | |
| "grad_norm": 2.4788318107618124, | |
| "learning_rate": 1.978241620066315e-07, | |
| "loss": 1.1502, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.7160810464221595, | |
| "grad_norm": 2.6045581314247253, | |
| "learning_rate": 1.9650011619185126e-07, | |
| "loss": 1.1872, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.7171069505001283, | |
| "grad_norm": 2.6627567620404107, | |
| "learning_rate": 1.9517943172822753e-07, | |
| "loss": 1.2355, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.718132854578097, | |
| "grad_norm": 2.730504463070005, | |
| "learning_rate": 1.938621232427327e-07, | |
| "loss": 1.2131, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7191587586560657, | |
| "grad_norm": 2.4953047796323498, | |
| "learning_rate": 1.9254820532494787e-07, | |
| "loss": 1.2416, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.7201846627340344, | |
| "grad_norm": 2.5640586712114986, | |
| "learning_rate": 1.9123769252690407e-07, | |
| "loss": 1.2348, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.7212105668120031, | |
| "grad_norm": 2.301017245817361, | |
| "learning_rate": 1.8993059936291845e-07, | |
| "loss": 1.1246, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.7222364708899718, | |
| "grad_norm": 2.560508237614933, | |
| "learning_rate": 1.8862694030943528e-07, | |
| "loss": 1.1593, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.7232623749679405, | |
| "grad_norm": 2.5384108530663667, | |
| "learning_rate": 1.873267298048649e-07, | |
| "loss": 1.177, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.7242882790459092, | |
| "grad_norm": 2.6017567089602784, | |
| "learning_rate": 1.8602998224942406e-07, | |
| "loss": 1.1913, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.725314183123878, | |
| "grad_norm": 2.563942704168396, | |
| "learning_rate": 1.847367120049762e-07, | |
| "loss": 1.1599, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.7263400872018466, | |
| "grad_norm": 2.477017573966492, | |
| "learning_rate": 1.834469333948725e-07, | |
| "loss": 1.3285, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.7273659912798154, | |
| "grad_norm": 2.606062790037432, | |
| "learning_rate": 1.8216066070379332e-07, | |
| "loss": 1.1228, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.728391895357784, | |
| "grad_norm": 2.627021153673398, | |
| "learning_rate": 1.808779081775901e-07, | |
| "loss": 1.254, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.7294177994357528, | |
| "grad_norm": 2.732112582668254, | |
| "learning_rate": 1.7959869002312665e-07, | |
| "loss": 1.1785, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.7304437035137215, | |
| "grad_norm": 2.7648989482089794, | |
| "learning_rate": 1.7832302040812392e-07, | |
| "loss": 1.2254, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.7314696075916902, | |
| "grad_norm": 2.5265344506070226, | |
| "learning_rate": 1.7705091346100016e-07, | |
| "loss": 1.248, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.7324955116696589, | |
| "grad_norm": 2.487884119195698, | |
| "learning_rate": 1.757823832707175e-07, | |
| "loss": 1.2019, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.7335214157476276, | |
| "grad_norm": 2.7066983532459763, | |
| "learning_rate": 1.7451744388662326e-07, | |
| "loss": 1.1771, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.7345473198255963, | |
| "grad_norm": 2.496085937161839, | |
| "learning_rate": 1.7325610931829616e-07, | |
| "loss": 1.1365, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.735573223903565, | |
| "grad_norm": 2.5310750605865966, | |
| "learning_rate": 1.719983935353903e-07, | |
| "loss": 1.2424, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.7365991279815337, | |
| "grad_norm": 2.6323596452399727, | |
| "learning_rate": 1.7074431046748074e-07, | |
| "loss": 1.2113, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.7376250320595025, | |
| "grad_norm": 2.773768814333001, | |
| "learning_rate": 1.6949387400390912e-07, | |
| "loss": 1.2481, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.7386509361374711, | |
| "grad_norm": 2.6027832225629637, | |
| "learning_rate": 1.682470979936298e-07, | |
| "loss": 1.1914, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.7396768402154399, | |
| "grad_norm": 2.5140857014311084, | |
| "learning_rate": 1.6700399624505663e-07, | |
| "loss": 1.1656, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.7407027442934085, | |
| "grad_norm": 2.6602123194953835, | |
| "learning_rate": 1.6576458252590986e-07, | |
| "loss": 1.2504, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.7417286483713773, | |
| "grad_norm": 2.481197593104395, | |
| "learning_rate": 1.6452887056306376e-07, | |
| "loss": 1.1759, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.742754552449346, | |
| "grad_norm": 2.6801836727829804, | |
| "learning_rate": 1.6329687404239445e-07, | |
| "loss": 1.3156, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.7437804565273147, | |
| "grad_norm": 2.5124931482279105, | |
| "learning_rate": 1.6206860660862858e-07, | |
| "loss": 1.2196, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.7448063606052834, | |
| "grad_norm": 2.664235589191919, | |
| "learning_rate": 1.6084408186519194e-07, | |
| "loss": 1.1395, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.7458322646832521, | |
| "grad_norm": 2.327075273677125, | |
| "learning_rate": 1.5962331337405914e-07, | |
| "loss": 1.2351, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.7468581687612208, | |
| "grad_norm": 2.412427852412854, | |
| "learning_rate": 1.584063146556025e-07, | |
| "loss": 1.2697, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.7478840728391896, | |
| "grad_norm": 2.5543949822741987, | |
| "learning_rate": 1.5719309918844414e-07, | |
| "loss": 1.23, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.7489099769171582, | |
| "grad_norm": 4.7369349678689066, | |
| "learning_rate": 1.5598368040930427e-07, | |
| "loss": 1.1512, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.749935880995127, | |
| "grad_norm": 2.6739445766939745, | |
| "learning_rate": 1.5477807171285489e-07, | |
| "loss": 1.2274, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.7509617850730956, | |
| "grad_norm": 2.8907340492672535, | |
| "learning_rate": 1.5357628645156918e-07, | |
| "loss": 1.1342, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.7519876891510644, | |
| "grad_norm": 2.725694829583633, | |
| "learning_rate": 1.5237833793557515e-07, | |
| "loss": 1.2117, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.753013593229033, | |
| "grad_norm": 2.4983300901122916, | |
| "learning_rate": 1.5118423943250768e-07, | |
| "loss": 1.1886, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.7540394973070018, | |
| "grad_norm": 2.97591503230041, | |
| "learning_rate": 1.499940041673616e-07, | |
| "loss": 1.1669, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.7550654013849705, | |
| "grad_norm": 2.4033448682516942, | |
| "learning_rate": 1.4880764532234514e-07, | |
| "loss": 1.0508, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.7560913054629392, | |
| "grad_norm": 2.508957495407041, | |
| "learning_rate": 1.4762517603673408e-07, | |
| "loss": 1.1536, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.7571172095409079, | |
| "grad_norm": 2.782391609960307, | |
| "learning_rate": 1.4644660940672627e-07, | |
| "loss": 1.1624, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.7581431136188767, | |
| "grad_norm": 2.43794343363202, | |
| "learning_rate": 1.4527195848529634e-07, | |
| "loss": 1.2295, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.7591690176968453, | |
| "grad_norm": 2.566694597309234, | |
| "learning_rate": 1.4410123628205134e-07, | |
| "loss": 1.1343, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.7601949217748141, | |
| "grad_norm": 2.6362502212431576, | |
| "learning_rate": 1.4293445576308673e-07, | |
| "loss": 1.2362, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.7612208258527827, | |
| "grad_norm": 2.4532291147501653, | |
| "learning_rate": 1.417716298508424e-07, | |
| "loss": 1.1928, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.7622467299307515, | |
| "grad_norm": 2.595969156532276, | |
| "learning_rate": 1.4061277142396006e-07, | |
| "loss": 1.2755, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.7632726340087201, | |
| "grad_norm": 2.517605616531437, | |
| "learning_rate": 1.3945789331714013e-07, | |
| "loss": 1.1298, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.7642985380866889, | |
| "grad_norm": 2.5082145361741315, | |
| "learning_rate": 1.3830700832099994e-07, | |
| "loss": 1.1012, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.7653244421646576, | |
| "grad_norm": 2.5628568172056925, | |
| "learning_rate": 1.3716012918193205e-07, | |
| "loss": 1.2271, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.7663503462426263, | |
| "grad_norm": 2.4325845713405387, | |
| "learning_rate": 1.3601726860196227e-07, | |
| "loss": 1.1876, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.767376250320595, | |
| "grad_norm": 2.507778203499113, | |
| "learning_rate": 1.3487843923861098e-07, | |
| "loss": 1.1841, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.7684021543985637, | |
| "grad_norm": 2.492682324587268, | |
| "learning_rate": 1.337436537047501e-07, | |
| "loss": 1.2382, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.7694280584765324, | |
| "grad_norm": 2.695111243135506, | |
| "learning_rate": 1.3261292456846646e-07, | |
| "loss": 1.192, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.7694280584765324, | |
| "eval_uground_MCTS_chains_SFT_val_loss": 1.2723054885864258, | |
| "eval_uground_MCTS_chains_SFT_val_runtime": 210.8326, | |
| "eval_uground_MCTS_chains_SFT_val_samples_per_second": 8.628, | |
| "eval_uground_MCTS_chains_SFT_val_steps_per_second": 1.081, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.7704539625545012, | |
| "grad_norm": 2.4499636226982187, | |
| "learning_rate": 1.3148626435291977e-07, | |
| "loss": 1.2008, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.7714798666324698, | |
| "grad_norm": 2.749941205607747, | |
| "learning_rate": 1.30363685536206e-07, | |
| "loss": 1.1632, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.7725057707104386, | |
| "grad_norm": 2.6515417658669325, | |
| "learning_rate": 1.2924520055121834e-07, | |
| "loss": 1.2075, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.7735316747884072, | |
| "grad_norm": 2.668556743002617, | |
| "learning_rate": 1.2813082178550928e-07, | |
| "loss": 1.2162, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.774557578866376, | |
| "grad_norm": 2.393774949444245, | |
| "learning_rate": 1.2702056158115405e-07, | |
| "loss": 1.1836, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.7755834829443446, | |
| "grad_norm": 2.383685066946397, | |
| "learning_rate": 1.2591443223461333e-07, | |
| "loss": 1.2551, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.7766093870223134, | |
| "grad_norm": 2.5950121153810164, | |
| "learning_rate": 1.2481244599659752e-07, | |
| "loss": 1.1199, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.7776352911002822, | |
| "grad_norm": 2.683432845893599, | |
| "learning_rate": 1.2371461507193075e-07, | |
| "loss": 1.2948, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.7786611951782508, | |
| "grad_norm": 2.5444949929602236, | |
| "learning_rate": 1.2262095161941588e-07, | |
| "loss": 1.1559, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.7796870992562196, | |
| "grad_norm": 2.825484071880231, | |
| "learning_rate": 1.215314677516997e-07, | |
| "loss": 1.1641, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.7807130033341882, | |
| "grad_norm": 2.7865772894604017, | |
| "learning_rate": 1.2044617553513897e-07, | |
| "loss": 1.1873, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.781738907412157, | |
| "grad_norm": 2.5228443815683224, | |
| "learning_rate": 1.1936508698966663e-07, | |
| "loss": 1.2681, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.7827648114901257, | |
| "grad_norm": 2.8999815031168636, | |
| "learning_rate": 1.1828821408865886e-07, | |
| "loss": 1.2122, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.7837907155680944, | |
| "grad_norm": 2.5689794885209563, | |
| "learning_rate": 1.1721556875880167e-07, | |
| "loss": 1.2239, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.7848166196460631, | |
| "grad_norm": 2.507318965085582, | |
| "learning_rate": 1.1614716287996063e-07, | |
| "loss": 1.1625, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.7858425237240318, | |
| "grad_norm": 2.650879555302289, | |
| "learning_rate": 1.150830082850468e-07, | |
| "loss": 1.2091, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.7868684278020005, | |
| "grad_norm": 2.644278727926541, | |
| "learning_rate": 1.1402311675988834e-07, | |
| "loss": 1.273, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.7878943318799693, | |
| "grad_norm": 2.780296372251445, | |
| "learning_rate": 1.1296750004309757e-07, | |
| "loss": 1.236, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.7889202359579379, | |
| "grad_norm": 2.5188724688574213, | |
| "learning_rate": 1.1191616982594259e-07, | |
| "loss": 1.1625, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.7899461400359067, | |
| "grad_norm": 2.523287196036484, | |
| "learning_rate": 1.1086913775221706e-07, | |
| "loss": 1.1488, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.7909720441138753, | |
| "grad_norm": 2.6829517454733716, | |
| "learning_rate": 1.0982641541811161e-07, | |
| "loss": 1.2307, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.7919979481918441, | |
| "grad_norm": 2.6511117351470697, | |
| "learning_rate": 1.0878801437208496e-07, | |
| "loss": 1.229, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.7930238522698128, | |
| "grad_norm": 2.9646303582070157, | |
| "learning_rate": 1.077539461147366e-07, | |
| "loss": 1.1874, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.7940497563477815, | |
| "grad_norm": 2.7202486187139097, | |
| "learning_rate": 1.0672422209867876e-07, | |
| "loss": 1.154, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.7950756604257502, | |
| "grad_norm": 2.5478407063833424, | |
| "learning_rate": 1.056988537284103e-07, | |
| "loss": 1.1646, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.7961015645037189, | |
| "grad_norm": 2.6329035656681943, | |
| "learning_rate": 1.0467785236018944e-07, | |
| "loss": 1.284, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.7971274685816876, | |
| "grad_norm": 2.581763231489404, | |
| "learning_rate": 1.0366122930190934e-07, | |
| "loss": 1.1138, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.7981533726596564, | |
| "grad_norm": 2.509180799267851, | |
| "learning_rate": 1.026489958129712e-07, | |
| "loss": 1.2524, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.799179276737625, | |
| "grad_norm": 2.277115810698049, | |
| "learning_rate": 1.0164116310416127e-07, | |
| "loss": 1.2171, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.8002051808155938, | |
| "grad_norm": 2.5306432670776386, | |
| "learning_rate": 1.0063774233752542e-07, | |
| "loss": 1.2668, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.8012310848935624, | |
| "grad_norm": 2.7565443335969966, | |
| "learning_rate": 9.963874462624566e-08, | |
| "loss": 1.2028, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.8022569889715312, | |
| "grad_norm": 2.717640654108823, | |
| "learning_rate": 9.864418103451827e-08, | |
| "loss": 1.1879, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.8032828930494998, | |
| "grad_norm": 2.428371028068662, | |
| "learning_rate": 9.765406257742915e-08, | |
| "loss": 1.2411, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.8043087971274686, | |
| "grad_norm": 2.7349523704191516, | |
| "learning_rate": 9.666840022083422e-08, | |
| "loss": 1.1647, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.8053347012054373, | |
| "grad_norm": 2.5706570443073224, | |
| "learning_rate": 9.568720488123577e-08, | |
| "loss": 1.2041, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.806360605283406, | |
| "grad_norm": 2.5312663092030974, | |
| "learning_rate": 9.471048742566312e-08, | |
| "loss": 1.2376, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.8073865093613747, | |
| "grad_norm": 2.567303045700278, | |
| "learning_rate": 9.373825867155155e-08, | |
| "loss": 1.2416, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.8084124134393434, | |
| "grad_norm": 2.5607337841796842, | |
| "learning_rate": 9.27705293866226e-08, | |
| "loss": 1.1564, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.8094383175173121, | |
| "grad_norm": 2.455235264261172, | |
| "learning_rate": 9.180731028876492e-08, | |
| "loss": 1.1916, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.8104642215952809, | |
| "grad_norm": 2.3169514617874594, | |
| "learning_rate": 9.084861204591549e-08, | |
| "loss": 1.2042, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.8114901256732495, | |
| "grad_norm": 2.4016974955821593, | |
| "learning_rate": 8.989444527594143e-08, | |
| "loss": 1.0985, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.8125160297512183, | |
| "grad_norm": 2.682553790748834, | |
| "learning_rate": 8.894482054652247e-08, | |
| "loss": 1.1976, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.8135419338291869, | |
| "grad_norm": 2.5902122938763843, | |
| "learning_rate": 8.7999748375034e-08, | |
| "loss": 1.2057, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.8145678379071557, | |
| "grad_norm": 2.4236373303760272, | |
| "learning_rate": 8.705923922843039e-08, | |
| "loss": 1.2162, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.8155937419851244, | |
| "grad_norm": 2.453178416086071, | |
| "learning_rate": 8.612330352312891e-08, | |
| "loss": 1.1699, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.8166196460630931, | |
| "grad_norm": 2.332974640258587, | |
| "learning_rate": 8.519195162489528e-08, | |
| "loss": 1.1845, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.8176455501410618, | |
| "grad_norm": 2.7398187199253767, | |
| "learning_rate": 8.426519384872732e-08, | |
| "loss": 1.2276, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.8186714542190305, | |
| "grad_norm": 2.799522945659722, | |
| "learning_rate": 8.334304045874246e-08, | |
| "loss": 1.1882, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.8196973582969992, | |
| "grad_norm": 2.597041968467517, | |
| "learning_rate": 8.242550166806272e-08, | |
| "loss": 1.21, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.820723262374968, | |
| "grad_norm": 2.922217189437273, | |
| "learning_rate": 8.151258763870177e-08, | |
| "loss": 1.2113, | |
| "step": 800 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 974, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 247351370907648.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |