| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.995051138238205, |
| "eval_steps": 500, |
| "global_step": 945, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005278785879247773, |
| "grad_norm": 8.559222684223487, |
| "learning_rate": 8.421052631578948e-07, |
| "loss": 1.7821, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.010557571758495546, |
| "grad_norm": 8.542811150657618, |
| "learning_rate": 1.6842105263157895e-06, |
| "loss": 1.7757, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.01583635763774332, |
| "grad_norm": 8.625119441824685, |
| "learning_rate": 2.5263157894736844e-06, |
| "loss": 1.7844, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.02111514351699109, |
| "grad_norm": 7.911430505391723, |
| "learning_rate": 3.368421052631579e-06, |
| "loss": 1.7579, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.026393929396238865, |
| "grad_norm": 6.143963490700802, |
| "learning_rate": 4.210526315789474e-06, |
| "loss": 1.7134, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.03167271527548664, |
| "grad_norm": 3.3865160264777097, |
| "learning_rate": 5.052631578947369e-06, |
| "loss": 1.6629, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.03695150115473441, |
| "grad_norm": 2.76282727616314, |
| "learning_rate": 5.8947368421052634e-06, |
| "loss": 1.6585, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.04223028703398218, |
| "grad_norm": 6.322449790932474, |
| "learning_rate": 6.736842105263158e-06, |
| "loss": 1.6616, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.047509072913229956, |
| "grad_norm": 6.279706082200225, |
| "learning_rate": 7.578947368421054e-06, |
| "loss": 1.6681, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.05278785879247773, |
| "grad_norm": 6.066649896759186, |
| "learning_rate": 8.421052631578948e-06, |
| "loss": 1.6635, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0580666446717255, |
| "grad_norm": 4.720492660185518, |
| "learning_rate": 9.263157894736842e-06, |
| "loss": 1.6048, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.06334543055097328, |
| "grad_norm": 4.038725392859273, |
| "learning_rate": 1.0105263157894738e-05, |
| "loss": 1.5823, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.06862421643022105, |
| "grad_norm": 2.675236418624382, |
| "learning_rate": 1.0947368421052633e-05, |
| "loss": 1.5534, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.07390300230946882, |
| "grad_norm": 1.9761478647451622, |
| "learning_rate": 1.1789473684210527e-05, |
| "loss": 1.5445, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0791817881887166, |
| "grad_norm": 2.1964197263869054, |
| "learning_rate": 1.263157894736842e-05, |
| "loss": 1.5301, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.08446057406796437, |
| "grad_norm": 2.0364335919963903, |
| "learning_rate": 1.3473684210526316e-05, |
| "loss": 1.5004, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.08973935994721215, |
| "grad_norm": 1.8037582738302078, |
| "learning_rate": 1.4315789473684212e-05, |
| "loss": 1.4781, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.09501814582645991, |
| "grad_norm": 1.4875183659443978, |
| "learning_rate": 1.5157894736842107e-05, |
| "loss": 1.4884, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.10029693170570769, |
| "grad_norm": 1.4977012844942934, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 1.463, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.10557571758495546, |
| "grad_norm": 1.2341554749307428, |
| "learning_rate": 1.6842105263157896e-05, |
| "loss": 1.4694, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.11085450346420324, |
| "grad_norm": 1.2543656515326747, |
| "learning_rate": 1.768421052631579e-05, |
| "loss": 1.4472, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.116133289343451, |
| "grad_norm": 1.277673973994405, |
| "learning_rate": 1.8526315789473684e-05, |
| "loss": 1.4194, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.12141207522269878, |
| "grad_norm": 1.0554281233550042, |
| "learning_rate": 1.936842105263158e-05, |
| "loss": 1.4259, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.12669086110194655, |
| "grad_norm": 0.9059715768012179, |
| "learning_rate": 2.0210526315789475e-05, |
| "loss": 1.4112, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.13196964698119432, |
| "grad_norm": 1.150687529393131, |
| "learning_rate": 2.105263157894737e-05, |
| "loss": 1.4208, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.1372484328604421, |
| "grad_norm": 0.8259556273498903, |
| "learning_rate": 2.1894736842105266e-05, |
| "loss": 1.4158, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.14252721873968988, |
| "grad_norm": 1.080990078765424, |
| "learning_rate": 2.273684210526316e-05, |
| "loss": 1.4037, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.14780600461893764, |
| "grad_norm": 1.731100113222458, |
| "learning_rate": 2.3578947368421054e-05, |
| "loss": 1.4024, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.1530847904981854, |
| "grad_norm": 0.8596609684641758, |
| "learning_rate": 2.442105263157895e-05, |
| "loss": 1.3998, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.1583635763774332, |
| "grad_norm": 1.9119682716812114, |
| "learning_rate": 2.526315789473684e-05, |
| "loss": 1.402, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.16364236225668097, |
| "grad_norm": 1.2533125345082186, |
| "learning_rate": 2.610526315789474e-05, |
| "loss": 1.39, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.16892114813592873, |
| "grad_norm": 1.489553834805727, |
| "learning_rate": 2.6947368421052632e-05, |
| "loss": 1.3828, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.1741999340151765, |
| "grad_norm": 1.6513868341059252, |
| "learning_rate": 2.778947368421053e-05, |
| "loss": 1.3678, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.1794787198944243, |
| "grad_norm": 1.2742436351081012, |
| "learning_rate": 2.8631578947368423e-05, |
| "loss": 1.3535, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.18475750577367206, |
| "grad_norm": 1.9331697766270215, |
| "learning_rate": 2.9473684210526317e-05, |
| "loss": 1.3837, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.19003629165291983, |
| "grad_norm": 1.3601512530638553, |
| "learning_rate": 3.0315789473684214e-05, |
| "loss": 1.3722, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.1953150775321676, |
| "grad_norm": 1.7311302669544857, |
| "learning_rate": 3.1157894736842105e-05, |
| "loss": 1.3686, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.20059386341141539, |
| "grad_norm": 1.48755699155441, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 1.3671, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.20587264929066315, |
| "grad_norm": 1.8327357288689288, |
| "learning_rate": 3.28421052631579e-05, |
| "loss": 1.3735, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.21115143516991092, |
| "grad_norm": 1.5083133000259623, |
| "learning_rate": 3.368421052631579e-05, |
| "loss": 1.3674, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.21643022104915868, |
| "grad_norm": 1.993160855595939, |
| "learning_rate": 3.452631578947369e-05, |
| "loss": 1.3699, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.22170900692840648, |
| "grad_norm": 1.5856686533566766, |
| "learning_rate": 3.536842105263158e-05, |
| "loss": 1.3518, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.22698779280765424, |
| "grad_norm": 1.9065067719811546, |
| "learning_rate": 3.621052631578948e-05, |
| "loss": 1.3633, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.232266578686902, |
| "grad_norm": 1.506131578841278, |
| "learning_rate": 3.705263157894737e-05, |
| "loss": 1.3497, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.23754536456614977, |
| "grad_norm": 2.121087638106546, |
| "learning_rate": 3.789473684210526e-05, |
| "loss": 1.3367, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.24282415044539757, |
| "grad_norm": 4.67404248196046, |
| "learning_rate": 3.873684210526316e-05, |
| "loss": 1.3672, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.24810293632464533, |
| "grad_norm": 1.2448680931118623, |
| "learning_rate": 3.9578947368421056e-05, |
| "loss": 1.3534, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.2533817222038931, |
| "grad_norm": 2.1014651081767455, |
| "learning_rate": 4.042105263157895e-05, |
| "loss": 1.3559, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.2586605080831409, |
| "grad_norm": 2.1083248107539814, |
| "learning_rate": 4.126315789473685e-05, |
| "loss": 1.3562, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.26393929396238863, |
| "grad_norm": 1.0081628250168648, |
| "learning_rate": 4.210526315789474e-05, |
| "loss": 1.3478, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2692180798416364, |
| "grad_norm": 2.245552893796406, |
| "learning_rate": 4.294736842105264e-05, |
| "loss": 1.3425, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.2744968657208842, |
| "grad_norm": 1.847721270053161, |
| "learning_rate": 4.378947368421053e-05, |
| "loss": 1.3484, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.27977565160013196, |
| "grad_norm": 2.07621002532574, |
| "learning_rate": 4.463157894736842e-05, |
| "loss": 1.3524, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.28505443747937975, |
| "grad_norm": 2.0573435626549674, |
| "learning_rate": 4.547368421052632e-05, |
| "loss": 1.3375, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.2903332233586275, |
| "grad_norm": 2.2804125611624744, |
| "learning_rate": 4.6315789473684214e-05, |
| "loss": 1.3537, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.2956120092378753, |
| "grad_norm": 1.614092007514655, |
| "learning_rate": 4.715789473684211e-05, |
| "loss": 1.3221, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.3008907951171231, |
| "grad_norm": 1.8239392632361, |
| "learning_rate": 4.8e-05, |
| "loss": 1.3551, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.3061695809963708, |
| "grad_norm": 1.892353404743652, |
| "learning_rate": 4.88421052631579e-05, |
| "loss": 1.3422, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.3114483668756186, |
| "grad_norm": 1.5626211808165376, |
| "learning_rate": 4.9684210526315796e-05, |
| "loss": 1.3388, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.3167271527548664, |
| "grad_norm": 2.2886161978053416, |
| "learning_rate": 5.052631578947368e-05, |
| "loss": 1.3519, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.32200593863411414, |
| "grad_norm": 1.2771706641759382, |
| "learning_rate": 5.136842105263158e-05, |
| "loss": 1.3285, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.32728472451336194, |
| "grad_norm": 1.8850893921105571, |
| "learning_rate": 5.221052631578948e-05, |
| "loss": 1.3285, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.3325635103926097, |
| "grad_norm": 2.836538683465238, |
| "learning_rate": 5.305263157894737e-05, |
| "loss": 1.3352, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.33784229627185747, |
| "grad_norm": 1.0161609306010637, |
| "learning_rate": 5.3894736842105265e-05, |
| "loss": 1.3303, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.34312108215110526, |
| "grad_norm": 3.418607300631356, |
| "learning_rate": 5.4736842105263165e-05, |
| "loss": 1.3568, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.348399868030353, |
| "grad_norm": 2.1951718649280716, |
| "learning_rate": 5.557894736842106e-05, |
| "loss": 1.3578, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.3536786539096008, |
| "grad_norm": 2.1129578266236697, |
| "learning_rate": 5.642105263157895e-05, |
| "loss": 1.362, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.3589574397888486, |
| "grad_norm": 2.6017181194163013, |
| "learning_rate": 5.726315789473685e-05, |
| "loss": 1.3446, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.3642362256680963, |
| "grad_norm": 2.0929961143676095, |
| "learning_rate": 5.810526315789475e-05, |
| "loss": 1.3476, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.3695150115473441, |
| "grad_norm": 2.9426854876810236, |
| "learning_rate": 5.8947368421052634e-05, |
| "loss": 1.361, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.37479379742659186, |
| "grad_norm": 2.047235372498259, |
| "learning_rate": 5.978947368421053e-05, |
| "loss": 1.3295, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.38007258330583965, |
| "grad_norm": 2.3163564645661605, |
| "learning_rate": 6.063157894736843e-05, |
| "loss": 1.3236, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.38535136918508744, |
| "grad_norm": 1.7708571795906927, |
| "learning_rate": 6.147368421052632e-05, |
| "loss": 1.3282, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.3906301550643352, |
| "grad_norm": 2.873431951917759, |
| "learning_rate": 6.231578947368421e-05, |
| "loss": 1.3124, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.395908940943583, |
| "grad_norm": 2.1608485069854138, |
| "learning_rate": 6.315789473684212e-05, |
| "loss": 1.3259, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.40118772682283077, |
| "grad_norm": 2.641406113572487, |
| "learning_rate": 6.400000000000001e-05, |
| "loss": 1.3073, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.4064665127020785, |
| "grad_norm": 2.134077803951715, |
| "learning_rate": 6.484210526315789e-05, |
| "loss": 1.332, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.4117452985813263, |
| "grad_norm": 2.548230794420582, |
| "learning_rate": 6.56842105263158e-05, |
| "loss": 1.3264, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.41702408446057404, |
| "grad_norm": 1.8584772908735328, |
| "learning_rate": 6.652631578947369e-05, |
| "loss": 1.3206, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.42230287033982183, |
| "grad_norm": 1.7329721515220284, |
| "learning_rate": 6.736842105263159e-05, |
| "loss": 1.3233, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.42758165621906963, |
| "grad_norm": 1.3798188484446696, |
| "learning_rate": 6.821052631578948e-05, |
| "loss": 1.3215, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.43286044209831737, |
| "grad_norm": 2.3304870562288627, |
| "learning_rate": 6.905263157894737e-05, |
| "loss": 1.3271, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.43813922797756516, |
| "grad_norm": 1.9536589639033466, |
| "learning_rate": 6.989473684210527e-05, |
| "loss": 1.3262, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.44341801385681295, |
| "grad_norm": 1.9444021198857042, |
| "learning_rate": 7.073684210526316e-05, |
| "loss": 1.3339, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.4486967997360607, |
| "grad_norm": 3.192423879350558, |
| "learning_rate": 7.157894736842105e-05, |
| "loss": 1.3214, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.4539755856153085, |
| "grad_norm": 1.5397227502548587, |
| "learning_rate": 7.242105263157896e-05, |
| "loss": 1.3166, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.4592543714945563, |
| "grad_norm": 4.745719843797305, |
| "learning_rate": 7.326315789473684e-05, |
| "loss": 1.3401, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.464533157373804, |
| "grad_norm": 2.9655787260096185, |
| "learning_rate": 7.410526315789474e-05, |
| "loss": 1.3378, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.4698119432530518, |
| "grad_norm": 4.46812982024058, |
| "learning_rate": 7.494736842105264e-05, |
| "loss": 1.3488, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.47509072913229955, |
| "grad_norm": 3.302230770830571, |
| "learning_rate": 7.578947368421052e-05, |
| "loss": 1.3212, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.48036951501154734, |
| "grad_norm": 3.240353930955636, |
| "learning_rate": 7.663157894736843e-05, |
| "loss": 1.3339, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.48564830089079514, |
| "grad_norm": 2.6043118638655134, |
| "learning_rate": 7.747368421052633e-05, |
| "loss": 1.3326, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.4909270867700429, |
| "grad_norm": 2.7735037196407433, |
| "learning_rate": 7.831578947368422e-05, |
| "loss": 1.3094, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.49620587264929067, |
| "grad_norm": 1.869194117660677, |
| "learning_rate": 7.915789473684211e-05, |
| "loss": 1.3337, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.5014846585285384, |
| "grad_norm": 1.486935743621699, |
| "learning_rate": 8e-05, |
| "loss": 1.3209, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.5067634444077862, |
| "grad_norm": 3.97176574114269, |
| "learning_rate": 7.999972679326877e-05, |
| "loss": 1.3097, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.512042230287034, |
| "grad_norm": 2.909020423931811, |
| "learning_rate": 7.999890717680716e-05, |
| "loss": 1.3255, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.5173210161662818, |
| "grad_norm": 3.266690230691967, |
| "learning_rate": 7.999754116181141e-05, |
| "loss": 1.322, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.5225998020455296, |
| "grad_norm": 2.9736870692736477, |
| "learning_rate": 7.999562876694173e-05, |
| "loss": 1.3241, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.5278785879247773, |
| "grad_norm": 2.029766896419903, |
| "learning_rate": 7.999317001832211e-05, |
| "loss": 1.2977, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5331573738040251, |
| "grad_norm": 1.8003805642877855, |
| "learning_rate": 7.999016494953987e-05, |
| "loss": 1.3131, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.5384361596832729, |
| "grad_norm": 1.6793075691245238, |
| "learning_rate": 7.998661360164525e-05, |
| "loss": 1.3121, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.5437149455625206, |
| "grad_norm": 1.986719183571971, |
| "learning_rate": 7.998251602315085e-05, |
| "loss": 1.3249, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.5489937314417684, |
| "grad_norm": 2.195586309730319, |
| "learning_rate": 7.997787227003101e-05, |
| "loss": 1.3059, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.5542725173210161, |
| "grad_norm": 1.461828111029453, |
| "learning_rate": 7.997268240572093e-05, |
| "loss": 1.3118, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.5595513032002639, |
| "grad_norm": 2.148695235144627, |
| "learning_rate": 7.99669465011159e-05, |
| "loss": 1.3273, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.5648300890795117, |
| "grad_norm": 1.6724341025412792, |
| "learning_rate": 7.996066463457032e-05, |
| "loss": 1.3228, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.5701088749587595, |
| "grad_norm": 1.705259901076724, |
| "learning_rate": 7.99538368918966e-05, |
| "loss": 1.3189, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.5753876608380073, |
| "grad_norm": 1.469080880590235, |
| "learning_rate": 7.9946463366364e-05, |
| "loss": 1.316, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.580666446717255, |
| "grad_norm": 1.8098027619124655, |
| "learning_rate": 7.993854415869737e-05, |
| "loss": 1.3488, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5859452325965028, |
| "grad_norm": 2.031226749468162, |
| "learning_rate": 7.993007937707573e-05, |
| "loss": 1.3186, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.5912240184757506, |
| "grad_norm": 1.676093917612674, |
| "learning_rate": 7.992106913713087e-05, |
| "loss": 1.3093, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.5965028043549984, |
| "grad_norm": 1.6468968483278041, |
| "learning_rate": 7.991151356194568e-05, |
| "loss": 1.3087, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.6017815902342462, |
| "grad_norm": 1.7040887005478604, |
| "learning_rate": 7.990141278205255e-05, |
| "loss": 1.305, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.607060376113494, |
| "grad_norm": 1.5442416545556625, |
| "learning_rate": 7.989076693543153e-05, |
| "loss": 1.3011, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.6123391619927416, |
| "grad_norm": 1.8397675930137192, |
| "learning_rate": 7.987957616750845e-05, |
| "loss": 1.3169, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.6176179478719894, |
| "grad_norm": 1.6758088273809235, |
| "learning_rate": 7.9867840631153e-05, |
| "loss": 1.3019, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.6228967337512372, |
| "grad_norm": 1.8403825263907043, |
| "learning_rate": 7.985556048667652e-05, |
| "loss": 1.3218, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.628175519630485, |
| "grad_norm": 1.6809344973155804, |
| "learning_rate": 7.984273590182992e-05, |
| "loss": 1.3122, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.6334543055097328, |
| "grad_norm": 1.6249704090599522, |
| "learning_rate": 7.982936705180139e-05, |
| "loss": 1.2886, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6387330913889805, |
| "grad_norm": 1.2705796545495383, |
| "learning_rate": 7.981545411921387e-05, |
| "loss": 1.3048, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.6440118772682283, |
| "grad_norm": 2.30022576461904, |
| "learning_rate": 7.980099729412272e-05, |
| "loss": 1.3203, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.6492906631474761, |
| "grad_norm": 1.3889473327754434, |
| "learning_rate": 7.978599677401304e-05, |
| "loss": 1.303, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.6545694490267239, |
| "grad_norm": 1.016655411214257, |
| "learning_rate": 7.977045276379698e-05, |
| "loss": 1.3, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.6598482349059717, |
| "grad_norm": 1.9260332196274916, |
| "learning_rate": 7.975436547581096e-05, |
| "loss": 1.2993, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.6651270207852193, |
| "grad_norm": 1.0922760502085274, |
| "learning_rate": 7.973773512981272e-05, |
| "loss": 1.2976, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.6704058066644671, |
| "grad_norm": 1.5276232239033796, |
| "learning_rate": 7.972056195297842e-05, |
| "loss": 1.3096, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.6756845925437149, |
| "grad_norm": 1.109799139624589, |
| "learning_rate": 7.97028461798994e-05, |
| "loss": 1.2976, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.6809633784229627, |
| "grad_norm": 2.2229397086392124, |
| "learning_rate": 7.968458805257913e-05, |
| "loss": 1.3206, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.6862421643022105, |
| "grad_norm": 1.9303859724176848, |
| "learning_rate": 7.966578782042972e-05, |
| "loss": 1.3084, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.6915209501814583, |
| "grad_norm": 1.3809636210427785, |
| "learning_rate": 7.964644574026869e-05, |
| "loss": 1.3148, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.696799736060706, |
| "grad_norm": 2.744998744916968, |
| "learning_rate": 7.962656207631538e-05, |
| "loss": 1.3209, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.7020785219399538, |
| "grad_norm": 2.3382633110913718, |
| "learning_rate": 7.960613710018733e-05, |
| "loss": 1.334, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.7073573078192016, |
| "grad_norm": 2.1467199346064763, |
| "learning_rate": 7.958517109089657e-05, |
| "loss": 1.3034, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.7126360936984494, |
| "grad_norm": 1.624615452394344, |
| "learning_rate": 7.956366433484585e-05, |
| "loss": 1.297, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.7179148795776972, |
| "grad_norm": 1.920698615496753, |
| "learning_rate": 7.954161712582469e-05, |
| "loss": 1.3061, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.7231936654569449, |
| "grad_norm": 1.3787767200786702, |
| "learning_rate": 7.95190297650054e-05, |
| "loss": 1.3038, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.7284724513361927, |
| "grad_norm": 1.9764036923036765, |
| "learning_rate": 7.949590256093892e-05, |
| "loss": 1.3072, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.7337512372154404, |
| "grad_norm": 1.27956585328178, |
| "learning_rate": 7.947223582955066e-05, |
| "loss": 1.3112, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.7390300230946882, |
| "grad_norm": 1.8770719697712916, |
| "learning_rate": 7.94480298941361e-05, |
| "loss": 1.2998, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.744308808973936, |
| "grad_norm": 1.36222195282238, |
| "learning_rate": 7.94232850853565e-05, |
| "loss": 1.3106, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.7495875948531837, |
| "grad_norm": 1.7131665811756942, |
| "learning_rate": 7.939800174123426e-05, |
| "loss": 1.2972, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.7548663807324315, |
| "grad_norm": 1.516526698406304, |
| "learning_rate": 7.937218020714838e-05, |
| "loss": 1.3063, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.7601451666116793, |
| "grad_norm": 1.559180702735073, |
| "learning_rate": 7.934582083582968e-05, |
| "loss": 1.2949, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.7654239524909271, |
| "grad_norm": 1.3965240338452205, |
| "learning_rate": 7.931892398735608e-05, |
| "loss": 1.2996, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.7707027383701749, |
| "grad_norm": 1.1876037429072739, |
| "learning_rate": 7.929149002914756e-05, |
| "loss": 1.2888, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.7759815242494227, |
| "grad_norm": 1.2523705350271135, |
| "learning_rate": 7.926351933596123e-05, |
| "loss": 1.3041, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.7812603101286704, |
| "grad_norm": 1.7160568167139532, |
| "learning_rate": 7.923501228988616e-05, |
| "loss": 1.2864, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.7865390960079182, |
| "grad_norm": 1.485339867730922, |
| "learning_rate": 7.920596928033819e-05, |
| "loss": 1.3044, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.791817881887166, |
| "grad_norm": 0.7690452839999546, |
| "learning_rate": 7.917639070405464e-05, |
| "loss": 1.2923, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.7970966677664137, |
| "grad_norm": 1.1067614665986554, |
| "learning_rate": 7.91462769650888e-05, |
| "loss": 1.3, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.8023754536456615, |
| "grad_norm": 1.9038151496223523, |
| "learning_rate": 7.911562847480446e-05, |
| "loss": 1.2881, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.8076542395249092, |
| "grad_norm": 1.328802107876604, |
| "learning_rate": 7.908444565187034e-05, |
| "loss": 1.2967, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.812933025404157, |
| "grad_norm": 0.8601798964930342, |
| "learning_rate": 7.905272892225426e-05, |
| "loss": 1.2922, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.8182118112834048, |
| "grad_norm": 1.2277724236844354, |
| "learning_rate": 7.902047871921748e-05, |
| "loss": 1.2904, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.8234905971626526, |
| "grad_norm": 1.7600047771215326, |
| "learning_rate": 7.898769548330857e-05, |
| "loss": 1.2952, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.8287693830419004, |
| "grad_norm": 1.147137205196615, |
| "learning_rate": 7.895437966235759e-05, |
| "loss": 1.3084, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.8340481689211481, |
| "grad_norm": 1.4328130237637848, |
| "learning_rate": 7.892053171146988e-05, |
| "loss": 1.3145, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.8393269548003959, |
| "grad_norm": 1.2401237696521015, |
| "learning_rate": 7.888615209301981e-05, |
| "loss": 1.2995, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.8446057406796437, |
| "grad_norm": 1.2705800837028438, |
| "learning_rate": 7.885124127664456e-05, |
| "loss": 1.2914, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.8498845265588915, |
| "grad_norm": 0.7892106895753584, |
| "learning_rate": 7.881579973923763e-05, |
| "loss": 1.2995, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.8551633124381393, |
| "grad_norm": 1.558474494813178, |
| "learning_rate": 7.877982796494235e-05, |
| "loss": 1.3144, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.860442098317387, |
| "grad_norm": 1.2556130173314157, |
| "learning_rate": 7.874332644514525e-05, |
| "loss": 1.2899, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.8657208841966347, |
| "grad_norm": 1.3752903536629737, |
| "learning_rate": 7.87062956784694e-05, |
| "loss": 1.2922, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.8709996700758825, |
| "grad_norm": 1.1533060282069754, |
| "learning_rate": 7.86687361707675e-05, |
| "loss": 1.3035, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.8762784559551303, |
| "grad_norm": 1.547240673853204, |
| "learning_rate": 7.86306484351151e-05, |
| "loss": 1.297, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.8815572418343781, |
| "grad_norm": 1.118967928263504, |
| "learning_rate": 7.859203299180347e-05, |
| "loss": 1.304, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.8868360277136259, |
| "grad_norm": 1.5660976179186918, |
| "learning_rate": 7.855289036833259e-05, |
| "loss": 1.2896, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.8921148135928736, |
| "grad_norm": 1.06197414137172, |
| "learning_rate": 7.851322109940383e-05, |
| "loss": 1.296, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.8973935994721214, |
| "grad_norm": 1.3075400468268668, |
| "learning_rate": 7.847302572691277e-05, |
| "loss": 1.2761, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.9026723853513692, |
| "grad_norm": 1.179457262497728, |
| "learning_rate": 7.843230479994173e-05, |
| "loss": 1.2824, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.907951171230617, |
| "grad_norm": 1.3418331018558847, |
| "learning_rate": 7.839105887475228e-05, |
| "loss": 1.2932, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.9132299571098648, |
| "grad_norm": 1.4832264655838705, |
| "learning_rate": 7.834928851477764e-05, |
| "loss": 1.2885, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.9185087429891126, |
| "grad_norm": 0.8499346587639997, |
| "learning_rate": 7.830699429061498e-05, |
| "loss": 1.2915, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.9237875288683602, |
| "grad_norm": 1.171937958836628, |
| "learning_rate": 7.826417678001763e-05, |
| "loss": 1.302, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.929066314747608, |
| "grad_norm": 1.290565505153455, |
| "learning_rate": 7.822083656788722e-05, |
| "loss": 1.276, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.9343451006268558, |
| "grad_norm": 1.4411711180611884, |
| "learning_rate": 7.817697424626562e-05, |
| "loss": 1.2962, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.9396238865061036, |
| "grad_norm": 0.9520805041627028, |
| "learning_rate": 7.813259041432689e-05, |
| "loss": 1.2736, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.9449026723853514, |
| "grad_norm": 1.3339621010189933, |
| "learning_rate": 7.808768567836913e-05, |
| "loss": 1.2915, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.9501814582645991, |
| "grad_norm": 1.3347433339166948, |
| "learning_rate": 7.804226065180615e-05, |
| "loss": 1.2872, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.9554602441438469, |
| "grad_norm": 0.7733485264737051, |
| "learning_rate": 7.79963159551591e-05, |
| "loss": 1.2963, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.9607390300230947, |
| "grad_norm": 1.0314173735628651, |
| "learning_rate": 7.794985221604798e-05, |
| "loss": 1.3129, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.9660178159023425, |
| "grad_norm": 1.1663661577084024, |
| "learning_rate": 7.790287006918311e-05, |
| "loss": 1.2886, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.9712966017815903, |
| "grad_norm": 1.5581398263647552, |
| "learning_rate": 7.785537015635646e-05, |
| "loss": 1.3064, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.976575387660838, |
| "grad_norm": 0.9722670744894553, |
| "learning_rate": 7.78073531264328e-05, |
| "loss": 1.2782, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.9818541735400858, |
| "grad_norm": 1.3125002501899055, |
| "learning_rate": 7.77588196353409e-05, |
| "loss": 1.2956, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.9871329594193335, |
| "grad_norm": 0.9504483070516759, |
| "learning_rate": 7.770977034606463e-05, |
| "loss": 1.2648, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.9924117452985813, |
| "grad_norm": 1.6171235581177341, |
| "learning_rate": 7.766020592863375e-05, |
| "loss": 1.2968, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.9976905311778291, |
| "grad_norm": 0.8278118436268492, |
| "learning_rate": 7.76101270601149e-05, |
| "loss": 1.2878, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.0046189376443417, |
| "grad_norm": 2.5192799011368443, |
| "learning_rate": 7.755953442460228e-05, |
| "loss": 2.3905, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.0098977235235895, |
| "grad_norm": 1.2367786491086867, |
| "learning_rate": 7.75084287132083e-05, |
| "loss": 1.2562, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.0151765094028373, |
| "grad_norm": 0.9651190908311478, |
| "learning_rate": 7.745681062405421e-05, |
| "loss": 1.2579, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.0204552952820851, |
| "grad_norm": 1.3333896910793919, |
| "learning_rate": 7.740468086226046e-05, |
| "loss": 1.2751, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.025734081161333, |
| "grad_norm": 1.0679022617096325, |
| "learning_rate": 7.735204013993714e-05, |
| "loss": 1.2645, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.0310128670405807, |
| "grad_norm": 0.9950802797680122, |
| "learning_rate": 7.729888917617424e-05, |
| "loss": 1.247, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.0362916529198285, |
| "grad_norm": 1.385909740174782, |
| "learning_rate": 7.724522869703182e-05, |
| "loss": 1.2696, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.0415704387990763, |
| "grad_norm": 0.9972863560186646, |
| "learning_rate": 7.719105943553007e-05, |
| "loss": 1.2422, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.046849224678324, |
| "grad_norm": 1.4770115211452153, |
| "learning_rate": 7.713638213163933e-05, |
| "loss": 1.2769, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.0521280105575717, |
| "grad_norm": 0.9454690125193276, |
| "learning_rate": 7.708119753226999e-05, |
| "loss": 1.2483, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.0574067964368195, |
| "grad_norm": 1.0793753061289406, |
| "learning_rate": 7.702550639126226e-05, |
| "loss": 1.2523, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.0626855823160672, |
| "grad_norm": 1.0252549645817886, |
| "learning_rate": 7.696930946937584e-05, |
| "loss": 1.2709, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.067964368195315, |
| "grad_norm": 1.8767373189017829, |
| "learning_rate": 7.691260753427962e-05, |
| "loss": 1.257, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.0732431540745628, |
| "grad_norm": 0.9835935975146896, |
| "learning_rate": 7.68554013605411e-05, |
| "loss": 1.2792, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.0785219399538106, |
| "grad_norm": 1.7392219386374146, |
| "learning_rate": 7.679769172961588e-05, |
| "loss": 1.2813, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.0838007258330584, |
| "grad_norm": 1.2615250940260356, |
| "learning_rate": 7.673947942983693e-05, |
| "loss": 1.2633, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.0890795117123062, |
| "grad_norm": 1.2633538446470585, |
| "learning_rate": 7.668076525640386e-05, |
| "loss": 1.3028, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.094358297591554, |
| "grad_norm": 0.9941464449801785, |
| "learning_rate": 7.662155001137206e-05, |
| "loss": 1.2603, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.0996370834708018, |
| "grad_norm": 1.0879031561585994, |
| "learning_rate": 7.656183450364166e-05, |
| "loss": 1.2731, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.1049158693500494, |
| "grad_norm": 0.8845819323205296, |
| "learning_rate": 7.650161954894666e-05, |
| "loss": 1.2779, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.1101946552292972, |
| "grad_norm": 1.154096961423528, |
| "learning_rate": 7.644090596984355e-05, |
| "loss": 1.2631, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.115473441108545, |
| "grad_norm": 1.5088917906508794, |
| "learning_rate": 7.637969459570027e-05, |
| "loss": 1.2737, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.1207522269877928, |
| "grad_norm": 0.748699753662145, |
| "learning_rate": 7.63179862626848e-05, |
| "loss": 1.2541, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.1260310128670405, |
| "grad_norm": 1.2438464709370354, |
| "learning_rate": 7.625578181375373e-05, |
| "loss": 1.244, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.1313097987462883, |
| "grad_norm": 1.1961589311155292, |
| "learning_rate": 7.619308209864079e-05, |
| "loss": 1.2596, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.1365885846255361, |
| "grad_norm": 1.1828866928066657, |
| "learning_rate": 7.612988797384516e-05, |
| "loss": 1.2737, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.141867370504784, |
| "grad_norm": 0.8880753274946044, |
| "learning_rate": 7.606620030261987e-05, |
| "loss": 1.2612, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.1471461563840317, |
| "grad_norm": 0.9206877300452608, |
| "learning_rate": 7.600201995495993e-05, |
| "loss": 1.2499, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.1524249422632795, |
| "grad_norm": 0.6583338875617647, |
| "learning_rate": 7.593734780759052e-05, |
| "loss": 1.2486, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.1577037281425273, |
| "grad_norm": 0.6183246384902424, |
| "learning_rate": 7.587218474395492e-05, |
| "loss": 1.2497, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.1629825140217749, |
| "grad_norm": 0.6843337221591282, |
| "learning_rate": 7.58065316542025e-05, |
| "loss": 1.2564, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.1682612999010227, |
| "grad_norm": 0.8921369062315051, |
| "learning_rate": 7.574038943517657e-05, |
| "loss": 1.2761, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.1735400857802705, |
| "grad_norm": 0.9769312253319868, |
| "learning_rate": 7.567375899040212e-05, |
| "loss": 1.2651, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.1788188716595183, |
| "grad_norm": 1.2636950297671263, |
| "learning_rate": 7.560664123007341e-05, |
| "loss": 1.2429, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.184097657538766, |
| "grad_norm": 1.12325478849589, |
| "learning_rate": 7.55390370710417e-05, |
| "loss": 1.2499, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.1893764434180139, |
| "grad_norm": 0.6688626443177845, |
| "learning_rate": 7.547094743680248e-05, |
| "loss": 1.2629, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.1946552292972616, |
| "grad_norm": 0.520812499306808, |
| "learning_rate": 7.540237325748312e-05, |
| "loss": 1.2504, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.1999340151765094, |
| "grad_norm": 0.5778902503931598, |
| "learning_rate": 7.533331546982999e-05, |
| "loss": 1.2405, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.2052128010557572, |
| "grad_norm": 0.8253860085731144, |
| "learning_rate": 7.526377501719568e-05, |
| "loss": 1.2453, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.210491586935005, |
| "grad_norm": 0.9450184881775752, |
| "learning_rate": 7.51937528495262e-05, |
| "loss": 1.2668, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.2157703728142528, |
| "grad_norm": 0.8332968398615714, |
| "learning_rate": 7.512324992334792e-05, |
| "loss": 1.2492, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.2210491586935004, |
| "grad_norm": 0.9025930411004827, |
| "learning_rate": 7.505226720175455e-05, |
| "loss": 1.2535, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.2263279445727482, |
| "grad_norm": 1.4269868470907296, |
| "learning_rate": 7.498080565439395e-05, |
| "loss": 1.2497, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.231606730451996, |
| "grad_norm": 0.8001271623337567, |
| "learning_rate": 7.49088662574549e-05, |
| "loss": 1.2563, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.2368855163312438, |
| "grad_norm": 0.959351713231221, |
| "learning_rate": 7.483644999365379e-05, |
| "loss": 1.2635, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.2421643022104916, |
| "grad_norm": 0.9377746411915463, |
| "learning_rate": 7.476355785222114e-05, |
| "loss": 1.2653, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.2474430880897394, |
| "grad_norm": 1.4913928935233076, |
| "learning_rate": 7.469019082888814e-05, |
| "loss": 1.2659, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.2527218739689872, |
| "grad_norm": 0.7519497431119264, |
| "learning_rate": 7.461634992587303e-05, |
| "loss": 1.2653, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.258000659848235, |
| "grad_norm": 0.9328702237386081, |
| "learning_rate": 7.45420361518674e-05, |
| "loss": 1.2585, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.2632794457274827, |
| "grad_norm": 1.4443370753472227, |
| "learning_rate": 7.446725052202239e-05, |
| "loss": 1.2674, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.2685582316067303, |
| "grad_norm": 0.6850260445092191, |
| "learning_rate": 7.43919940579349e-05, |
| "loss": 1.2468, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.2738370174859783, |
| "grad_norm": 1.12844194486329, |
| "learning_rate": 7.431626778763355e-05, |
| "loss": 1.2592, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.279115803365226, |
| "grad_norm": 1.0108743560548688, |
| "learning_rate": 7.424007274556467e-05, |
| "loss": 1.2447, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.2843945892444737, |
| "grad_norm": 0.8135944524134671, |
| "learning_rate": 7.416340997257819e-05, |
| "loss": 1.2465, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.2896733751237215, |
| "grad_norm": 0.911884059778029, |
| "learning_rate": 7.408628051591336e-05, |
| "loss": 1.2481, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.2949521610029693, |
| "grad_norm": 0.6823887927427031, |
| "learning_rate": 7.400868542918457e-05, |
| "loss": 1.2413, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.300230946882217, |
| "grad_norm": 0.6456313903186492, |
| "learning_rate": 7.393062577236679e-05, |
| "loss": 1.255, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.3055097327614649, |
| "grad_norm": 0.7655605961879589, |
| "learning_rate": 7.385210261178121e-05, |
| "loss": 1.2559, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.3107885186407127, |
| "grad_norm": 1.2704761768677957, |
| "learning_rate": 7.377311702008061e-05, |
| "loss": 1.275, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.3160673045199605, |
| "grad_norm": 0.7188540018618917, |
| "learning_rate": 7.369367007623477e-05, |
| "loss": 1.2622, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.3213460903992083, |
| "grad_norm": 0.7077473160288391, |
| "learning_rate": 7.361376286551571e-05, |
| "loss": 1.26, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.3266248762784558, |
| "grad_norm": 0.6818378006965554, |
| "learning_rate": 7.353339647948279e-05, |
| "loss": 1.2636, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.3319036621577038, |
| "grad_norm": 0.9321991492837668, |
| "learning_rate": 7.345257201596789e-05, |
| "loss": 1.2506, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.3371824480369514, |
| "grad_norm": 3.3053933752949005, |
| "learning_rate": 7.337129057906042e-05, |
| "loss": 1.2786, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.3424612339161992, |
| "grad_norm": 2.593380141147791, |
| "learning_rate": 7.328955327909212e-05, |
| "loss": 1.2541, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.347740019795447, |
| "grad_norm": 0.6054358194419176, |
| "learning_rate": 7.320736123262203e-05, |
| "loss": 1.2582, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.3530188056746948, |
| "grad_norm": 1.25329796192022, |
| "learning_rate": 7.312471556242118e-05, |
| "loss": 1.2556, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.3582975915539426, |
| "grad_norm": 1.0368918572602928, |
| "learning_rate": 7.304161739745724e-05, |
| "loss": 1.2481, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.3635763774331904, |
| "grad_norm": 1.3001573393775, |
| "learning_rate": 7.295806787287909e-05, |
| "loss": 1.2691, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.3688551633124382, |
| "grad_norm": 18.476131898389347, |
| "learning_rate": 7.287406813000138e-05, |
| "loss": 1.2768, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.374133949191686, |
| "grad_norm": 1.6192604738085483, |
| "learning_rate": 7.278961931628886e-05, |
| "loss": 1.2839, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.3794127350709338, |
| "grad_norm": 1.7679541160592658, |
| "learning_rate": 7.270472258534072e-05, |
| "loss": 1.2703, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.3846915209501813, |
| "grad_norm": 1.8000057405797747, |
| "learning_rate": 7.261937909687494e-05, |
| "loss": 1.269, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.3899703068294293, |
| "grad_norm": 1.4623812962670848, |
| "learning_rate": 7.253359001671224e-05, |
| "loss": 1.2548, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.395249092708677, |
| "grad_norm": 0.9642896538975226, |
| "learning_rate": 7.244735651676035e-05, |
| "loss": 1.2513, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.4005278785879247, |
| "grad_norm": 1.8215181671865868, |
| "learning_rate": 7.236067977499791e-05, |
| "loss": 1.2683, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.4058066644671725, |
| "grad_norm": 1.8947839088124854, |
| "learning_rate": 7.227356097545835e-05, |
| "loss": 1.2688, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.4110854503464203, |
| "grad_norm": 1.257578409586287, |
| "learning_rate": 7.218600130821385e-05, |
| "loss": 1.2656, |
| "step": 267 |
| }, |
| { |
| "epoch": 1.416364236225668, |
| "grad_norm": 1.3538243305929665, |
| "learning_rate": 7.209800196935888e-05, |
| "loss": 1.2623, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.4216430221049159, |
| "grad_norm": 0.5092846767556427, |
| "learning_rate": 7.200956416099405e-05, |
| "loss": 1.2606, |
| "step": 269 |
| }, |
| { |
| "epoch": 1.4269218079841637, |
| "grad_norm": 1.3171858475761182, |
| "learning_rate": 7.192068909120959e-05, |
| "loss": 1.246, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.4322005938634115, |
| "grad_norm": 0.9278498418105572, |
| "learning_rate": 7.183137797406886e-05, |
| "loss": 1.262, |
| "step": 271 |
| }, |
| { |
| "epoch": 1.4374793797426593, |
| "grad_norm": 0.7095356686741614, |
| "learning_rate": 7.174163202959178e-05, |
| "loss": 1.265, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.4427581656219068, |
| "grad_norm": 0.5432578683940454, |
| "learning_rate": 7.165145248373814e-05, |
| "loss": 1.2641, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.4480369515011549, |
| "grad_norm": 0.535484849080536, |
| "learning_rate": 7.15608405683909e-05, |
| "loss": 1.2645, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.4533157373804024, |
| "grad_norm": 3.517558722980717, |
| "learning_rate": 7.146979752133934e-05, |
| "loss": 1.3068, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.4585945232596502, |
| "grad_norm": 1.0961722347293192, |
| "learning_rate": 7.137832458626209e-05, |
| "loss": 1.2759, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.463873309138898, |
| "grad_norm": 1.4589790859163712, |
| "learning_rate": 7.128642301271026e-05, |
| "loss": 1.2604, |
| "step": 277 |
| }, |
| { |
| "epoch": 1.4691520950181458, |
| "grad_norm": 0.8271599352115797, |
| "learning_rate": 7.119409405609025e-05, |
| "loss": 1.2574, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.4744308808973936, |
| "grad_norm": 1.6144482683602592, |
| "learning_rate": 7.110133897764672e-05, |
| "loss": 1.2567, |
| "step": 279 |
| }, |
| { |
| "epoch": 1.4797096667766414, |
| "grad_norm": 1.0778238677287892, |
| "learning_rate": 7.10081590444452e-05, |
| "loss": 1.2633, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.4849884526558892, |
| "grad_norm": 1.574048217009984, |
| "learning_rate": 7.091455552935499e-05, |
| "loss": 1.2721, |
| "step": 281 |
| }, |
| { |
| "epoch": 1.490267238535137, |
| "grad_norm": 1.4504862826022984, |
| "learning_rate": 7.082052971103158e-05, |
| "loss": 1.2527, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.4955460244143848, |
| "grad_norm": 0.9050091986245387, |
| "learning_rate": 7.07260828738993e-05, |
| "loss": 1.2566, |
| "step": 283 |
| }, |
| { |
| "epoch": 1.5008248102936323, |
| "grad_norm": 1.3808682239204024, |
| "learning_rate": 7.063121630813374e-05, |
| "loss": 1.2662, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.5061035961728804, |
| "grad_norm": 0.7200106206887804, |
| "learning_rate": 7.053593130964412e-05, |
| "loss": 1.2573, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.511382382052128, |
| "grad_norm": 1.2302845526700892, |
| "learning_rate": 7.044022918005559e-05, |
| "loss": 1.2446, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.5166611679313757, |
| "grad_norm": 0.8014756997371825, |
| "learning_rate": 7.034411122669142e-05, |
| "loss": 1.2665, |
| "step": 287 |
| }, |
| { |
| "epoch": 1.5219399538106235, |
| "grad_norm": 0.9094547926910241, |
| "learning_rate": 7.024757876255525e-05, |
| "loss": 1.2642, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.5272187396898713, |
| "grad_norm": 0.7607544679621796, |
| "learning_rate": 7.015063310631299e-05, |
| "loss": 1.2547, |
| "step": 289 |
| }, |
| { |
| "epoch": 1.5324975255691191, |
| "grad_norm": 0.7181185070974516, |
| "learning_rate": 7.005327558227494e-05, |
| "loss": 1.2583, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.537776311448367, |
| "grad_norm": 0.5716191527660932, |
| "learning_rate": 6.995550752037766e-05, |
| "loss": 1.2634, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.5430550973276147, |
| "grad_norm": 0.5794658962331969, |
| "learning_rate": 6.985733025616576e-05, |
| "loss": 1.2594, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.5483338832068623, |
| "grad_norm": 0.5823905292282883, |
| "learning_rate": 6.975874513077374e-05, |
| "loss": 1.2478, |
| "step": 293 |
| }, |
| { |
| "epoch": 1.5536126690861103, |
| "grad_norm": 0.5780008289508327, |
| "learning_rate": 6.965975349090757e-05, |
| "loss": 1.2501, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.5588914549653579, |
| "grad_norm": 0.37458218109688246, |
| "learning_rate": 6.956035668882637e-05, |
| "loss": 1.2515, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.5641702408446059, |
| "grad_norm": 0.4630840089540465, |
| "learning_rate": 6.946055608232392e-05, |
| "loss": 1.2555, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.5694490267238534, |
| "grad_norm": 0.46579508102759115, |
| "learning_rate": 6.936035303471008e-05, |
| "loss": 1.2302, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.5747278126031012, |
| "grad_norm": 0.440147654088893, |
| "learning_rate": 6.925974891479222e-05, |
| "loss": 1.2397, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.580006598482349, |
| "grad_norm": 0.3789595605856227, |
| "learning_rate": 6.915874509685646e-05, |
| "loss": 1.2367, |
| "step": 299 |
| }, |
| { |
| "epoch": 1.5852853843615968, |
| "grad_norm": 0.44066314676421686, |
| "learning_rate": 6.905734296064897e-05, |
| "loss": 1.2532, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.5905641702408446, |
| "grad_norm": 0.4786748650063442, |
| "learning_rate": 6.895554389135705e-05, |
| "loss": 1.2395, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.5958429561200924, |
| "grad_norm": 0.3993789516492546, |
| "learning_rate": 6.885334927959022e-05, |
| "loss": 1.2475, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.6011217419993402, |
| "grad_norm": 0.3734818160752607, |
| "learning_rate": 6.875076052136132e-05, |
| "loss": 1.2484, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.6064005278785878, |
| "grad_norm": 0.3352937985165155, |
| "learning_rate": 6.864777901806728e-05, |
| "loss": 1.2369, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.6116793137578358, |
| "grad_norm": 0.4671437568457166, |
| "learning_rate": 6.85444061764701e-05, |
| "loss": 1.2376, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.6169580996370834, |
| "grad_norm": 0.3667230797477068, |
| "learning_rate": 6.844064340867759e-05, |
| "loss": 1.2487, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.6222368855163314, |
| "grad_norm": 0.3770360726749058, |
| "learning_rate": 6.833649213212409e-05, |
| "loss": 1.2456, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.627515671395579, |
| "grad_norm": 0.35396903252590395, |
| "learning_rate": 6.823195376955108e-05, |
| "loss": 1.2528, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.6327944572748267, |
| "grad_norm": 0.29380579788484945, |
| "learning_rate": 6.812702974898779e-05, |
| "loss": 1.2441, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.6380732431540745, |
| "grad_norm": 0.43788577617277263, |
| "learning_rate": 6.802172150373164e-05, |
| "loss": 1.239, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.6433520290333223, |
| "grad_norm": 0.32377479140969234, |
| "learning_rate": 6.791603047232871e-05, |
| "loss": 1.233, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.6486308149125701, |
| "grad_norm": 1.7706277137744126, |
| "learning_rate": 6.780995809855405e-05, |
| "loss": 1.2474, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.653909600791818, |
| "grad_norm": 8.656382792450561, |
| "learning_rate": 6.7703505831392e-05, |
| "loss": 1.265, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.6591883866710657, |
| "grad_norm": 0.6523846362961528, |
| "learning_rate": 6.759667512501637e-05, |
| "loss": 1.2573, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.6644671725503133, |
| "grad_norm": 0.5330875065870518, |
| "learning_rate": 6.748946743877052e-05, |
| "loss": 1.2488, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.6697459584295613, |
| "grad_norm": 0.7219628521797609, |
| "learning_rate": 6.738188423714756e-05, |
| "loss": 1.2602, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.6750247443088089, |
| "grad_norm": 0.5201448299460263, |
| "learning_rate": 6.727392698977021e-05, |
| "loss": 1.2396, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.680303530188057, |
| "grad_norm": 0.44274195631429936, |
| "learning_rate": 6.716559717137084e-05, |
| "loss": 1.2406, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.6855823160673045, |
| "grad_norm": 0.5798477856934268, |
| "learning_rate": 6.70568962617712e-05, |
| "loss": 1.2414, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.6908611019465523, |
| "grad_norm": 0.7807750714971328, |
| "learning_rate": 6.69478257458623e-05, |
| "loss": 1.2413, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.6961398878258, |
| "grad_norm": 1.1559025640488658, |
| "learning_rate": 6.683838711358411e-05, |
| "loss": 1.2841, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.7014186737050478, |
| "grad_norm": 0.5759215742814919, |
| "learning_rate": 6.672858185990516e-05, |
| "loss": 1.2406, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.7066974595842956, |
| "grad_norm": 0.4726737719351046, |
| "learning_rate": 6.661841148480218e-05, |
| "loss": 1.2484, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.7119762454635434, |
| "grad_norm": 0.5006895849458483, |
| "learning_rate": 6.650787749323959e-05, |
| "loss": 1.2559, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.7172550313427912, |
| "grad_norm": 0.625628745360177, |
| "learning_rate": 6.639698139514892e-05, |
| "loss": 1.2747, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.7225338172220388, |
| "grad_norm": 0.6550942178842886, |
| "learning_rate": 6.628572470540814e-05, |
| "loss": 1.242, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.7278126031012868, |
| "grad_norm": 0.5870546197249171, |
| "learning_rate": 6.617410894382113e-05, |
| "loss": 1.2453, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.7330913889805344, |
| "grad_norm": 0.41134148223364553, |
| "learning_rate": 6.606213563509675e-05, |
| "loss": 1.2478, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.7383701748597824, |
| "grad_norm": 0.46444131340246914, |
| "learning_rate": 6.594980630882807e-05, |
| "loss": 1.2425, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.74364896073903, |
| "grad_norm": 0.5911200501832204, |
| "learning_rate": 6.58371224994715e-05, |
| "loss": 1.2458, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.7489277466182778, |
| "grad_norm": 0.7085536258057434, |
| "learning_rate": 6.57240857463258e-05, |
| "loss": 1.244, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.7542065324975256, |
| "grad_norm": 0.5992624201421093, |
| "learning_rate": 6.561069759351105e-05, |
| "loss": 1.2368, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.7594853183767734, |
| "grad_norm": 0.3551137979944097, |
| "learning_rate": 6.54969595899476e-05, |
| "loss": 1.2416, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.7647641042560211, |
| "grad_norm": 0.4082908080936244, |
| "learning_rate": 6.538287328933484e-05, |
| "loss": 1.253, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.770042890135269, |
| "grad_norm": 0.5365388835965731, |
| "learning_rate": 6.526844025013004e-05, |
| "loss": 1.2254, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.7753216760145167, |
| "grad_norm": 0.5024584986563364, |
| "learning_rate": 6.515366203552704e-05, |
| "loss": 1.2456, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.7806004618937643, |
| "grad_norm": 0.44456650959553395, |
| "learning_rate": 6.503854021343487e-05, |
| "loss": 1.2196, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.7858792477730123, |
| "grad_norm": 0.4783375231088745, |
| "learning_rate": 6.492307635645637e-05, |
| "loss": 1.2323, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.79115803365226, |
| "grad_norm": 0.48814392623760583, |
| "learning_rate": 6.480727204186669e-05, |
| "loss": 1.2417, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.796436819531508, |
| "grad_norm": 0.44118266288578545, |
| "learning_rate": 6.469112885159172e-05, |
| "loss": 1.2516, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.8017156054107555, |
| "grad_norm": 0.9971330600754439, |
| "learning_rate": 6.457464837218656e-05, |
| "loss": 1.2524, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.8069943912900033, |
| "grad_norm": 0.3714817216276515, |
| "learning_rate": 6.445783219481375e-05, |
| "loss": 1.2477, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.812273177169251, |
| "grad_norm": 0.8549026356260457, |
| "learning_rate": 6.434068191522158e-05, |
| "loss": 1.2591, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.8175519630484989, |
| "grad_norm": 0.3340586702181202, |
| "learning_rate": 6.42231991337223e-05, |
| "loss": 1.2372, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.8228307489277467, |
| "grad_norm": 0.43304400565838075, |
| "learning_rate": 6.410538545517026e-05, |
| "loss": 1.243, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.8281095348069942, |
| "grad_norm": 0.45719292572074793, |
| "learning_rate": 6.398724248893995e-05, |
| "loss": 1.2504, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.8333883206862422, |
| "grad_norm": 0.599990078135568, |
| "learning_rate": 6.386877184890404e-05, |
| "loss": 1.2438, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.8386671065654898, |
| "grad_norm": 0.6906831942159901, |
| "learning_rate": 6.374997515341136e-05, |
| "loss": 1.2477, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.8439458924447378, |
| "grad_norm": 0.8731330067427534, |
| "learning_rate": 6.363085402526477e-05, |
| "loss": 1.2674, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.8492246783239854, |
| "grad_norm": 0.9022754372270435, |
| "learning_rate": 6.351141009169893e-05, |
| "loss": 1.382, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.8545034642032334, |
| "grad_norm": 1.2486459290282084, |
| "learning_rate": 6.33916449843582e-05, |
| "loss": 1.2545, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.859782250082481, |
| "grad_norm": 1.6920822976080476, |
| "learning_rate": 6.327156033927426e-05, |
| "loss": 1.2658, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.8650610359617288, |
| "grad_norm": 0.7097169693478338, |
| "learning_rate": 6.315115779684375e-05, |
| "loss": 1.236, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.8703398218409766, |
| "grad_norm": 1.6978575452018114, |
| "learning_rate": 6.303043900180595e-05, |
| "loss": 1.2585, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.8756186077202244, |
| "grad_norm": 0.7845252429794936, |
| "learning_rate": 6.290940560322022e-05, |
| "loss": 1.2517, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.8808973935994722, |
| "grad_norm": 1.3607837437906547, |
| "learning_rate": 6.278805925444351e-05, |
| "loss": 1.256, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.8861761794787197, |
| "grad_norm": 0.6792465095925219, |
| "learning_rate": 6.26664016131078e-05, |
| "loss": 1.2673, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.8914549653579678, |
| "grad_norm": 1.1323653230234954, |
| "learning_rate": 6.25444343410974e-05, |
| "loss": 1.2587, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.8967337512372153, |
| "grad_norm": 0.8336475113673848, |
| "learning_rate": 6.242215910452631e-05, |
| "loss": 1.2487, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.9020125371164633, |
| "grad_norm": 1.0432671496997088, |
| "learning_rate": 6.229957757371542e-05, |
| "loss": 1.2685, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.907291322995711, |
| "grad_norm": 1.282931842988959, |
| "learning_rate": 6.217669142316969e-05, |
| "loss": 1.2437, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.9125701088749587, |
| "grad_norm": 0.5494429533946689, |
| "learning_rate": 6.205350233155528e-05, |
| "loss": 1.2385, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.9178488947542065, |
| "grad_norm": 0.8430421321010195, |
| "learning_rate": 6.193001198167666e-05, |
| "loss": 1.2516, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.9231276806334543, |
| "grad_norm": 0.6400343107166205, |
| "learning_rate": 6.180622206045357e-05, |
| "loss": 1.2514, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.928406466512702, |
| "grad_norm": 0.683698880506509, |
| "learning_rate": 6.168213425889798e-05, |
| "loss": 1.2298, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.9336852523919499, |
| "grad_norm": 0.7501472371069945, |
| "learning_rate": 6.155775027209104e-05, |
| "loss": 1.2631, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.9389640382711977, |
| "grad_norm": 0.5723685480165793, |
| "learning_rate": 6.143307179915987e-05, |
| "loss": 1.2524, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.9442428241504452, |
| "grad_norm": 0.5386840918093146, |
| "learning_rate": 6.130810054325438e-05, |
| "loss": 1.2604, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.9495216100296933, |
| "grad_norm": 0.6074943796692782, |
| "learning_rate": 6.118283821152396e-05, |
| "loss": 1.2331, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.9548003959089408, |
| "grad_norm": 0.7838692807153693, |
| "learning_rate": 6.105728651509424e-05, |
| "loss": 1.2489, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.9600791817881889, |
| "grad_norm": 0.4905147139769546, |
| "learning_rate": 6.0931447169043645e-05, |
| "loss": 1.2187, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.9653579676674364, |
| "grad_norm": 0.49373919780087727, |
| "learning_rate": 6.080532189238e-05, |
| "loss": 1.25, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.9706367535466842, |
| "grad_norm": 0.5601632476322846, |
| "learning_rate": 6.067891240801702e-05, |
| "loss": 1.2433, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.975915539425932, |
| "grad_norm": 0.5235707794626788, |
| "learning_rate": 6.0552220442750824e-05, |
| "loss": 1.2384, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.9811943253051798, |
| "grad_norm": 0.5725817706676869, |
| "learning_rate": 6.042524772723628e-05, |
| "loss": 1.232, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.9864731111844276, |
| "grad_norm": 0.3333019113840953, |
| "learning_rate": 6.0297995995963434e-05, |
| "loss": 1.2347, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.9917518970636754, |
| "grad_norm": 0.4325220309561531, |
| "learning_rate": 6.017046698723374e-05, |
| "loss": 1.2351, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.9970306829429232, |
| "grad_norm": 0.5371606229052233, |
| "learning_rate": 6.0042662443136396e-05, |
| "loss": 1.2433, |
| "step": 378 |
| }, |
| { |
| "epoch": 2.003959089409436, |
| "grad_norm": 0.9139703688362438, |
| "learning_rate": 5.991458410952449e-05, |
| "loss": 2.3033, |
| "step": 379 |
| }, |
| { |
| "epoch": 2.0092378752886835, |
| "grad_norm": 1.3521600980278237, |
| "learning_rate": 5.978623373599117e-05, |
| "loss": 1.2172, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.0145166611679315, |
| "grad_norm": 0.7161418602043893, |
| "learning_rate": 5.965761307584571e-05, |
| "loss": 1.2157, |
| "step": 381 |
| }, |
| { |
| "epoch": 2.019795447047179, |
| "grad_norm": 0.7509184595632193, |
| "learning_rate": 5.9528723886089624e-05, |
| "loss": 1.1977, |
| "step": 382 |
| }, |
| { |
| "epoch": 2.025074232926427, |
| "grad_norm": 1.0599214341732555, |
| "learning_rate": 5.939956792739264e-05, |
| "loss": 1.2177, |
| "step": 383 |
| }, |
| { |
| "epoch": 2.0303530188056746, |
| "grad_norm": 0.8980168138332725, |
| "learning_rate": 5.9270146964068614e-05, |
| "loss": 1.2153, |
| "step": 384 |
| }, |
| { |
| "epoch": 2.0356318046849227, |
| "grad_norm": 0.7180380231092935, |
| "learning_rate": 5.9140462764051464e-05, |
| "loss": 1.2187, |
| "step": 385 |
| }, |
| { |
| "epoch": 2.0409105905641702, |
| "grad_norm": 0.6531462330254099, |
| "learning_rate": 5.901051709887101e-05, |
| "loss": 1.2251, |
| "step": 386 |
| }, |
| { |
| "epoch": 2.046189376443418, |
| "grad_norm": 0.7734045217497407, |
| "learning_rate": 5.888031174362878e-05, |
| "loss": 1.21, |
| "step": 387 |
| }, |
| { |
| "epoch": 2.051468162322666, |
| "grad_norm": 0.8550587940395448, |
| "learning_rate": 5.874984847697372e-05, |
| "loss": 1.2012, |
| "step": 388 |
| }, |
| { |
| "epoch": 2.0567469482019134, |
| "grad_norm": 1.2780965188327706, |
| "learning_rate": 5.8619129081077996e-05, |
| "loss": 1.2518, |
| "step": 389 |
| }, |
| { |
| "epoch": 2.0620257340811614, |
| "grad_norm": 0.8321341577982173, |
| "learning_rate": 5.848815534161254e-05, |
| "loss": 1.2093, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.067304519960409, |
| "grad_norm": 0.9615664316853234, |
| "learning_rate": 5.83569290477227e-05, |
| "loss": 1.2072, |
| "step": 391 |
| }, |
| { |
| "epoch": 2.072583305839657, |
| "grad_norm": 0.5979739722697388, |
| "learning_rate": 5.822545199200383e-05, |
| "loss": 1.2336, |
| "step": 392 |
| }, |
| { |
| "epoch": 2.0778620917189046, |
| "grad_norm": 0.8164608679169012, |
| "learning_rate": 5.8093725970476755e-05, |
| "loss": 1.1964, |
| "step": 393 |
| }, |
| { |
| "epoch": 2.0831408775981526, |
| "grad_norm": 3.085844123659633, |
| "learning_rate": 5.796175278256328e-05, |
| "loss": 1.2152, |
| "step": 394 |
| }, |
| { |
| "epoch": 2.0884196634774, |
| "grad_norm": 2.7108585877216593, |
| "learning_rate": 5.782953423106154e-05, |
| "loss": 1.2449, |
| "step": 395 |
| }, |
| { |
| "epoch": 2.093698449356648, |
| "grad_norm": 0.8230346043328058, |
| "learning_rate": 5.769707212212147e-05, |
| "loss": 1.2061, |
| "step": 396 |
| }, |
| { |
| "epoch": 2.0989772352358957, |
| "grad_norm": 1.227853136572132, |
| "learning_rate": 5.756436826522005e-05, |
| "loss": 1.2376, |
| "step": 397 |
| }, |
| { |
| "epoch": 2.1042560211151433, |
| "grad_norm": 0.753658806600911, |
| "learning_rate": 5.743142447313664e-05, |
| "loss": 1.2157, |
| "step": 398 |
| }, |
| { |
| "epoch": 2.1095348069943913, |
| "grad_norm": 1.015129720892302, |
| "learning_rate": 5.729824256192816e-05, |
| "loss": 1.2119, |
| "step": 399 |
| }, |
| { |
| "epoch": 2.114813592873639, |
| "grad_norm": 0.5801446237487646, |
| "learning_rate": 5.716482435090436e-05, |
| "loss": 1.2186, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.120092378752887, |
| "grad_norm": 1.017251986410329, |
| "learning_rate": 5.703117166260291e-05, |
| "loss": 1.2263, |
| "step": 401 |
| }, |
| { |
| "epoch": 2.1253711646321345, |
| "grad_norm": 0.4873010162044414, |
| "learning_rate": 5.68972863227645e-05, |
| "loss": 1.2196, |
| "step": 402 |
| }, |
| { |
| "epoch": 2.1306499505113825, |
| "grad_norm": 0.7310559896138776, |
| "learning_rate": 5.676317016030795e-05, |
| "loss": 1.2177, |
| "step": 403 |
| }, |
| { |
| "epoch": 2.13592873639063, |
| "grad_norm": 1.1699510054393627, |
| "learning_rate": 5.662882500730517e-05, |
| "loss": 1.3042, |
| "step": 404 |
| }, |
| { |
| "epoch": 2.141207522269878, |
| "grad_norm": 0.676699808456523, |
| "learning_rate": 5.6494252698956146e-05, |
| "loss": 1.2048, |
| "step": 405 |
| }, |
| { |
| "epoch": 2.1464863081491257, |
| "grad_norm": 0.6473663142899424, |
| "learning_rate": 5.6359455073563936e-05, |
| "loss": 1.2161, |
| "step": 406 |
| }, |
| { |
| "epoch": 2.1517650940283737, |
| "grad_norm": 0.4172949088763905, |
| "learning_rate": 5.6224433972509433e-05, |
| "loss": 1.2192, |
| "step": 407 |
| }, |
| { |
| "epoch": 2.1570438799076213, |
| "grad_norm": 1.1239469184334865, |
| "learning_rate": 5.608919124022636e-05, |
| "loss": 1.2329, |
| "step": 408 |
| }, |
| { |
| "epoch": 2.162322665786869, |
| "grad_norm": 0.4587267505463191, |
| "learning_rate": 5.595372872417593e-05, |
| "loss": 1.2217, |
| "step": 409 |
| }, |
| { |
| "epoch": 2.167601451666117, |
| "grad_norm": 5.412203166967258, |
| "learning_rate": 5.58180482748217e-05, |
| "loss": 1.2201, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.1728802375453644, |
| "grad_norm": 18.213451676887374, |
| "learning_rate": 5.568215174560431e-05, |
| "loss": 1.351, |
| "step": 411 |
| }, |
| { |
| "epoch": 2.1781590234246124, |
| "grad_norm": 13.536878824327069, |
| "learning_rate": 5.554604099291604e-05, |
| "loss": 1.2383, |
| "step": 412 |
| }, |
| { |
| "epoch": 2.18343780930386, |
| "grad_norm": 6.282206839205327, |
| "learning_rate": 5.5409717876075605e-05, |
| "loss": 1.2588, |
| "step": 413 |
| }, |
| { |
| "epoch": 2.188716595183108, |
| "grad_norm": 1.2831210440571694, |
| "learning_rate": 5.527318425730268e-05, |
| "loss": 1.238, |
| "step": 414 |
| }, |
| { |
| "epoch": 2.1939953810623556, |
| "grad_norm": 3.739838095929456, |
| "learning_rate": 5.513644200169242e-05, |
| "loss": 1.229, |
| "step": 415 |
| }, |
| { |
| "epoch": 2.1992741669416036, |
| "grad_norm": 1.899326073319162, |
| "learning_rate": 5.499949297719006e-05, |
| "loss": 1.2544, |
| "step": 416 |
| }, |
| { |
| "epoch": 2.204552952820851, |
| "grad_norm": 4.148642778578833, |
| "learning_rate": 5.486233905456538e-05, |
| "loss": 1.2406, |
| "step": 417 |
| }, |
| { |
| "epoch": 2.2098317387000987, |
| "grad_norm": 14.647605474491863, |
| "learning_rate": 5.472498210738713e-05, |
| "loss": 1.2508, |
| "step": 418 |
| }, |
| { |
| "epoch": 2.2151105245793468, |
| "grad_norm": 2.4488321875685157, |
| "learning_rate": 5.458742401199741e-05, |
| "loss": 1.2625, |
| "step": 419 |
| }, |
| { |
| "epoch": 2.2203893104585943, |
| "grad_norm": 1.2822438195586405, |
| "learning_rate": 5.444966664748613e-05, |
| "loss": 1.2378, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.2256680963378423, |
| "grad_norm": 1.203185114838534, |
| "learning_rate": 5.431171189566522e-05, |
| "loss": 1.238, |
| "step": 421 |
| }, |
| { |
| "epoch": 2.23094688221709, |
| "grad_norm": 1.3127305879869517, |
| "learning_rate": 5.417356164104306e-05, |
| "loss": 1.2513, |
| "step": 422 |
| }, |
| { |
| "epoch": 2.236225668096338, |
| "grad_norm": 0.8297161506665818, |
| "learning_rate": 5.40352177707986e-05, |
| "loss": 1.2286, |
| "step": 423 |
| }, |
| { |
| "epoch": 2.2415044539755855, |
| "grad_norm": 0.7963612920042281, |
| "learning_rate": 5.389668217475566e-05, |
| "loss": 1.2333, |
| "step": 424 |
| }, |
| { |
| "epoch": 2.2467832398548335, |
| "grad_norm": 0.8841598541409225, |
| "learning_rate": 5.3757956745357134e-05, |
| "loss": 1.2401, |
| "step": 425 |
| }, |
| { |
| "epoch": 2.252062025734081, |
| "grad_norm": 0.6425481585506017, |
| "learning_rate": 5.3619043377639055e-05, |
| "loss": 1.2298, |
| "step": 426 |
| }, |
| { |
| "epoch": 2.257340811613329, |
| "grad_norm": 1.4204595652997734, |
| "learning_rate": 5.347994396920479e-05, |
| "loss": 1.2209, |
| "step": 427 |
| }, |
| { |
| "epoch": 2.2626195974925767, |
| "grad_norm": 0.6873192257170075, |
| "learning_rate": 5.334066042019907e-05, |
| "loss": 1.2254, |
| "step": 428 |
| }, |
| { |
| "epoch": 2.2678983833718247, |
| "grad_norm": 1.0883491912415153, |
| "learning_rate": 5.320119463328207e-05, |
| "loss": 1.2169, |
| "step": 429 |
| }, |
| { |
| "epoch": 2.2731771692510723, |
| "grad_norm": 0.6756173808771652, |
| "learning_rate": 5.306154851360333e-05, |
| "loss": 1.2371, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.27845595513032, |
| "grad_norm": 0.9877713663217423, |
| "learning_rate": 5.2921723968775896e-05, |
| "loss": 1.2081, |
| "step": 431 |
| }, |
| { |
| "epoch": 2.283734741009568, |
| "grad_norm": 0.8015364281608918, |
| "learning_rate": 5.2781722908850086e-05, |
| "loss": 1.2086, |
| "step": 432 |
| }, |
| { |
| "epoch": 2.2890135268888154, |
| "grad_norm": 0.6008408657002147, |
| "learning_rate": 5.264154724628751e-05, |
| "loss": 1.2218, |
| "step": 433 |
| }, |
| { |
| "epoch": 2.2942923127680634, |
| "grad_norm": 0.6734753765950555, |
| "learning_rate": 5.250119889593488e-05, |
| "loss": 1.2171, |
| "step": 434 |
| }, |
| { |
| "epoch": 2.299571098647311, |
| "grad_norm": 0.42516851079297513, |
| "learning_rate": 5.23606797749979e-05, |
| "loss": 1.2219, |
| "step": 435 |
| }, |
| { |
| "epoch": 2.304849884526559, |
| "grad_norm": 0.5063759909871318, |
| "learning_rate": 5.221999180301506e-05, |
| "loss": 1.2063, |
| "step": 436 |
| }, |
| { |
| "epoch": 2.3101286704058066, |
| "grad_norm": 0.4563996005720369, |
| "learning_rate": 5.2079136901831425e-05, |
| "loss": 1.2209, |
| "step": 437 |
| }, |
| { |
| "epoch": 2.3154074562850546, |
| "grad_norm": 0.3910351995919037, |
| "learning_rate": 5.1938116995572325e-05, |
| "loss": 1.2179, |
| "step": 438 |
| }, |
| { |
| "epoch": 2.320686242164302, |
| "grad_norm": 0.5608831097319059, |
| "learning_rate": 5.179693401061714e-05, |
| "loss": 1.2197, |
| "step": 439 |
| }, |
| { |
| "epoch": 2.3259650280435498, |
| "grad_norm": 0.4703055804548109, |
| "learning_rate": 5.1655589875572994e-05, |
| "loss": 1.2136, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.331243813922798, |
| "grad_norm": 0.44399692780721745, |
| "learning_rate": 5.151408652124831e-05, |
| "loss": 1.2111, |
| "step": 441 |
| }, |
| { |
| "epoch": 2.3365225998020454, |
| "grad_norm": 0.4792323461061973, |
| "learning_rate": 5.1372425880626536e-05, |
| "loss": 1.2166, |
| "step": 442 |
| }, |
| { |
| "epoch": 2.3418013856812934, |
| "grad_norm": 0.38455094020768915, |
| "learning_rate": 5.1230609888839724e-05, |
| "loss": 1.2161, |
| "step": 443 |
| }, |
| { |
| "epoch": 2.347080171560541, |
| "grad_norm": 0.4279941829860823, |
| "learning_rate": 5.108864048314204e-05, |
| "loss": 1.2203, |
| "step": 444 |
| }, |
| { |
| "epoch": 2.352358957439789, |
| "grad_norm": 0.2777582548813507, |
| "learning_rate": 5.0946519602883326e-05, |
| "loss": 1.2144, |
| "step": 445 |
| }, |
| { |
| "epoch": 2.3576377433190365, |
| "grad_norm": 0.5492728533936109, |
| "learning_rate": 5.0804249189482664e-05, |
| "loss": 1.2233, |
| "step": 446 |
| }, |
| { |
| "epoch": 2.3629165291982845, |
| "grad_norm": 0.2985781488436604, |
| "learning_rate": 5.066183118640177e-05, |
| "loss": 1.2158, |
| "step": 447 |
| }, |
| { |
| "epoch": 2.368195315077532, |
| "grad_norm": 0.3207066404394407, |
| "learning_rate": 5.0519267539118506e-05, |
| "loss": 1.2113, |
| "step": 448 |
| }, |
| { |
| "epoch": 2.37347410095678, |
| "grad_norm": 0.2926582089437268, |
| "learning_rate": 5.037656019510028e-05, |
| "loss": 1.1993, |
| "step": 449 |
| }, |
| { |
| "epoch": 2.3787528868360277, |
| "grad_norm": 0.2800596258887348, |
| "learning_rate": 5.023371110377743e-05, |
| "loss": 1.2074, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.3840316727152757, |
| "grad_norm": 0.3116276776485492, |
| "learning_rate": 5.009072221651662e-05, |
| "loss": 1.2239, |
| "step": 451 |
| }, |
| { |
| "epoch": 2.3893104585945233, |
| "grad_norm": 0.22694341264711498, |
| "learning_rate": 4.9947595486594206e-05, |
| "loss": 1.2124, |
| "step": 452 |
| }, |
| { |
| "epoch": 2.394589244473771, |
| "grad_norm": 0.2733742917735896, |
| "learning_rate": 4.9804332869169436e-05, |
| "loss": 1.2084, |
| "step": 453 |
| }, |
| { |
| "epoch": 2.399868030353019, |
| "grad_norm": 0.34240813380863155, |
| "learning_rate": 4.966093632125792e-05, |
| "loss": 1.2208, |
| "step": 454 |
| }, |
| { |
| "epoch": 2.4051468162322664, |
| "grad_norm": 0.3184112467462416, |
| "learning_rate": 4.951740780170475e-05, |
| "loss": 1.2177, |
| "step": 455 |
| }, |
| { |
| "epoch": 2.4104256021115145, |
| "grad_norm": 0.21605403579412932, |
| "learning_rate": 4.937374927115783e-05, |
| "loss": 1.2026, |
| "step": 456 |
| }, |
| { |
| "epoch": 2.415704387990762, |
| "grad_norm": 0.3270749368088314, |
| "learning_rate": 4.9229962692041e-05, |
| "loss": 1.2115, |
| "step": 457 |
| }, |
| { |
| "epoch": 2.42098317387001, |
| "grad_norm": 0.22532926778581241, |
| "learning_rate": 4.908605002852735e-05, |
| "loss": 1.2151, |
| "step": 458 |
| }, |
| { |
| "epoch": 2.4262619597492576, |
| "grad_norm": 0.8669157175290935, |
| "learning_rate": 4.89420132465123e-05, |
| "loss": 1.2216, |
| "step": 459 |
| }, |
| { |
| "epoch": 2.4315407456285056, |
| "grad_norm": 0.29610996332037504, |
| "learning_rate": 4.879785431358675e-05, |
| "loss": 1.2041, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.436819531507753, |
| "grad_norm": 0.2681710573971505, |
| "learning_rate": 4.865357519901026e-05, |
| "loss": 1.2184, |
| "step": 461 |
| }, |
| { |
| "epoch": 2.442098317387001, |
| "grad_norm": 0.2717324203460164, |
| "learning_rate": 4.850917787368409e-05, |
| "loss": 1.2287, |
| "step": 462 |
| }, |
| { |
| "epoch": 2.447377103266249, |
| "grad_norm": 0.34204178133936813, |
| "learning_rate": 4.8364664310124305e-05, |
| "loss": 1.2228, |
| "step": 463 |
| }, |
| { |
| "epoch": 2.4526558891454964, |
| "grad_norm": 0.30685100069464205, |
| "learning_rate": 4.822003648243481e-05, |
| "loss": 1.2188, |
| "step": 464 |
| }, |
| { |
| "epoch": 2.4579346750247444, |
| "grad_norm": 0.2732221315582542, |
| "learning_rate": 4.807529636628041e-05, |
| "loss": 1.2078, |
| "step": 465 |
| }, |
| { |
| "epoch": 2.463213460903992, |
| "grad_norm": 0.2848276134012873, |
| "learning_rate": 4.7930445938859824e-05, |
| "loss": 1.2146, |
| "step": 466 |
| }, |
| { |
| "epoch": 2.46849224678324, |
| "grad_norm": 0.23099153058477162, |
| "learning_rate": 4.778548717887862e-05, |
| "loss": 1.2144, |
| "step": 467 |
| }, |
| { |
| "epoch": 2.4737710326624875, |
| "grad_norm": 0.24410298456014018, |
| "learning_rate": 4.764042206652225e-05, |
| "loss": 1.2096, |
| "step": 468 |
| }, |
| { |
| "epoch": 2.4790498185417356, |
| "grad_norm": 0.23453508567964462, |
| "learning_rate": 4.7495252583429e-05, |
| "loss": 1.2229, |
| "step": 469 |
| }, |
| { |
| "epoch": 2.484328604420983, |
| "grad_norm": 0.2274160056940384, |
| "learning_rate": 4.734998071266282e-05, |
| "loss": 1.1957, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.4896073903002307, |
| "grad_norm": 0.7222158250734677, |
| "learning_rate": 4.720460843868639e-05, |
| "loss": 1.212, |
| "step": 471 |
| }, |
| { |
| "epoch": 2.4948861761794787, |
| "grad_norm": 0.23319039438700873, |
| "learning_rate": 4.705913774733389e-05, |
| "loss": 1.2174, |
| "step": 472 |
| }, |
| { |
| "epoch": 2.5001649620587267, |
| "grad_norm": 0.20581558881463416, |
| "learning_rate": 4.6913570625783925e-05, |
| "loss": 1.2127, |
| "step": 473 |
| }, |
| { |
| "epoch": 2.5054437479379743, |
| "grad_norm": 0.580190640636312, |
| "learning_rate": 4.676790906253238e-05, |
| "loss": 1.2175, |
| "step": 474 |
| }, |
| { |
| "epoch": 2.510722533817222, |
| "grad_norm": 0.35859386128357623, |
| "learning_rate": 4.66221550473652e-05, |
| "loss": 1.2208, |
| "step": 475 |
| }, |
| { |
| "epoch": 2.51600131969647, |
| "grad_norm": 0.21572591452681666, |
| "learning_rate": 4.647631057133133e-05, |
| "loss": 1.2139, |
| "step": 476 |
| }, |
| { |
| "epoch": 2.5212801055757175, |
| "grad_norm": 0.3184926568808877, |
| "learning_rate": 4.633037762671536e-05, |
| "loss": 1.2147, |
| "step": 477 |
| }, |
| { |
| "epoch": 2.5265588914549655, |
| "grad_norm": 0.23798191993579182, |
| "learning_rate": 4.618435820701045e-05, |
| "loss": 1.2218, |
| "step": 478 |
| }, |
| { |
| "epoch": 2.531837677334213, |
| "grad_norm": 0.19775491535362455, |
| "learning_rate": 4.603825430689101e-05, |
| "loss": 1.2149, |
| "step": 479 |
| }, |
| { |
| "epoch": 2.5371164632134606, |
| "grad_norm": 0.2527202262533402, |
| "learning_rate": 4.589206792218551e-05, |
| "loss": 1.2303, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.5423952490927086, |
| "grad_norm": 0.22950883586170756, |
| "learning_rate": 4.574580104984914e-05, |
| "loss": 1.213, |
| "step": 481 |
| }, |
| { |
| "epoch": 2.5476740349719567, |
| "grad_norm": 0.24370406115364615, |
| "learning_rate": 4.5599455687936605e-05, |
| "loss": 1.2063, |
| "step": 482 |
| }, |
| { |
| "epoch": 2.5529528208512042, |
| "grad_norm": 0.26261007853169177, |
| "learning_rate": 4.54530338355748e-05, |
| "loss": 1.2023, |
| "step": 483 |
| }, |
| { |
| "epoch": 2.558231606730452, |
| "grad_norm": 0.2014430995217622, |
| "learning_rate": 4.530653749293554e-05, |
| "loss": 1.2181, |
| "step": 484 |
| }, |
| { |
| "epoch": 2.5635103926097, |
| "grad_norm": 0.23150145338987912, |
| "learning_rate": 4.515996866120814e-05, |
| "loss": 1.2001, |
| "step": 485 |
| }, |
| { |
| "epoch": 2.5687891784889474, |
| "grad_norm": 0.24436976922792347, |
| "learning_rate": 4.501332934257217e-05, |
| "loss": 1.1965, |
| "step": 486 |
| }, |
| { |
| "epoch": 2.5740679643681954, |
| "grad_norm": 0.20891310725843007, |
| "learning_rate": 4.48666215401701e-05, |
| "loss": 1.2095, |
| "step": 487 |
| }, |
| { |
| "epoch": 2.579346750247443, |
| "grad_norm": 0.580497655713609, |
| "learning_rate": 4.471984725807987e-05, |
| "loss": 1.2028, |
| "step": 488 |
| }, |
| { |
| "epoch": 2.584625536126691, |
| "grad_norm": 0.2458630083617766, |
| "learning_rate": 4.457300850128757e-05, |
| "loss": 1.2314, |
| "step": 489 |
| }, |
| { |
| "epoch": 2.5899043220059386, |
| "grad_norm": 0.20029462571056664, |
| "learning_rate": 4.442610727566003e-05, |
| "loss": 1.1999, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.5951831078851866, |
| "grad_norm": 0.1824564421862542, |
| "learning_rate": 4.427914558791747e-05, |
| "loss": 1.2128, |
| "step": 491 |
| }, |
| { |
| "epoch": 2.600461893764434, |
| "grad_norm": 0.22834088575096842, |
| "learning_rate": 4.4132125445605974e-05, |
| "loss": 1.1976, |
| "step": 492 |
| }, |
| { |
| "epoch": 2.6057406796436817, |
| "grad_norm": 0.19803337517555744, |
| "learning_rate": 4.3985048857070163e-05, |
| "loss": 1.2236, |
| "step": 493 |
| }, |
| { |
| "epoch": 2.6110194655229297, |
| "grad_norm": 0.23458909248075385, |
| "learning_rate": 4.383791783142576e-05, |
| "loss": 1.204, |
| "step": 494 |
| }, |
| { |
| "epoch": 2.6162982514021778, |
| "grad_norm": 0.21121300683483385, |
| "learning_rate": 4.369073437853208e-05, |
| "loss": 1.2117, |
| "step": 495 |
| }, |
| { |
| "epoch": 2.6215770372814253, |
| "grad_norm": 0.20128926160161514, |
| "learning_rate": 4.3543500508964636e-05, |
| "loss": 1.2041, |
| "step": 496 |
| }, |
| { |
| "epoch": 2.626855823160673, |
| "grad_norm": 0.21654654847139376, |
| "learning_rate": 4.339621823398762e-05, |
| "loss": 1.2228, |
| "step": 497 |
| }, |
| { |
| "epoch": 2.632134609039921, |
| "grad_norm": 0.17324746161391114, |
| "learning_rate": 4.32488895655265e-05, |
| "loss": 1.2106, |
| "step": 498 |
| }, |
| { |
| "epoch": 2.6374133949191685, |
| "grad_norm": 0.21573620879427124, |
| "learning_rate": 4.3101516516140466e-05, |
| "loss": 1.1987, |
| "step": 499 |
| }, |
| { |
| "epoch": 2.6426921807984165, |
| "grad_norm": 0.2470219196100934, |
| "learning_rate": 4.295410109899496e-05, |
| "loss": 1.2103, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.647970966677664, |
| "grad_norm": 0.2350166861687524, |
| "learning_rate": 4.280664532783421e-05, |
| "loss": 1.2154, |
| "step": 501 |
| }, |
| { |
| "epoch": 2.6532497525569116, |
| "grad_norm": 0.20325354736823176, |
| "learning_rate": 4.265915121695368e-05, |
| "loss": 1.2146, |
| "step": 502 |
| }, |
| { |
| "epoch": 2.6585285384361597, |
| "grad_norm": 0.1651221750574765, |
| "learning_rate": 4.251162078117254e-05, |
| "loss": 1.2059, |
| "step": 503 |
| }, |
| { |
| "epoch": 2.6638073243154077, |
| "grad_norm": 0.2323971696245432, |
| "learning_rate": 4.236405603580622e-05, |
| "loss": 1.2124, |
| "step": 504 |
| }, |
| { |
| "epoch": 2.6690861101946552, |
| "grad_norm": 0.266884077772536, |
| "learning_rate": 4.22164589966388e-05, |
| "loss": 1.2068, |
| "step": 505 |
| }, |
| { |
| "epoch": 2.674364896073903, |
| "grad_norm": 0.17608948591217963, |
| "learning_rate": 4.206883167989551e-05, |
| "loss": 1.2183, |
| "step": 506 |
| }, |
| { |
| "epoch": 2.679643681953151, |
| "grad_norm": 0.24861673978443094, |
| "learning_rate": 4.1921176102215195e-05, |
| "loss": 1.2106, |
| "step": 507 |
| }, |
| { |
| "epoch": 2.6849224678323984, |
| "grad_norm": 0.20562099943439288, |
| "learning_rate": 4.1773494280622706e-05, |
| "loss": 1.2157, |
| "step": 508 |
| }, |
| { |
| "epoch": 2.6902012537116464, |
| "grad_norm": 0.1897513819316434, |
| "learning_rate": 4.1625788232501475e-05, |
| "loss": 1.2029, |
| "step": 509 |
| }, |
| { |
| "epoch": 2.695480039590894, |
| "grad_norm": 0.21765120587734574, |
| "learning_rate": 4.1478059975565806e-05, |
| "loss": 1.1957, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.700758825470142, |
| "grad_norm": 0.17421718276141962, |
| "learning_rate": 4.13303115278334e-05, |
| "loss": 1.2126, |
| "step": 511 |
| }, |
| { |
| "epoch": 2.7060376113493896, |
| "grad_norm": 0.18994412403249641, |
| "learning_rate": 4.11825449075978e-05, |
| "loss": 1.2011, |
| "step": 512 |
| }, |
| { |
| "epoch": 2.7113163972286376, |
| "grad_norm": 0.213262900251563, |
| "learning_rate": 4.103476213340076e-05, |
| "loss": 1.1988, |
| "step": 513 |
| }, |
| { |
| "epoch": 2.716595183107885, |
| "grad_norm": 0.2193737588424257, |
| "learning_rate": 4.088696522400472e-05, |
| "loss": 1.1975, |
| "step": 514 |
| }, |
| { |
| "epoch": 2.7218739689871327, |
| "grad_norm": 0.18264279167008451, |
| "learning_rate": 4.07391561983652e-05, |
| "loss": 1.2135, |
| "step": 515 |
| }, |
| { |
| "epoch": 2.7271527548663808, |
| "grad_norm": 0.23335206923435833, |
| "learning_rate": 4.059133707560325e-05, |
| "loss": 1.2138, |
| "step": 516 |
| }, |
| { |
| "epoch": 2.7324315407456288, |
| "grad_norm": 0.25900257473153687, |
| "learning_rate": 4.04435098749778e-05, |
| "loss": 1.2107, |
| "step": 517 |
| }, |
| { |
| "epoch": 2.7377103266248763, |
| "grad_norm": 0.2103342998522433, |
| "learning_rate": 4.029567661585821e-05, |
| "loss": 1.206, |
| "step": 518 |
| }, |
| { |
| "epoch": 2.742989112504124, |
| "grad_norm": 0.17831077097654793, |
| "learning_rate": 4.014783931769652e-05, |
| "loss": 1.1986, |
| "step": 519 |
| }, |
| { |
| "epoch": 2.748267898383372, |
| "grad_norm": 0.20867309600721232, |
| "learning_rate": 4e-05, |
| "loss": 1.1943, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.7535466842626195, |
| "grad_norm": 0.1537772415999436, |
| "learning_rate": 3.9852160682303486e-05, |
| "loss": 1.2091, |
| "step": 521 |
| }, |
| { |
| "epoch": 2.7588254701418675, |
| "grad_norm": 0.20801222179757986, |
| "learning_rate": 3.970432338414181e-05, |
| "loss": 1.2116, |
| "step": 522 |
| }, |
| { |
| "epoch": 2.764104256021115, |
| "grad_norm": 0.194411153494734, |
| "learning_rate": 3.955649012502221e-05, |
| "loss": 1.2015, |
| "step": 523 |
| }, |
| { |
| "epoch": 2.7693830419003627, |
| "grad_norm": 0.2605114580460575, |
| "learning_rate": 3.940866292439677e-05, |
| "loss": 1.1968, |
| "step": 524 |
| }, |
| { |
| "epoch": 2.7746618277796107, |
| "grad_norm": 0.1719623548499034, |
| "learning_rate": 3.926084380163481e-05, |
| "loss": 1.2121, |
| "step": 525 |
| }, |
| { |
| "epoch": 2.7799406136588587, |
| "grad_norm": 0.19539588740286956, |
| "learning_rate": 3.9113034775995285e-05, |
| "loss": 1.1957, |
| "step": 526 |
| }, |
| { |
| "epoch": 2.7852193995381063, |
| "grad_norm": 0.18647363107594456, |
| "learning_rate": 3.896523786659926e-05, |
| "loss": 1.2039, |
| "step": 527 |
| }, |
| { |
| "epoch": 2.790498185417354, |
| "grad_norm": 0.23992352538776165, |
| "learning_rate": 3.881745509240222e-05, |
| "loss": 1.2057, |
| "step": 528 |
| }, |
| { |
| "epoch": 2.795776971296602, |
| "grad_norm": 0.19235184099847877, |
| "learning_rate": 3.8669688472166604e-05, |
| "loss": 1.2053, |
| "step": 529 |
| }, |
| { |
| "epoch": 2.8010557571758494, |
| "grad_norm": 0.20150647808031436, |
| "learning_rate": 3.8521940024434214e-05, |
| "loss": 1.1979, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.8063345430550974, |
| "grad_norm": 0.20818880270673554, |
| "learning_rate": 3.837421176749854e-05, |
| "loss": 1.1917, |
| "step": 531 |
| }, |
| { |
| "epoch": 2.811613328934345, |
| "grad_norm": 0.160286642176085, |
| "learning_rate": 3.822650571937729e-05, |
| "loss": 1.2062, |
| "step": 532 |
| }, |
| { |
| "epoch": 2.816892114813593, |
| "grad_norm": 0.2244956653582766, |
| "learning_rate": 3.807882389778483e-05, |
| "loss": 1.2149, |
| "step": 533 |
| }, |
| { |
| "epoch": 2.8221709006928406, |
| "grad_norm": 0.17765823360734992, |
| "learning_rate": 3.79311683201045e-05, |
| "loss": 1.2232, |
| "step": 534 |
| }, |
| { |
| "epoch": 2.8274496865720886, |
| "grad_norm": 0.2142393041003271, |
| "learning_rate": 3.7783541003361203e-05, |
| "loss": 1.2059, |
| "step": 535 |
| }, |
| { |
| "epoch": 2.832728472451336, |
| "grad_norm": 0.17229227174221823, |
| "learning_rate": 3.7635943964193786e-05, |
| "loss": 1.1881, |
| "step": 536 |
| }, |
| { |
| "epoch": 2.8380072583305838, |
| "grad_norm": 0.20443638338032005, |
| "learning_rate": 3.7488379218827466e-05, |
| "loss": 1.2006, |
| "step": 537 |
| }, |
| { |
| "epoch": 2.8432860442098318, |
| "grad_norm": 0.16509634715516444, |
| "learning_rate": 3.734084878304635e-05, |
| "loss": 1.2044, |
| "step": 538 |
| }, |
| { |
| "epoch": 2.8485648300890793, |
| "grad_norm": 0.19649714810146393, |
| "learning_rate": 3.7193354672165804e-05, |
| "loss": 1.1957, |
| "step": 539 |
| }, |
| { |
| "epoch": 2.8538436159683274, |
| "grad_norm": 0.18562259750029927, |
| "learning_rate": 3.7045898901005045e-05, |
| "loss": 1.2009, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.859122401847575, |
| "grad_norm": 0.16283779745383173, |
| "learning_rate": 3.689848348385955e-05, |
| "loss": 1.2076, |
| "step": 541 |
| }, |
| { |
| "epoch": 2.864401187726823, |
| "grad_norm": 0.17467802811899544, |
| "learning_rate": 3.6751110434473504e-05, |
| "loss": 1.2054, |
| "step": 542 |
| }, |
| { |
| "epoch": 2.8696799736060705, |
| "grad_norm": 0.2124736329055944, |
| "learning_rate": 3.6603781766012374e-05, |
| "loss": 1.2064, |
| "step": 543 |
| }, |
| { |
| "epoch": 2.8749587594853185, |
| "grad_norm": 0.15324274860627765, |
| "learning_rate": 3.645649949103538e-05, |
| "loss": 1.2184, |
| "step": 544 |
| }, |
| { |
| "epoch": 2.880237545364566, |
| "grad_norm": 0.21566078265805713, |
| "learning_rate": 3.630926562146792e-05, |
| "loss": 1.2094, |
| "step": 545 |
| }, |
| { |
| "epoch": 2.8855163312438137, |
| "grad_norm": 0.18547268756739058, |
| "learning_rate": 3.616208216857424e-05, |
| "loss": 1.2151, |
| "step": 546 |
| }, |
| { |
| "epoch": 2.8907951171230617, |
| "grad_norm": 0.17370399818819313, |
| "learning_rate": 3.601495114292984e-05, |
| "loss": 1.2015, |
| "step": 547 |
| }, |
| { |
| "epoch": 2.8960739030023097, |
| "grad_norm": 0.1763097150756615, |
| "learning_rate": 3.586787455439403e-05, |
| "loss": 1.1834, |
| "step": 548 |
| }, |
| { |
| "epoch": 2.9013526888815573, |
| "grad_norm": 0.5161949943347345, |
| "learning_rate": 3.572085441208255e-05, |
| "loss": 1.2152, |
| "step": 549 |
| }, |
| { |
| "epoch": 2.906631474760805, |
| "grad_norm": 0.4758257977373414, |
| "learning_rate": 3.5573892724339974e-05, |
| "loss": 1.2213, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.911910260640053, |
| "grad_norm": 0.20315764691238297, |
| "learning_rate": 3.542699149871245e-05, |
| "loss": 1.1998, |
| "step": 551 |
| }, |
| { |
| "epoch": 2.9171890465193004, |
| "grad_norm": 0.3185401826129705, |
| "learning_rate": 3.5280152741920146e-05, |
| "loss": 1.2057, |
| "step": 552 |
| }, |
| { |
| "epoch": 2.9224678323985485, |
| "grad_norm": 0.17732212942108816, |
| "learning_rate": 3.513337845982991e-05, |
| "loss": 1.2003, |
| "step": 553 |
| }, |
| { |
| "epoch": 2.927746618277796, |
| "grad_norm": 0.17655839868070522, |
| "learning_rate": 3.498667065742783e-05, |
| "loss": 1.2188, |
| "step": 554 |
| }, |
| { |
| "epoch": 2.9330254041570436, |
| "grad_norm": 0.17930609363824526, |
| "learning_rate": 3.484003133879188e-05, |
| "loss": 1.2178, |
| "step": 555 |
| }, |
| { |
| "epoch": 2.9383041900362916, |
| "grad_norm": 0.1771053499582412, |
| "learning_rate": 3.4693462507064475e-05, |
| "loss": 1.1851, |
| "step": 556 |
| }, |
| { |
| "epoch": 2.9435829759155396, |
| "grad_norm": 0.19233889091342063, |
| "learning_rate": 3.4546966164425196e-05, |
| "loss": 1.2282, |
| "step": 557 |
| }, |
| { |
| "epoch": 2.948861761794787, |
| "grad_norm": 0.2019800309492491, |
| "learning_rate": 3.440054431206341e-05, |
| "loss": 1.2272, |
| "step": 558 |
| }, |
| { |
| "epoch": 2.954140547674035, |
| "grad_norm": 0.18880582680263816, |
| "learning_rate": 3.4254198950150876e-05, |
| "loss": 1.2165, |
| "step": 559 |
| }, |
| { |
| "epoch": 2.959419333553283, |
| "grad_norm": 0.20209531295455146, |
| "learning_rate": 3.41079320778145e-05, |
| "loss": 1.1977, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.9646981194325304, |
| "grad_norm": 0.1818502465064666, |
| "learning_rate": 3.3961745693108995e-05, |
| "loss": 1.1977, |
| "step": 561 |
| }, |
| { |
| "epoch": 2.9699769053117784, |
| "grad_norm": 0.20013609861537826, |
| "learning_rate": 3.3815641792989556e-05, |
| "loss": 1.2009, |
| "step": 562 |
| }, |
| { |
| "epoch": 2.975255691191026, |
| "grad_norm": 0.19474968068343523, |
| "learning_rate": 3.366962237328465e-05, |
| "loss": 1.1932, |
| "step": 563 |
| }, |
| { |
| "epoch": 2.980534477070274, |
| "grad_norm": 0.16811423192887717, |
| "learning_rate": 3.3523689428668686e-05, |
| "loss": 1.216, |
| "step": 564 |
| }, |
| { |
| "epoch": 2.9858132629495215, |
| "grad_norm": 0.20904143259971103, |
| "learning_rate": 3.33778449526348e-05, |
| "loss": 1.2044, |
| "step": 565 |
| }, |
| { |
| "epoch": 2.9910920488287696, |
| "grad_norm": 0.15694602056779824, |
| "learning_rate": 3.323209093746764e-05, |
| "loss": 1.2036, |
| "step": 566 |
| }, |
| { |
| "epoch": 2.996370834708017, |
| "grad_norm": 0.19731725001456019, |
| "learning_rate": 3.308642937421609e-05, |
| "loss": 1.2145, |
| "step": 567 |
| }, |
| { |
| "epoch": 3.00329924117453, |
| "grad_norm": 0.39640254269304404, |
| "learning_rate": 3.294086225266612e-05, |
| "loss": 2.2691, |
| "step": 568 |
| }, |
| { |
| "epoch": 3.0085780270537774, |
| "grad_norm": 0.35149949016057497, |
| "learning_rate": 3.279539156131362e-05, |
| "loss": 1.1908, |
| "step": 569 |
| }, |
| { |
| "epoch": 3.0138568129330254, |
| "grad_norm": 0.25800980108361316, |
| "learning_rate": 3.2650019287337184e-05, |
| "loss": 1.1863, |
| "step": 570 |
| }, |
| { |
| "epoch": 3.019135598812273, |
| "grad_norm": 0.29271578035617773, |
| "learning_rate": 3.250474741657101e-05, |
| "loss": 1.1733, |
| "step": 571 |
| }, |
| { |
| "epoch": 3.024414384691521, |
| "grad_norm": 0.2889155002761209, |
| "learning_rate": 3.235957793347776e-05, |
| "loss": 1.195, |
| "step": 572 |
| }, |
| { |
| "epoch": 3.0296931705707686, |
| "grad_norm": 0.2673002288345648, |
| "learning_rate": 3.221451282112139e-05, |
| "loss": 1.1857, |
| "step": 573 |
| }, |
| { |
| "epoch": 3.0349719564500166, |
| "grad_norm": 0.2427413851962326, |
| "learning_rate": 3.2069554061140196e-05, |
| "loss": 1.1815, |
| "step": 574 |
| }, |
| { |
| "epoch": 3.040250742329264, |
| "grad_norm": 0.27248962898507717, |
| "learning_rate": 3.19247036337196e-05, |
| "loss": 1.1735, |
| "step": 575 |
| }, |
| { |
| "epoch": 3.045529528208512, |
| "grad_norm": 0.28142619295211896, |
| "learning_rate": 3.177996351756521e-05, |
| "loss": 1.1672, |
| "step": 576 |
| }, |
| { |
| "epoch": 3.0508083140877598, |
| "grad_norm": 0.23654685125778924, |
| "learning_rate": 3.1635335689875716e-05, |
| "loss": 1.1947, |
| "step": 577 |
| }, |
| { |
| "epoch": 3.056087099967008, |
| "grad_norm": 0.3060272236601635, |
| "learning_rate": 3.149082212631592e-05, |
| "loss": 1.1669, |
| "step": 578 |
| }, |
| { |
| "epoch": 3.0613658858462554, |
| "grad_norm": 0.1990490966508174, |
| "learning_rate": 3.134642480098975e-05, |
| "loss": 1.1734, |
| "step": 579 |
| }, |
| { |
| "epoch": 3.066644671725503, |
| "grad_norm": 0.23725195603837385, |
| "learning_rate": 3.120214568641327e-05, |
| "loss": 1.1846, |
| "step": 580 |
| }, |
| { |
| "epoch": 3.071923457604751, |
| "grad_norm": 1.7515631467902577, |
| "learning_rate": 3.105798675348772e-05, |
| "loss": 1.2022, |
| "step": 581 |
| }, |
| { |
| "epoch": 3.0772022434839985, |
| "grad_norm": 0.23405760507735043, |
| "learning_rate": 3.0913949971472654e-05, |
| "loss": 1.1848, |
| "step": 582 |
| }, |
| { |
| "epoch": 3.0824810293632465, |
| "grad_norm": 0.2536321974582141, |
| "learning_rate": 3.0770037307959014e-05, |
| "loss": 1.1856, |
| "step": 583 |
| }, |
| { |
| "epoch": 3.087759815242494, |
| "grad_norm": 0.24687742855673658, |
| "learning_rate": 3.062625072884218e-05, |
| "loss": 1.182, |
| "step": 584 |
| }, |
| { |
| "epoch": 3.093038601121742, |
| "grad_norm": 0.26309550113541097, |
| "learning_rate": 3.048259219829526e-05, |
| "loss": 1.1717, |
| "step": 585 |
| }, |
| { |
| "epoch": 3.0983173870009897, |
| "grad_norm": 0.26335631307371027, |
| "learning_rate": 3.033906367874209e-05, |
| "loss": 1.175, |
| "step": 586 |
| }, |
| { |
| "epoch": 3.1035961728802377, |
| "grad_norm": 0.19703497978014536, |
| "learning_rate": 3.019566713083057e-05, |
| "loss": 1.1885, |
| "step": 587 |
| }, |
| { |
| "epoch": 3.1088749587594853, |
| "grad_norm": 0.5752260315715897, |
| "learning_rate": 3.0052404513405817e-05, |
| "loss": 1.205, |
| "step": 588 |
| }, |
| { |
| "epoch": 3.1141537446387333, |
| "grad_norm": 0.22038192362400044, |
| "learning_rate": 2.990927778348338e-05, |
| "loss": 1.1938, |
| "step": 589 |
| }, |
| { |
| "epoch": 3.119432530517981, |
| "grad_norm": 0.21601092261632623, |
| "learning_rate": 2.9766288896222577e-05, |
| "loss": 1.1754, |
| "step": 590 |
| }, |
| { |
| "epoch": 3.1247113163972284, |
| "grad_norm": 0.2237994485232262, |
| "learning_rate": 2.9623439804899738e-05, |
| "loss": 1.1753, |
| "step": 591 |
| }, |
| { |
| "epoch": 3.1299901022764764, |
| "grad_norm": 0.19731985592374132, |
| "learning_rate": 2.9480732460881504e-05, |
| "loss": 1.1695, |
| "step": 592 |
| }, |
| { |
| "epoch": 3.135268888155724, |
| "grad_norm": 0.49723859022777406, |
| "learning_rate": 2.9338168813598238e-05, |
| "loss": 1.2003, |
| "step": 593 |
| }, |
| { |
| "epoch": 3.140547674034972, |
| "grad_norm": 0.17077112936798963, |
| "learning_rate": 2.9195750810517353e-05, |
| "loss": 1.1538, |
| "step": 594 |
| }, |
| { |
| "epoch": 3.1458264599142196, |
| "grad_norm": 0.31811164602044134, |
| "learning_rate": 2.905348039711669e-05, |
| "loss": 1.2078, |
| "step": 595 |
| }, |
| { |
| "epoch": 3.1511052457934676, |
| "grad_norm": 0.18318775715519817, |
| "learning_rate": 2.891135951685799e-05, |
| "loss": 1.1747, |
| "step": 596 |
| }, |
| { |
| "epoch": 3.156384031672715, |
| "grad_norm": 0.19307423783474406, |
| "learning_rate": 2.8769390111160293e-05, |
| "loss": 1.1872, |
| "step": 597 |
| }, |
| { |
| "epoch": 3.161662817551963, |
| "grad_norm": 0.19217084719934985, |
| "learning_rate": 2.862757411937347e-05, |
| "loss": 1.1904, |
| "step": 598 |
| }, |
| { |
| "epoch": 3.166941603431211, |
| "grad_norm": 0.2033421815267501, |
| "learning_rate": 2.8485913478751706e-05, |
| "loss": 1.1795, |
| "step": 599 |
| }, |
| { |
| "epoch": 3.172220389310459, |
| "grad_norm": 0.1883185538491291, |
| "learning_rate": 2.834441012442702e-05, |
| "loss": 1.1803, |
| "step": 600 |
| }, |
| { |
| "epoch": 3.1774991751897064, |
| "grad_norm": 0.19166460775924493, |
| "learning_rate": 2.8203065989382853e-05, |
| "loss": 1.1751, |
| "step": 601 |
| }, |
| { |
| "epoch": 3.182777961068954, |
| "grad_norm": 0.6048917980927118, |
| "learning_rate": 2.8061883004427692e-05, |
| "loss": 1.1751, |
| "step": 602 |
| }, |
| { |
| "epoch": 3.188056746948202, |
| "grad_norm": 0.21052892689634514, |
| "learning_rate": 2.792086309816859e-05, |
| "loss": 1.197, |
| "step": 603 |
| }, |
| { |
| "epoch": 3.1933355328274495, |
| "grad_norm": 0.17247927939199434, |
| "learning_rate": 2.778000819698494e-05, |
| "loss": 1.1852, |
| "step": 604 |
| }, |
| { |
| "epoch": 3.1986143187066975, |
| "grad_norm": 0.21702246989530297, |
| "learning_rate": 2.7639320225002108e-05, |
| "loss": 1.1838, |
| "step": 605 |
| }, |
| { |
| "epoch": 3.203893104585945, |
| "grad_norm": 0.18519967189771228, |
| "learning_rate": 2.7498801104065127e-05, |
| "loss": 1.18, |
| "step": 606 |
| }, |
| { |
| "epoch": 3.209171890465193, |
| "grad_norm": 0.2180595120663347, |
| "learning_rate": 2.7358452753712506e-05, |
| "loss": 1.1763, |
| "step": 607 |
| }, |
| { |
| "epoch": 3.2144506763444407, |
| "grad_norm": 0.19356701655435976, |
| "learning_rate": 2.721827709114992e-05, |
| "loss": 1.1718, |
| "step": 608 |
| }, |
| { |
| "epoch": 3.2197294622236887, |
| "grad_norm": 0.2263137728095583, |
| "learning_rate": 2.707827603122411e-05, |
| "loss": 1.188, |
| "step": 609 |
| }, |
| { |
| "epoch": 3.2250082481029363, |
| "grad_norm": 0.1875965654825016, |
| "learning_rate": 2.6938451486396675e-05, |
| "loss": 1.1736, |
| "step": 610 |
| }, |
| { |
| "epoch": 3.230287033982184, |
| "grad_norm": 0.19885738913391388, |
| "learning_rate": 2.679880536671795e-05, |
| "loss": 1.1787, |
| "step": 611 |
| }, |
| { |
| "epoch": 3.235565819861432, |
| "grad_norm": 0.16754129647748367, |
| "learning_rate": 2.6659339579800928e-05, |
| "loss": 1.1866, |
| "step": 612 |
| }, |
| { |
| "epoch": 3.2408446057406795, |
| "grad_norm": 0.18478285151592688, |
| "learning_rate": 2.6520056030795225e-05, |
| "loss": 1.1925, |
| "step": 613 |
| }, |
| { |
| "epoch": 3.2461233916199275, |
| "grad_norm": 0.1741366266971174, |
| "learning_rate": 2.6380956622360955e-05, |
| "loss": 1.1761, |
| "step": 614 |
| }, |
| { |
| "epoch": 3.251402177499175, |
| "grad_norm": 0.205724397210717, |
| "learning_rate": 2.6242043254642876e-05, |
| "loss": 1.1781, |
| "step": 615 |
| }, |
| { |
| "epoch": 3.256680963378423, |
| "grad_norm": 0.1635965859364289, |
| "learning_rate": 2.6103317825244347e-05, |
| "loss": 1.1787, |
| "step": 616 |
| }, |
| { |
| "epoch": 3.2619597492576706, |
| "grad_norm": 0.18322293170555132, |
| "learning_rate": 2.596478222920141e-05, |
| "loss": 1.1707, |
| "step": 617 |
| }, |
| { |
| "epoch": 3.2672385351369186, |
| "grad_norm": 0.21024379892510617, |
| "learning_rate": 2.582643835895696e-05, |
| "loss": 1.1849, |
| "step": 618 |
| }, |
| { |
| "epoch": 3.272517321016166, |
| "grad_norm": 0.16828870708008303, |
| "learning_rate": 2.5688288104334787e-05, |
| "loss": 1.1757, |
| "step": 619 |
| }, |
| { |
| "epoch": 3.2777961068954142, |
| "grad_norm": 0.1846842052479709, |
| "learning_rate": 2.5550333352513885e-05, |
| "loss": 1.1879, |
| "step": 620 |
| }, |
| { |
| "epoch": 3.283074892774662, |
| "grad_norm": 0.19687585348678469, |
| "learning_rate": 2.54125759880026e-05, |
| "loss": 1.1816, |
| "step": 621 |
| }, |
| { |
| "epoch": 3.28835367865391, |
| "grad_norm": 0.1758961182832862, |
| "learning_rate": 2.5275017892612885e-05, |
| "loss": 1.1911, |
| "step": 622 |
| }, |
| { |
| "epoch": 3.2936324645331574, |
| "grad_norm": 0.18792489795821068, |
| "learning_rate": 2.5137660945434617e-05, |
| "loss": 1.188, |
| "step": 623 |
| }, |
| { |
| "epoch": 3.298911250412405, |
| "grad_norm": 0.16028174516582183, |
| "learning_rate": 2.500050702280995e-05, |
| "loss": 1.1861, |
| "step": 624 |
| }, |
| { |
| "epoch": 3.304190036291653, |
| "grad_norm": 0.25493998515543115, |
| "learning_rate": 2.4863557998307593e-05, |
| "loss": 1.2039, |
| "step": 625 |
| }, |
| { |
| "epoch": 3.3094688221709005, |
| "grad_norm": 0.1702317701841697, |
| "learning_rate": 2.4726815742697326e-05, |
| "loss": 1.1653, |
| "step": 626 |
| }, |
| { |
| "epoch": 3.3147476080501486, |
| "grad_norm": 0.1811300435479535, |
| "learning_rate": 2.4590282123924398e-05, |
| "loss": 1.1911, |
| "step": 627 |
| }, |
| { |
| "epoch": 3.320026393929396, |
| "grad_norm": 0.16020792712846268, |
| "learning_rate": 2.4453959007083968e-05, |
| "loss": 1.1786, |
| "step": 628 |
| }, |
| { |
| "epoch": 3.325305179808644, |
| "grad_norm": 0.17703804764351366, |
| "learning_rate": 2.4317848254395698e-05, |
| "loss": 1.1874, |
| "step": 629 |
| }, |
| { |
| "epoch": 3.3305839656878917, |
| "grad_norm": 0.17309284980916728, |
| "learning_rate": 2.4181951725178302e-05, |
| "loss": 1.1695, |
| "step": 630 |
| }, |
| { |
| "epoch": 3.3358627515671397, |
| "grad_norm": 0.1620501421501924, |
| "learning_rate": 2.4046271275824083e-05, |
| "loss": 1.2024, |
| "step": 631 |
| }, |
| { |
| "epoch": 3.3411415374463873, |
| "grad_norm": 0.28467017075140105, |
| "learning_rate": 2.3910808759773666e-05, |
| "loss": 1.173, |
| "step": 632 |
| }, |
| { |
| "epoch": 3.346420323325635, |
| "grad_norm": 0.16148158903613652, |
| "learning_rate": 2.3775566027490583e-05, |
| "loss": 1.1731, |
| "step": 633 |
| }, |
| { |
| "epoch": 3.351699109204883, |
| "grad_norm": 0.20024202970675797, |
| "learning_rate": 2.364054492643608e-05, |
| "loss": 1.1826, |
| "step": 634 |
| }, |
| { |
| "epoch": 3.3569778950841305, |
| "grad_norm": 0.15280584791983493, |
| "learning_rate": 2.3505747301043867e-05, |
| "loss": 1.1849, |
| "step": 635 |
| }, |
| { |
| "epoch": 3.3622566809633785, |
| "grad_norm": 0.20221936488778078, |
| "learning_rate": 2.3371174992694848e-05, |
| "loss": 1.1965, |
| "step": 636 |
| }, |
| { |
| "epoch": 3.367535466842626, |
| "grad_norm": 0.4791617718328374, |
| "learning_rate": 2.3236829839692065e-05, |
| "loss": 1.1957, |
| "step": 637 |
| }, |
| { |
| "epoch": 3.372814252721874, |
| "grad_norm": 0.18149628325520292, |
| "learning_rate": 2.310271367723551e-05, |
| "loss": 1.1731, |
| "step": 638 |
| }, |
| { |
| "epoch": 3.3780930386011216, |
| "grad_norm": 0.18733774960030386, |
| "learning_rate": 2.2968828337397095e-05, |
| "loss": 1.1705, |
| "step": 639 |
| }, |
| { |
| "epoch": 3.3833718244803697, |
| "grad_norm": 0.16505883561842968, |
| "learning_rate": 2.2835175649095645e-05, |
| "loss": 1.1827, |
| "step": 640 |
| }, |
| { |
| "epoch": 3.3886506103596172, |
| "grad_norm": 0.16387587478009516, |
| "learning_rate": 2.270175743807185e-05, |
| "loss": 1.1842, |
| "step": 641 |
| }, |
| { |
| "epoch": 3.3939293962388652, |
| "grad_norm": 0.6763913842218626, |
| "learning_rate": 2.2568575526863385e-05, |
| "loss": 1.1761, |
| "step": 642 |
| }, |
| { |
| "epoch": 3.399208182118113, |
| "grad_norm": 0.20445761019330047, |
| "learning_rate": 2.2435631734779974e-05, |
| "loss": 1.1868, |
| "step": 643 |
| }, |
| { |
| "epoch": 3.404486967997361, |
| "grad_norm": 0.15713802524158743, |
| "learning_rate": 2.2302927877878543e-05, |
| "loss": 1.193, |
| "step": 644 |
| }, |
| { |
| "epoch": 3.4097657538766084, |
| "grad_norm": 0.1626598876968337, |
| "learning_rate": 2.2170465768938473e-05, |
| "loss": 1.1629, |
| "step": 645 |
| }, |
| { |
| "epoch": 3.415044539755856, |
| "grad_norm": 0.15630065852416064, |
| "learning_rate": 2.203824721743674e-05, |
| "loss": 1.1835, |
| "step": 646 |
| }, |
| { |
| "epoch": 3.420323325635104, |
| "grad_norm": 0.16627965905634337, |
| "learning_rate": 2.1906274029523262e-05, |
| "loss": 1.1932, |
| "step": 647 |
| }, |
| { |
| "epoch": 3.4256021115143516, |
| "grad_norm": 0.3853688887851438, |
| "learning_rate": 2.177454800799618e-05, |
| "loss": 1.1788, |
| "step": 648 |
| }, |
| { |
| "epoch": 3.4308808973935996, |
| "grad_norm": 0.1710965211235484, |
| "learning_rate": 2.1643070952277314e-05, |
| "loss": 1.1837, |
| "step": 649 |
| }, |
| { |
| "epoch": 3.436159683272847, |
| "grad_norm": 0.16418437492334867, |
| "learning_rate": 2.1511844658387478e-05, |
| "loss": 1.1825, |
| "step": 650 |
| }, |
| { |
| "epoch": 3.441438469152095, |
| "grad_norm": 0.16951121532514787, |
| "learning_rate": 2.1380870918922004e-05, |
| "loss": 1.1799, |
| "step": 651 |
| }, |
| { |
| "epoch": 3.4467172550313427, |
| "grad_norm": 0.1693028978568346, |
| "learning_rate": 2.1250151523026295e-05, |
| "loss": 1.1651, |
| "step": 652 |
| }, |
| { |
| "epoch": 3.4519960409105908, |
| "grad_norm": 0.16242147126253437, |
| "learning_rate": 2.1119688256371233e-05, |
| "loss": 1.183, |
| "step": 653 |
| }, |
| { |
| "epoch": 3.4572748267898383, |
| "grad_norm": 0.29611972498303013, |
| "learning_rate": 2.0989482901128998e-05, |
| "loss": 1.1888, |
| "step": 654 |
| }, |
| { |
| "epoch": 3.462553612669086, |
| "grad_norm": 0.16236945816797332, |
| "learning_rate": 2.0859537235948543e-05, |
| "loss": 1.1779, |
| "step": 655 |
| }, |
| { |
| "epoch": 3.467832398548334, |
| "grad_norm": 0.1586147334948237, |
| "learning_rate": 2.0729853035931386e-05, |
| "loss": 1.1779, |
| "step": 656 |
| }, |
| { |
| "epoch": 3.4731111844275815, |
| "grad_norm": 0.18364011968994423, |
| "learning_rate": 2.060043207260738e-05, |
| "loss": 1.1814, |
| "step": 657 |
| }, |
| { |
| "epoch": 3.4783899703068295, |
| "grad_norm": 0.18650200564424077, |
| "learning_rate": 2.0471276113910383e-05, |
| "loss": 1.1796, |
| "step": 658 |
| }, |
| { |
| "epoch": 3.483668756186077, |
| "grad_norm": 0.19826495336352215, |
| "learning_rate": 2.0342386924154313e-05, |
| "loss": 1.1842, |
| "step": 659 |
| }, |
| { |
| "epoch": 3.488947542065325, |
| "grad_norm": 0.17236160815821988, |
| "learning_rate": 2.0213766264008857e-05, |
| "loss": 1.1809, |
| "step": 660 |
| }, |
| { |
| "epoch": 3.4942263279445727, |
| "grad_norm": 0.23601297199302493, |
| "learning_rate": 2.008541589047551e-05, |
| "loss": 1.1898, |
| "step": 661 |
| }, |
| { |
| "epoch": 3.4995051138238207, |
| "grad_norm": 0.17149615755966166, |
| "learning_rate": 1.9957337556863604e-05, |
| "loss": 1.1809, |
| "step": 662 |
| }, |
| { |
| "epoch": 3.5047838997030683, |
| "grad_norm": 0.20446720126414816, |
| "learning_rate": 1.9829533012766268e-05, |
| "loss": 1.1849, |
| "step": 663 |
| }, |
| { |
| "epoch": 3.510062685582316, |
| "grad_norm": 0.1883725541194937, |
| "learning_rate": 1.9702004004036583e-05, |
| "loss": 1.1724, |
| "step": 664 |
| }, |
| { |
| "epoch": 3.515341471461564, |
| "grad_norm": 0.1616721588144235, |
| "learning_rate": 1.957475227276373e-05, |
| "loss": 1.1779, |
| "step": 665 |
| }, |
| { |
| "epoch": 3.520620257340812, |
| "grad_norm": 0.16929488246463403, |
| "learning_rate": 1.9447779557249183e-05, |
| "loss": 1.1854, |
| "step": 666 |
| }, |
| { |
| "epoch": 3.5258990432200594, |
| "grad_norm": 0.16292016568868656, |
| "learning_rate": 1.9321087591982987e-05, |
| "loss": 1.1861, |
| "step": 667 |
| }, |
| { |
| "epoch": 3.531177829099307, |
| "grad_norm": 0.17403946253846359, |
| "learning_rate": 1.9194678107620013e-05, |
| "loss": 1.1805, |
| "step": 668 |
| }, |
| { |
| "epoch": 3.536456614978555, |
| "grad_norm": 0.1694106848076632, |
| "learning_rate": 1.906855283095637e-05, |
| "loss": 1.1856, |
| "step": 669 |
| }, |
| { |
| "epoch": 3.5417354008578026, |
| "grad_norm": 0.1820904220829124, |
| "learning_rate": 1.8942713484905762e-05, |
| "loss": 1.1727, |
| "step": 670 |
| }, |
| { |
| "epoch": 3.5470141867370506, |
| "grad_norm": 0.16909152503020708, |
| "learning_rate": 1.8817161788476052e-05, |
| "loss": 1.1884, |
| "step": 671 |
| }, |
| { |
| "epoch": 3.552292972616298, |
| "grad_norm": 0.15357737056906967, |
| "learning_rate": 1.869189945674564e-05, |
| "loss": 1.1894, |
| "step": 672 |
| }, |
| { |
| "epoch": 3.557571758495546, |
| "grad_norm": 0.15273867858520462, |
| "learning_rate": 1.8566928200840128e-05, |
| "loss": 1.1723, |
| "step": 673 |
| }, |
| { |
| "epoch": 3.5628505443747938, |
| "grad_norm": 0.30895988367256033, |
| "learning_rate": 1.8442249727908973e-05, |
| "loss": 1.1982, |
| "step": 674 |
| }, |
| { |
| "epoch": 3.5681293302540418, |
| "grad_norm": 0.14637005365137004, |
| "learning_rate": 1.8317865741102025e-05, |
| "loss": 1.1769, |
| "step": 675 |
| }, |
| { |
| "epoch": 3.5734081161332893, |
| "grad_norm": 0.15688995507200096, |
| "learning_rate": 1.819377793954646e-05, |
| "loss": 1.1877, |
| "step": 676 |
| }, |
| { |
| "epoch": 3.578686902012537, |
| "grad_norm": 0.15514659519283094, |
| "learning_rate": 1.806998801832335e-05, |
| "loss": 1.1745, |
| "step": 677 |
| }, |
| { |
| "epoch": 3.583965687891785, |
| "grad_norm": 0.15960023980743218, |
| "learning_rate": 1.7946497668444717e-05, |
| "loss": 1.1918, |
| "step": 678 |
| }, |
| { |
| "epoch": 3.5892444737710325, |
| "grad_norm": 0.16074105620670257, |
| "learning_rate": 1.7823308576830326e-05, |
| "loss": 1.1827, |
| "step": 679 |
| }, |
| { |
| "epoch": 3.5945232596502805, |
| "grad_norm": 0.17341000855364072, |
| "learning_rate": 1.770042242628458e-05, |
| "loss": 1.1928, |
| "step": 680 |
| }, |
| { |
| "epoch": 3.599802045529528, |
| "grad_norm": 0.19117031283641508, |
| "learning_rate": 1.7577840895473687e-05, |
| "loss": 1.1847, |
| "step": 681 |
| }, |
| { |
| "epoch": 3.605080831408776, |
| "grad_norm": 0.14631791720740261, |
| "learning_rate": 1.7455565658902603e-05, |
| "loss": 1.1919, |
| "step": 682 |
| }, |
| { |
| "epoch": 3.6103596172880237, |
| "grad_norm": 0.19178297155978408, |
| "learning_rate": 1.733359838689222e-05, |
| "loss": 1.1764, |
| "step": 683 |
| }, |
| { |
| "epoch": 3.6156384031672717, |
| "grad_norm": 0.1736295590236363, |
| "learning_rate": 1.7211940745556496e-05, |
| "loss": 1.1768, |
| "step": 684 |
| }, |
| { |
| "epoch": 3.6209171890465193, |
| "grad_norm": 0.1841012098697114, |
| "learning_rate": 1.7090594396779793e-05, |
| "loss": 1.1762, |
| "step": 685 |
| }, |
| { |
| "epoch": 3.626195974925767, |
| "grad_norm": 0.17136791598501644, |
| "learning_rate": 1.6969560998194062e-05, |
| "loss": 1.1852, |
| "step": 686 |
| }, |
| { |
| "epoch": 3.631474760805015, |
| "grad_norm": 0.17325004058389215, |
| "learning_rate": 1.6848842203156267e-05, |
| "loss": 1.1794, |
| "step": 687 |
| }, |
| { |
| "epoch": 3.636753546684263, |
| "grad_norm": 0.19438978043428873, |
| "learning_rate": 1.6728439660725768e-05, |
| "loss": 1.1836, |
| "step": 688 |
| }, |
| { |
| "epoch": 3.6420323325635104, |
| "grad_norm": 0.15890915637153022, |
| "learning_rate": 1.6608355015641807e-05, |
| "loss": 1.1777, |
| "step": 689 |
| }, |
| { |
| "epoch": 3.647311118442758, |
| "grad_norm": 0.23571007165732896, |
| "learning_rate": 1.648858990830108e-05, |
| "loss": 1.1803, |
| "step": 690 |
| }, |
| { |
| "epoch": 3.652589904322006, |
| "grad_norm": 0.18886233467603014, |
| "learning_rate": 1.636914597473525e-05, |
| "loss": 1.1921, |
| "step": 691 |
| }, |
| { |
| "epoch": 3.6578686902012536, |
| "grad_norm": 0.15020457052587974, |
| "learning_rate": 1.6250024846588632e-05, |
| "loss": 1.1768, |
| "step": 692 |
| }, |
| { |
| "epoch": 3.6631474760805016, |
| "grad_norm": 0.19794031747921945, |
| "learning_rate": 1.6131228151095976e-05, |
| "loss": 1.1975, |
| "step": 693 |
| }, |
| { |
| "epoch": 3.668426261959749, |
| "grad_norm": 0.17700601859911672, |
| "learning_rate": 1.6012757511060062e-05, |
| "loss": 1.1746, |
| "step": 694 |
| }, |
| { |
| "epoch": 3.6737050478389968, |
| "grad_norm": 0.15666164343925118, |
| "learning_rate": 1.589461454482975e-05, |
| "loss": 1.1679, |
| "step": 695 |
| }, |
| { |
| "epoch": 3.678983833718245, |
| "grad_norm": 0.1799147716866367, |
| "learning_rate": 1.577680086627771e-05, |
| "loss": 1.1829, |
| "step": 696 |
| }, |
| { |
| "epoch": 3.684262619597493, |
| "grad_norm": 0.15622510538635312, |
| "learning_rate": 1.5659318084778427e-05, |
| "loss": 1.1712, |
| "step": 697 |
| }, |
| { |
| "epoch": 3.6895414054767404, |
| "grad_norm": 0.1369739991909485, |
| "learning_rate": 1.5542167805186262e-05, |
| "loss": 1.1855, |
| "step": 698 |
| }, |
| { |
| "epoch": 3.694820191355988, |
| "grad_norm": 0.16493354365350582, |
| "learning_rate": 1.542535162781345e-05, |
| "loss": 1.1756, |
| "step": 699 |
| }, |
| { |
| "epoch": 3.700098977235236, |
| "grad_norm": 0.16258714311932046, |
| "learning_rate": 1.530887114840829e-05, |
| "loss": 1.1836, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.7053777631144835, |
| "grad_norm": 0.14824689112607964, |
| "learning_rate": 1.5192727958133336e-05, |
| "loss": 1.1858, |
| "step": 701 |
| }, |
| { |
| "epoch": 3.7106565489937315, |
| "grad_norm": 0.17024560483432752, |
| "learning_rate": 1.5076923643543637e-05, |
| "loss": 1.191, |
| "step": 702 |
| }, |
| { |
| "epoch": 3.715935334872979, |
| "grad_norm": 0.2937433238239512, |
| "learning_rate": 1.4961459786565136e-05, |
| "loss": 1.2079, |
| "step": 703 |
| }, |
| { |
| "epoch": 3.721214120752227, |
| "grad_norm": 0.14683060389697267, |
| "learning_rate": 1.4846337964472973e-05, |
| "loss": 1.1549, |
| "step": 704 |
| }, |
| { |
| "epoch": 3.7264929066314747, |
| "grad_norm": 0.1736075539679898, |
| "learning_rate": 1.4731559749869973e-05, |
| "loss": 1.1772, |
| "step": 705 |
| }, |
| { |
| "epoch": 3.7317716925107227, |
| "grad_norm": 0.15781844683554935, |
| "learning_rate": 1.4617126710665166e-05, |
| "loss": 1.1967, |
| "step": 706 |
| }, |
| { |
| "epoch": 3.7370504783899703, |
| "grad_norm": 0.18024181822406263, |
| "learning_rate": 1.4503040410052412e-05, |
| "loss": 1.1763, |
| "step": 707 |
| }, |
| { |
| "epoch": 3.742329264269218, |
| "grad_norm": 0.18126802231132821, |
| "learning_rate": 1.438930240648896e-05, |
| "loss": 1.1799, |
| "step": 708 |
| }, |
| { |
| "epoch": 3.747608050148466, |
| "grad_norm": 1.030705083825917, |
| "learning_rate": 1.4275914253674206e-05, |
| "loss": 1.1889, |
| "step": 709 |
| }, |
| { |
| "epoch": 3.752886836027714, |
| "grad_norm": 0.16620977435780646, |
| "learning_rate": 1.4162877500528516e-05, |
| "loss": 1.1748, |
| "step": 710 |
| }, |
| { |
| "epoch": 3.7581656219069615, |
| "grad_norm": 0.2798778927760511, |
| "learning_rate": 1.4050193691171931e-05, |
| "loss": 1.1849, |
| "step": 711 |
| }, |
| { |
| "epoch": 3.763444407786209, |
| "grad_norm": 0.14293138521061613, |
| "learning_rate": 1.3937864364903253e-05, |
| "loss": 1.1765, |
| "step": 712 |
| }, |
| { |
| "epoch": 3.768723193665457, |
| "grad_norm": 0.16417271654966648, |
| "learning_rate": 1.3825891056178874e-05, |
| "loss": 1.1722, |
| "step": 713 |
| }, |
| { |
| "epoch": 3.7740019795447046, |
| "grad_norm": 0.1311111754910553, |
| "learning_rate": 1.3714275294591852e-05, |
| "loss": 1.1888, |
| "step": 714 |
| }, |
| { |
| "epoch": 3.7792807654239526, |
| "grad_norm": 0.14306901078476733, |
| "learning_rate": 1.3603018604851106e-05, |
| "loss": 1.1645, |
| "step": 715 |
| }, |
| { |
| "epoch": 3.7845595513032, |
| "grad_norm": 0.15620863897436546, |
| "learning_rate": 1.349212250676041e-05, |
| "loss": 1.1753, |
| "step": 716 |
| }, |
| { |
| "epoch": 3.789838337182448, |
| "grad_norm": 0.14152193009692798, |
| "learning_rate": 1.3381588515197818e-05, |
| "loss": 1.1826, |
| "step": 717 |
| }, |
| { |
| "epoch": 3.795117123061696, |
| "grad_norm": 0.19538067948218324, |
| "learning_rate": 1.3271418140094854e-05, |
| "loss": 1.1796, |
| "step": 718 |
| }, |
| { |
| "epoch": 3.800395908940944, |
| "grad_norm": 0.1442409305088691, |
| "learning_rate": 1.3161612886415904e-05, |
| "loss": 1.1869, |
| "step": 719 |
| }, |
| { |
| "epoch": 3.8056746948201914, |
| "grad_norm": 0.13512286814960517, |
| "learning_rate": 1.3052174254137713e-05, |
| "loss": 1.1915, |
| "step": 720 |
| }, |
| { |
| "epoch": 3.810953480699439, |
| "grad_norm": 0.14311317112625468, |
| "learning_rate": 1.2943103738228815e-05, |
| "loss": 1.1924, |
| "step": 721 |
| }, |
| { |
| "epoch": 3.816232266578687, |
| "grad_norm": 0.1307906860768968, |
| "learning_rate": 1.2834402828629177e-05, |
| "loss": 1.1834, |
| "step": 722 |
| }, |
| { |
| "epoch": 3.8215110524579345, |
| "grad_norm": 0.13454071601034975, |
| "learning_rate": 1.2726073010229798e-05, |
| "loss": 1.1678, |
| "step": 723 |
| }, |
| { |
| "epoch": 3.8267898383371826, |
| "grad_norm": 0.14555974766872148, |
| "learning_rate": 1.2618115762852451e-05, |
| "loss": 1.1943, |
| "step": 724 |
| }, |
| { |
| "epoch": 3.83206862421643, |
| "grad_norm": 0.18463072722535032, |
| "learning_rate": 1.2510532561229493e-05, |
| "loss": 1.1805, |
| "step": 725 |
| }, |
| { |
| "epoch": 3.837347410095678, |
| "grad_norm": 0.17705683325025137, |
| "learning_rate": 1.2403324874983653e-05, |
| "loss": 1.1795, |
| "step": 726 |
| }, |
| { |
| "epoch": 3.8426261959749257, |
| "grad_norm": 0.14977271526880837, |
| "learning_rate": 1.2296494168608004e-05, |
| "loss": 1.1861, |
| "step": 727 |
| }, |
| { |
| "epoch": 3.8479049818541737, |
| "grad_norm": 0.17767233652373313, |
| "learning_rate": 1.2190041901445948e-05, |
| "loss": 1.1804, |
| "step": 728 |
| }, |
| { |
| "epoch": 3.8531837677334213, |
| "grad_norm": 0.14698313922467643, |
| "learning_rate": 1.2083969527671294e-05, |
| "loss": 1.1863, |
| "step": 729 |
| }, |
| { |
| "epoch": 3.858462553612669, |
| "grad_norm": 0.15375271230113852, |
| "learning_rate": 1.1978278496268362e-05, |
| "loss": 1.1936, |
| "step": 730 |
| }, |
| { |
| "epoch": 3.863741339491917, |
| "grad_norm": 0.15069392504511053, |
| "learning_rate": 1.1872970251012204e-05, |
| "loss": 1.1794, |
| "step": 731 |
| }, |
| { |
| "epoch": 3.869020125371165, |
| "grad_norm": 0.15663922512201614, |
| "learning_rate": 1.1768046230448924e-05, |
| "loss": 1.1761, |
| "step": 732 |
| }, |
| { |
| "epoch": 3.8742989112504125, |
| "grad_norm": 0.1398727690424049, |
| "learning_rate": 1.1663507867875911e-05, |
| "loss": 1.1925, |
| "step": 733 |
| }, |
| { |
| "epoch": 3.87957769712966, |
| "grad_norm": 0.129891178182677, |
| "learning_rate": 1.1559356591322426e-05, |
| "loss": 1.1689, |
| "step": 734 |
| }, |
| { |
| "epoch": 3.884856483008908, |
| "grad_norm": 0.16126609799676608, |
| "learning_rate": 1.145559382352991e-05, |
| "loss": 1.1663, |
| "step": 735 |
| }, |
| { |
| "epoch": 3.8901352688881556, |
| "grad_norm": 0.72353456639229, |
| "learning_rate": 1.1352220981932738e-05, |
| "loss": 1.1898, |
| "step": 736 |
| }, |
| { |
| "epoch": 3.8954140547674037, |
| "grad_norm": 0.15606017767964075, |
| "learning_rate": 1.12492394786387e-05, |
| "loss": 1.1899, |
| "step": 737 |
| }, |
| { |
| "epoch": 3.9006928406466512, |
| "grad_norm": 0.1416594211440069, |
| "learning_rate": 1.1146650720409781e-05, |
| "loss": 1.1853, |
| "step": 738 |
| }, |
| { |
| "epoch": 3.905971626525899, |
| "grad_norm": 0.13915107172987345, |
| "learning_rate": 1.1044456108642967e-05, |
| "loss": 1.1917, |
| "step": 739 |
| }, |
| { |
| "epoch": 3.911250412405147, |
| "grad_norm": 0.12781574255133024, |
| "learning_rate": 1.0942657039351042e-05, |
| "loss": 1.1725, |
| "step": 740 |
| }, |
| { |
| "epoch": 3.916529198284395, |
| "grad_norm": 0.14766652542576716, |
| "learning_rate": 1.0841254903143547e-05, |
| "loss": 1.1967, |
| "step": 741 |
| }, |
| { |
| "epoch": 3.9218079841636424, |
| "grad_norm": 0.26120047464098534, |
| "learning_rate": 1.0740251085207785e-05, |
| "loss": 1.1785, |
| "step": 742 |
| }, |
| { |
| "epoch": 3.92708677004289, |
| "grad_norm": 0.14335573482650907, |
| "learning_rate": 1.0639646965289927e-05, |
| "loss": 1.1817, |
| "step": 743 |
| }, |
| { |
| "epoch": 3.932365555922138, |
| "grad_norm": 0.14772540528742029, |
| "learning_rate": 1.0539443917676092e-05, |
| "loss": 1.1754, |
| "step": 744 |
| }, |
| { |
| "epoch": 3.9376443418013856, |
| "grad_norm": 0.1424170141242611, |
| "learning_rate": 1.0439643311173642e-05, |
| "loss": 1.173, |
| "step": 745 |
| }, |
| { |
| "epoch": 3.9429231276806336, |
| "grad_norm": 0.7072515085588025, |
| "learning_rate": 1.0340246509092448e-05, |
| "loss": 1.21, |
| "step": 746 |
| }, |
| { |
| "epoch": 3.948201913559881, |
| "grad_norm": 0.18752526913379627, |
| "learning_rate": 1.024125486922627e-05, |
| "loss": 1.1796, |
| "step": 747 |
| }, |
| { |
| "epoch": 3.953480699439129, |
| "grad_norm": 0.14084279259329482, |
| "learning_rate": 1.0142669743834243e-05, |
| "loss": 1.1747, |
| "step": 748 |
| }, |
| { |
| "epoch": 3.9587594853183767, |
| "grad_norm": 0.18116846121283872, |
| "learning_rate": 1.0044492479622359e-05, |
| "loss": 1.1626, |
| "step": 749 |
| }, |
| { |
| "epoch": 3.9640382711976248, |
| "grad_norm": 0.1362638764334788, |
| "learning_rate": 9.946724417725067e-06, |
| "loss": 1.1958, |
| "step": 750 |
| }, |
| { |
| "epoch": 3.9693170570768723, |
| "grad_norm": 0.14138256699607013, |
| "learning_rate": 9.849366893687034e-06, |
| "loss": 1.1644, |
| "step": 751 |
| }, |
| { |
| "epoch": 3.97459584295612, |
| "grad_norm": 0.12221201901085602, |
| "learning_rate": 9.752421237444768e-06, |
| "loss": 1.174, |
| "step": 752 |
| }, |
| { |
| "epoch": 3.979874628835368, |
| "grad_norm": 0.1420340161710947, |
| "learning_rate": 9.655888773308586e-06, |
| "loss": 1.1703, |
| "step": 753 |
| }, |
| { |
| "epoch": 3.9851534147146155, |
| "grad_norm": 0.13068888106532828, |
| "learning_rate": 9.559770819944428e-06, |
| "loss": 1.1772, |
| "step": 754 |
| }, |
| { |
| "epoch": 3.9904322005938635, |
| "grad_norm": 0.15702714311609547, |
| "learning_rate": 9.464068690355881e-06, |
| "loss": 1.1776, |
| "step": 755 |
| }, |
| { |
| "epoch": 3.995710986473111, |
| "grad_norm": 0.1322693957523336, |
| "learning_rate": 9.368783691866272e-06, |
| "loss": 1.1811, |
| "step": 756 |
| }, |
| { |
| "epoch": 4.002639392939624, |
| "grad_norm": 0.32862922272344564, |
| "learning_rate": 9.273917126100706e-06, |
| "loss": 2.2105, |
| "step": 757 |
| }, |
| { |
| "epoch": 4.007918178818872, |
| "grad_norm": 0.15972455045077913, |
| "learning_rate": 9.179470288968435e-06, |
| "loss": 1.1521, |
| "step": 758 |
| }, |
| { |
| "epoch": 4.013196964698119, |
| "grad_norm": 0.16085099316633536, |
| "learning_rate": 9.085444470645033e-06, |
| "loss": 1.1649, |
| "step": 759 |
| }, |
| { |
| "epoch": 4.018475750577367, |
| "grad_norm": 0.15307676096371256, |
| "learning_rate": 8.991840955554805e-06, |
| "loss": 1.1606, |
| "step": 760 |
| }, |
| { |
| "epoch": 4.023754536456615, |
| "grad_norm": 0.1584835111545146, |
| "learning_rate": 8.898661022353301e-06, |
| "loss": 1.179, |
| "step": 761 |
| }, |
| { |
| "epoch": 4.029033322335863, |
| "grad_norm": 0.1490011216344127, |
| "learning_rate": 8.805905943909754e-06, |
| "loss": 1.1734, |
| "step": 762 |
| }, |
| { |
| "epoch": 4.0343121082151105, |
| "grad_norm": 0.171667967945809, |
| "learning_rate": 8.713576987289753e-06, |
| "loss": 1.149, |
| "step": 763 |
| }, |
| { |
| "epoch": 4.039590894094358, |
| "grad_norm": 0.14542174260694177, |
| "learning_rate": 8.621675413737911e-06, |
| "loss": 1.1773, |
| "step": 764 |
| }, |
| { |
| "epoch": 4.044869679973606, |
| "grad_norm": 0.14302481831921374, |
| "learning_rate": 8.530202478660672e-06, |
| "loss": 1.1694, |
| "step": 765 |
| }, |
| { |
| "epoch": 4.050148465852854, |
| "grad_norm": 0.15754932582955972, |
| "learning_rate": 8.439159431609108e-06, |
| "loss": 1.1604, |
| "step": 766 |
| }, |
| { |
| "epoch": 4.055427251732102, |
| "grad_norm": 0.1428839940000341, |
| "learning_rate": 8.34854751626188e-06, |
| "loss": 1.1603, |
| "step": 767 |
| }, |
| { |
| "epoch": 4.060706037611349, |
| "grad_norm": 0.145295522617084, |
| "learning_rate": 8.258367970408248e-06, |
| "loss": 1.1655, |
| "step": 768 |
| }, |
| { |
| "epoch": 4.065984823490597, |
| "grad_norm": 0.6586015632358717, |
| "learning_rate": 8.168622025931152e-06, |
| "loss": 1.1775, |
| "step": 769 |
| }, |
| { |
| "epoch": 4.071263609369845, |
| "grad_norm": 0.13235823309712424, |
| "learning_rate": 8.07931090879042e-06, |
| "loss": 1.1821, |
| "step": 770 |
| }, |
| { |
| "epoch": 4.076542395249093, |
| "grad_norm": 0.15766183056424551, |
| "learning_rate": 7.990435839005961e-06, |
| "loss": 1.1501, |
| "step": 771 |
| }, |
| { |
| "epoch": 4.0818211811283405, |
| "grad_norm": 0.16128467997458518, |
| "learning_rate": 7.901998030641125e-06, |
| "loss": 1.1647, |
| "step": 772 |
| }, |
| { |
| "epoch": 4.087099967007588, |
| "grad_norm": 0.175684873680538, |
| "learning_rate": 7.813998691786172e-06, |
| "loss": 1.1526, |
| "step": 773 |
| }, |
| { |
| "epoch": 4.092378752886836, |
| "grad_norm": 0.1483548124292395, |
| "learning_rate": 7.726439024541647e-06, |
| "loss": 1.1565, |
| "step": 774 |
| }, |
| { |
| "epoch": 4.097657538766084, |
| "grad_norm": 0.14495321888236465, |
| "learning_rate": 7.639320225002106e-06, |
| "loss": 1.1671, |
| "step": 775 |
| }, |
| { |
| "epoch": 4.102936324645332, |
| "grad_norm": 0.15968741317488708, |
| "learning_rate": 7.552643483239661e-06, |
| "loss": 1.1731, |
| "step": 776 |
| }, |
| { |
| "epoch": 4.108215110524579, |
| "grad_norm": 0.1711293844095317, |
| "learning_rate": 7.466409983287763e-06, |
| "loss": 1.1862, |
| "step": 777 |
| }, |
| { |
| "epoch": 4.113493896403827, |
| "grad_norm": 0.15163396329916032, |
| "learning_rate": 7.380620903125071e-06, |
| "loss": 1.1497, |
| "step": 778 |
| }, |
| { |
| "epoch": 4.118772682283075, |
| "grad_norm": 0.137743428037284, |
| "learning_rate": 7.295277414659279e-06, |
| "loss": 1.1513, |
| "step": 779 |
| }, |
| { |
| "epoch": 4.124051468162323, |
| "grad_norm": 0.1703498274940864, |
| "learning_rate": 7.21038068371116e-06, |
| "loss": 1.1868, |
| "step": 780 |
| }, |
| { |
| "epoch": 4.12933025404157, |
| "grad_norm": 0.14516466392073746, |
| "learning_rate": 7.125931869998637e-06, |
| "loss": 1.1722, |
| "step": 781 |
| }, |
| { |
| "epoch": 4.134609039920818, |
| "grad_norm": 0.8612828120069205, |
| "learning_rate": 7.041932127120916e-06, |
| "loss": 1.1966, |
| "step": 782 |
| }, |
| { |
| "epoch": 4.139887825800066, |
| "grad_norm": 0.16416849793012017, |
| "learning_rate": 6.958382602542775e-06, |
| "loss": 1.1575, |
| "step": 783 |
| }, |
| { |
| "epoch": 4.145166611679314, |
| "grad_norm": 0.13648389237144803, |
| "learning_rate": 6.875284437578829e-06, |
| "loss": 1.1652, |
| "step": 784 |
| }, |
| { |
| "epoch": 4.150445397558562, |
| "grad_norm": 0.12915478715404072, |
| "learning_rate": 6.792638767377981e-06, |
| "loss": 1.1623, |
| "step": 785 |
| }, |
| { |
| "epoch": 4.155724183437809, |
| "grad_norm": 0.1271714835707355, |
| "learning_rate": 6.710446720907886e-06, |
| "loss": 1.1556, |
| "step": 786 |
| }, |
| { |
| "epoch": 4.161002969317057, |
| "grad_norm": 0.1374695700589436, |
| "learning_rate": 6.6287094209396005e-06, |
| "loss": 1.1703, |
| "step": 787 |
| }, |
| { |
| "epoch": 4.166281755196305, |
| "grad_norm": 0.13643108166742873, |
| "learning_rate": 6.547427984032104e-06, |
| "loss": 1.175, |
| "step": 788 |
| }, |
| { |
| "epoch": 4.171560541075553, |
| "grad_norm": 0.12993636667214994, |
| "learning_rate": 6.466603520517205e-06, |
| "loss": 1.186, |
| "step": 789 |
| }, |
| { |
| "epoch": 4.1768393269548, |
| "grad_norm": 0.1241730758098539, |
| "learning_rate": 6.386237134484296e-06, |
| "loss": 1.1608, |
| "step": 790 |
| }, |
| { |
| "epoch": 4.182118112834048, |
| "grad_norm": 0.12139659159425875, |
| "learning_rate": 6.306329923765222e-06, |
| "loss": 1.1656, |
| "step": 791 |
| }, |
| { |
| "epoch": 4.187396898713296, |
| "grad_norm": 0.138372121781133, |
| "learning_rate": 6.226882979919407e-06, |
| "loss": 1.1464, |
| "step": 792 |
| }, |
| { |
| "epoch": 4.192675684592544, |
| "grad_norm": 0.12939648961836608, |
| "learning_rate": 6.147897388218811e-06, |
| "loss": 1.159, |
| "step": 793 |
| }, |
| { |
| "epoch": 4.1979544704717915, |
| "grad_norm": 0.1230027109136608, |
| "learning_rate": 6.0693742276332245e-06, |
| "loss": 1.1541, |
| "step": 794 |
| }, |
| { |
| "epoch": 4.203233256351039, |
| "grad_norm": 0.14916519840606735, |
| "learning_rate": 5.991314570815441e-06, |
| "loss": 1.1772, |
| "step": 795 |
| }, |
| { |
| "epoch": 4.208512042230287, |
| "grad_norm": 0.1292233287870313, |
| "learning_rate": 5.913719484086638e-06, |
| "loss": 1.1546, |
| "step": 796 |
| }, |
| { |
| "epoch": 4.213790828109535, |
| "grad_norm": 0.12382165284530203, |
| "learning_rate": 5.836590027421829e-06, |
| "loss": 1.1461, |
| "step": 797 |
| }, |
| { |
| "epoch": 4.219069613988783, |
| "grad_norm": 0.12426017833944712, |
| "learning_rate": 5.759927254435345e-06, |
| "loss": 1.1575, |
| "step": 798 |
| }, |
| { |
| "epoch": 4.22434839986803, |
| "grad_norm": 0.12511292326231407, |
| "learning_rate": 5.683732212366466e-06, |
| "loss": 1.1667, |
| "step": 799 |
| }, |
| { |
| "epoch": 4.229627185747278, |
| "grad_norm": 0.1225763097437516, |
| "learning_rate": 5.608005942065102e-06, |
| "loss": 1.1627, |
| "step": 800 |
| }, |
| { |
| "epoch": 4.234905971626526, |
| "grad_norm": 0.12366047962888504, |
| "learning_rate": 5.532749477977613e-06, |
| "loss": 1.1704, |
| "step": 801 |
| }, |
| { |
| "epoch": 4.240184757505774, |
| "grad_norm": 0.11826441744257116, |
| "learning_rate": 5.45796384813261e-06, |
| "loss": 1.1778, |
| "step": 802 |
| }, |
| { |
| "epoch": 4.245463543385021, |
| "grad_norm": 0.12376814311025498, |
| "learning_rate": 5.383650074126973e-06, |
| "loss": 1.1523, |
| "step": 803 |
| }, |
| { |
| "epoch": 4.250742329264269, |
| "grad_norm": 0.12958820147684294, |
| "learning_rate": 5.309809171111866e-06, |
| "loss": 1.1479, |
| "step": 804 |
| }, |
| { |
| "epoch": 4.2560211151435166, |
| "grad_norm": 0.13474325077163982, |
| "learning_rate": 5.236442147778866e-06, |
| "loss": 1.1672, |
| "step": 805 |
| }, |
| { |
| "epoch": 4.261299901022765, |
| "grad_norm": 0.11930301746988618, |
| "learning_rate": 5.163550006346225e-06, |
| "loss": 1.1766, |
| "step": 806 |
| }, |
| { |
| "epoch": 4.266578686902013, |
| "grad_norm": 0.13565747630020547, |
| "learning_rate": 5.0911337425451115e-06, |
| "loss": 1.1669, |
| "step": 807 |
| }, |
| { |
| "epoch": 4.27185747278126, |
| "grad_norm": 0.14084681033309193, |
| "learning_rate": 5.0191943456060574e-06, |
| "loss": 1.1572, |
| "step": 808 |
| }, |
| { |
| "epoch": 4.277136258660508, |
| "grad_norm": 0.11838154220172313, |
| "learning_rate": 4.947732798245466e-06, |
| "loss": 1.1527, |
| "step": 809 |
| }, |
| { |
| "epoch": 4.282415044539756, |
| "grad_norm": 0.11759251011433089, |
| "learning_rate": 4.8767500766520834e-06, |
| "loss": 1.1658, |
| "step": 810 |
| }, |
| { |
| "epoch": 4.287693830419004, |
| "grad_norm": 0.1083263135518414, |
| "learning_rate": 4.806247150473811e-06, |
| "loss": 1.1576, |
| "step": 811 |
| }, |
| { |
| "epoch": 4.292972616298251, |
| "grad_norm": 0.12075133264162158, |
| "learning_rate": 4.736224982804331e-06, |
| "loss": 1.1649, |
| "step": 812 |
| }, |
| { |
| "epoch": 4.298251402177499, |
| "grad_norm": 0.12348266353073907, |
| "learning_rate": 4.66668453017002e-06, |
| "loss": 1.1523, |
| "step": 813 |
| }, |
| { |
| "epoch": 4.303530188056747, |
| "grad_norm": 0.1259621809705678, |
| "learning_rate": 4.597626742516892e-06, |
| "loss": 1.1555, |
| "step": 814 |
| }, |
| { |
| "epoch": 4.308808973935995, |
| "grad_norm": 0.1162939281963056, |
| "learning_rate": 4.529052563197524e-06, |
| "loss": 1.1657, |
| "step": 815 |
| }, |
| { |
| "epoch": 4.3140877598152425, |
| "grad_norm": 0.12082024531451342, |
| "learning_rate": 4.460962928958323e-06, |
| "loss": 1.1697, |
| "step": 816 |
| }, |
| { |
| "epoch": 4.31936654569449, |
| "grad_norm": 0.1143945972992921, |
| "learning_rate": 4.393358769926592e-06, |
| "loss": 1.1664, |
| "step": 817 |
| }, |
| { |
| "epoch": 4.324645331573738, |
| "grad_norm": 0.1184039155349698, |
| "learning_rate": 4.326241009597891e-06, |
| "loss": 1.1607, |
| "step": 818 |
| }, |
| { |
| "epoch": 4.329924117452986, |
| "grad_norm": 0.11885449737609986, |
| "learning_rate": 4.259610564823433e-06, |
| "loss": 1.1711, |
| "step": 819 |
| }, |
| { |
| "epoch": 4.335202903332234, |
| "grad_norm": 0.2993015113097636, |
| "learning_rate": 4.193468345797511e-06, |
| "loss": 1.1736, |
| "step": 820 |
| }, |
| { |
| "epoch": 4.340481689211481, |
| "grad_norm": 0.11015741307567276, |
| "learning_rate": 4.127815256045091e-06, |
| "loss": 1.1809, |
| "step": 821 |
| }, |
| { |
| "epoch": 4.345760475090729, |
| "grad_norm": 0.11568779303705128, |
| "learning_rate": 4.06265219240948e-06, |
| "loss": 1.1686, |
| "step": 822 |
| }, |
| { |
| "epoch": 4.351039260969977, |
| "grad_norm": 0.12406719177604637, |
| "learning_rate": 3.997980045040062e-06, |
| "loss": 1.1626, |
| "step": 823 |
| }, |
| { |
| "epoch": 4.356318046849225, |
| "grad_norm": 0.11523454788820603, |
| "learning_rate": 3.933799697380134e-06, |
| "loss": 1.1691, |
| "step": 824 |
| }, |
| { |
| "epoch": 4.361596832728472, |
| "grad_norm": 0.11048268846604357, |
| "learning_rate": 3.870112026154847e-06, |
| "loss": 1.169, |
| "step": 825 |
| }, |
| { |
| "epoch": 4.36687561860772, |
| "grad_norm": 0.11346757591013902, |
| "learning_rate": 3.806917901359226e-06, |
| "loss": 1.1694, |
| "step": 826 |
| }, |
| { |
| "epoch": 4.3721544044869685, |
| "grad_norm": 0.13471289129675465, |
| "learning_rate": 3.7442181862462666e-06, |
| "loss": 1.1543, |
| "step": 827 |
| }, |
| { |
| "epoch": 4.377433190366216, |
| "grad_norm": 0.299919959697272, |
| "learning_rate": 3.6820137373152087e-06, |
| "loss": 1.1736, |
| "step": 828 |
| }, |
| { |
| "epoch": 4.382711976245464, |
| "grad_norm": 0.11380219584729494, |
| "learning_rate": 3.620305404299744e-06, |
| "loss": 1.1778, |
| "step": 829 |
| }, |
| { |
| "epoch": 4.387990762124711, |
| "grad_norm": 0.11818765407385347, |
| "learning_rate": 3.5590940301564623e-06, |
| "loss": 1.1567, |
| "step": 830 |
| }, |
| { |
| "epoch": 4.393269548003959, |
| "grad_norm": 0.11642391366774718, |
| "learning_rate": 3.498380451053365e-06, |
| "loss": 1.1599, |
| "step": 831 |
| }, |
| { |
| "epoch": 4.398548333883207, |
| "grad_norm": 0.11490246362522913, |
| "learning_rate": 3.4381654963583413e-06, |
| "loss": 1.168, |
| "step": 832 |
| }, |
| { |
| "epoch": 4.403827119762455, |
| "grad_norm": 0.13726613621953002, |
| "learning_rate": 3.3784499886279565e-06, |
| "loss": 1.1477, |
| "step": 833 |
| }, |
| { |
| "epoch": 4.409105905641702, |
| "grad_norm": 0.11259159480179101, |
| "learning_rate": 3.3192347435961493e-06, |
| "loss": 1.1704, |
| "step": 834 |
| }, |
| { |
| "epoch": 4.41438469152095, |
| "grad_norm": 0.11867399217960718, |
| "learning_rate": 3.2605205701630795e-06, |
| "loss": 1.1642, |
| "step": 835 |
| }, |
| { |
| "epoch": 4.4196634774001975, |
| "grad_norm": 0.11376592541343825, |
| "learning_rate": 3.202308270384138e-06, |
| "loss": 1.1593, |
| "step": 836 |
| }, |
| { |
| "epoch": 4.424942263279446, |
| "grad_norm": 0.11774644117599645, |
| "learning_rate": 3.144598639458911e-06, |
| "loss": 1.1549, |
| "step": 837 |
| }, |
| { |
| "epoch": 4.4302210491586935, |
| "grad_norm": 0.11501940477051867, |
| "learning_rate": 3.0873924657203934e-06, |
| "loss": 1.1724, |
| "step": 838 |
| }, |
| { |
| "epoch": 4.435499835037941, |
| "grad_norm": 0.12802614276387342, |
| "learning_rate": 3.0306905306241695e-06, |
| "loss": 1.1875, |
| "step": 839 |
| }, |
| { |
| "epoch": 4.440778620917189, |
| "grad_norm": 0.11853661247701457, |
| "learning_rate": 2.974493608737752e-06, |
| "loss": 1.1564, |
| "step": 840 |
| }, |
| { |
| "epoch": 4.446057406796437, |
| "grad_norm": 0.11905841171224281, |
| "learning_rate": 2.9188024677300065e-06, |
| "loss": 1.1693, |
| "step": 841 |
| }, |
| { |
| "epoch": 4.451336192675685, |
| "grad_norm": 0.10964650477920777, |
| "learning_rate": 2.863617868360673e-06, |
| "loss": 1.1605, |
| "step": 842 |
| }, |
| { |
| "epoch": 4.456614978554932, |
| "grad_norm": 0.1295527964301152, |
| "learning_rate": 2.8089405644699463e-06, |
| "loss": 1.1702, |
| "step": 843 |
| }, |
| { |
| "epoch": 4.46189376443418, |
| "grad_norm": 0.12355557618863641, |
| "learning_rate": 2.754771302968191e-06, |
| "loss": 1.1712, |
| "step": 844 |
| }, |
| { |
| "epoch": 4.467172550313428, |
| "grad_norm": 0.11165463039337062, |
| "learning_rate": 2.7011108238257723e-06, |
| "loss": 1.1838, |
| "step": 845 |
| }, |
| { |
| "epoch": 4.472451336192676, |
| "grad_norm": 0.11075858795536124, |
| "learning_rate": 2.647959860062872e-06, |
| "loss": 1.1611, |
| "step": 846 |
| }, |
| { |
| "epoch": 4.4777301220719234, |
| "grad_norm": 0.12178769651136817, |
| "learning_rate": 2.5953191377395557e-06, |
| "loss": 1.164, |
| "step": 847 |
| }, |
| { |
| "epoch": 4.483008907951171, |
| "grad_norm": 0.13608459292258282, |
| "learning_rate": 2.5431893759458027e-06, |
| "loss": 1.1749, |
| "step": 848 |
| }, |
| { |
| "epoch": 4.488287693830419, |
| "grad_norm": 0.11221373624505879, |
| "learning_rate": 2.491571286791703e-06, |
| "loss": 1.1545, |
| "step": 849 |
| }, |
| { |
| "epoch": 4.493566479709667, |
| "grad_norm": 0.10508342975727109, |
| "learning_rate": 2.4404655753977437e-06, |
| "loss": 1.1671, |
| "step": 850 |
| }, |
| { |
| "epoch": 4.498845265588915, |
| "grad_norm": 0.10995666327973941, |
| "learning_rate": 2.3898729398851164e-06, |
| "loss": 1.1576, |
| "step": 851 |
| }, |
| { |
| "epoch": 4.504124051468162, |
| "grad_norm": 0.1035869276890292, |
| "learning_rate": 2.3397940713662683e-06, |
| "loss": 1.1744, |
| "step": 852 |
| }, |
| { |
| "epoch": 4.50940283734741, |
| "grad_norm": 0.11927558398750628, |
| "learning_rate": 2.2902296539353895e-06, |
| "loss": 1.1457, |
| "step": 853 |
| }, |
| { |
| "epoch": 4.514681623226658, |
| "grad_norm": 0.11724384351038623, |
| "learning_rate": 2.2411803646591057e-06, |
| "loss": 1.1793, |
| "step": 854 |
| }, |
| { |
| "epoch": 4.519960409105906, |
| "grad_norm": 0.10784941076305816, |
| "learning_rate": 2.192646873567221e-06, |
| "loss": 1.1585, |
| "step": 855 |
| }, |
| { |
| "epoch": 4.525239194985153, |
| "grad_norm": 0.10037816732827384, |
| "learning_rate": 2.1446298436435508e-06, |
| "loss": 1.151, |
| "step": 856 |
| }, |
| { |
| "epoch": 4.530517980864401, |
| "grad_norm": 0.11365281735149528, |
| "learning_rate": 2.097129930816895e-06, |
| "loss": 1.1722, |
| "step": 857 |
| }, |
| { |
| "epoch": 4.535796766743649, |
| "grad_norm": 0.11231091970978371, |
| "learning_rate": 2.0501477839520323e-06, |
| "loss": 1.1668, |
| "step": 858 |
| }, |
| { |
| "epoch": 4.541075552622897, |
| "grad_norm": 0.10707625359180917, |
| "learning_rate": 2.0036840448409166e-06, |
| "loss": 1.1656, |
| "step": 859 |
| }, |
| { |
| "epoch": 4.5463543385021445, |
| "grad_norm": 0.103343369882393, |
| "learning_rate": 1.957739348193859e-06, |
| "loss": 1.159, |
| "step": 860 |
| }, |
| { |
| "epoch": 4.551633124381392, |
| "grad_norm": 0.10643619236223816, |
| "learning_rate": 1.912314321630877e-06, |
| "loss": 1.1653, |
| "step": 861 |
| }, |
| { |
| "epoch": 4.55691191026064, |
| "grad_norm": 0.11321998012840938, |
| "learning_rate": 1.867409585673121e-06, |
| "loss": 1.1683, |
| "step": 862 |
| }, |
| { |
| "epoch": 4.562190696139888, |
| "grad_norm": 0.11702443692773487, |
| "learning_rate": 1.823025753734391e-06, |
| "loss": 1.1491, |
| "step": 863 |
| }, |
| { |
| "epoch": 4.567469482019136, |
| "grad_norm": 0.11336390689648039, |
| "learning_rate": 1.779163432112787e-06, |
| "loss": 1.1499, |
| "step": 864 |
| }, |
| { |
| "epoch": 4.572748267898383, |
| "grad_norm": 0.1079563192946037, |
| "learning_rate": 1.735823219982371e-06, |
| "loss": 1.1492, |
| "step": 865 |
| }, |
| { |
| "epoch": 4.578027053777631, |
| "grad_norm": 0.11346949871890852, |
| "learning_rate": 1.693005709385025e-06, |
| "loss": 1.1675, |
| "step": 866 |
| }, |
| { |
| "epoch": 4.583305839656878, |
| "grad_norm": 0.10606397010769701, |
| "learning_rate": 1.6507114852223694e-06, |
| "loss": 1.1638, |
| "step": 867 |
| }, |
| { |
| "epoch": 4.588584625536127, |
| "grad_norm": 0.11224848857280989, |
| "learning_rate": 1.608941125247725e-06, |
| "loss": 1.17, |
| "step": 868 |
| }, |
| { |
| "epoch": 4.5938634114153745, |
| "grad_norm": 0.11525328958527273, |
| "learning_rate": 1.5676952000582746e-06, |
| "loss": 1.1689, |
| "step": 869 |
| }, |
| { |
| "epoch": 4.599142197294622, |
| "grad_norm": 0.10941397776251549, |
| "learning_rate": 1.5269742730872384e-06, |
| "loss": 1.1552, |
| "step": 870 |
| }, |
| { |
| "epoch": 4.6044209831738705, |
| "grad_norm": 0.1053956618070631, |
| "learning_rate": 1.4867789005961818e-06, |
| "loss": 1.1509, |
| "step": 871 |
| }, |
| { |
| "epoch": 4.609699769053118, |
| "grad_norm": 0.10586295213178988, |
| "learning_rate": 1.4471096316674272e-06, |
| "loss": 1.1679, |
| "step": 872 |
| }, |
| { |
| "epoch": 4.614978554932366, |
| "grad_norm": 0.11592453657256088, |
| "learning_rate": 1.4079670081965246e-06, |
| "loss": 1.1731, |
| "step": 873 |
| }, |
| { |
| "epoch": 4.620257340811613, |
| "grad_norm": 0.09907380207012419, |
| "learning_rate": 1.3693515648849042e-06, |
| "loss": 1.1687, |
| "step": 874 |
| }, |
| { |
| "epoch": 4.625536126690861, |
| "grad_norm": 0.12561558309571264, |
| "learning_rate": 1.3312638292325032e-06, |
| "loss": 1.1549, |
| "step": 875 |
| }, |
| { |
| "epoch": 4.630814912570109, |
| "grad_norm": 0.10690353972188854, |
| "learning_rate": 1.2937043215306156e-06, |
| "loss": 1.1537, |
| "step": 876 |
| }, |
| { |
| "epoch": 4.636093698449357, |
| "grad_norm": 0.10264572047936735, |
| "learning_rate": 1.256673554854757e-06, |
| "loss": 1.1574, |
| "step": 877 |
| }, |
| { |
| "epoch": 4.641372484328604, |
| "grad_norm": 0.12384527063181164, |
| "learning_rate": 1.2201720350576608e-06, |
| "loss": 1.1717, |
| "step": 878 |
| }, |
| { |
| "epoch": 4.646651270207852, |
| "grad_norm": 0.12292766986287774, |
| "learning_rate": 1.1842002607623804e-06, |
| "loss": 1.1351, |
| "step": 879 |
| }, |
| { |
| "epoch": 4.6519300560870995, |
| "grad_norm": 0.09960668601841101, |
| "learning_rate": 1.1487587233554432e-06, |
| "loss": 1.1635, |
| "step": 880 |
| }, |
| { |
| "epoch": 4.657208841966348, |
| "grad_norm": 0.10157889253610197, |
| "learning_rate": 1.1138479069801967e-06, |
| "loss": 1.1694, |
| "step": 881 |
| }, |
| { |
| "epoch": 4.662487627845596, |
| "grad_norm": 0.10654619175522417, |
| "learning_rate": 1.0794682885301344e-06, |
| "loss": 1.1577, |
| "step": 882 |
| }, |
| { |
| "epoch": 4.667766413724843, |
| "grad_norm": 0.11376564016272218, |
| "learning_rate": 1.0456203376424169e-06, |
| "loss": 1.1711, |
| "step": 883 |
| }, |
| { |
| "epoch": 4.673045199604091, |
| "grad_norm": 0.09940409901353127, |
| "learning_rate": 1.0123045166914403e-06, |
| "loss": 1.1718, |
| "step": 884 |
| }, |
| { |
| "epoch": 4.678323985483339, |
| "grad_norm": 0.10209569506859624, |
| "learning_rate": 9.79521280782536e-07, |
| "loss": 1.1646, |
| "step": 885 |
| }, |
| { |
| "epoch": 4.683602771362587, |
| "grad_norm": 0.09649947128746558, |
| "learning_rate": 9.472710777457395e-07, |
| "loss": 1.1662, |
| "step": 886 |
| }, |
| { |
| "epoch": 4.688881557241834, |
| "grad_norm": 0.10006450320603427, |
| "learning_rate": 9.155543481296747e-07, |
| "loss": 1.1615, |
| "step": 887 |
| }, |
| { |
| "epoch": 4.694160343121082, |
| "grad_norm": 0.10739407381371646, |
| "learning_rate": 8.843715251955464e-07, |
| "loss": 1.1644, |
| "step": 888 |
| }, |
| { |
| "epoch": 4.69943912900033, |
| "grad_norm": 0.10063647783868902, |
| "learning_rate": 8.537230349112158e-07, |
| "loss": 1.1716, |
| "step": 889 |
| }, |
| { |
| "epoch": 4.704717914879578, |
| "grad_norm": 0.10085797467259582, |
| "learning_rate": 8.236092959453646e-07, |
| "loss": 1.1613, |
| "step": 890 |
| }, |
| { |
| "epoch": 4.7099967007588255, |
| "grad_norm": 1.7837683707358607, |
| "learning_rate": 7.940307196618113e-07, |
| "loss": 1.1772, |
| "step": 891 |
| }, |
| { |
| "epoch": 4.715275486638073, |
| "grad_norm": 0.10090146467799602, |
| "learning_rate": 7.64987710113858e-07, |
| "loss": 1.1473, |
| "step": 892 |
| }, |
| { |
| "epoch": 4.720554272517321, |
| "grad_norm": 0.10306276432506471, |
| "learning_rate": 7.364806640387878e-07, |
| "loss": 1.167, |
| "step": 893 |
| }, |
| { |
| "epoch": 4.725833058396569, |
| "grad_norm": 0.10423139318723618, |
| "learning_rate": 7.085099708524557e-07, |
| "loss": 1.1724, |
| "step": 894 |
| }, |
| { |
| "epoch": 4.731111844275817, |
| "grad_norm": 0.10587170611709364, |
| "learning_rate": 6.810760126439287e-07, |
| "loss": 1.1635, |
| "step": 895 |
| }, |
| { |
| "epoch": 4.736390630155064, |
| "grad_norm": 0.10713450024011799, |
| "learning_rate": 6.54179164170321e-07, |
| "loss": 1.1799, |
| "step": 896 |
| }, |
| { |
| "epoch": 4.741669416034312, |
| "grad_norm": 0.1033918776840394, |
| "learning_rate": 6.278197928516294e-07, |
| "loss": 1.1574, |
| "step": 897 |
| }, |
| { |
| "epoch": 4.74694820191356, |
| "grad_norm": 0.10277089392474535, |
| "learning_rate": 6.019982587657413e-07, |
| "loss": 1.1625, |
| "step": 898 |
| }, |
| { |
| "epoch": 4.752226987792808, |
| "grad_norm": 0.09433906599785452, |
| "learning_rate": 5.767149146435014e-07, |
| "loss": 1.1728, |
| "step": 899 |
| }, |
| { |
| "epoch": 4.757505773672055, |
| "grad_norm": 0.10104242957996779, |
| "learning_rate": 5.519701058638971e-07, |
| "loss": 1.1532, |
| "step": 900 |
| }, |
| { |
| "epoch": 4.762784559551303, |
| "grad_norm": 0.10949650094260902, |
| "learning_rate": 5.277641704493519e-07, |
| "loss": 1.1686, |
| "step": 901 |
| }, |
| { |
| "epoch": 4.768063345430551, |
| "grad_norm": 0.10199310812595654, |
| "learning_rate": 5.040974390610753e-07, |
| "loss": 1.1635, |
| "step": 902 |
| }, |
| { |
| "epoch": 4.773342131309799, |
| "grad_norm": 0.10007983394238783, |
| "learning_rate": 4.809702349946044e-07, |
| "loss": 1.1673, |
| "step": 903 |
| }, |
| { |
| "epoch": 4.778620917189047, |
| "grad_norm": 0.11016988989364565, |
| "learning_rate": 4.583828741753138e-07, |
| "loss": 1.1543, |
| "step": 904 |
| }, |
| { |
| "epoch": 4.783899703068294, |
| "grad_norm": 0.10118174124280127, |
| "learning_rate": 4.363356651541617e-07, |
| "loss": 1.1625, |
| "step": 905 |
| }, |
| { |
| "epoch": 4.789178488947542, |
| "grad_norm": 8.168639065238155, |
| "learning_rate": 4.148289091034441e-07, |
| "loss": 1.1561, |
| "step": 906 |
| }, |
| { |
| "epoch": 4.79445727482679, |
| "grad_norm": 0.13568083810035608, |
| "learning_rate": 3.9386289981267813e-07, |
| "loss": 1.1541, |
| "step": 907 |
| }, |
| { |
| "epoch": 4.799736060706038, |
| "grad_norm": 0.09556938886884957, |
| "learning_rate": 3.734379236846231e-07, |
| "loss": 1.1589, |
| "step": 908 |
| }, |
| { |
| "epoch": 4.805014846585285, |
| "grad_norm": 0.10242778222635852, |
| "learning_rate": 3.5355425973131017e-07, |
| "loss": 1.1748, |
| "step": 909 |
| }, |
| { |
| "epoch": 4.810293632464533, |
| "grad_norm": 0.09666773860438002, |
| "learning_rate": 3.3421217957029017e-07, |
| "loss": 1.1598, |
| "step": 910 |
| }, |
| { |
| "epoch": 4.8155724183437805, |
| "grad_norm": 0.09978976902696876, |
| "learning_rate": 3.154119474208894e-07, |
| "loss": 1.1752, |
| "step": 911 |
| }, |
| { |
| "epoch": 4.820851204223029, |
| "grad_norm": 0.09710058280846443, |
| "learning_rate": 2.971538201005997e-07, |
| "loss": 1.1718, |
| "step": 912 |
| }, |
| { |
| "epoch": 4.8261299901022765, |
| "grad_norm": 0.10166616238664858, |
| "learning_rate": 2.7943804702159185e-07, |
| "loss": 1.1674, |
| "step": 913 |
| }, |
| { |
| "epoch": 4.831408775981524, |
| "grad_norm": 0.12196992584814861, |
| "learning_rate": 2.6226487018728317e-07, |
| "loss": 1.1652, |
| "step": 914 |
| }, |
| { |
| "epoch": 4.836687561860772, |
| "grad_norm": 0.10117286603737477, |
| "learning_rate": 2.4563452418905565e-07, |
| "loss": 1.1708, |
| "step": 915 |
| }, |
| { |
| "epoch": 4.84196634774002, |
| "grad_norm": 0.10639803241716381, |
| "learning_rate": 2.2954723620302267e-07, |
| "loss": 1.1717, |
| "step": 916 |
| }, |
| { |
| "epoch": 4.847245133619268, |
| "grad_norm": 0.10365034376993183, |
| "learning_rate": 2.140032259869651e-07, |
| "loss": 1.1586, |
| "step": 917 |
| }, |
| { |
| "epoch": 4.852523919498515, |
| "grad_norm": 0.09739690810779615, |
| "learning_rate": 1.9900270587728477e-07, |
| "loss": 1.1449, |
| "step": 918 |
| }, |
| { |
| "epoch": 4.857802705377763, |
| "grad_norm": 0.10136332633557722, |
| "learning_rate": 1.8454588078613555e-07, |
| "loss": 1.1616, |
| "step": 919 |
| }, |
| { |
| "epoch": 4.863081491257011, |
| "grad_norm": 0.10227583958211155, |
| "learning_rate": 1.706329481986213e-07, |
| "loss": 1.1617, |
| "step": 920 |
| }, |
| { |
| "epoch": 4.868360277136259, |
| "grad_norm": 0.1038745119866778, |
| "learning_rate": 1.5726409817007348e-07, |
| "loss": 1.1701, |
| "step": 921 |
| }, |
| { |
| "epoch": 4.873639063015506, |
| "grad_norm": 0.10774776382247592, |
| "learning_rate": 1.444395133234888e-07, |
| "loss": 1.1665, |
| "step": 922 |
| }, |
| { |
| "epoch": 4.878917848894754, |
| "grad_norm": 0.09792018389942055, |
| "learning_rate": 1.321593688470113e-07, |
| "loss": 1.1809, |
| "step": 923 |
| }, |
| { |
| "epoch": 4.884196634774002, |
| "grad_norm": 0.10169455342635682, |
| "learning_rate": 1.2042383249154743e-07, |
| "loss": 1.1703, |
| "step": 924 |
| }, |
| { |
| "epoch": 4.88947542065325, |
| "grad_norm": 0.0970982445176629, |
| "learning_rate": 1.0923306456847915e-07, |
| "loss": 1.1724, |
| "step": 925 |
| }, |
| { |
| "epoch": 4.894754206532498, |
| "grad_norm": 0.09775306631079801, |
| "learning_rate": 9.858721794745229e-08, |
| "loss": 1.1597, |
| "step": 926 |
| }, |
| { |
| "epoch": 4.900032992411745, |
| "grad_norm": 0.09765431840575976, |
| "learning_rate": 8.848643805432045e-08, |
| "loss": 1.162, |
| "step": 927 |
| }, |
| { |
| "epoch": 4.905311778290993, |
| "grad_norm": 0.09631829632067684, |
| "learning_rate": 7.893086286913764e-08, |
| "loss": 1.1572, |
| "step": 928 |
| }, |
| { |
| "epoch": 4.910590564170241, |
| "grad_norm": 0.0941345070066087, |
| "learning_rate": 6.992062292427548e-08, |
| "loss": 1.1515, |
| "step": 929 |
| }, |
| { |
| "epoch": 4.915869350049489, |
| "grad_norm": 0.09830810649690867, |
| "learning_rate": 6.145584130264226e-08, |
| "loss": 1.1534, |
| "step": 930 |
| }, |
| { |
| "epoch": 4.921148135928736, |
| "grad_norm": 0.3185034298194423, |
| "learning_rate": 5.353663363600437e-08, |
| "loss": 1.1921, |
| "step": 931 |
| }, |
| { |
| "epoch": 4.926426921807984, |
| "grad_norm": 0.10174557358686656, |
| "learning_rate": 4.6163108103405295e-08, |
| "loss": 1.1714, |
| "step": 932 |
| }, |
| { |
| "epoch": 4.931705707687232, |
| "grad_norm": 0.0955458770336446, |
| "learning_rate": 3.933536542968686e-08, |
| "loss": 1.1645, |
| "step": 933 |
| }, |
| { |
| "epoch": 4.93698449356648, |
| "grad_norm": 0.09414779046743281, |
| "learning_rate": 3.305349888410803e-08, |
| "loss": 1.1724, |
| "step": 934 |
| }, |
| { |
| "epoch": 4.9422632794457275, |
| "grad_norm": 0.10775863469508211, |
| "learning_rate": 2.731759427908376e-08, |
| "loss": 1.1738, |
| "step": 935 |
| }, |
| { |
| "epoch": 4.947542065324975, |
| "grad_norm": 0.10257711289400813, |
| "learning_rate": 2.2127729968999257e-08, |
| "loss": 1.16, |
| "step": 936 |
| }, |
| { |
| "epoch": 4.952820851204223, |
| "grad_norm": 0.11208184061076697, |
| "learning_rate": 1.74839768491486e-08, |
| "loss": 1.1694, |
| "step": 937 |
| }, |
| { |
| "epoch": 4.958099637083471, |
| "grad_norm": 0.12886345058516566, |
| "learning_rate": 1.3386398354762187e-08, |
| "loss": 1.1722, |
| "step": 938 |
| }, |
| { |
| "epoch": 4.963378422962719, |
| "grad_norm": 0.10028905949188494, |
| "learning_rate": 9.835050460140772e-09, |
| "loss": 1.1624, |
| "step": 939 |
| }, |
| { |
| "epoch": 4.968657208841966, |
| "grad_norm": 0.10904642824875087, |
| "learning_rate": 6.829981677891618e-09, |
| "loss": 1.1632, |
| "step": 940 |
| }, |
| { |
| "epoch": 4.973935994721214, |
| "grad_norm": 0.10094212240836907, |
| "learning_rate": 4.3712330582668105e-09, |
| "loss": 1.1605, |
| "step": 941 |
| }, |
| { |
| "epoch": 4.979214780600461, |
| "grad_norm": 0.11163546063862742, |
| "learning_rate": 2.458838188599266e-09, |
| "loss": 1.1583, |
| "step": 942 |
| }, |
| { |
| "epoch": 4.98449356647971, |
| "grad_norm": 0.09739350820326295, |
| "learning_rate": 1.0928231928497568e-09, |
| "loss": 1.1585, |
| "step": 943 |
| }, |
| { |
| "epoch": 4.989772352358957, |
| "grad_norm": 0.09419504954849381, |
| "learning_rate": 2.7320673123831796e-10, |
| "loss": 1.1736, |
| "step": 944 |
| }, |
| { |
| "epoch": 4.995051138238205, |
| "grad_norm": 0.12697849655419563, |
| "learning_rate": 0.0, |
| "loss": 1.1708, |
| "step": 945 |
| }, |
| { |
| "epoch": 4.995051138238205, |
| "step": 945, |
| "total_flos": 2.4317663849553592e+19, |
| "train_loss": 1.2392805136070049, |
| "train_runtime": 143310.648, |
| "train_samples_per_second": 3.383, |
| "train_steps_per_second": 0.007 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 945, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.4317663849553592e+19, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|