| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 1.0, | |
| "global_step": 984, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004073319755600814, | |
| "grad_norm": 34.300819396972656, | |
| "learning_rate": 8.130081300813008e-09, | |
| "loss": 1.59619802236557, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.008146639511201629, | |
| "grad_norm": 30.720197677612305, | |
| "learning_rate": 1.6260162601626016e-08, | |
| "loss": 1.468272864818573, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.012219959266802444, | |
| "grad_norm": 30.16754722595215, | |
| "learning_rate": 2.4390243902439023e-08, | |
| "loss": 1.3843095302581787, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.016293279022403257, | |
| "grad_norm": 38.58047103881836, | |
| "learning_rate": 3.252032520325203e-08, | |
| "loss": 1.7031245231628418, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.020366598778004074, | |
| "grad_norm": 30.89760971069336, | |
| "learning_rate": 4.065040650406504e-08, | |
| "loss": 1.4844104647636414, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.024439918533604887, | |
| "grad_norm": 34.434993743896484, | |
| "learning_rate": 4.878048780487805e-08, | |
| "loss": 1.574910283088684, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.028513238289205704, | |
| "grad_norm": 32.540470123291016, | |
| "learning_rate": 5.6910569105691055e-08, | |
| "loss": 1.4606674909591675, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.032586558044806514, | |
| "grad_norm": 36.41299819946289, | |
| "learning_rate": 6.504065040650406e-08, | |
| "loss": 1.553576111793518, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.03665987780040733, | |
| "grad_norm": 34.50511932373047, | |
| "learning_rate": 7.317073170731706e-08, | |
| "loss": 1.3344553709030151, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.04073319755600815, | |
| "grad_norm": 27.898704528808594, | |
| "learning_rate": 8.130081300813008e-08, | |
| "loss": 1.3406395316123962, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.04480651731160896, | |
| "grad_norm": 29.29271125793457, | |
| "learning_rate": 8.943089430894309e-08, | |
| "loss": 1.4415303468704224, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.048879837067209775, | |
| "grad_norm": 28.2354736328125, | |
| "learning_rate": 9.75609756097561e-08, | |
| "loss": 1.2696096301078796, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.05295315682281059, | |
| "grad_norm": 35.44163131713867, | |
| "learning_rate": 1.0569105691056911e-07, | |
| "loss": 1.598312497138977, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.05702647657841141, | |
| "grad_norm": 26.94402313232422, | |
| "learning_rate": 1.1382113821138211e-07, | |
| "loss": 1.3497812747955322, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.06109979633401222, | |
| "grad_norm": 37.78248977661133, | |
| "learning_rate": 1.219512195121951e-07, | |
| "loss": 1.5689660906791687, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.06517311608961303, | |
| "grad_norm": 31.73078155517578, | |
| "learning_rate": 1.3008130081300813e-07, | |
| "loss": 1.525648295879364, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.06924643584521385, | |
| "grad_norm": 27.77250862121582, | |
| "learning_rate": 1.3821138211382114e-07, | |
| "loss": 1.304672360420227, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.07331975560081466, | |
| "grad_norm": 28.092498779296875, | |
| "learning_rate": 1.4634146341463413e-07, | |
| "loss": 1.346445381641388, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.07739307535641547, | |
| "grad_norm": 30.995866775512695, | |
| "learning_rate": 1.5447154471544717e-07, | |
| "loss": 1.447025179862976, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0814663951120163, | |
| "grad_norm": 28.858421325683594, | |
| "learning_rate": 1.6260162601626016e-07, | |
| "loss": 1.3801668882369995, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0855397148676171, | |
| "grad_norm": 31.91228485107422, | |
| "learning_rate": 1.7073170731707317e-07, | |
| "loss": 1.4577875137329102, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.08961303462321792, | |
| "grad_norm": 31.215259552001953, | |
| "learning_rate": 1.7886178861788619e-07, | |
| "loss": 1.4091373682022095, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.09368635437881874, | |
| "grad_norm": 30.24734115600586, | |
| "learning_rate": 1.8699186991869917e-07, | |
| "loss": 1.4649581909179688, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.09775967413441955, | |
| "grad_norm": 31.560291290283203, | |
| "learning_rate": 1.951219512195122e-07, | |
| "loss": 1.5308585166931152, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.10183299389002037, | |
| "grad_norm": 27.27391242980957, | |
| "learning_rate": 2.032520325203252e-07, | |
| "loss": 1.5144553780555725, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.10590631364562118, | |
| "grad_norm": 29.813785552978516, | |
| "learning_rate": 2.1138211382113822e-07, | |
| "loss": 1.519466757774353, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.109979633401222, | |
| "grad_norm": 24.201751708984375, | |
| "learning_rate": 2.195121951219512e-07, | |
| "loss": 1.3116011023521423, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.11405295315682282, | |
| "grad_norm": 27.95865249633789, | |
| "learning_rate": 2.2764227642276422e-07, | |
| "loss": 1.4637184143066406, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.11812627291242363, | |
| "grad_norm": 26.65915870666504, | |
| "learning_rate": 2.3577235772357723e-07, | |
| "loss": 1.4885194301605225, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.12219959266802444, | |
| "grad_norm": 27.386289596557617, | |
| "learning_rate": 2.439024390243902e-07, | |
| "loss": 1.3836334347724915, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.12627291242362526, | |
| "grad_norm": 25.87419319152832, | |
| "learning_rate": 2.520325203252032e-07, | |
| "loss": 1.3642336130142212, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.13034623217922606, | |
| "grad_norm": 26.620105743408203, | |
| "learning_rate": 2.6016260162601625e-07, | |
| "loss": 1.3461121916770935, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.13441955193482688, | |
| "grad_norm": 22.665058135986328, | |
| "learning_rate": 2.682926829268293e-07, | |
| "loss": 1.2577590942382812, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.1384928716904277, | |
| "grad_norm": 23.679920196533203, | |
| "learning_rate": 2.764227642276423e-07, | |
| "loss": 1.2572017908096313, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.1425661914460285, | |
| "grad_norm": 25.136371612548828, | |
| "learning_rate": 2.8455284552845527e-07, | |
| "loss": 1.2670851349830627, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.14663951120162932, | |
| "grad_norm": 21.567337036132812, | |
| "learning_rate": 2.9268292682926825e-07, | |
| "loss": 1.242683231830597, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.15071283095723015, | |
| "grad_norm": 20.61647605895996, | |
| "learning_rate": 3.008130081300813e-07, | |
| "loss": 1.279579222202301, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.15478615071283094, | |
| "grad_norm": 20.656513214111328, | |
| "learning_rate": 3.0894308943089434e-07, | |
| "loss": 1.2040475606918335, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.15885947046843177, | |
| "grad_norm": 22.86530876159668, | |
| "learning_rate": 3.170731707317073e-07, | |
| "loss": 1.2522715330123901, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.1629327902240326, | |
| "grad_norm": 20.22757911682129, | |
| "learning_rate": 3.252032520325203e-07, | |
| "loss": 1.2012774348258972, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1670061099796334, | |
| "grad_norm": 23.09739875793457, | |
| "learning_rate": 3.333333333333333e-07, | |
| "loss": 1.2088268399238586, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.1710794297352342, | |
| "grad_norm": 22.845685958862305, | |
| "learning_rate": 3.4146341463414634e-07, | |
| "loss": 1.0982880592346191, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.17515274949083504, | |
| "grad_norm": 19.80814552307129, | |
| "learning_rate": 3.4959349593495933e-07, | |
| "loss": 1.1271469593048096, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.17922606924643583, | |
| "grad_norm": 20.553686141967773, | |
| "learning_rate": 3.5772357723577237e-07, | |
| "loss": 1.0008204579353333, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.18329938900203666, | |
| "grad_norm": 16.66282844543457, | |
| "learning_rate": 3.6585365853658536e-07, | |
| "loss": 0.9251897931098938, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.18737270875763748, | |
| "grad_norm": 15.797308921813965, | |
| "learning_rate": 3.7398373983739835e-07, | |
| "loss": 1.0191328525543213, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.19144602851323828, | |
| "grad_norm": 13.579208374023438, | |
| "learning_rate": 3.821138211382114e-07, | |
| "loss": 0.774791806936264, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.1955193482688391, | |
| "grad_norm": 14.556002616882324, | |
| "learning_rate": 3.902439024390244e-07, | |
| "loss": 1.0026790797710419, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.19959266802443992, | |
| "grad_norm": 14.489509582519531, | |
| "learning_rate": 3.9837398373983736e-07, | |
| "loss": 0.9430837631225586, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.20366598778004075, | |
| "grad_norm": 12.495223999023438, | |
| "learning_rate": 4.065040650406504e-07, | |
| "loss": 0.8999880254268646, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.20773930753564154, | |
| "grad_norm": 11.441575050354004, | |
| "learning_rate": 4.146341463414634e-07, | |
| "loss": 0.8320233225822449, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.21181262729124237, | |
| "grad_norm": 10.894216537475586, | |
| "learning_rate": 4.2276422764227643e-07, | |
| "loss": 0.8139239549636841, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.2158859470468432, | |
| "grad_norm": 10.404220581054688, | |
| "learning_rate": 4.308943089430894e-07, | |
| "loss": 0.8323288261890411, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.219959266802444, | |
| "grad_norm": 10.463072776794434, | |
| "learning_rate": 4.390243902439024e-07, | |
| "loss": 0.882573276758194, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.2240325865580448, | |
| "grad_norm": 10.669075012207031, | |
| "learning_rate": 4.471544715447154e-07, | |
| "loss": 0.749780923128128, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.22810590631364563, | |
| "grad_norm": 10.453638076782227, | |
| "learning_rate": 4.5528455284552844e-07, | |
| "loss": 0.7727148830890656, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.23217922606924643, | |
| "grad_norm": 11.427080154418945, | |
| "learning_rate": 4.634146341463415e-07, | |
| "loss": 0.8585084676742554, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.23625254582484725, | |
| "grad_norm": 8.558117866516113, | |
| "learning_rate": 4.7154471544715447e-07, | |
| "loss": 0.7314337491989136, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.24032586558044808, | |
| "grad_norm": 9.031648635864258, | |
| "learning_rate": 4.796747967479675e-07, | |
| "loss": 0.701579749584198, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.24439918533604887, | |
| "grad_norm": 8.817708969116211, | |
| "learning_rate": 4.878048780487804e-07, | |
| "loss": 0.7815204560756683, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2484725050916497, | |
| "grad_norm": 8.00804615020752, | |
| "learning_rate": 4.959349593495934e-07, | |
| "loss": 0.655106246471405, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.2525458248472505, | |
| "grad_norm": 6.538842678070068, | |
| "learning_rate": 5.040650406504064e-07, | |
| "loss": 0.6697916388511658, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.25661914460285135, | |
| "grad_norm": 7.5446553230285645, | |
| "learning_rate": 5.121951219512195e-07, | |
| "loss": 0.7426944077014923, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.2606924643584521, | |
| "grad_norm": 6.402474403381348, | |
| "learning_rate": 5.203252032520325e-07, | |
| "loss": 0.6401277780532837, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.26476578411405294, | |
| "grad_norm": 7.257569313049316, | |
| "learning_rate": 5.284552845528455e-07, | |
| "loss": 0.6731106042861938, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.26883910386965376, | |
| "grad_norm": 6.263636589050293, | |
| "learning_rate": 5.365853658536586e-07, | |
| "loss": 0.5806022882461548, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.2729124236252546, | |
| "grad_norm": 5.273800849914551, | |
| "learning_rate": 5.447154471544715e-07, | |
| "loss": 0.5338439792394638, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.2769857433808554, | |
| "grad_norm": 5.2786149978637695, | |
| "learning_rate": 5.528455284552846e-07, | |
| "loss": 0.5390533208847046, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.28105906313645623, | |
| "grad_norm": 4.901702404022217, | |
| "learning_rate": 5.609756097560975e-07, | |
| "loss": 0.5899032056331635, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.285132382892057, | |
| "grad_norm": 4.853933811187744, | |
| "learning_rate": 5.691056910569105e-07, | |
| "loss": 0.5600310862064362, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.2892057026476578, | |
| "grad_norm": 4.680273532867432, | |
| "learning_rate": 5.772357723577236e-07, | |
| "loss": 0.5319355428218842, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.29327902240325865, | |
| "grad_norm": 3.7406885623931885, | |
| "learning_rate": 5.853658536585365e-07, | |
| "loss": 0.508156955242157, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.2973523421588595, | |
| "grad_norm": 4.389779567718506, | |
| "learning_rate": 5.934959349593496e-07, | |
| "loss": 0.49855048954486847, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.3014256619144603, | |
| "grad_norm": 4.23866081237793, | |
| "learning_rate": 6.016260162601626e-07, | |
| "loss": 0.5242476612329483, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.3054989816700611, | |
| "grad_norm": 4.1824951171875, | |
| "learning_rate": 6.097560975609756e-07, | |
| "loss": 0.532037615776062, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.3095723014256619, | |
| "grad_norm": 3.7223150730133057, | |
| "learning_rate": 6.178861788617887e-07, | |
| "loss": 0.46959882974624634, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.3136456211812627, | |
| "grad_norm": 3.545388698577881, | |
| "learning_rate": 6.260162601626016e-07, | |
| "loss": 0.4825982600450516, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.31771894093686354, | |
| "grad_norm": 3.6351099014282227, | |
| "learning_rate": 6.341463414634146e-07, | |
| "loss": 0.5095209777355194, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.32179226069246436, | |
| "grad_norm": 3.243072271347046, | |
| "learning_rate": 6.422764227642276e-07, | |
| "loss": 0.4842926263809204, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.3258655804480652, | |
| "grad_norm": 3.5646300315856934, | |
| "learning_rate": 6.504065040650406e-07, | |
| "loss": 0.4908552020788193, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.329938900203666, | |
| "grad_norm": 3.5380759239196777, | |
| "learning_rate": 6.585365853658536e-07, | |
| "loss": 0.4536065459251404, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.3340122199592668, | |
| "grad_norm": 3.128525495529175, | |
| "learning_rate": 6.666666666666666e-07, | |
| "loss": 0.47657161951065063, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.3380855397148676, | |
| "grad_norm": 3.3621485233306885, | |
| "learning_rate": 6.747967479674797e-07, | |
| "loss": 0.43791596591472626, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.3421588594704684, | |
| "grad_norm": 3.39066219329834, | |
| "learning_rate": 6.829268292682927e-07, | |
| "loss": 0.42947711050510406, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.34623217922606925, | |
| "grad_norm": 3.7795698642730713, | |
| "learning_rate": 6.910569105691057e-07, | |
| "loss": 0.4219910502433777, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.35030549898167007, | |
| "grad_norm": 3.633206367492676, | |
| "learning_rate": 6.991869918699187e-07, | |
| "loss": 0.4253977984189987, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.3543788187372709, | |
| "grad_norm": 3.6160175800323486, | |
| "learning_rate": 7.073170731707316e-07, | |
| "loss": 0.449339896440506, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.35845213849287166, | |
| "grad_norm": 3.30557918548584, | |
| "learning_rate": 7.154471544715447e-07, | |
| "loss": 0.45001736283302307, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.3625254582484725, | |
| "grad_norm": 3.1727640628814697, | |
| "learning_rate": 7.235772357723577e-07, | |
| "loss": 0.4165496975183487, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.3665987780040733, | |
| "grad_norm": 3.073976516723633, | |
| "learning_rate": 7.317073170731707e-07, | |
| "loss": 0.4443822205066681, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.37067209775967414, | |
| "grad_norm": 3.129105567932129, | |
| "learning_rate": 7.398373983739837e-07, | |
| "loss": 0.4265598952770233, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.37474541751527496, | |
| "grad_norm": 3.1485190391540527, | |
| "learning_rate": 7.479674796747967e-07, | |
| "loss": 0.3882734924554825, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.3788187372708758, | |
| "grad_norm": 3.1610565185546875, | |
| "learning_rate": 7.560975609756097e-07, | |
| "loss": 0.37010858952999115, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.38289205702647655, | |
| "grad_norm": 3.039264440536499, | |
| "learning_rate": 7.642276422764228e-07, | |
| "loss": 0.400989294052124, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.3869653767820774, | |
| "grad_norm": 2.9321980476379395, | |
| "learning_rate": 7.723577235772358e-07, | |
| "loss": 0.3771343380212784, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.3910386965376782, | |
| "grad_norm": 2.807072162628174, | |
| "learning_rate": 7.804878048780488e-07, | |
| "loss": 0.4001482129096985, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.395112016293279, | |
| "grad_norm": 2.8286941051483154, | |
| "learning_rate": 7.886178861788617e-07, | |
| "loss": 0.4234430640935898, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.39918533604887985, | |
| "grad_norm": 2.9245986938476562, | |
| "learning_rate": 7.967479674796747e-07, | |
| "loss": 0.3854667395353317, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.40325865580448067, | |
| "grad_norm": 3.015875816345215, | |
| "learning_rate": 8.048780487804878e-07, | |
| "loss": 0.38027653098106384, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.4073319755600815, | |
| "grad_norm": 2.907216787338257, | |
| "learning_rate": 8.130081300813008e-07, | |
| "loss": 0.34937676787376404, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.41140529531568226, | |
| "grad_norm": 3.131850004196167, | |
| "learning_rate": 8.211382113821138e-07, | |
| "loss": 0.4414845108985901, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.4154786150712831, | |
| "grad_norm": 2.9019775390625, | |
| "learning_rate": 8.292682926829268e-07, | |
| "loss": 0.3990558981895447, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.4195519348268839, | |
| "grad_norm": 2.9362523555755615, | |
| "learning_rate": 8.373983739837398e-07, | |
| "loss": 0.41413092613220215, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.42362525458248473, | |
| "grad_norm": 3.0895473957061768, | |
| "learning_rate": 8.455284552845529e-07, | |
| "loss": 0.3904542028903961, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.42769857433808556, | |
| "grad_norm": 2.9235992431640625, | |
| "learning_rate": 8.536585365853657e-07, | |
| "loss": 0.3995140939950943, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.4317718940936864, | |
| "grad_norm": 2.919102668762207, | |
| "learning_rate": 8.617886178861788e-07, | |
| "loss": 0.32857778668403625, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.43584521384928715, | |
| "grad_norm": 2.831698417663574, | |
| "learning_rate": 8.699186991869918e-07, | |
| "loss": 0.3507983237504959, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.439918533604888, | |
| "grad_norm": 2.952693223953247, | |
| "learning_rate": 8.780487804878048e-07, | |
| "loss": 0.37046514451503754, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.4439918533604888, | |
| "grad_norm": 3.315002679824829, | |
| "learning_rate": 8.861788617886179e-07, | |
| "loss": 0.391086682677269, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.4480651731160896, | |
| "grad_norm": 2.7241294384002686, | |
| "learning_rate": 8.943089430894308e-07, | |
| "loss": 0.3864188492298126, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.45213849287169044, | |
| "grad_norm": 2.782064199447632, | |
| "learning_rate": 9.024390243902439e-07, | |
| "loss": 0.38219109177589417, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.45621181262729127, | |
| "grad_norm": 4.001572132110596, | |
| "learning_rate": 9.105691056910569e-07, | |
| "loss": 0.3784598410129547, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.46028513238289204, | |
| "grad_norm": 2.607434034347534, | |
| "learning_rate": 9.186991869918699e-07, | |
| "loss": 0.3763512521982193, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.46435845213849286, | |
| "grad_norm": 2.97188138961792, | |
| "learning_rate": 9.26829268292683e-07, | |
| "loss": 0.36788034439086914, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.4684317718940937, | |
| "grad_norm": 2.9631524085998535, | |
| "learning_rate": 9.349593495934958e-07, | |
| "loss": 0.3696867823600769, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.4725050916496945, | |
| "grad_norm": 2.5895049571990967, | |
| "learning_rate": 9.430894308943089e-07, | |
| "loss": 0.3349902927875519, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.47657841140529533, | |
| "grad_norm": 2.600832462310791, | |
| "learning_rate": 9.512195121951218e-07, | |
| "loss": 0.34966227412223816, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.48065173116089616, | |
| "grad_norm": 3.0639443397521973, | |
| "learning_rate": 9.59349593495935e-07, | |
| "loss": 0.38310858607292175, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.4847250509164969, | |
| "grad_norm": 2.6944706439971924, | |
| "learning_rate": 9.67479674796748e-07, | |
| "loss": 0.3360476493835449, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.48879837067209775, | |
| "grad_norm": 2.8398237228393555, | |
| "learning_rate": 9.756097560975609e-07, | |
| "loss": 0.39176714420318604, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.49287169042769857, | |
| "grad_norm": 2.8028745651245117, | |
| "learning_rate": 9.83739837398374e-07, | |
| "loss": 0.37909021973609924, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.4969450101832994, | |
| "grad_norm": 2.6169185638427734, | |
| "learning_rate": 9.918699186991869e-07, | |
| "loss": 0.37069061398506165, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.5010183299389002, | |
| "grad_norm": 2.572046995162964, | |
| "learning_rate": 1e-06, | |
| "loss": 0.3428824096918106, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.505091649694501, | |
| "grad_norm": 2.7804417610168457, | |
| "learning_rate": 9.999979682219186e-07, | |
| "loss": 0.3680119812488556, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.5091649694501018, | |
| "grad_norm": 2.5910799503326416, | |
| "learning_rate": 9.999918729041868e-07, | |
| "loss": 0.33467385172843933, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.5132382892057027, | |
| "grad_norm": 2.8417587280273438, | |
| "learning_rate": 9.999817140963419e-07, | |
| "loss": 0.35100705921649933, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.5173116089613035, | |
| "grad_norm": 2.905728340148926, | |
| "learning_rate": 9.999674918809457e-07, | |
| "loss": 0.32811686396598816, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.5213849287169042, | |
| "grad_norm": 2.5878095626831055, | |
| "learning_rate": 9.99949206373584e-07, | |
| "loss": 0.32490645349025726, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.5254582484725051, | |
| "grad_norm": 2.9762229919433594, | |
| "learning_rate": 9.999268577228648e-07, | |
| "loss": 0.3934018760919571, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.5295315682281059, | |
| "grad_norm": 2.792989492416382, | |
| "learning_rate": 9.99900446110418e-07, | |
| "loss": 0.3315049111843109, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5336048879837068, | |
| "grad_norm": 2.6891062259674072, | |
| "learning_rate": 9.998699717508945e-07, | |
| "loss": 0.3097301423549652, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.5376782077393075, | |
| "grad_norm": 2.92191481590271, | |
| "learning_rate": 9.99835434891962e-07, | |
| "loss": 0.34749817848205566, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.5417515274949084, | |
| "grad_norm": 2.980543851852417, | |
| "learning_rate": 9.99796835814306e-07, | |
| "loss": 0.3367327153682709, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.5458248472505092, | |
| "grad_norm": 2.50433611869812, | |
| "learning_rate": 9.99754174831625e-07, | |
| "loss": 0.3090934008359909, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.5498981670061099, | |
| "grad_norm": 2.869647979736328, | |
| "learning_rate": 9.9970745229063e-07, | |
| "loss": 0.35603591799736023, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.5539714867617108, | |
| "grad_norm": 2.6435837745666504, | |
| "learning_rate": 9.9965666857104e-07, | |
| "loss": 0.3288918733596802, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.5580448065173116, | |
| "grad_norm": 2.7970142364501953, | |
| "learning_rate": 9.996018240855806e-07, | |
| "loss": 0.3878723680973053, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.5621181262729125, | |
| "grad_norm": 2.593043327331543, | |
| "learning_rate": 9.995429192799788e-07, | |
| "loss": 0.3534126281738281, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.5661914460285132, | |
| "grad_norm": 2.8867013454437256, | |
| "learning_rate": 9.994799546329602e-07, | |
| "loss": 0.38061630725860596, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.570264765784114, | |
| "grad_norm": 2.589017152786255, | |
| "learning_rate": 9.994129306562458e-07, | |
| "loss": 0.37725748121738434, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.5743380855397149, | |
| "grad_norm": 2.369696617126465, | |
| "learning_rate": 9.993418478945472e-07, | |
| "loss": 0.32034583389759064, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.5784114052953157, | |
| "grad_norm": 2.6410069465637207, | |
| "learning_rate": 9.992667069255618e-07, | |
| "loss": 0.36017628014087677, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.5824847250509165, | |
| "grad_norm": 2.597259283065796, | |
| "learning_rate": 9.991875083599688e-07, | |
| "loss": 0.32577911019325256, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.5865580448065173, | |
| "grad_norm": 2.761859655380249, | |
| "learning_rate": 9.991042528414237e-07, | |
| "loss": 0.33353830873966217, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.5906313645621182, | |
| "grad_norm": 2.7634713649749756, | |
| "learning_rate": 9.990169410465536e-07, | |
| "loss": 0.33604632318019867, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.594704684317719, | |
| "grad_norm": 2.820897340774536, | |
| "learning_rate": 9.98925573684951e-07, | |
| "loss": 0.3069554716348648, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.5987780040733197, | |
| "grad_norm": 2.856700897216797, | |
| "learning_rate": 9.98830151499169e-07, | |
| "loss": 0.33896636962890625, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.6028513238289206, | |
| "grad_norm": 2.9203782081604004, | |
| "learning_rate": 9.987306752647142e-07, | |
| "loss": 0.35070909559726715, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.6069246435845214, | |
| "grad_norm": 2.679352283477783, | |
| "learning_rate": 9.986271457900414e-07, | |
| "loss": 0.3325359970331192, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.6109979633401222, | |
| "grad_norm": 2.4953606128692627, | |
| "learning_rate": 9.98519563916546e-07, | |
| "loss": 0.32330869138240814, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.615071283095723, | |
| "grad_norm": 2.618744134902954, | |
| "learning_rate": 9.98407930518558e-07, | |
| "loss": 0.33912393450737, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.6191446028513238, | |
| "grad_norm": 2.6512296199798584, | |
| "learning_rate": 9.982922465033348e-07, | |
| "loss": 0.3045920431613922, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.6232179226069247, | |
| "grad_norm": 2.7606050968170166, | |
| "learning_rate": 9.981725128110532e-07, | |
| "loss": 0.32916732132434845, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.6272912423625254, | |
| "grad_norm": 2.95037841796875, | |
| "learning_rate": 9.980487304148024e-07, | |
| "loss": 0.36757831275463104, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.6313645621181263, | |
| "grad_norm": 2.890489339828491, | |
| "learning_rate": 9.97920900320576e-07, | |
| "loss": 0.36117151379585266, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.6354378818737271, | |
| "grad_norm": 2.7488858699798584, | |
| "learning_rate": 9.97789023567263e-07, | |
| "loss": 0.35026322305202484, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.639511201629328, | |
| "grad_norm": 2.5479671955108643, | |
| "learning_rate": 9.976531012266413e-07, | |
| "loss": 0.308156818151474, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.6435845213849287, | |
| "grad_norm": 2.717344045639038, | |
| "learning_rate": 9.975131344033664e-07, | |
| "loss": 0.29827529191970825, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.6476578411405295, | |
| "grad_norm": 2.569551467895508, | |
| "learning_rate": 9.973691242349648e-07, | |
| "loss": 0.3232528269290924, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.6517311608961304, | |
| "grad_norm": 3.0013420581817627, | |
| "learning_rate": 9.972210718918233e-07, | |
| "loss": 0.3270832598209381, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6558044806517311, | |
| "grad_norm": 2.7339162826538086, | |
| "learning_rate": 9.970689785771798e-07, | |
| "loss": 0.3668155074119568, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.659877800407332, | |
| "grad_norm": 2.6689724922180176, | |
| "learning_rate": 9.969128455271137e-07, | |
| "loss": 0.32853490114212036, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.6639511201629328, | |
| "grad_norm": 3.042081117630005, | |
| "learning_rate": 9.967526740105358e-07, | |
| "loss": 0.3487651199102402, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.6680244399185336, | |
| "grad_norm": 2.4641284942626953, | |
| "learning_rate": 9.965884653291783e-07, | |
| "loss": 0.35704147815704346, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.6720977596741344, | |
| "grad_norm": 2.6836225986480713, | |
| "learning_rate": 9.964202208175833e-07, | |
| "loss": 0.33587950468063354, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.6761710794297352, | |
| "grad_norm": 2.2905988693237305, | |
| "learning_rate": 9.962479418430932e-07, | |
| "loss": 0.3061918318271637, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.6802443991853361, | |
| "grad_norm": 2.4772934913635254, | |
| "learning_rate": 9.960716298058381e-07, | |
| "loss": 0.2896444499492645, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.6843177189409368, | |
| "grad_norm": 2.6987321376800537, | |
| "learning_rate": 9.958912861387258e-07, | |
| "loss": 0.3374595195055008, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.6883910386965377, | |
| "grad_norm": 2.6165449619293213, | |
| "learning_rate": 9.9570691230743e-07, | |
| "loss": 0.33027225732803345, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.6924643584521385, | |
| "grad_norm": 3.1326680183410645, | |
| "learning_rate": 9.955185098103771e-07, | |
| "loss": 0.3138381540775299, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.6965376782077393, | |
| "grad_norm": 2.5313732624053955, | |
| "learning_rate": 9.953260801787356e-07, | |
| "loss": 0.31824737787246704, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.7006109979633401, | |
| "grad_norm": 2.529325008392334, | |
| "learning_rate": 9.951296249764025e-07, | |
| "loss": 0.298155277967453, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.7046843177189409, | |
| "grad_norm": 2.6821744441986084, | |
| "learning_rate": 9.949291457999916e-07, | |
| "loss": 0.33296874165534973, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.7087576374745418, | |
| "grad_norm": 2.588157892227173, | |
| "learning_rate": 9.947246442788193e-07, | |
| "loss": 0.31226691603660583, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.7128309572301426, | |
| "grad_norm": 2.7822420597076416, | |
| "learning_rate": 9.945161220748927e-07, | |
| "loss": 0.322743222117424, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.7169042769857433, | |
| "grad_norm": 2.379702091217041, | |
| "learning_rate": 9.943035808828953e-07, | |
| "loss": 0.3056500107049942, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.7209775967413442, | |
| "grad_norm": 2.4450721740722656, | |
| "learning_rate": 9.94087022430173e-07, | |
| "loss": 0.3037564754486084, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.725050916496945, | |
| "grad_norm": 2.5885887145996094, | |
| "learning_rate": 9.938664484767205e-07, | |
| "loss": 0.327587828040123, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.7291242362525459, | |
| "grad_norm": 2.613290309906006, | |
| "learning_rate": 9.936418608151675e-07, | |
| "loss": 0.33323927223682404, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.7331975560081466, | |
| "grad_norm": 2.6541707515716553, | |
| "learning_rate": 9.93413261270763e-07, | |
| "loss": 0.3316569924354553, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7372708757637475, | |
| "grad_norm": 2.646383047103882, | |
| "learning_rate": 9.931806517013612e-07, | |
| "loss": 0.35486292839050293, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.7413441955193483, | |
| "grad_norm": 2.5270328521728516, | |
| "learning_rate": 9.92944033997406e-07, | |
| "loss": 0.3157142102718353, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.745417515274949, | |
| "grad_norm": 2.5851869583129883, | |
| "learning_rate": 9.927034100819163e-07, | |
| "loss": 0.3013855814933777, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.7494908350305499, | |
| "grad_norm": 2.75219988822937, | |
| "learning_rate": 9.924587819104695e-07, | |
| "loss": 0.3420049250125885, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.7535641547861507, | |
| "grad_norm": 2.436596632003784, | |
| "learning_rate": 9.922101514711865e-07, | |
| "loss": 0.3062688261270523, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.7576374745417516, | |
| "grad_norm": 2.9479236602783203, | |
| "learning_rate": 9.919575207847145e-07, | |
| "loss": 0.31793762743473053, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.7617107942973523, | |
| "grad_norm": 2.5482208728790283, | |
| "learning_rate": 9.917008919042116e-07, | |
| "loss": 0.3306496888399124, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.7657841140529531, | |
| "grad_norm": 2.609839677810669, | |
| "learning_rate": 9.914402669153295e-07, | |
| "loss": 0.29324449598789215, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.769857433808554, | |
| "grad_norm": 2.5740039348602295, | |
| "learning_rate": 9.91175647936197e-07, | |
| "loss": 0.3193310797214508, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.7739307535641547, | |
| "grad_norm": 2.3878629207611084, | |
| "learning_rate": 9.909070371174019e-07, | |
| "loss": 0.3040658235549927, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.7780040733197556, | |
| "grad_norm": 2.755152463912964, | |
| "learning_rate": 9.906344366419746e-07, | |
| "loss": 0.33930477499961853, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.7820773930753564, | |
| "grad_norm": 2.58367657661438, | |
| "learning_rate": 9.9035784872537e-07, | |
| "loss": 0.3244568109512329, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.7861507128309573, | |
| "grad_norm": 2.350712537765503, | |
| "learning_rate": 9.90077275615449e-07, | |
| "loss": 0.2779058516025543, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.790224032586558, | |
| "grad_norm": 2.7418465614318848, | |
| "learning_rate": 9.897927195924608e-07, | |
| "loss": 0.32641272246837616, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.7942973523421588, | |
| "grad_norm": 2.516510009765625, | |
| "learning_rate": 9.895041829690238e-07, | |
| "loss": 0.3083319664001465, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.7983706720977597, | |
| "grad_norm": 2.7772316932678223, | |
| "learning_rate": 9.892116680901084e-07, | |
| "loss": 0.30357757210731506, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.8024439918533605, | |
| "grad_norm": 2.5389041900634766, | |
| "learning_rate": 9.88915177333015e-07, | |
| "loss": 0.30694054067134857, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.8065173116089613, | |
| "grad_norm": 2.7129383087158203, | |
| "learning_rate": 9.886147131073579e-07, | |
| "loss": 0.3402569591999054, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.8105906313645621, | |
| "grad_norm": 2.654186248779297, | |
| "learning_rate": 9.883102778550434e-07, | |
| "loss": 0.3343619704246521, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.814663951120163, | |
| "grad_norm": 2.380168914794922, | |
| "learning_rate": 9.880018740502508e-07, | |
| "loss": 0.3020651191473007, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8187372708757638, | |
| "grad_norm": 2.771951198577881, | |
| "learning_rate": 9.876895041994127e-07, | |
| "loss": 0.30565840005874634, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.8228105906313645, | |
| "grad_norm": 2.4966540336608887, | |
| "learning_rate": 9.873731708411939e-07, | |
| "loss": 0.3085058331489563, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.8268839103869654, | |
| "grad_norm": 2.5919551849365234, | |
| "learning_rate": 9.870528765464711e-07, | |
| "loss": 0.34540820121765137, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.8309572301425662, | |
| "grad_norm": 3.0668885707855225, | |
| "learning_rate": 9.867286239183122e-07, | |
| "loss": 0.3307037353515625, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.835030549898167, | |
| "grad_norm": 2.4281554222106934, | |
| "learning_rate": 9.864004155919544e-07, | |
| "loss": 0.28929875791072845, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.8391038696537678, | |
| "grad_norm": 2.5561623573303223, | |
| "learning_rate": 9.860682542347838e-07, | |
| "loss": 0.3272414803504944, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.8431771894093686, | |
| "grad_norm": 2.824591636657715, | |
| "learning_rate": 9.85732142546313e-07, | |
| "loss": 0.3192295432090759, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.8472505091649695, | |
| "grad_norm": 2.643718719482422, | |
| "learning_rate": 9.853920832581597e-07, | |
| "loss": 0.31284041702747345, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.8513238289205702, | |
| "grad_norm": 2.6777195930480957, | |
| "learning_rate": 9.850480791340236e-07, | |
| "loss": 0.3136574327945709, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.8553971486761711, | |
| "grad_norm": 2.5229766368865967, | |
| "learning_rate": 9.847001329696652e-07, | |
| "loss": 0.3047819435596466, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.8594704684317719, | |
| "grad_norm": 2.659447431564331, | |
| "learning_rate": 9.843482475928818e-07, | |
| "loss": 0.3642407953739166, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.8635437881873728, | |
| "grad_norm": 2.697049379348755, | |
| "learning_rate": 9.839924258634853e-07, | |
| "loss": 0.3134022653102875, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.8676171079429735, | |
| "grad_norm": 2.629868745803833, | |
| "learning_rate": 9.83632670673279e-07, | |
| "loss": 0.306331992149353, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.8716904276985743, | |
| "grad_norm": 2.4997003078460693, | |
| "learning_rate": 9.832689849460339e-07, | |
| "loss": 0.3142865002155304, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.8757637474541752, | |
| "grad_norm": 2.826869010925293, | |
| "learning_rate": 9.829013716374647e-07, | |
| "loss": 0.2904099076986313, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.879837067209776, | |
| "grad_norm": 2.6697499752044678, | |
| "learning_rate": 9.825298337352058e-07, | |
| "loss": 0.29838354885578156, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.8839103869653768, | |
| "grad_norm": 2.5330023765563965, | |
| "learning_rate": 9.821543742587876e-07, | |
| "loss": 0.3052047789096832, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.8879837067209776, | |
| "grad_norm": 2.806683301925659, | |
| "learning_rate": 9.817749962596114e-07, | |
| "loss": 0.3121778964996338, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.8920570264765784, | |
| "grad_norm": 2.718122720718384, | |
| "learning_rate": 9.81391702820925e-07, | |
| "loss": 0.32955022156238556, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.8961303462321792, | |
| "grad_norm": 2.346466541290283, | |
| "learning_rate": 9.81004497057797e-07, | |
| "loss": 0.291049063205719, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.90020366598778, | |
| "grad_norm": 2.4048361778259277, | |
| "learning_rate": 9.806133821170924e-07, | |
| "loss": 0.30249159038066864, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.9042769857433809, | |
| "grad_norm": 2.681546688079834, | |
| "learning_rate": 9.80218361177446e-07, | |
| "loss": 0.362154021859169, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.9083503054989817, | |
| "grad_norm": 2.792266368865967, | |
| "learning_rate": 9.798194374492375e-07, | |
| "loss": 0.28344525396823883, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.9124236252545825, | |
| "grad_norm": 2.507050037384033, | |
| "learning_rate": 9.794166141745646e-07, | |
| "loss": 0.2935172915458679, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.9164969450101833, | |
| "grad_norm": 2.7160379886627197, | |
| "learning_rate": 9.790098946272177e-07, | |
| "loss": 0.3005199581384659, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.9205702647657841, | |
| "grad_norm": 2.666494131088257, | |
| "learning_rate": 9.785992821126518e-07, | |
| "loss": 0.30710943043231964, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.924643584521385, | |
| "grad_norm": 2.699313163757324, | |
| "learning_rate": 9.781847799679615e-07, | |
| "loss": 0.3164513558149338, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.9287169042769857, | |
| "grad_norm": 2.49406099319458, | |
| "learning_rate": 9.777663915618517e-07, | |
| "loss": 0.3061770647764206, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.9327902240325866, | |
| "grad_norm": 2.552093029022217, | |
| "learning_rate": 9.773441202946121e-07, | |
| "loss": 0.2973909080028534, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.9368635437881874, | |
| "grad_norm": 2.5773231983184814, | |
| "learning_rate": 9.76917969598089e-07, | |
| "loss": 0.31120532751083374, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.9409368635437881, | |
| "grad_norm": 2.653515100479126, | |
| "learning_rate": 9.76487942935657e-07, | |
| "loss": 0.3365926146507263, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.945010183299389, | |
| "grad_norm": 2.670433282852173, | |
| "learning_rate": 9.760540438021907e-07, | |
| "loss": 0.3196941614151001, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.9490835030549898, | |
| "grad_norm": 2.892035961151123, | |
| "learning_rate": 9.756162757240373e-07, | |
| "loss": 0.33982205390930176, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.9531568228105907, | |
| "grad_norm": 2.5157856941223145, | |
| "learning_rate": 9.751746422589872e-07, | |
| "loss": 0.2537951096892357, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.9572301425661914, | |
| "grad_norm": 2.6808388233184814, | |
| "learning_rate": 9.747291469962452e-07, | |
| "loss": 0.2846526652574539, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.9613034623217923, | |
| "grad_norm": 2.451559066772461, | |
| "learning_rate": 9.742797935564011e-07, | |
| "loss": 0.29611095786094666, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.9653767820773931, | |
| "grad_norm": 2.7313358783721924, | |
| "learning_rate": 9.738265855914012e-07, | |
| "loss": 0.3275996297597885, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.9694501018329938, | |
| "grad_norm": 2.5593299865722656, | |
| "learning_rate": 9.733695267845171e-07, | |
| "loss": 0.2993656247854233, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.9735234215885947, | |
| "grad_norm": 2.6013288497924805, | |
| "learning_rate": 9.729086208503173e-07, | |
| "loss": 0.31615155935287476, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.9775967413441955, | |
| "grad_norm": 2.5403575897216797, | |
| "learning_rate": 9.72443871534636e-07, | |
| "loss": 0.2843424677848816, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.9816700610997964, | |
| "grad_norm": 2.4495410919189453, | |
| "learning_rate": 9.719752826145432e-07, | |
| "loss": 0.2987358868122101, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.9857433808553971, | |
| "grad_norm": 2.719775676727295, | |
| "learning_rate": 9.715028578983136e-07, | |
| "loss": 0.34320636093616486, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.9898167006109979, | |
| "grad_norm": 2.7152929306030273, | |
| "learning_rate": 9.71026601225396e-07, | |
| "loss": 0.2937510758638382, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.9938900203665988, | |
| "grad_norm": 2.4305663108825684, | |
| "learning_rate": 9.705465164663817e-07, | |
| "loss": 0.29807206988334656, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.9979633401221996, | |
| "grad_norm": 2.322704792022705, | |
| "learning_rate": 9.700626075229738e-07, | |
| "loss": 0.3189048618078232, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 2.322704792022705, | |
| "learning_rate": 9.695748783279544e-07, | |
| "loss": 0.3195984363555908, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.0040733197556009, | |
| "grad_norm": 2.505608081817627, | |
| "learning_rate": 9.690833328451532e-07, | |
| "loss": 0.24639207869768143, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.0081466395112015, | |
| "grad_norm": 2.5328471660614014, | |
| "learning_rate": 9.68587975069416e-07, | |
| "loss": 0.3204229325056076, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.0122199592668024, | |
| "grad_norm": 2.462449073791504, | |
| "learning_rate": 9.680888090265713e-07, | |
| "loss": 0.29177573323249817, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.0162932790224033, | |
| "grad_norm": 2.4300286769866943, | |
| "learning_rate": 9.67585838773397e-07, | |
| "loss": 0.2859930992126465, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.0203665987780042, | |
| "grad_norm": 2.4073538780212402, | |
| "learning_rate": 9.67079068397589e-07, | |
| "loss": 0.253083311021328, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.0244399185336048, | |
| "grad_norm": 2.4259159564971924, | |
| "learning_rate": 9.66568502017727e-07, | |
| "loss": 0.27018117904663086, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.0285132382892057, | |
| "grad_norm": 2.490452766418457, | |
| "learning_rate": 9.660541437832416e-07, | |
| "loss": 0.270308181643486, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.0325865580448066, | |
| "grad_norm": 2.2988967895507812, | |
| "learning_rate": 9.655359978743798e-07, | |
| "loss": 0.2865062654018402, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.0366598778004072, | |
| "grad_norm": 2.3249478340148926, | |
| "learning_rate": 9.650140685021716e-07, | |
| "loss": 0.30297039449214935, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.0407331975560081, | |
| "grad_norm": 2.530956506729126, | |
| "learning_rate": 9.644883599083957e-07, | |
| "loss": 0.2875078618526459, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.044806517311609, | |
| "grad_norm": 2.462820053100586, | |
| "learning_rate": 9.639588763655448e-07, | |
| "loss": 0.3308262676000595, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.0488798370672097, | |
| "grad_norm": 2.733877658843994, | |
| "learning_rate": 9.634256221767912e-07, | |
| "loss": 0.3172256797552109, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.0529531568228105, | |
| "grad_norm": 2.5849740505218506, | |
| "learning_rate": 9.628886016759515e-07, | |
| "loss": 0.24582470953464508, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.0570264765784114, | |
| "grad_norm": 2.495885133743286, | |
| "learning_rate": 9.623478192274517e-07, | |
| "loss": 0.29177717864513397, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.0610997963340123, | |
| "grad_norm": 2.7560508251190186, | |
| "learning_rate": 9.618032792262914e-07, | |
| "loss": 0.2618248611688614, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.065173116089613, | |
| "grad_norm": 2.5949246883392334, | |
| "learning_rate": 9.612549860980087e-07, | |
| "loss": 0.29752814769744873, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.0692464358452138, | |
| "grad_norm": 2.5723235607147217, | |
| "learning_rate": 9.60702944298643e-07, | |
| "loss": 0.283334344625473, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.0733197556008147, | |
| "grad_norm": 2.5463922023773193, | |
| "learning_rate": 9.601471583147002e-07, | |
| "loss": 0.29830360412597656, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.0773930753564154, | |
| "grad_norm": 2.4415295124053955, | |
| "learning_rate": 9.595876326631153e-07, | |
| "loss": 0.278356209397316, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.0814663951120163, | |
| "grad_norm": 2.4167826175689697, | |
| "learning_rate": 9.590243718912164e-07, | |
| "loss": 0.2482159435749054, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.0855397148676171, | |
| "grad_norm": 2.4946138858795166, | |
| "learning_rate": 9.584573805766867e-07, | |
| "loss": 0.2892283797264099, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.089613034623218, | |
| "grad_norm": 2.585139036178589, | |
| "learning_rate": 9.578866633275286e-07, | |
| "loss": 0.28597134351730347, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.0936863543788187, | |
| "grad_norm": 2.5749940872192383, | |
| "learning_rate": 9.573122247820253e-07, | |
| "loss": 0.24586065858602524, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.0977596741344195, | |
| "grad_norm": 2.6099436283111572, | |
| "learning_rate": 9.567340696087032e-07, | |
| "loss": 0.2968917638063431, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.1018329938900204, | |
| "grad_norm": 2.5843560695648193, | |
| "learning_rate": 9.561522025062946e-07, | |
| "loss": 0.3214084059000015, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.105906313645621, | |
| "grad_norm": 2.7599520683288574, | |
| "learning_rate": 9.555666282036984e-07, | |
| "loss": 0.2851483225822449, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.109979633401222, | |
| "grad_norm": 2.340637683868408, | |
| "learning_rate": 9.549773514599428e-07, | |
| "loss": 0.2864815294742584, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.1140529531568228, | |
| "grad_norm": 2.682030200958252, | |
| "learning_rate": 9.543843770641463e-07, | |
| "loss": 0.2536205053329468, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.1181262729124237, | |
| "grad_norm": 2.3765580654144287, | |
| "learning_rate": 9.537877098354784e-07, | |
| "loss": 0.24970313906669617, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.1221995926680244, | |
| "grad_norm": 2.5046305656433105, | |
| "learning_rate": 9.531873546231208e-07, | |
| "loss": 0.2925817370414734, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.1262729124236253, | |
| "grad_norm": 2.487609386444092, | |
| "learning_rate": 9.525833163062273e-07, | |
| "loss": 0.28858567774295807, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.1303462321792261, | |
| "grad_norm": 2.5617923736572266, | |
| "learning_rate": 9.519755997938856e-07, | |
| "loss": 0.29943670332431793, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.1344195519348268, | |
| "grad_norm": 2.5144989490509033, | |
| "learning_rate": 9.51364210025076e-07, | |
| "loss": 0.26054797321558, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.1384928716904277, | |
| "grad_norm": 2.393367290496826, | |
| "learning_rate": 9.507491519686315e-07, | |
| "loss": 0.2766056954860687, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.1425661914460286, | |
| "grad_norm": 2.5914998054504395, | |
| "learning_rate": 9.501304306231983e-07, | |
| "loss": 0.2934035658836365, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.1466395112016294, | |
| "grad_norm": 2.6555488109588623, | |
| "learning_rate": 9.495080510171942e-07, | |
| "loss": 0.27582375705242157, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.15071283095723, | |
| "grad_norm": 2.4485878944396973, | |
| "learning_rate": 9.488820182087682e-07, | |
| "loss": 0.27037250250577927, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.154786150712831, | |
| "grad_norm": 2.5796926021575928, | |
| "learning_rate": 9.482523372857592e-07, | |
| "loss": 0.25297392159700394, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.1588594704684319, | |
| "grad_norm": 2.4900271892547607, | |
| "learning_rate": 9.476190133656548e-07, | |
| "loss": 0.2928699851036072, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.1629327902240325, | |
| "grad_norm": 2.378876209259033, | |
| "learning_rate": 9.469820515955497e-07, | |
| "loss": 0.3021889925003052, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.1670061099796334, | |
| "grad_norm": 2.377063751220703, | |
| "learning_rate": 9.463414571521036e-07, | |
| "loss": 0.26095758378505707, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.1710794297352343, | |
| "grad_norm": 2.347393274307251, | |
| "learning_rate": 9.456972352414997e-07, | |
| "loss": 0.24494879692792892, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.175152749490835, | |
| "grad_norm": 2.3959014415740967, | |
| "learning_rate": 9.450493910994017e-07, | |
| "loss": 0.28228186070919037, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.1792260692464358, | |
| "grad_norm": 2.5725162029266357, | |
| "learning_rate": 9.443979299909117e-07, | |
| "loss": 0.3038964122533798, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.1832993890020367, | |
| "grad_norm": 3.050133228302002, | |
| "learning_rate": 9.437428572105274e-07, | |
| "loss": 0.28559453785419464, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.1873727087576376, | |
| "grad_norm": 2.4305083751678467, | |
| "learning_rate": 9.430841780820988e-07, | |
| "loss": 0.26872922480106354, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.1914460285132382, | |
| "grad_norm": 2.3637914657592773, | |
| "learning_rate": 9.424218979587852e-07, | |
| "loss": 0.2783532440662384, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.195519348268839, | |
| "grad_norm": 2.4774560928344727, | |
| "learning_rate": 9.417560222230114e-07, | |
| "loss": 0.2842411994934082, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.19959266802444, | |
| "grad_norm": 2.5469460487365723, | |
| "learning_rate": 9.410865562864245e-07, | |
| "loss": 0.2584770694375038, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.2036659877800409, | |
| "grad_norm": 2.3821113109588623, | |
| "learning_rate": 9.404135055898495e-07, | |
| "loss": 0.29300089180469513, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.2077393075356415, | |
| "grad_norm": 2.7043542861938477, | |
| "learning_rate": 9.397368756032444e-07, | |
| "loss": 0.2970714569091797, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.2118126272912424, | |
| "grad_norm": 2.353179693222046, | |
| "learning_rate": 9.390566718256578e-07, | |
| "loss": 0.28464287519454956, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.2158859470468433, | |
| "grad_norm": 2.615365743637085, | |
| "learning_rate": 9.383728997851819e-07, | |
| "loss": 0.2843187004327774, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.219959266802444, | |
| "grad_norm": 2.526761054992676, | |
| "learning_rate": 9.376855650389089e-07, | |
| "loss": 0.29214321076869965, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.2240325865580448, | |
| "grad_norm": 2.416611909866333, | |
| "learning_rate": 9.369946731728854e-07, | |
| "loss": 0.2746543139219284, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.2281059063136457, | |
| "grad_norm": 2.6475841999053955, | |
| "learning_rate": 9.363002298020673e-07, | |
| "loss": 0.32700057327747345, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.2321792260692463, | |
| "grad_norm": 2.7061877250671387, | |
| "learning_rate": 9.356022405702739e-07, | |
| "loss": 0.26850761473178864, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.2362525458248472, | |
| "grad_norm": 2.578963279724121, | |
| "learning_rate": 9.349007111501419e-07, | |
| "loss": 0.2758927643299103, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.240325865580448, | |
| "grad_norm": 2.476339340209961, | |
| "learning_rate": 9.341956472430801e-07, | |
| "loss": 0.3046765774488449, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.2443991853360488, | |
| "grad_norm": 2.594074249267578, | |
| "learning_rate": 9.334870545792217e-07, | |
| "loss": 0.30949874222278595, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.2484725050916496, | |
| "grad_norm": 2.311086416244507, | |
| "learning_rate": 9.32774938917379e-07, | |
| "loss": 0.27652885019779205, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.2525458248472505, | |
| "grad_norm": 2.5869719982147217, | |
| "learning_rate": 9.320593060449958e-07, | |
| "loss": 0.2891390472650528, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.2566191446028514, | |
| "grad_norm": 2.7591726779937744, | |
| "learning_rate": 9.313401617781012e-07, | |
| "loss": 0.2366793006658554, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.260692464358452, | |
| "grad_norm": 2.394120693206787, | |
| "learning_rate": 9.306175119612612e-07, | |
| "loss": 0.2750696837902069, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.264765784114053, | |
| "grad_norm": 2.542830228805542, | |
| "learning_rate": 9.29891362467532e-07, | |
| "loss": 0.29537804424762726, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.2688391038696538, | |
| "grad_norm": 2.63010573387146, | |
| "learning_rate": 9.291617191984123e-07, | |
| "loss": 0.28230538964271545, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.2729124236252547, | |
| "grad_norm": 2.4835424423217773, | |
| "learning_rate": 9.284285880837946e-07, | |
| "loss": 0.2597920596599579, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.2769857433808554, | |
| "grad_norm": 2.4826974868774414, | |
| "learning_rate": 9.276919750819181e-07, | |
| "loss": 0.28388310968875885, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.2810590631364562, | |
| "grad_norm": 2.495941400527954, | |
| "learning_rate": 9.269518861793193e-07, | |
| "loss": 0.2510681226849556, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.2851323828920571, | |
| "grad_norm": 2.3886849880218506, | |
| "learning_rate": 9.262083273907837e-07, | |
| "loss": 0.28582368791103363, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.2892057026476578, | |
| "grad_norm": 2.443054437637329, | |
| "learning_rate": 9.254613047592974e-07, | |
| "loss": 0.2807822525501251, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.2932790224032586, | |
| "grad_norm": 2.2685866355895996, | |
| "learning_rate": 9.247108243559971e-07, | |
| "loss": 0.2636859193444252, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.2973523421588595, | |
| "grad_norm": 2.4459731578826904, | |
| "learning_rate": 9.239568922801212e-07, | |
| "loss": 0.28120698034763336, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.3014256619144602, | |
| "grad_norm": 2.4667344093322754, | |
| "learning_rate": 9.231995146589605e-07, | |
| "loss": 0.27484674006700516, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.305498981670061, | |
| "grad_norm": 2.432385206222534, | |
| "learning_rate": 9.22438697647808e-07, | |
| "loss": 0.3200181722640991, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.309572301425662, | |
| "grad_norm": 2.594827175140381, | |
| "learning_rate": 9.21674447429909e-07, | |
| "loss": 0.26146167516708374, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.3136456211812626, | |
| "grad_norm": 2.6781699657440186, | |
| "learning_rate": 9.209067702164108e-07, | |
| "loss": 0.30198925733566284, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.3177189409368635, | |
| "grad_norm": 2.382455348968506, | |
| "learning_rate": 9.201356722463124e-07, | |
| "loss": 0.2656544893980026, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.3217922606924644, | |
| "grad_norm": 2.3920562267303467, | |
| "learning_rate": 9.193611597864137e-07, | |
| "loss": 0.30542662739753723, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.3258655804480652, | |
| "grad_norm": 2.5210139751434326, | |
| "learning_rate": 9.185832391312642e-07, | |
| "loss": 0.33505627512931824, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.3299389002036661, | |
| "grad_norm": 2.530137777328491, | |
| "learning_rate": 9.178019166031128e-07, | |
| "loss": 0.28073740005493164, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.3340122199592668, | |
| "grad_norm": 2.6757373809814453, | |
| "learning_rate": 9.170171985518551e-07, | |
| "loss": 0.2617020010948181, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.3380855397148677, | |
| "grad_norm": 2.737130880355835, | |
| "learning_rate": 9.162290913549831e-07, | |
| "loss": 0.31688614189624786, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.3421588594704685, | |
| "grad_norm": 2.4106035232543945, | |
| "learning_rate": 9.154376014175325e-07, | |
| "loss": 0.25045838952064514, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.3462321792260692, | |
| "grad_norm": 2.5006155967712402, | |
| "learning_rate": 9.146427351720307e-07, | |
| "loss": 0.2350526750087738, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.35030549898167, | |
| "grad_norm": 2.443948268890381, | |
| "learning_rate": 9.138444990784453e-07, | |
| "loss": 0.3193846642971039, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.354378818737271, | |
| "grad_norm": 2.5271644592285156, | |
| "learning_rate": 9.130428996241304e-07, | |
| "loss": 0.2833334505558014, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.3584521384928716, | |
| "grad_norm": 2.3211746215820312, | |
| "learning_rate": 9.122379433237748e-07, | |
| "loss": 0.27067267149686813, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.3625254582484725, | |
| "grad_norm": 2.5455574989318848, | |
| "learning_rate": 9.11429636719349e-07, | |
| "loss": 0.3177233636379242, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.3665987780040734, | |
| "grad_norm": 2.9286653995513916, | |
| "learning_rate": 9.106179863800513e-07, | |
| "loss": 0.30554522573947906, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.370672097759674, | |
| "grad_norm": 2.452594757080078, | |
| "learning_rate": 9.098029989022557e-07, | |
| "loss": 0.29210953414440155, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.374745417515275, | |
| "grad_norm": 2.5022428035736084, | |
| "learning_rate": 9.089846809094563e-07, | |
| "loss": 0.25522106885910034, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.3788187372708758, | |
| "grad_norm": 2.6464457511901855, | |
| "learning_rate": 9.081630390522157e-07, | |
| "loss": 0.287350669503212, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.3828920570264764, | |
| "grad_norm": 2.3021206855773926, | |
| "learning_rate": 9.073380800081096e-07, | |
| "loss": 0.2737245708703995, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.3869653767820773, | |
| "grad_norm": 2.5468103885650635, | |
| "learning_rate": 9.065098104816726e-07, | |
| "loss": 0.26481032371520996, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.3910386965376782, | |
| "grad_norm": 2.3847126960754395, | |
| "learning_rate": 9.056782372043445e-07, | |
| "loss": 0.270496666431427, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.395112016293279, | |
| "grad_norm": 2.586550235748291, | |
| "learning_rate": 9.048433669344144e-07, | |
| "loss": 0.32853636145591736, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.39918533604888, | |
| "grad_norm": 2.5621798038482666, | |
| "learning_rate": 9.04005206456967e-07, | |
| "loss": 0.2707676887512207, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.4032586558044806, | |
| "grad_norm": 2.405062198638916, | |
| "learning_rate": 9.031637625838264e-07, | |
| "loss": 0.285464346408844, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.4073319755600815, | |
| "grad_norm": 2.382589817047119, | |
| "learning_rate": 9.023190421535016e-07, | |
| "loss": 0.2704601585865021, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.4114052953156824, | |
| "grad_norm": 2.2916622161865234, | |
| "learning_rate": 9.014710520311306e-07, | |
| "loss": 0.2510330229997635, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.415478615071283, | |
| "grad_norm": 2.3999521732330322, | |
| "learning_rate": 9.006197991084241e-07, | |
| "loss": 0.2853076159954071, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.419551934826884, | |
| "grad_norm": 2.6907191276550293, | |
| "learning_rate": 8.997652903036104e-07, | |
| "loss": 0.27804453670978546, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.4236252545824848, | |
| "grad_norm": 2.3953359127044678, | |
| "learning_rate": 8.989075325613784e-07, | |
| "loss": 0.24225353449583054, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.4276985743380854, | |
| "grad_norm": 2.5458950996398926, | |
| "learning_rate": 8.980465328528218e-07, | |
| "loss": 0.29082323610782623, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.4317718940936863, | |
| "grad_norm": 2.570434093475342, | |
| "learning_rate": 8.971822981753818e-07, | |
| "loss": 0.2637802064418793, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.4358452138492872, | |
| "grad_norm": 2.608654260635376, | |
| "learning_rate": 8.963148355527908e-07, | |
| "loss": 0.267157182097435, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.4399185336048879, | |
| "grad_norm": 2.505756139755249, | |
| "learning_rate": 8.954441520350149e-07, | |
| "loss": 0.31443025171756744, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.4439918533604887, | |
| "grad_norm": 2.8767051696777344, | |
| "learning_rate": 8.945702546981968e-07, | |
| "loss": 0.28173527121543884, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.4480651731160896, | |
| "grad_norm": 2.5839850902557373, | |
| "learning_rate": 8.936931506445984e-07, | |
| "loss": 0.2735389843583107, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.4521384928716905, | |
| "grad_norm": 2.380516529083252, | |
| "learning_rate": 8.928128470025429e-07, | |
| "loss": 0.2868722528219223, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.4562118126272914, | |
| "grad_norm": 2.373098134994507, | |
| "learning_rate": 8.919293509263567e-07, | |
| "loss": 0.266360841691494, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.460285132382892, | |
| "grad_norm": 2.440216541290283, | |
| "learning_rate": 8.910426695963118e-07, | |
| "loss": 0.26618482172489166, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.464358452138493, | |
| "grad_norm": 2.374094247817993, | |
| "learning_rate": 8.901528102185669e-07, | |
| "loss": 0.27816291898489, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.4684317718940938, | |
| "grad_norm": 2.3657944202423096, | |
| "learning_rate": 8.892597800251093e-07, | |
| "loss": 0.25230376422405243, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.4725050916496945, | |
| "grad_norm": 2.3009097576141357, | |
| "learning_rate": 8.883635862736956e-07, | |
| "loss": 0.26984117925167084, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.4765784114052953, | |
| "grad_norm": 2.9458165168762207, | |
| "learning_rate": 8.874642362477929e-07, | |
| "loss": 0.29643698036670685, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.4806517311608962, | |
| "grad_norm": 2.8554880619049072, | |
| "learning_rate": 8.865617372565199e-07, | |
| "loss": 0.256381556391716, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.4847250509164969, | |
| "grad_norm": 2.5279104709625244, | |
| "learning_rate": 8.856560966345876e-07, | |
| "loss": 0.24138055741786957, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.4887983706720977, | |
| "grad_norm": 2.4175782203674316, | |
| "learning_rate": 8.847473217422388e-07, | |
| "loss": 0.27801232039928436, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.4928716904276986, | |
| "grad_norm": 2.7168712615966797, | |
| "learning_rate": 8.838354199651891e-07, | |
| "loss": 0.28772565722465515, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.4969450101832993, | |
| "grad_norm": 2.5258262157440186, | |
| "learning_rate": 8.829203987145669e-07, | |
| "loss": 0.2848212271928787, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.5010183299389002, | |
| "grad_norm": 2.424790859222412, | |
| "learning_rate": 8.820022654268525e-07, | |
| "loss": 0.2653035372495651, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.505091649694501, | |
| "grad_norm": 2.416537284851074, | |
| "learning_rate": 8.810810275638182e-07, | |
| "loss": 0.27215129137039185, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.5091649694501017, | |
| "grad_norm": 2.5008385181427, | |
| "learning_rate": 8.801566926124677e-07, | |
| "loss": 0.2523413300514221, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.5132382892057028, | |
| "grad_norm": 2.5779659748077393, | |
| "learning_rate": 8.79229268084975e-07, | |
| "loss": 0.2584775537252426, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.5173116089613035, | |
| "grad_norm": 2.631009340286255, | |
| "learning_rate": 8.782987615186234e-07, | |
| "loss": 0.27790168672800064, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.5213849287169041, | |
| "grad_norm": 2.4295825958251953, | |
| "learning_rate": 8.773651804757443e-07, | |
| "loss": 0.26932528614997864, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.5254582484725052, | |
| "grad_norm": 2.6135988235473633, | |
| "learning_rate": 8.764285325436555e-07, | |
| "loss": 0.28256936371326447, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.5295315682281059, | |
| "grad_norm": 2.5016775131225586, | |
| "learning_rate": 8.754888253346002e-07, | |
| "loss": 0.28559644520282745, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.5336048879837068, | |
| "grad_norm": 2.5780680179595947, | |
| "learning_rate": 8.745460664856844e-07, | |
| "loss": 0.2705298289656639, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.5376782077393076, | |
| "grad_norm": 2.7102303504943848, | |
| "learning_rate": 8.736002636588151e-07, | |
| "loss": 0.27591292560100555, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.5417515274949083, | |
| "grad_norm": 2.520955801010132, | |
| "learning_rate": 8.72651424540638e-07, | |
| "loss": 0.2621312141418457, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.5458248472505092, | |
| "grad_norm": 3.2938437461853027, | |
| "learning_rate": 8.716995568424754e-07, | |
| "loss": 0.2692745327949524, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.54989816700611, | |
| "grad_norm": 2.4736831188201904, | |
| "learning_rate": 8.70744668300263e-07, | |
| "loss": 0.24611817300319672, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.5539714867617107, | |
| "grad_norm": 2.3731565475463867, | |
| "learning_rate": 8.697867666744871e-07, | |
| "loss": 0.2787970006465912, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.5580448065173116, | |
| "grad_norm": 2.5973782539367676, | |
| "learning_rate": 8.688258597501219e-07, | |
| "loss": 0.3056941330432892, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.5621181262729125, | |
| "grad_norm": 2.490133285522461, | |
| "learning_rate": 8.678619553365658e-07, | |
| "loss": 0.28115689754486084, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.5661914460285131, | |
| "grad_norm": 2.7315447330474854, | |
| "learning_rate": 8.668950612675783e-07, | |
| "loss": 0.2873499393463135, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.570264765784114, | |
| "grad_norm": 2.518087387084961, | |
| "learning_rate": 8.659251854012161e-07, | |
| "loss": 0.28173641860485077, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.5743380855397149, | |
| "grad_norm": 2.214815378189087, | |
| "learning_rate": 8.649523356197695e-07, | |
| "loss": 0.2376307100057602, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.5784114052953155, | |
| "grad_norm": 2.2534103393554688, | |
| "learning_rate": 8.639765198296977e-07, | |
| "loss": 0.2530301362276077, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.5824847250509166, | |
| "grad_norm": 2.5998353958129883, | |
| "learning_rate": 8.629977459615654e-07, | |
| "loss": 0.26012372225522995, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.5865580448065173, | |
| "grad_norm": 2.4811596870422363, | |
| "learning_rate": 8.620160219699777e-07, | |
| "loss": 0.26469268649816513, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.5906313645621182, | |
| "grad_norm": 2.4212377071380615, | |
| "learning_rate": 8.610313558335156e-07, | |
| "loss": 0.2749471068382263, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.594704684317719, | |
| "grad_norm": 2.3209803104400635, | |
| "learning_rate": 8.600437555546716e-07, | |
| "loss": 0.2666809409856796, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.5987780040733197, | |
| "grad_norm": 2.6138010025024414, | |
| "learning_rate": 8.59053229159784e-07, | |
| "loss": 0.2631463557481766, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.6028513238289206, | |
| "grad_norm": 2.4465487003326416, | |
| "learning_rate": 8.580597846989721e-07, | |
| "loss": 0.2840096950531006, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.6069246435845215, | |
| "grad_norm": 2.781353235244751, | |
| "learning_rate": 8.570634302460706e-07, | |
| "loss": 0.26131805777549744, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.6109979633401221, | |
| "grad_norm": 2.4190244674682617, | |
| "learning_rate": 8.560641738985641e-07, | |
| "loss": 0.28362704813480377, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.615071283095723, | |
| "grad_norm": 2.461918354034424, | |
| "learning_rate": 8.550620237775213e-07, | |
| "loss": 0.2682417184114456, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.6191446028513239, | |
| "grad_norm": 2.4705612659454346, | |
| "learning_rate": 8.540569880275286e-07, | |
| "loss": 0.2832501232624054, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.6232179226069245, | |
| "grad_norm": 2.401193380355835, | |
| "learning_rate": 8.530490748166244e-07, | |
| "loss": 0.2732377126812935, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.6272912423625254, | |
| "grad_norm": 2.7205650806427, | |
| "learning_rate": 8.520382923362328e-07, | |
| "loss": 0.2597970813512802, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.6313645621181263, | |
| "grad_norm": 2.6480019092559814, | |
| "learning_rate": 8.510246488010964e-07, | |
| "loss": 0.2697395384311676, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.635437881873727, | |
| "grad_norm": 2.5444276332855225, | |
| "learning_rate": 8.5000815244921e-07, | |
| "loss": 0.26779498159885406, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.639511201629328, | |
| "grad_norm": 2.3247053623199463, | |
| "learning_rate": 8.489888115417537e-07, | |
| "loss": 0.2565397247672081, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.6435845213849287, | |
| "grad_norm": 2.8525049686431885, | |
| "learning_rate": 8.479666343630256e-07, | |
| "loss": 0.29496023058891296, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.6476578411405294, | |
| "grad_norm": 2.5987389087677, | |
| "learning_rate": 8.469416292203746e-07, | |
| "loss": 0.24137818068265915, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.6517311608961305, | |
| "grad_norm": 2.4682445526123047, | |
| "learning_rate": 8.459138044441323e-07, | |
| "loss": 0.3068127781152725, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.6558044806517311, | |
| "grad_norm": 2.43457293510437, | |
| "learning_rate": 8.448831683875464e-07, | |
| "loss": 0.2586989253759384, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.659877800407332, | |
| "grad_norm": 2.446392297744751, | |
| "learning_rate": 8.438497294267116e-07, | |
| "loss": 0.2845039367675781, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.663951120162933, | |
| "grad_norm": 2.4405159950256348, | |
| "learning_rate": 8.428134959605027e-07, | |
| "loss": 0.2668181359767914, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.6680244399185336, | |
| "grad_norm": 2.398472309112549, | |
| "learning_rate": 8.417744764105053e-07, | |
| "loss": 0.294752761721611, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.6720977596741344, | |
| "grad_norm": 2.485675096511841, | |
| "learning_rate": 8.407326792209481e-07, | |
| "loss": 0.27970798313617706, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.6761710794297353, | |
| "grad_norm": 2.326519250869751, | |
| "learning_rate": 8.396881128586338e-07, | |
| "loss": 0.27439168095588684, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.680244399185336, | |
| "grad_norm": 2.5076746940612793, | |
| "learning_rate": 8.386407858128706e-07, | |
| "loss": 0.24190914630889893, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.6843177189409368, | |
| "grad_norm": 2.418656349182129, | |
| "learning_rate": 8.375907065954028e-07, | |
| "loss": 0.302260160446167, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.6883910386965377, | |
| "grad_norm": 2.431206226348877, | |
| "learning_rate": 8.365378837403427e-07, | |
| "loss": 0.24484054744243622, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.6924643584521384, | |
| "grad_norm": 2.1974987983703613, | |
| "learning_rate": 8.354823258040995e-07, | |
| "loss": 0.23830442130565643, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.6965376782077393, | |
| "grad_norm": 2.531301498413086, | |
| "learning_rate": 8.344240413653111e-07, | |
| "loss": 0.23077362775802612, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.7006109979633401, | |
| "grad_norm": 2.3643598556518555, | |
| "learning_rate": 8.333630390247741e-07, | |
| "loss": 0.2691231817007065, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.7046843177189408, | |
| "grad_norm": 2.429917335510254, | |
| "learning_rate": 8.322993274053738e-07, | |
| "loss": 0.26382092386484146, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.708757637474542, | |
| "grad_norm": 2.3184382915496826, | |
| "learning_rate": 8.312329151520139e-07, | |
| "loss": 0.24146142601966858, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.7128309572301426, | |
| "grad_norm": 2.3974194526672363, | |
| "learning_rate": 8.301638109315465e-07, | |
| "loss": 0.28932487964630127, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.7169042769857432, | |
| "grad_norm": 2.445939064025879, | |
| "learning_rate": 8.290920234327019e-07, | |
| "loss": 0.2816842496395111, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.7209775967413443, | |
| "grad_norm": 2.517059087753296, | |
| "learning_rate": 8.280175613660175e-07, | |
| "loss": 0.28957797586917877, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.725050916496945, | |
| "grad_norm": 2.7085039615631104, | |
| "learning_rate": 8.269404334637666e-07, | |
| "loss": 0.2929697036743164, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.7291242362525459, | |
| "grad_norm": 2.3027985095977783, | |
| "learning_rate": 8.258606484798896e-07, | |
| "loss": 0.2602064907550812, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.7331975560081467, | |
| "grad_norm": 2.4261462688446045, | |
| "learning_rate": 8.247782151899195e-07, | |
| "loss": 0.2826506048440933, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.7372708757637474, | |
| "grad_norm": 2.6706831455230713, | |
| "learning_rate": 8.236931423909138e-07, | |
| "loss": 0.25307597219944, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.7413441955193483, | |
| "grad_norm": 2.6151740550994873, | |
| "learning_rate": 8.226054389013808e-07, | |
| "loss": 0.31587836146354675, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.7454175152749491, | |
| "grad_norm": 2.3904056549072266, | |
| "learning_rate": 8.215151135612088e-07, | |
| "loss": 0.2650100588798523, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.7494908350305498, | |
| "grad_norm": 2.48195219039917, | |
| "learning_rate": 8.204221752315948e-07, | |
| "loss": 0.24214383959770203, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.7535641547861507, | |
| "grad_norm": 2.28658390045166, | |
| "learning_rate": 8.193266327949708e-07, | |
| "loss": 0.26451194286346436, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.7576374745417516, | |
| "grad_norm": 2.365696430206299, | |
| "learning_rate": 8.182284951549335e-07, | |
| "loss": 0.26112615317106247, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.7617107942973522, | |
| "grad_norm": 2.467344284057617, | |
| "learning_rate": 8.171277712361708e-07, | |
| "loss": 0.2644128352403641, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.765784114052953, | |
| "grad_norm": 2.6279022693634033, | |
| "learning_rate": 8.160244699843899e-07, | |
| "loss": 0.26811327785253525, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.769857433808554, | |
| "grad_norm": 2.196678638458252, | |
| "learning_rate": 8.149186003662437e-07, | |
| "loss": 0.2764963060617447, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.7739307535641546, | |
| "grad_norm": 2.582368850708008, | |
| "learning_rate": 8.138101713692587e-07, | |
| "loss": 0.24663043022155762, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.7780040733197557, | |
| "grad_norm": 2.4632701873779297, | |
| "learning_rate": 8.12699192001762e-07, | |
| "loss": 0.24030117690563202, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.7820773930753564, | |
| "grad_norm": 2.549862861633301, | |
| "learning_rate": 8.115856712928077e-07, | |
| "loss": 0.2775857746601105, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.7861507128309573, | |
| "grad_norm": 2.460521936416626, | |
| "learning_rate": 8.104696182921039e-07, | |
| "loss": 0.24370034784078598, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.7902240325865582, | |
| "grad_norm": 2.489588975906372, | |
| "learning_rate": 8.093510420699386e-07, | |
| "loss": 0.2672967165708542, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.7942973523421588, | |
| "grad_norm": 2.442493438720703, | |
| "learning_rate": 8.082299517171061e-07, | |
| "loss": 0.2774495333433151, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.7983706720977597, | |
| "grad_norm": 2.6533353328704834, | |
| "learning_rate": 8.071063563448339e-07, | |
| "loss": 0.27777694165706635, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.8024439918533606, | |
| "grad_norm": 2.546030282974243, | |
| "learning_rate": 8.059802650847077e-07, | |
| "loss": 0.2475699484348297, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.8065173116089612, | |
| "grad_norm": 2.603074789047241, | |
| "learning_rate": 8.048516870885977e-07, | |
| "loss": 0.2610132396221161, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.810590631364562, | |
| "grad_norm": 2.5384109020233154, | |
| "learning_rate": 8.037206315285841e-07, | |
| "loss": 0.26541996002197266, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.814663951120163, | |
| "grad_norm": 2.5843305587768555, | |
| "learning_rate": 8.025871075968826e-07, | |
| "loss": 0.26443643867969513, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.8187372708757636, | |
| "grad_norm": 2.428213119506836, | |
| "learning_rate": 8.014511245057691e-07, | |
| "loss": 0.27873335778713226, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.8228105906313645, | |
| "grad_norm": 2.622931480407715, | |
| "learning_rate": 8.003126914875063e-07, | |
| "loss": 0.2784544825553894, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.8268839103869654, | |
| "grad_norm": 2.4025309085845947, | |
| "learning_rate": 7.991718177942666e-07, | |
| "loss": 0.22745566070079803, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.830957230142566, | |
| "grad_norm": 2.426877498626709, | |
| "learning_rate": 7.980285126980591e-07, | |
| "loss": 0.2900083065032959, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.8350305498981672, | |
| "grad_norm": 2.506085157394409, | |
| "learning_rate": 7.968827854906528e-07, | |
| "loss": 0.3019551932811737, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.8391038696537678, | |
| "grad_norm": 2.428899049758911, | |
| "learning_rate": 7.95734645483501e-07, | |
| "loss": 0.2602303624153137, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.8431771894093685, | |
| "grad_norm": 2.7357733249664307, | |
| "learning_rate": 7.945841020076669e-07, | |
| "loss": 0.2674318701028824, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.8472505091649696, | |
| "grad_norm": 2.492051124572754, | |
| "learning_rate": 7.934311644137463e-07, | |
| "loss": 0.29544439911842346, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.8513238289205702, | |
| "grad_norm": 2.751485586166382, | |
| "learning_rate": 7.922758420717928e-07, | |
| "loss": 0.2730901688337326, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.8553971486761711, | |
| "grad_norm": 2.469752550125122, | |
| "learning_rate": 7.911181443712407e-07, | |
| "loss": 0.28135350346565247, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.859470468431772, | |
| "grad_norm": 2.5486271381378174, | |
| "learning_rate": 7.89958080720829e-07, | |
| "loss": 0.2647327482700348, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.8635437881873727, | |
| "grad_norm": 2.328063726425171, | |
| "learning_rate": 7.887956605485258e-07, | |
| "loss": 0.29177480936050415, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.8676171079429735, | |
| "grad_norm": 2.3431217670440674, | |
| "learning_rate": 7.876308933014502e-07, | |
| "loss": 0.2461807057261467, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.8716904276985744, | |
| "grad_norm": 2.346773862838745, | |
| "learning_rate": 7.864637884457961e-07, | |
| "loss": 0.2737877368927002, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.875763747454175, | |
| "grad_norm": 2.6286261081695557, | |
| "learning_rate": 7.852943554667561e-07, | |
| "loss": 0.29079559445381165, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.879837067209776, | |
| "grad_norm": 2.3207788467407227, | |
| "learning_rate": 7.841226038684434e-07, | |
| "loss": 0.261492520570755, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.8839103869653768, | |
| "grad_norm": 2.4097938537597656, | |
| "learning_rate": 7.829485431738148e-07, | |
| "loss": 0.2779378667473793, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.8879837067209775, | |
| "grad_norm": 2.580665349960327, | |
| "learning_rate": 7.817721829245935e-07, | |
| "loss": 0.26885151863098145, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.8920570264765784, | |
| "grad_norm": 2.5190136432647705, | |
| "learning_rate": 7.805935326811912e-07, | |
| "loss": 0.2348337173461914, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.8961303462321792, | |
| "grad_norm": 2.6823647022247314, | |
| "learning_rate": 7.794126020226309e-07, | |
| "loss": 0.31435824930667877, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.90020366598778, | |
| "grad_norm": 2.678208827972412, | |
| "learning_rate": 7.782294005464686e-07, | |
| "loss": 0.2894388735294342, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.904276985743381, | |
| "grad_norm": 2.519721269607544, | |
| "learning_rate": 7.770439378687161e-07, | |
| "loss": 0.3156396448612213, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.9083503054989817, | |
| "grad_norm": 2.6619021892547607, | |
| "learning_rate": 7.758562236237614e-07, | |
| "loss": 0.2675836831331253, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.9124236252545825, | |
| "grad_norm": 2.2732584476470947, | |
| "learning_rate": 7.746662674642912e-07, | |
| "loss": 0.21649424731731415, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.9164969450101834, | |
| "grad_norm": 2.302424669265747, | |
| "learning_rate": 7.734740790612136e-07, | |
| "loss": 0.2436254546046257, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.920570264765784, | |
| "grad_norm": 2.3057138919830322, | |
| "learning_rate": 7.722796681035769e-07, | |
| "loss": 0.25994570553302765, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.924643584521385, | |
| "grad_norm": 2.327444076538086, | |
| "learning_rate": 7.710830442984937e-07, | |
| "loss": 0.2572202906012535, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.9287169042769858, | |
| "grad_norm": 2.3709921836853027, | |
| "learning_rate": 7.698842173710599e-07, | |
| "loss": 0.2600872740149498, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.9327902240325865, | |
| "grad_norm": 2.463008165359497, | |
| "learning_rate": 7.686831970642766e-07, | |
| "loss": 0.2489926964044571, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.9368635437881874, | |
| "grad_norm": 2.4627251625061035, | |
| "learning_rate": 7.674799931389708e-07, | |
| "loss": 0.2903194725513458, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.9409368635437882, | |
| "grad_norm": 2.9676828384399414, | |
| "learning_rate": 7.662746153737156e-07, | |
| "loss": 0.2674560844898224, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.945010183299389, | |
| "grad_norm": 2.448298692703247, | |
| "learning_rate": 7.65067073564752e-07, | |
| "loss": 0.3171275407075882, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.9490835030549898, | |
| "grad_norm": 2.524946451187134, | |
| "learning_rate": 7.638573775259077e-07, | |
| "loss": 0.2637060284614563, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.9531568228105907, | |
| "grad_norm": 2.4001801013946533, | |
| "learning_rate": 7.62645537088518e-07, | |
| "loss": 0.23155802488327026, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.9572301425661913, | |
| "grad_norm": 2.287005662918091, | |
| "learning_rate": 7.614315621013469e-07, | |
| "loss": 0.27207742631435394, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.9613034623217924, | |
| "grad_norm": 2.453338623046875, | |
| "learning_rate": 7.60215462430505e-07, | |
| "loss": 0.2895518094301224, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.965376782077393, | |
| "grad_norm": 2.5567643642425537, | |
| "learning_rate": 7.58997247959371e-07, | |
| "loss": 0.2574731409549713, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.9694501018329937, | |
| "grad_norm": 2.365522623062134, | |
| "learning_rate": 7.577769285885108e-07, | |
| "loss": 0.22622792422771454, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.9735234215885948, | |
| "grad_norm": 2.2915074825286865, | |
| "learning_rate": 7.56554514235597e-07, | |
| "loss": 0.26031681150197983, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.9775967413441955, | |
| "grad_norm": 2.651094436645508, | |
| "learning_rate": 7.553300148353284e-07, | |
| "loss": 0.2904250845313072, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.9816700610997964, | |
| "grad_norm": 2.416658639907837, | |
| "learning_rate": 7.541034403393489e-07, | |
| "loss": 0.2949088215827942, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.9857433808553973, | |
| "grad_norm": 2.4140050411224365, | |
| "learning_rate": 7.528748007161676e-07, | |
| "loss": 0.2634105682373047, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.989816700610998, | |
| "grad_norm": 2.684359550476074, | |
| "learning_rate": 7.516441059510764e-07, | |
| "loss": 0.27155014872550964, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.9938900203665988, | |
| "grad_norm": 2.4172725677490234, | |
| "learning_rate": 7.5041136604607e-07, | |
| "loss": 0.25667136907577515, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.9979633401221997, | |
| "grad_norm": 2.3175907135009766, | |
| "learning_rate": 7.491765910197643e-07, | |
| "loss": 0.2724708914756775, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.5458946228027344, | |
| "learning_rate": 7.479397909073143e-07, | |
| "loss": 0.2475520372390747, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.0040733197556007, | |
| "grad_norm": 2.372309446334839, | |
| "learning_rate": 7.467009757603336e-07, | |
| "loss": 0.25162914395332336, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 2.0081466395112018, | |
| "grad_norm": 2.3240795135498047, | |
| "learning_rate": 7.454601556468121e-07, | |
| "loss": 0.24550767987966537, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 2.0122199592668024, | |
| "grad_norm": 2.3425838947296143, | |
| "learning_rate": 7.442173406510341e-07, | |
| "loss": 0.21918359398841858, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 2.016293279022403, | |
| "grad_norm": 2.2755539417266846, | |
| "learning_rate": 7.429725408734968e-07, | |
| "loss": 0.24949443340301514, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 2.020366598778004, | |
| "grad_norm": 2.490926742553711, | |
| "learning_rate": 7.417257664308276e-07, | |
| "loss": 0.2353430986404419, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 2.024439918533605, | |
| "grad_norm": 2.390965461730957, | |
| "learning_rate": 7.404770274557028e-07, | |
| "loss": 0.25412893295288086, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 2.0285132382892055, | |
| "grad_norm": 2.11749005317688, | |
| "learning_rate": 7.392263340967641e-07, | |
| "loss": 0.21416524052619934, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 2.0325865580448066, | |
| "grad_norm": 2.317534923553467, | |
| "learning_rate": 7.379736965185368e-07, | |
| "loss": 0.24323680251836777, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0366598778004072, | |
| "grad_norm": 2.29274845123291, | |
| "learning_rate": 7.367191249013472e-07, | |
| "loss": 0.2301274538040161, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 2.0407331975560083, | |
| "grad_norm": 2.2650434970855713, | |
| "learning_rate": 7.354626294412402e-07, | |
| "loss": 0.1820373833179474, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.044806517311609, | |
| "grad_norm": 2.6822898387908936, | |
| "learning_rate": 7.342042203498951e-07, | |
| "loss": 0.24766084551811218, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 2.0488798370672097, | |
| "grad_norm": 2.583789348602295, | |
| "learning_rate": 7.329439078545438e-07, | |
| "loss": 0.24215728789567947, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.0529531568228108, | |
| "grad_norm": 2.484274387359619, | |
| "learning_rate": 7.316817021978883e-07, | |
| "loss": 0.23763683438301086, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 2.0570264765784114, | |
| "grad_norm": 2.4976799488067627, | |
| "learning_rate": 7.304176136380149e-07, | |
| "loss": 0.2570403814315796, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.061099796334012, | |
| "grad_norm": 2.323218584060669, | |
| "learning_rate": 7.291516524483136e-07, | |
| "loss": 0.21409639716148376, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 2.065173116089613, | |
| "grad_norm": 2.562451124191284, | |
| "learning_rate": 7.278838289173933e-07, | |
| "loss": 0.2541456073522568, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.069246435845214, | |
| "grad_norm": 2.538037061691284, | |
| "learning_rate": 7.266141533489983e-07, | |
| "loss": 0.2855856567621231, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 2.0733197556008145, | |
| "grad_norm": 2.348076343536377, | |
| "learning_rate": 7.253426360619242e-07, | |
| "loss": 0.2544526904821396, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.0773930753564156, | |
| "grad_norm": 2.2017970085144043, | |
| "learning_rate": 7.240692873899351e-07, | |
| "loss": 0.21077334135770798, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 2.0814663951120163, | |
| "grad_norm": 2.3848230838775635, | |
| "learning_rate": 7.227941176816787e-07, | |
| "loss": 0.2178598716855049, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.085539714867617, | |
| "grad_norm": 2.4316163063049316, | |
| "learning_rate": 7.215171373006024e-07, | |
| "loss": 0.21304114907979965, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 2.089613034623218, | |
| "grad_norm": 2.342197895050049, | |
| "learning_rate": 7.202383566248692e-07, | |
| "loss": 0.2186000794172287, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.0936863543788187, | |
| "grad_norm": 2.3286263942718506, | |
| "learning_rate": 7.189577860472731e-07, | |
| "loss": 0.25711505115032196, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 2.0977596741344193, | |
| "grad_norm": 2.196580410003662, | |
| "learning_rate": 7.176754359751555e-07, | |
| "loss": 0.20909912884235382, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.1018329938900204, | |
| "grad_norm": 2.197080373764038, | |
| "learning_rate": 7.163913168303191e-07, | |
| "loss": 0.23562318086624146, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 2.105906313645621, | |
| "grad_norm": 2.5636260509490967, | |
| "learning_rate": 7.151054390489444e-07, | |
| "loss": 0.25697334110736847, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.109979633401222, | |
| "grad_norm": 2.6412978172302246, | |
| "learning_rate": 7.138178130815047e-07, | |
| "loss": 0.24819861352443695, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 2.114052953156823, | |
| "grad_norm": 2.4628520011901855, | |
| "learning_rate": 7.125284493926809e-07, | |
| "loss": 0.20018430054187775, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.1181262729124235, | |
| "grad_norm": 2.6035919189453125, | |
| "learning_rate": 7.112373584612763e-07, | |
| "loss": 0.2615906372666359, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 2.1221995926680246, | |
| "grad_norm": 2.499178409576416, | |
| "learning_rate": 7.099445507801323e-07, | |
| "loss": 0.25786396861076355, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.1262729124236253, | |
| "grad_norm": 2.356799364089966, | |
| "learning_rate": 7.086500368560419e-07, | |
| "loss": 0.24771813303232193, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 2.130346232179226, | |
| "grad_norm": 2.3499882221221924, | |
| "learning_rate": 7.073538272096651e-07, | |
| "loss": 0.2333463951945305, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.134419551934827, | |
| "grad_norm": 2.328732490539551, | |
| "learning_rate": 7.060559323754435e-07, | |
| "loss": 0.20822811126708984, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.1384928716904277, | |
| "grad_norm": 2.24107027053833, | |
| "learning_rate": 7.047563629015141e-07, | |
| "loss": 0.2279072254896164, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.1425661914460283, | |
| "grad_norm": 2.3738133907318115, | |
| "learning_rate": 7.03455129349624e-07, | |
| "loss": 0.23648229241371155, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 2.1466395112016294, | |
| "grad_norm": 2.2748498916625977, | |
| "learning_rate": 7.021522422950443e-07, | |
| "loss": 0.21738301217556, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.15071283095723, | |
| "grad_norm": 2.4731128215789795, | |
| "learning_rate": 7.008477123264847e-07, | |
| "loss": 0.23567791283130646, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 2.1547861507128308, | |
| "grad_norm": 2.5964107513427734, | |
| "learning_rate": 6.995415500460067e-07, | |
| "loss": 0.25774678587913513, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.158859470468432, | |
| "grad_norm": 2.7168128490448, | |
| "learning_rate": 6.982337660689377e-07, | |
| "loss": 0.25715047866106033, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 2.1629327902240325, | |
| "grad_norm": 2.4718027114868164, | |
| "learning_rate": 6.96924371023785e-07, | |
| "loss": 0.21807514131069183, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.167006109979633, | |
| "grad_norm": 2.3053858280181885, | |
| "learning_rate": 6.956133755521496e-07, | |
| "loss": 0.22069019079208374, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 2.1710794297352343, | |
| "grad_norm": 2.3815200328826904, | |
| "learning_rate": 6.943007903086387e-07, | |
| "loss": 0.2234855741262436, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.175152749490835, | |
| "grad_norm": 2.331794261932373, | |
| "learning_rate": 6.929866259607797e-07, | |
| "loss": 0.2392452359199524, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.179226069246436, | |
| "grad_norm": 2.5421111583709717, | |
| "learning_rate": 6.916708931889344e-07, | |
| "loss": 0.2579016238451004, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.1832993890020367, | |
| "grad_norm": 2.235800266265869, | |
| "learning_rate": 6.903536026862104e-07, | |
| "loss": 0.21762673556804657, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 2.1873727087576373, | |
| "grad_norm": 2.3409221172332764, | |
| "learning_rate": 6.890347651583759e-07, | |
| "loss": 0.21134832501411438, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.1914460285132384, | |
| "grad_norm": 2.29571795463562, | |
| "learning_rate": 6.877143913237713e-07, | |
| "loss": 0.22032570838928223, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 2.195519348268839, | |
| "grad_norm": 2.3287696838378906, | |
| "learning_rate": 6.863924919132236e-07, | |
| "loss": 0.22613044828176498, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.1995926680244398, | |
| "grad_norm": 2.2647016048431396, | |
| "learning_rate": 6.850690776699573e-07, | |
| "loss": 0.20803789049386978, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 2.203665987780041, | |
| "grad_norm": 2.5180113315582275, | |
| "learning_rate": 6.837441593495086e-07, | |
| "loss": 0.25074785202741623, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.2077393075356415, | |
| "grad_norm": 2.4430267810821533, | |
| "learning_rate": 6.824177477196377e-07, | |
| "loss": 0.23671245574951172, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 2.211812627291242, | |
| "grad_norm": 2.4262306690216064, | |
| "learning_rate": 6.810898535602411e-07, | |
| "loss": 0.23878173530101776, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.2158859470468433, | |
| "grad_norm": 2.2304294109344482, | |
| "learning_rate": 6.797604876632632e-07, | |
| "loss": 0.2229127734899521, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 2.219959266802444, | |
| "grad_norm": 2.443239450454712, | |
| "learning_rate": 6.784296608326107e-07, | |
| "loss": 0.22968536615371704, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.224032586558045, | |
| "grad_norm": 2.5297110080718994, | |
| "learning_rate": 6.770973838840622e-07, | |
| "loss": 0.2511328458786011, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 2.2281059063136457, | |
| "grad_norm": 2.648350238800049, | |
| "learning_rate": 6.757636676451823e-07, | |
| "loss": 0.2674560844898224, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.2321792260692463, | |
| "grad_norm": 2.1822292804718018, | |
| "learning_rate": 6.744285229552327e-07, | |
| "loss": 0.19531694799661636, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 2.2362525458248474, | |
| "grad_norm": 2.4335954189300537, | |
| "learning_rate": 6.730919606650841e-07, | |
| "loss": 0.23324161767959595, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.240325865580448, | |
| "grad_norm": 2.5245561599731445, | |
| "learning_rate": 6.717539916371288e-07, | |
| "loss": 0.22070679813623428, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 2.2443991853360488, | |
| "grad_norm": 2.4031879901885986, | |
| "learning_rate": 6.704146267451908e-07, | |
| "loss": 0.20751216262578964, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.24847250509165, | |
| "grad_norm": 2.4731009006500244, | |
| "learning_rate": 6.690738768744394e-07, | |
| "loss": 0.23825813084840775, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 2.2525458248472505, | |
| "grad_norm": 2.62092924118042, | |
| "learning_rate": 6.677317529212993e-07, | |
| "loss": 0.23570290952920914, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.256619144602851, | |
| "grad_norm": 2.4778685569763184, | |
| "learning_rate": 6.663882657933626e-07, | |
| "loss": 0.22070936858654022, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 2.2606924643584523, | |
| "grad_norm": 2.3577542304992676, | |
| "learning_rate": 6.650434264093e-07, | |
| "loss": 0.2068188190460205, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.264765784114053, | |
| "grad_norm": 2.421339988708496, | |
| "learning_rate": 6.636972456987725e-07, | |
| "loss": 0.25278185307979584, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 2.2688391038696536, | |
| "grad_norm": 2.4010040760040283, | |
| "learning_rate": 6.623497346023417e-07, | |
| "loss": 0.24000639468431473, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.2729124236252547, | |
| "grad_norm": 2.54424786567688, | |
| "learning_rate": 6.610009040713818e-07, | |
| "loss": 0.27193646132946014, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 2.2769857433808554, | |
| "grad_norm": 2.5175139904022217, | |
| "learning_rate": 6.596507650679899e-07, | |
| "loss": 0.23332953453063965, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.281059063136456, | |
| "grad_norm": 2.4466259479522705, | |
| "learning_rate": 6.582993285648976e-07, | |
| "loss": 0.2428213581442833, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 2.285132382892057, | |
| "grad_norm": 2.4644064903259277, | |
| "learning_rate": 6.569466055453807e-07, | |
| "loss": 0.21874462068080902, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 2.2892057026476578, | |
| "grad_norm": 2.250108480453491, | |
| "learning_rate": 6.555926070031716e-07, | |
| "loss": 0.20208529382944107, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 2.293279022403259, | |
| "grad_norm": 2.5665433406829834, | |
| "learning_rate": 6.542373439423683e-07, | |
| "loss": 0.22964468598365784, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 2.2973523421588595, | |
| "grad_norm": 2.6082041263580322, | |
| "learning_rate": 6.528808273773459e-07, | |
| "loss": 0.24122490733861923, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 2.30142566191446, | |
| "grad_norm": 2.4170379638671875, | |
| "learning_rate": 6.515230683326669e-07, | |
| "loss": 0.21298009902238846, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 2.3054989816700613, | |
| "grad_norm": 2.651233196258545, | |
| "learning_rate": 6.501640778429917e-07, | |
| "loss": 0.24708368629217148, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 2.309572301425662, | |
| "grad_norm": 2.462790012359619, | |
| "learning_rate": 6.488038669529886e-07, | |
| "loss": 0.2247847020626068, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.3136456211812626, | |
| "grad_norm": 2.2574470043182373, | |
| "learning_rate": 6.474424467172442e-07, | |
| "loss": 0.21436495333909988, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 2.3177189409368637, | |
| "grad_norm": 2.4516212940216064, | |
| "learning_rate": 6.460798282001738e-07, | |
| "loss": 0.2516315132379532, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.3217922606924644, | |
| "grad_norm": 2.514031410217285, | |
| "learning_rate": 6.447160224759311e-07, | |
| "loss": 0.2250988855957985, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 2.325865580448065, | |
| "grad_norm": 2.3617303371429443, | |
| "learning_rate": 6.433510406283185e-07, | |
| "loss": 0.23347290605306625, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 2.329938900203666, | |
| "grad_norm": 2.392488479614258, | |
| "learning_rate": 6.419848937506964e-07, | |
| "loss": 0.25712814927101135, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 2.3340122199592668, | |
| "grad_norm": 2.4351253509521484, | |
| "learning_rate": 6.406175929458944e-07, | |
| "loss": 0.23134000599384308, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 2.3380855397148674, | |
| "grad_norm": 2.351980447769165, | |
| "learning_rate": 6.392491493261198e-07, | |
| "loss": 0.24626444280147552, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 2.3421588594704685, | |
| "grad_norm": 2.2502481937408447, | |
| "learning_rate": 6.37879574012867e-07, | |
| "loss": 0.2008371651172638, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 2.346232179226069, | |
| "grad_norm": 2.413408041000366, | |
| "learning_rate": 6.36508878136829e-07, | |
| "loss": 0.24356064200401306, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 2.35030549898167, | |
| "grad_norm": 2.358431577682495, | |
| "learning_rate": 6.351370728378049e-07, | |
| "loss": 0.25313031673431396, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 2.354378818737271, | |
| "grad_norm": 2.580531358718872, | |
| "learning_rate": 6.337641692646106e-07, | |
| "loss": 0.2431691735982895, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 2.3584521384928716, | |
| "grad_norm": 2.4596192836761475, | |
| "learning_rate": 6.323901785749871e-07, | |
| "loss": 0.2600405141711235, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.3625254582484727, | |
| "grad_norm": 2.41133975982666, | |
| "learning_rate": 6.310151119355118e-07, | |
| "loss": 0.1907319650053978, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 2.3665987780040734, | |
| "grad_norm": 2.5591516494750977, | |
| "learning_rate": 6.296389805215054e-07, | |
| "loss": 0.2532680034637451, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 2.370672097759674, | |
| "grad_norm": 2.362185478210449, | |
| "learning_rate": 6.282617955169425e-07, | |
| "loss": 0.2242288738489151, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 2.374745417515275, | |
| "grad_norm": 2.438898801803589, | |
| "learning_rate": 6.268835681143602e-07, | |
| "loss": 0.2321534976363182, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 2.378818737270876, | |
| "grad_norm": 2.066927433013916, | |
| "learning_rate": 6.255043095147678e-07, | |
| "loss": 0.2276027500629425, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 2.3828920570264764, | |
| "grad_norm": 2.347784996032715, | |
| "learning_rate": 6.241240309275545e-07, | |
| "loss": 0.2264810875058174, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 2.3869653767820775, | |
| "grad_norm": 2.444033145904541, | |
| "learning_rate": 6.227427435703995e-07, | |
| "loss": 0.2255028337240219, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 2.391038696537678, | |
| "grad_norm": 2.2944633960723877, | |
| "learning_rate": 6.213604586691805e-07, | |
| "loss": 0.2329559102654457, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 2.395112016293279, | |
| "grad_norm": 2.346848964691162, | |
| "learning_rate": 6.199771874578819e-07, | |
| "loss": 0.22230961173772812, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 2.39918533604888, | |
| "grad_norm": 2.4055089950561523, | |
| "learning_rate": 6.185929411785042e-07, | |
| "loss": 0.22255368530750275, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.4032586558044806, | |
| "grad_norm": 2.5555362701416016, | |
| "learning_rate": 6.172077310809724e-07, | |
| "loss": 0.25746987015008926, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 2.4073319755600817, | |
| "grad_norm": 2.5292749404907227, | |
| "learning_rate": 6.15821568423045e-07, | |
| "loss": 0.1872299611568451, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 2.4114052953156824, | |
| "grad_norm": 2.6285977363586426, | |
| "learning_rate": 6.144344644702211e-07, | |
| "loss": 0.25099916756153107, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 2.415478615071283, | |
| "grad_norm": 2.830077886581421, | |
| "learning_rate": 6.130464304956504e-07, | |
| "loss": 0.26932021975517273, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 2.4195519348268837, | |
| "grad_norm": 2.4076106548309326, | |
| "learning_rate": 6.116574777800412e-07, | |
| "loss": 0.25139734894037247, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 2.423625254582485, | |
| "grad_norm": 2.45658540725708, | |
| "learning_rate": 6.102676176115681e-07, | |
| "loss": 0.2526377737522125, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 2.4276985743380854, | |
| "grad_norm": 2.455595016479492, | |
| "learning_rate": 6.088768612857807e-07, | |
| "loss": 0.20270772278308868, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 2.4317718940936865, | |
| "grad_norm": 2.5158026218414307, | |
| "learning_rate": 6.074852201055121e-07, | |
| "loss": 0.2484816238284111, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 2.435845213849287, | |
| "grad_norm": 2.8085286617279053, | |
| "learning_rate": 6.060927053807863e-07, | |
| "loss": 0.2813955247402191, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 2.439918533604888, | |
| "grad_norm": 2.644648313522339, | |
| "learning_rate": 6.046993284287267e-07, | |
| "loss": 0.2188793271780014, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.443991853360489, | |
| "grad_norm": 2.362947702407837, | |
| "learning_rate": 6.033051005734647e-07, | |
| "loss": 0.2174539864063263, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 2.4480651731160896, | |
| "grad_norm": 2.469346284866333, | |
| "learning_rate": 6.019100331460466e-07, | |
| "loss": 0.24562832713127136, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 2.4521384928716903, | |
| "grad_norm": 2.338634729385376, | |
| "learning_rate": 6.005141374843419e-07, | |
| "loss": 0.23371918499469757, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 2.4562118126272914, | |
| "grad_norm": 2.3929550647735596, | |
| "learning_rate": 5.991174249329514e-07, | |
| "loss": 0.23879797011613846, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 2.460285132382892, | |
| "grad_norm": 2.257073163986206, | |
| "learning_rate": 5.977199068431153e-07, | |
| "loss": 0.22032149881124496, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 2.4643584521384927, | |
| "grad_norm": 2.408695936203003, | |
| "learning_rate": 5.963215945726198e-07, | |
| "loss": 0.2490229532122612, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 2.468431771894094, | |
| "grad_norm": 2.3911116123199463, | |
| "learning_rate": 5.949224994857057e-07, | |
| "loss": 0.23558590561151505, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 2.4725050916496945, | |
| "grad_norm": 2.351417303085327, | |
| "learning_rate": 5.93522632952976e-07, | |
| "loss": 0.22598809003829956, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 2.4765784114052956, | |
| "grad_norm": 2.258190870285034, | |
| "learning_rate": 5.921220063513034e-07, | |
| "loss": 0.17976737767457962, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 2.480651731160896, | |
| "grad_norm": 2.431304454803467, | |
| "learning_rate": 5.907206310637375e-07, | |
| "loss": 0.209254652261734, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.484725050916497, | |
| "grad_norm": 2.3682570457458496, | |
| "learning_rate": 5.893185184794128e-07, | |
| "loss": 0.23237604647874832, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 2.4887983706720975, | |
| "grad_norm": 2.473200798034668, | |
| "learning_rate": 5.879156799934554e-07, | |
| "loss": 0.21567458659410477, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 2.4928716904276986, | |
| "grad_norm": 2.312518358230591, | |
| "learning_rate": 5.865121270068916e-07, | |
| "loss": 0.20658506453037262, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 2.4969450101832993, | |
| "grad_norm": 2.5240719318389893, | |
| "learning_rate": 5.851078709265541e-07, | |
| "loss": 0.2315971404314041, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 2.5010183299389004, | |
| "grad_norm": 2.444183349609375, | |
| "learning_rate": 5.837029231649898e-07, | |
| "loss": 0.248056061565876, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 2.505091649694501, | |
| "grad_norm": 2.928251028060913, | |
| "learning_rate": 5.82297295140367e-07, | |
| "loss": 0.190412700176239, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 2.5091649694501017, | |
| "grad_norm": 2.3546931743621826, | |
| "learning_rate": 5.808909982763825e-07, | |
| "loss": 0.2290133684873581, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 2.513238289205703, | |
| "grad_norm": 2.4045937061309814, | |
| "learning_rate": 5.794840440021686e-07, | |
| "loss": 0.20809811353683472, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 2.5173116089613035, | |
| "grad_norm": 2.304481029510498, | |
| "learning_rate": 5.780764437522012e-07, | |
| "loss": 0.21659423410892487, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 2.521384928716904, | |
| "grad_norm": 2.5403997898101807, | |
| "learning_rate": 5.766682089662054e-07, | |
| "loss": 0.24090662598609924, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.525458248472505, | |
| "grad_norm": 2.365463972091675, | |
| "learning_rate": 5.752593510890635e-07, | |
| "loss": 0.22003071755170822, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 2.529531568228106, | |
| "grad_norm": 2.4152722358703613, | |
| "learning_rate": 5.738498815707219e-07, | |
| "loss": 0.22138181328773499, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 2.5336048879837065, | |
| "grad_norm": 2.3184590339660645, | |
| "learning_rate": 5.724398118660973e-07, | |
| "loss": 0.22900409996509552, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 2.5376782077393076, | |
| "grad_norm": 2.4730820655822754, | |
| "learning_rate": 5.710291534349849e-07, | |
| "loss": 0.2507135570049286, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 2.5417515274949083, | |
| "grad_norm": 2.771104097366333, | |
| "learning_rate": 5.696179177419642e-07, | |
| "loss": 0.2613483816385269, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 2.5458248472505094, | |
| "grad_norm": 2.334442377090454, | |
| "learning_rate": 5.682061162563061e-07, | |
| "loss": 0.19708166271448135, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 2.54989816700611, | |
| "grad_norm": 2.361771821975708, | |
| "learning_rate": 5.667937604518798e-07, | |
| "loss": 0.2150387316942215, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 2.5539714867617107, | |
| "grad_norm": 2.5752477645874023, | |
| "learning_rate": 5.653808618070597e-07, | |
| "loss": 0.25150199234485626, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 2.5580448065173114, | |
| "grad_norm": 2.685345411300659, | |
| "learning_rate": 5.639674318046317e-07, | |
| "loss": 0.22753620892763138, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 2.5621181262729125, | |
| "grad_norm": 2.3737359046936035, | |
| "learning_rate": 5.625534819317004e-07, | |
| "loss": 0.2346680760383606, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.566191446028513, | |
| "grad_norm": 2.5365428924560547, | |
| "learning_rate": 5.61139023679595e-07, | |
| "loss": 0.27340593934059143, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 2.5702647657841142, | |
| "grad_norm": 2.5359578132629395, | |
| "learning_rate": 5.597240685437765e-07, | |
| "loss": 0.21642443537712097, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 2.574338085539715, | |
| "grad_norm": 2.651430130004883, | |
| "learning_rate": 5.583086280237446e-07, | |
| "loss": 0.2263542339205742, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 2.5784114052953155, | |
| "grad_norm": 2.7270631790161133, | |
| "learning_rate": 5.568927136229432e-07, | |
| "loss": 0.2491724044084549, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 2.5824847250509166, | |
| "grad_norm": 2.3526268005371094, | |
| "learning_rate": 5.554763368486674e-07, | |
| "loss": 0.21600161492824554, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 2.5865580448065173, | |
| "grad_norm": 2.307650327682495, | |
| "learning_rate": 5.540595092119708e-07, | |
| "loss": 0.19877354055643082, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 2.5906313645621184, | |
| "grad_norm": 2.3778960704803467, | |
| "learning_rate": 5.526422422275707e-07, | |
| "loss": 0.2133459597826004, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 2.594704684317719, | |
| "grad_norm": 2.5685219764709473, | |
| "learning_rate": 5.512245474137546e-07, | |
| "loss": 0.2637478709220886, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 2.5987780040733197, | |
| "grad_norm": 2.3442726135253906, | |
| "learning_rate": 5.498064362922882e-07, | |
| "loss": 0.2268994301557541, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 2.6028513238289204, | |
| "grad_norm": 2.780430793762207, | |
| "learning_rate": 5.483879203883194e-07, | |
| "loss": 0.24991512298583984, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.6069246435845215, | |
| "grad_norm": 2.5000061988830566, | |
| "learning_rate": 5.469690112302863e-07, | |
| "loss": 0.2421465590596199, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 2.610997963340122, | |
| "grad_norm": 2.5583932399749756, | |
| "learning_rate": 5.455497203498231e-07, | |
| "loss": 0.22834140062332153, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 2.6150712830957232, | |
| "grad_norm": 2.4214282035827637, | |
| "learning_rate": 5.441300592816662e-07, | |
| "loss": 0.24269834905862808, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 2.619144602851324, | |
| "grad_norm": 2.352479934692383, | |
| "learning_rate": 5.427100395635607e-07, | |
| "loss": 0.23605701327323914, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 2.6232179226069245, | |
| "grad_norm": 2.3162059783935547, | |
| "learning_rate": 5.412896727361662e-07, | |
| "loss": 0.2196669727563858, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 2.627291242362525, | |
| "grad_norm": 2.438443183898926, | |
| "learning_rate": 5.398689703429634e-07, | |
| "loss": 0.23577384650707245, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 2.6313645621181263, | |
| "grad_norm": 2.6072158813476562, | |
| "learning_rate": 5.384479439301605e-07, | |
| "loss": 0.22815095633268356, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 2.635437881873727, | |
| "grad_norm": 2.495229959487915, | |
| "learning_rate": 5.370266050465983e-07, | |
| "loss": 0.21826496720314026, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 2.639511201629328, | |
| "grad_norm": 2.4141361713409424, | |
| "learning_rate": 5.356049652436579e-07, | |
| "loss": 0.2289327010512352, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 2.6435845213849287, | |
| "grad_norm": 2.461887836456299, | |
| "learning_rate": 5.341830360751658e-07, | |
| "loss": 0.24295459687709808, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.6476578411405294, | |
| "grad_norm": 2.4014673233032227, | |
| "learning_rate": 5.327608290972998e-07, | |
| "loss": 0.23736917972564697, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 2.6517311608961305, | |
| "grad_norm": 2.3798553943634033, | |
| "learning_rate": 5.313383558684957e-07, | |
| "loss": 0.21394247561693192, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 2.655804480651731, | |
| "grad_norm": 2.433729648590088, | |
| "learning_rate": 5.299156279493535e-07, | |
| "loss": 0.22641988098621368, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 2.6598778004073322, | |
| "grad_norm": 2.801403284072876, | |
| "learning_rate": 5.284926569025428e-07, | |
| "loss": 0.19061069190502167, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 2.663951120162933, | |
| "grad_norm": 2.345667839050293, | |
| "learning_rate": 5.270694542927088e-07, | |
| "loss": 0.2233218401670456, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 2.6680244399185336, | |
| "grad_norm": 2.7157375812530518, | |
| "learning_rate": 5.256460316863791e-07, | |
| "loss": 0.268241822719574, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 2.672097759674134, | |
| "grad_norm": 2.566452980041504, | |
| "learning_rate": 5.242224006518694e-07, | |
| "loss": 0.23765669763088226, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 2.6761710794297353, | |
| "grad_norm": 2.395622491836548, | |
| "learning_rate": 5.227985727591888e-07, | |
| "loss": 0.2502652183175087, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 2.680244399185336, | |
| "grad_norm": 2.3431246280670166, | |
| "learning_rate": 5.213745595799462e-07, | |
| "loss": 0.2402183562517166, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 2.684317718940937, | |
| "grad_norm": 2.374241352081299, | |
| "learning_rate": 5.199503726872573e-07, | |
| "loss": 0.23906593769788742, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.6883910386965377, | |
| "grad_norm": 2.4988749027252197, | |
| "learning_rate": 5.185260236556484e-07, | |
| "loss": 0.23993954807519913, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 2.6924643584521384, | |
| "grad_norm": 2.4878809452056885, | |
| "learning_rate": 5.171015240609644e-07, | |
| "loss": 0.23122139275074005, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 2.696537678207739, | |
| "grad_norm": 2.6239712238311768, | |
| "learning_rate": 5.156768854802734e-07, | |
| "loss": 0.2619614750146866, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 2.70061099796334, | |
| "grad_norm": 3.4436697959899902, | |
| "learning_rate": 5.142521194917733e-07, | |
| "loss": 0.24487808346748352, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 2.704684317718941, | |
| "grad_norm": 2.537623643875122, | |
| "learning_rate": 5.128272376746971e-07, | |
| "loss": 0.2131681889295578, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 2.708757637474542, | |
| "grad_norm": 2.5551114082336426, | |
| "learning_rate": 5.114022516092194e-07, | |
| "loss": 0.2128780037164688, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 2.7128309572301426, | |
| "grad_norm": 2.3989691734313965, | |
| "learning_rate": 5.099771728763623e-07, | |
| "loss": 0.24740803986787796, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 2.716904276985743, | |
| "grad_norm": 2.2155869007110596, | |
| "learning_rate": 5.085520130579005e-07, | |
| "loss": 0.19293303787708282, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 2.7209775967413443, | |
| "grad_norm": 2.773521661758423, | |
| "learning_rate": 5.07126783736268e-07, | |
| "loss": 0.24105177074670792, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 2.725050916496945, | |
| "grad_norm": 2.3562657833099365, | |
| "learning_rate": 5.057014964944634e-07, | |
| "loss": 0.2287985384464264, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.729124236252546, | |
| "grad_norm": 2.4878883361816406, | |
| "learning_rate": 5.042761629159566e-07, | |
| "loss": 0.2295224368572235, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 2.7331975560081467, | |
| "grad_norm": 2.9576029777526855, | |
| "learning_rate": 5.028507945845932e-07, | |
| "loss": 0.24781977385282516, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 2.7372708757637474, | |
| "grad_norm": 2.7853589057922363, | |
| "learning_rate": 5.014254030845021e-07, | |
| "loss": 0.2594607025384903, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 2.741344195519348, | |
| "grad_norm": 2.313760280609131, | |
| "learning_rate": 5e-07, | |
| "loss": 0.2149473801255226, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 2.745417515274949, | |
| "grad_norm": 2.6287894248962402, | |
| "learning_rate": 4.98574596915498e-07, | |
| "loss": 0.2292870730161667, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 2.74949083503055, | |
| "grad_norm": 2.3970835208892822, | |
| "learning_rate": 4.971492054154068e-07, | |
| "loss": 0.24747398495674133, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 2.753564154786151, | |
| "grad_norm": 2.5344555377960205, | |
| "learning_rate": 4.957238370840436e-07, | |
| "loss": 0.24396724253892899, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 2.7576374745417516, | |
| "grad_norm": 2.3144986629486084, | |
| "learning_rate": 4.942985035055366e-07, | |
| "loss": 0.2170693725347519, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 2.7617107942973522, | |
| "grad_norm": 2.3190531730651855, | |
| "learning_rate": 4.928732162637321e-07, | |
| "loss": 0.22908472269773483, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 2.765784114052953, | |
| "grad_norm": 2.263834238052368, | |
| "learning_rate": 4.914479869420994e-07, | |
| "loss": 0.22652582079172134, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.769857433808554, | |
| "grad_norm": 2.4412286281585693, | |
| "learning_rate": 4.900228271236377e-07, | |
| "loss": 0.22030826658010483, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 2.7739307535641546, | |
| "grad_norm": 2.443682909011841, | |
| "learning_rate": 4.885977483907804e-07, | |
| "loss": 0.22983287274837494, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 2.7780040733197557, | |
| "grad_norm": 2.451335906982422, | |
| "learning_rate": 4.871727623253028e-07, | |
| "loss": 0.22219268232584, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 2.7820773930753564, | |
| "grad_norm": 2.3448593616485596, | |
| "learning_rate": 4.857478805082267e-07, | |
| "loss": 0.2025885134935379, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 2.786150712830957, | |
| "grad_norm": 2.2007482051849365, | |
| "learning_rate": 4.843231145197266e-07, | |
| "loss": 0.2085644006729126, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 2.790224032586558, | |
| "grad_norm": 2.331252336502075, | |
| "learning_rate": 4.828984759390356e-07, | |
| "loss": 0.20427662134170532, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 2.794297352342159, | |
| "grad_norm": 2.243265151977539, | |
| "learning_rate": 4.814739763443515e-07, | |
| "loss": 0.2013532519340515, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 2.79837067209776, | |
| "grad_norm": 2.581249475479126, | |
| "learning_rate": 4.800496273127429e-07, | |
| "loss": 0.2316228449344635, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 2.8024439918533606, | |
| "grad_norm": 2.5763373374938965, | |
| "learning_rate": 4.786254404200538e-07, | |
| "loss": 0.2278730794787407, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 2.8065173116089612, | |
| "grad_norm": 2.4369475841522217, | |
| "learning_rate": 4.772014272408114e-07, | |
| "loss": 0.22459274530410767, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.810590631364562, | |
| "grad_norm": 2.458749294281006, | |
| "learning_rate": 4.757775993481306e-07, | |
| "loss": 0.21142005175352097, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 2.814663951120163, | |
| "grad_norm": 2.3797214031219482, | |
| "learning_rate": 4.743539683136209e-07, | |
| "loss": 0.23049592226743698, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 2.8187372708757636, | |
| "grad_norm": 2.5822553634643555, | |
| "learning_rate": 4.7293054570729126e-07, | |
| "loss": 0.2521442621946335, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 2.8228105906313647, | |
| "grad_norm": 2.489661931991577, | |
| "learning_rate": 4.715073430974573e-07, | |
| "loss": 0.24500177055597305, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 2.8268839103869654, | |
| "grad_norm": 2.592890977859497, | |
| "learning_rate": 4.7008437205064634e-07, | |
| "loss": 0.2384454905986786, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 2.830957230142566, | |
| "grad_norm": 2.5368080139160156, | |
| "learning_rate": 4.686616441315043e-07, | |
| "loss": 0.25886720418930054, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 2.835030549898167, | |
| "grad_norm": 2.4768271446228027, | |
| "learning_rate": 4.672391709027002e-07, | |
| "loss": 0.2593514025211334, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 2.839103869653768, | |
| "grad_norm": 2.3749191761016846, | |
| "learning_rate": 4.658169639248342e-07, | |
| "loss": 0.19730783253908157, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 2.8431771894093685, | |
| "grad_norm": 2.4880032539367676, | |
| "learning_rate": 4.643950347563421e-07, | |
| "loss": 0.22945521771907806, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 2.8472505091649696, | |
| "grad_norm": 2.3967182636260986, | |
| "learning_rate": 4.6297339495340165e-07, | |
| "loss": 0.24117758870124817, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.8513238289205702, | |
| "grad_norm": 2.144585609436035, | |
| "learning_rate": 4.615520560698397e-07, | |
| "loss": 0.181608684360981, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 2.855397148676171, | |
| "grad_norm": 2.489408493041992, | |
| "learning_rate": 4.601310296570366e-07, | |
| "loss": 0.27073855698108673, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 2.859470468431772, | |
| "grad_norm": 2.4170804023742676, | |
| "learning_rate": 4.5871032726383385e-07, | |
| "loss": 0.20901280641555786, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 2.8635437881873727, | |
| "grad_norm": 2.4658868312835693, | |
| "learning_rate": 4.572899604364392e-07, | |
| "loss": 0.22699516266584396, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 2.8676171079429738, | |
| "grad_norm": 2.32704758644104, | |
| "learning_rate": 4.5586994071833377e-07, | |
| "loss": 0.2194635197520256, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 2.8716904276985744, | |
| "grad_norm": 2.540724277496338, | |
| "learning_rate": 4.5445027965017683e-07, | |
| "loss": 0.23322076350450516, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 2.875763747454175, | |
| "grad_norm": 2.4795303344726562, | |
| "learning_rate": 4.5303098876971373e-07, | |
| "loss": 0.22777557373046875, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 2.8798370672097757, | |
| "grad_norm": 2.5126326084136963, | |
| "learning_rate": 4.516120796116806e-07, | |
| "loss": 0.2254505679011345, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 2.883910386965377, | |
| "grad_norm": 2.6765761375427246, | |
| "learning_rate": 4.5019356370771185e-07, | |
| "loss": 0.23741194605827332, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 2.8879837067209775, | |
| "grad_norm": 2.261955976486206, | |
| "learning_rate": 4.487754525862453e-07, | |
| "loss": 0.22391848266124725, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.8920570264765786, | |
| "grad_norm": 2.5763208866119385, | |
| "learning_rate": 4.473577577724293e-07, | |
| "loss": 0.2411726713180542, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 2.8961303462321792, | |
| "grad_norm": 2.489358425140381, | |
| "learning_rate": 4.459404907880292e-07, | |
| "loss": 0.25273367017507553, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 2.90020366598778, | |
| "grad_norm": 2.3726134300231934, | |
| "learning_rate": 4.4452366315133256e-07, | |
| "loss": 0.20239847898483276, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 2.904276985743381, | |
| "grad_norm": 2.5708673000335693, | |
| "learning_rate": 4.43107286377057e-07, | |
| "loss": 0.23958415538072586, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 2.9083503054989817, | |
| "grad_norm": 2.319916248321533, | |
| "learning_rate": 4.4169137197625537e-07, | |
| "loss": 0.2106548771262169, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 2.9124236252545828, | |
| "grad_norm": 2.5788509845733643, | |
| "learning_rate": 4.4027593145622357e-07, | |
| "loss": 0.2554449290037155, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 2.9164969450101834, | |
| "grad_norm": 2.4445812702178955, | |
| "learning_rate": 4.388609763204051e-07, | |
| "loss": 0.22007200866937637, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 2.920570264765784, | |
| "grad_norm": 2.534118175506592, | |
| "learning_rate": 4.3744651806829967e-07, | |
| "loss": 0.24423322826623917, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 2.9246435845213847, | |
| "grad_norm": 2.4413700103759766, | |
| "learning_rate": 4.3603256819536817e-07, | |
| "loss": 0.22403902560472488, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 2.928716904276986, | |
| "grad_norm": 2.397890567779541, | |
| "learning_rate": 4.3461913819294035e-07, | |
| "loss": 0.2157697230577469, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.9327902240325865, | |
| "grad_norm": 2.469019651412964, | |
| "learning_rate": 4.332062395481203e-07, | |
| "loss": 0.22792110592126846, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 2.9368635437881876, | |
| "grad_norm": 2.254711866378784, | |
| "learning_rate": 4.3179388374369396e-07, | |
| "loss": 0.22459496557712555, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 2.9409368635437882, | |
| "grad_norm": 2.2734055519104004, | |
| "learning_rate": 4.3038208225803594e-07, | |
| "loss": 0.22033336013555527, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 2.945010183299389, | |
| "grad_norm": 2.565237283706665, | |
| "learning_rate": 4.289708465650151e-07, | |
| "loss": 0.24250654131174088, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 2.9490835030549896, | |
| "grad_norm": 2.408484935760498, | |
| "learning_rate": 4.275601881339027e-07, | |
| "loss": 0.22946030646562576, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 2.9531568228105907, | |
| "grad_norm": 2.5278210639953613, | |
| "learning_rate": 4.261501184292782e-07, | |
| "loss": 0.22008834779262543, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 2.9572301425661913, | |
| "grad_norm": 2.2692785263061523, | |
| "learning_rate": 4.2474064891093655e-07, | |
| "loss": 0.22004914283752441, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 2.9613034623217924, | |
| "grad_norm": 2.8039700984954834, | |
| "learning_rate": 4.2333179103379445e-07, | |
| "loss": 0.22936520725488663, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 2.965376782077393, | |
| "grad_norm": 2.5510387420654297, | |
| "learning_rate": 4.2192355624779884e-07, | |
| "loss": 0.2276434227824211, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 2.9694501018329937, | |
| "grad_norm": 2.512366771697998, | |
| "learning_rate": 4.205159559978313e-07, | |
| "loss": 0.23083247244358063, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.973523421588595, | |
| "grad_norm": 2.492103338241577, | |
| "learning_rate": 4.1910900172361763e-07, | |
| "loss": 0.2253756895661354, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 2.9775967413441955, | |
| "grad_norm": 2.4701900482177734, | |
| "learning_rate": 4.1770270485963294e-07, | |
| "loss": 0.2475346326828003, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 2.9816700610997966, | |
| "grad_norm": 2.8806581497192383, | |
| "learning_rate": 4.162970768350102e-07, | |
| "loss": 0.27113020420074463, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 2.9857433808553973, | |
| "grad_norm": 2.374631643295288, | |
| "learning_rate": 4.148921290734459e-07, | |
| "loss": 0.22385699301958084, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 2.989816700610998, | |
| "grad_norm": 2.5273098945617676, | |
| "learning_rate": 4.134878729931083e-07, | |
| "loss": 0.2260419949889183, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 2.9938900203665986, | |
| "grad_norm": 2.4065239429473877, | |
| "learning_rate": 4.120843200065447e-07, | |
| "loss": 0.2051657810807228, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 2.9979633401221997, | |
| "grad_norm": 2.419027328491211, | |
| "learning_rate": 4.106814815205873e-07, | |
| "loss": 0.22403547167778015, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 2.419027328491211, | |
| "learning_rate": 4.092793689362625e-07, | |
| "loss": 0.2715803384780884, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 3.0040733197556007, | |
| "grad_norm": 2.3535571098327637, | |
| "learning_rate": 4.078779936486965e-07, | |
| "loss": 0.21160051226615906, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 3.0081466395112018, | |
| "grad_norm": 2.379765272140503, | |
| "learning_rate": 4.06477367047024e-07, | |
| "loss": 0.22423982620239258, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 3.0122199592668024, | |
| "grad_norm": 2.3597190380096436, | |
| "learning_rate": 4.050775005142943e-07, | |
| "loss": 0.22356468439102173, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 3.016293279022403, | |
| "grad_norm": 2.3479771614074707, | |
| "learning_rate": 4.036784054273803e-07, | |
| "loss": 0.1949443370103836, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 3.020366598778004, | |
| "grad_norm": 2.1719963550567627, | |
| "learning_rate": 4.0228009315688463e-07, | |
| "loss": 0.16644436120986938, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 3.024439918533605, | |
| "grad_norm": 2.2947304248809814, | |
| "learning_rate": 4.0088257506704853e-07, | |
| "loss": 0.21931639313697815, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 3.0285132382892055, | |
| "grad_norm": 2.383701801300049, | |
| "learning_rate": 3.994858625156582e-07, | |
| "loss": 0.2001979798078537, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 3.0325865580448066, | |
| "grad_norm": 2.237454891204834, | |
| "learning_rate": 3.9808996685395344e-07, | |
| "loss": 0.20605531334877014, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 3.0366598778004072, | |
| "grad_norm": 2.3682029247283936, | |
| "learning_rate": 3.966948994265354e-07, | |
| "loss": 0.21250516921281815, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 3.0407331975560083, | |
| "grad_norm": 2.4874134063720703, | |
| "learning_rate": 3.953006715712733e-07, | |
| "loss": 0.21023060381412506, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 3.044806517311609, | |
| "grad_norm": 2.21878719329834, | |
| "learning_rate": 3.939072946192139e-07, | |
| "loss": 0.22278980910778046, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 3.0488798370672097, | |
| "grad_norm": 2.3506107330322266, | |
| "learning_rate": 3.9251477989448795e-07, | |
| "loss": 0.2010231390595436, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 3.0529531568228108, | |
| "grad_norm": 2.3747055530548096, | |
| "learning_rate": 3.9112313871421937e-07, | |
| "loss": 0.18845809996128082, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 3.0570264765784114, | |
| "grad_norm": 2.1489953994750977, | |
| "learning_rate": 3.897323823884318e-07, | |
| "loss": 0.19643110036849976, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 3.061099796334012, | |
| "grad_norm": 2.3836452960968018, | |
| "learning_rate": 3.8834252221995877e-07, | |
| "loss": 0.22072193771600723, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 3.065173116089613, | |
| "grad_norm": 2.418147563934326, | |
| "learning_rate": 3.8695356950434945e-07, | |
| "loss": 0.2128564417362213, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 3.069246435845214, | |
| "grad_norm": 2.341552734375, | |
| "learning_rate": 3.855655355297789e-07, | |
| "loss": 0.21925050020217896, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 3.0733197556008145, | |
| "grad_norm": 2.7095723152160645, | |
| "learning_rate": 3.8417843157695497e-07, | |
| "loss": 0.19232793152332306, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 3.0773930753564156, | |
| "grad_norm": 2.275331497192383, | |
| "learning_rate": 3.827922689190275e-07, | |
| "loss": 0.203746996819973, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 3.0814663951120163, | |
| "grad_norm": 2.3823604583740234, | |
| "learning_rate": 3.8140705882149585e-07, | |
| "loss": 0.20427367091178894, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 3.085539714867617, | |
| "grad_norm": 3.103059768676758, | |
| "learning_rate": 3.8002281254211815e-07, | |
| "loss": 0.22299692034721375, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 3.089613034623218, | |
| "grad_norm": 2.3718128204345703, | |
| "learning_rate": 3.7863954133081966e-07, | |
| "loss": 0.18740925192832947, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 3.0936863543788187, | |
| "grad_norm": 2.494307518005371, | |
| "learning_rate": 3.772572564296004e-07, | |
| "loss": 0.18933183699846268, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 3.0977596741344193, | |
| "grad_norm": 2.2291083335876465, | |
| "learning_rate": 3.7587596907244545e-07, | |
| "loss": 0.20075885951519012, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 3.1018329938900204, | |
| "grad_norm": 2.42327618598938, | |
| "learning_rate": 3.744956904852321e-07, | |
| "loss": 0.20842499285936356, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 3.105906313645621, | |
| "grad_norm": 2.4771366119384766, | |
| "learning_rate": 3.7311643188563967e-07, | |
| "loss": 0.20921579748392105, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 3.109979633401222, | |
| "grad_norm": 2.459636688232422, | |
| "learning_rate": 3.717382044830575e-07, | |
| "loss": 0.20464809238910675, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 3.114052953156823, | |
| "grad_norm": 2.4493255615234375, | |
| "learning_rate": 3.7036101947849456e-07, | |
| "loss": 0.19754133373498917, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 3.1181262729124235, | |
| "grad_norm": 2.426888942718506, | |
| "learning_rate": 3.6898488806448807e-07, | |
| "loss": 0.22133717685937881, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 3.1221995926680246, | |
| "grad_norm": 2.7352089881896973, | |
| "learning_rate": 3.6760982142501284e-07, | |
| "loss": 0.19161057472229004, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 3.1262729124236253, | |
| "grad_norm": 2.271554946899414, | |
| "learning_rate": 3.6623583073538965e-07, | |
| "loss": 0.1958870366215706, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 3.130346232179226, | |
| "grad_norm": 2.6821932792663574, | |
| "learning_rate": 3.6486292716219514e-07, | |
| "loss": 0.20984943211078644, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 3.134419551934827, | |
| "grad_norm": 2.6320438385009766, | |
| "learning_rate": 3.634911218631711e-07, | |
| "loss": 0.20410407334566116, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 3.1384928716904277, | |
| "grad_norm": 2.2215492725372314, | |
| "learning_rate": 3.6212042598713296e-07, | |
| "loss": 0.21011168509721756, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 3.1425661914460283, | |
| "grad_norm": 2.485713005065918, | |
| "learning_rate": 3.607508506738803e-07, | |
| "loss": 0.20080970227718353, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 3.1466395112016294, | |
| "grad_norm": 2.4904892444610596, | |
| "learning_rate": 3.5938240705410537e-07, | |
| "loss": 0.21116189658641815, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 3.15071283095723, | |
| "grad_norm": 2.4921395778656006, | |
| "learning_rate": 3.580151062493036e-07, | |
| "loss": 0.1990169882774353, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 3.1547861507128308, | |
| "grad_norm": 2.323054790496826, | |
| "learning_rate": 3.566489593716816e-07, | |
| "loss": 0.19865593314170837, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 3.158859470468432, | |
| "grad_norm": 2.533299684524536, | |
| "learning_rate": 3.5528397752406894e-07, | |
| "loss": 0.19558026641607285, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 3.1629327902240325, | |
| "grad_norm": 2.2772974967956543, | |
| "learning_rate": 3.5392017179982613e-07, | |
| "loss": 0.18929005414247513, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 3.167006109979633, | |
| "grad_norm": 2.433457374572754, | |
| "learning_rate": 3.5255755328275584e-07, | |
| "loss": 0.2181885540485382, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 3.1710794297352343, | |
| "grad_norm": 2.350560426712036, | |
| "learning_rate": 3.511961330470115e-07, | |
| "loss": 0.19552721083164215, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 3.175152749490835, | |
| "grad_norm": 2.4112911224365234, | |
| "learning_rate": 3.498359221570083e-07, | |
| "loss": 0.20244888216257095, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 3.179226069246436, | |
| "grad_norm": 2.4253125190734863, | |
| "learning_rate": 3.484769316673331e-07, | |
| "loss": 0.22686784714460373, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 3.1832993890020367, | |
| "grad_norm": 2.535693645477295, | |
| "learning_rate": 3.471191726226541e-07, | |
| "loss": 0.22888771444559097, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 3.1873727087576373, | |
| "grad_norm": 2.3272151947021484, | |
| "learning_rate": 3.4576265605763185e-07, | |
| "loss": 0.20377343893051147, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 3.1914460285132384, | |
| "grad_norm": 2.6908118724823, | |
| "learning_rate": 3.444073929968284e-07, | |
| "loss": 0.20916848629713058, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 3.195519348268839, | |
| "grad_norm": 2.3921995162963867, | |
| "learning_rate": 3.4305339445461923e-07, | |
| "loss": 0.1810828298330307, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 3.1995926680244398, | |
| "grad_norm": 2.4223461151123047, | |
| "learning_rate": 3.417006714351024e-07, | |
| "loss": 0.2161688134074211, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 3.203665987780041, | |
| "grad_norm": 2.4449656009674072, | |
| "learning_rate": 3.4034923493201007e-07, | |
| "loss": 0.18497437238693237, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 3.2077393075356415, | |
| "grad_norm": 2.400811195373535, | |
| "learning_rate": 3.3899909592861816e-07, | |
| "loss": 0.1974419429898262, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 3.211812627291242, | |
| "grad_norm": 2.381777763366699, | |
| "learning_rate": 3.3765026539765827e-07, | |
| "loss": 0.18730898946523666, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 3.2158859470468433, | |
| "grad_norm": 2.3965096473693848, | |
| "learning_rate": 3.3630275430122747e-07, | |
| "loss": 0.17667900025844574, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 3.219959266802444, | |
| "grad_norm": 2.2706997394561768, | |
| "learning_rate": 3.349565735907e-07, | |
| "loss": 0.21489760279655457, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 3.224032586558045, | |
| "grad_norm": 2.5842676162719727, | |
| "learning_rate": 3.336117342066375e-07, | |
| "loss": 0.21414875984191895, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 3.2281059063136457, | |
| "grad_norm": 2.563056707382202, | |
| "learning_rate": 3.3226824707870073e-07, | |
| "loss": 0.22138936817646027, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 3.2321792260692463, | |
| "grad_norm": 2.35392689704895, | |
| "learning_rate": 3.3092612312556075e-07, | |
| "loss": 0.20381484925746918, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 3.2362525458248474, | |
| "grad_norm": 2.532590389251709, | |
| "learning_rate": 3.2958537325480924e-07, | |
| "loss": 0.21896713972091675, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 3.240325865580448, | |
| "grad_norm": 2.476938247680664, | |
| "learning_rate": 3.282460083628713e-07, | |
| "loss": 0.21506690233945847, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 3.2443991853360488, | |
| "grad_norm": 2.544861078262329, | |
| "learning_rate": 3.2690803933491576e-07, | |
| "loss": 0.22808198630809784, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 3.24847250509165, | |
| "grad_norm": 2.4569203853607178, | |
| "learning_rate": 3.255714770447674e-07, | |
| "loss": 0.2176097184419632, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 3.2525458248472505, | |
| "grad_norm": 2.4378373622894287, | |
| "learning_rate": 3.242363323548177e-07, | |
| "loss": 0.19017792493104935, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.256619144602851, | |
| "grad_norm": 2.4247865676879883, | |
| "learning_rate": 3.229026161159378e-07, | |
| "loss": 0.2059084177017212, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 3.2606924643584523, | |
| "grad_norm": 2.5162672996520996, | |
| "learning_rate": 3.215703391673893e-07, | |
| "loss": 0.19650442153215408, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 3.264765784114053, | |
| "grad_norm": 2.570923328399658, | |
| "learning_rate": 3.202395123367367e-07, | |
| "loss": 0.2457878440618515, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 3.2688391038696536, | |
| "grad_norm": 2.5794918537139893, | |
| "learning_rate": 3.189101464397591e-07, | |
| "loss": 0.1937229335308075, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 3.2729124236252547, | |
| "grad_norm": 2.4221835136413574, | |
| "learning_rate": 3.1758225228036227e-07, | |
| "loss": 0.17538277059793472, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 3.2769857433808554, | |
| "grad_norm": 2.418198347091675, | |
| "learning_rate": 3.1625584065049155e-07, | |
| "loss": 0.20809274911880493, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 3.281059063136456, | |
| "grad_norm": 2.56083607673645, | |
| "learning_rate": 3.1493092233004277e-07, | |
| "loss": 0.2350323647260666, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 3.285132382892057, | |
| "grad_norm": 2.516634702682495, | |
| "learning_rate": 3.136075080867765e-07, | |
| "loss": 0.1945355385541916, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 3.2892057026476578, | |
| "grad_norm": 2.5624663829803467, | |
| "learning_rate": 3.1228560867622854e-07, | |
| "loss": 0.21247724443674088, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 3.293279022403259, | |
| "grad_norm": 2.6713180541992188, | |
| "learning_rate": 3.1096523484162407e-07, | |
| "loss": 0.20905288308858871, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 3.2973523421588595, | |
| "grad_norm": 2.5594656467437744, | |
| "learning_rate": 3.0964639731378947e-07, | |
| "loss": 0.21280484646558762, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 3.30142566191446, | |
| "grad_norm": 2.5005033016204834, | |
| "learning_rate": 3.0832910681106565e-07, | |
| "loss": 0.1858508661389351, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 3.3054989816700613, | |
| "grad_norm": 2.5059120655059814, | |
| "learning_rate": 3.070133740392202e-07, | |
| "loss": 0.17269013077020645, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 3.309572301425662, | |
| "grad_norm": 2.484297752380371, | |
| "learning_rate": 3.0569920969136135e-07, | |
| "loss": 0.21136271953582764, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 3.3136456211812626, | |
| "grad_norm": 2.5234310626983643, | |
| "learning_rate": 3.043866244478505e-07, | |
| "loss": 0.18130285292863846, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 3.3177189409368637, | |
| "grad_norm": 2.338294267654419, | |
| "learning_rate": 3.0307562897621485e-07, | |
| "loss": 0.20136955380439758, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 3.3217922606924644, | |
| "grad_norm": 2.490870714187622, | |
| "learning_rate": 3.0176623393106235e-07, | |
| "loss": 0.18799114972352982, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 3.325865580448065, | |
| "grad_norm": 2.756178140640259, | |
| "learning_rate": 3.0045844995399327e-07, | |
| "loss": 0.17684923857450485, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 3.329938900203666, | |
| "grad_norm": 2.6265006065368652, | |
| "learning_rate": 2.9915228767351535e-07, | |
| "loss": 0.20372115820646286, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 3.3340122199592668, | |
| "grad_norm": 2.3552069664001465, | |
| "learning_rate": 2.978477577049556e-07, | |
| "loss": 0.17303332686424255, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 3.3380855397148674, | |
| "grad_norm": 2.3262510299682617, | |
| "learning_rate": 2.965448706503761e-07, | |
| "loss": 0.22534170746803284, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 3.3421588594704685, | |
| "grad_norm": 2.619785785675049, | |
| "learning_rate": 2.952436370984859e-07, | |
| "loss": 0.2306053191423416, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 3.346232179226069, | |
| "grad_norm": 2.4102282524108887, | |
| "learning_rate": 2.939440676245566e-07, | |
| "loss": 0.2103247046470642, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 3.35030549898167, | |
| "grad_norm": 2.646646738052368, | |
| "learning_rate": 2.926461727903349e-07, | |
| "loss": 0.21324314177036285, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 3.354378818737271, | |
| "grad_norm": 2.454810619354248, | |
| "learning_rate": 2.9134996314395817e-07, | |
| "loss": 0.1971464827656746, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 3.3584521384928716, | |
| "grad_norm": 2.4926371574401855, | |
| "learning_rate": 2.900554492198677e-07, | |
| "loss": 0.19652055203914642, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 3.3625254582484727, | |
| "grad_norm": 2.251152992248535, | |
| "learning_rate": 2.887626415387237e-07, | |
| "loss": 0.22214417904615402, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 3.3665987780040734, | |
| "grad_norm": 2.5524468421936035, | |
| "learning_rate": 2.8747155060731937e-07, | |
| "loss": 0.21327239274978638, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 3.370672097759674, | |
| "grad_norm": 2.429053783416748, | |
| "learning_rate": 2.8618218691849545e-07, | |
| "loss": 0.2205718532204628, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 3.374745417515275, | |
| "grad_norm": 2.5766923427581787, | |
| "learning_rate": 2.8489456095105566e-07, | |
| "loss": 0.23655060678720474, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 3.378818737270876, | |
| "grad_norm": 2.752376079559326, | |
| "learning_rate": 2.836086831696809e-07, | |
| "loss": 0.24048178642988205, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 3.3828920570264764, | |
| "grad_norm": 2.4999852180480957, | |
| "learning_rate": 2.8232456402484463e-07, | |
| "loss": 0.16257788240909576, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 3.3869653767820775, | |
| "grad_norm": 2.4269254207611084, | |
| "learning_rate": 2.8104221395272674e-07, | |
| "loss": 0.2149578034877777, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 3.391038696537678, | |
| "grad_norm": 2.354278326034546, | |
| "learning_rate": 2.797616433751309e-07, | |
| "loss": 0.19306989759206772, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 3.395112016293279, | |
| "grad_norm": 2.495457649230957, | |
| "learning_rate": 2.784828626993976e-07, | |
| "loss": 0.16760513186454773, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 3.39918533604888, | |
| "grad_norm": 2.299818754196167, | |
| "learning_rate": 2.772058823183212e-07, | |
| "loss": 0.1885610893368721, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 3.4032586558044806, | |
| "grad_norm": 2.441559076309204, | |
| "learning_rate": 2.7593071261006473e-07, | |
| "loss": 0.19247783720493317, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 3.4073319755600817, | |
| "grad_norm": 2.5072267055511475, | |
| "learning_rate": 2.746573639380758e-07, | |
| "loss": 0.16321063041687012, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 3.4114052953156824, | |
| "grad_norm": 2.3393125534057617, | |
| "learning_rate": 2.7338584665100195e-07, | |
| "loss": 0.1672486811876297, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 3.415478615071283, | |
| "grad_norm": 2.5106165409088135, | |
| "learning_rate": 2.7211617108260674e-07, | |
| "loss": 0.20449652522802353, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 3.4195519348268837, | |
| "grad_norm": 2.5324196815490723, | |
| "learning_rate": 2.708483475516865e-07, | |
| "loss": 0.2249668836593628, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 3.423625254582485, | |
| "grad_norm": 2.581486225128174, | |
| "learning_rate": 2.695823863619853e-07, | |
| "loss": 0.2402847856283188, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 3.4276985743380854, | |
| "grad_norm": 2.535104274749756, | |
| "learning_rate": 2.683182978021118e-07, | |
| "loss": 0.1992955058813095, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 3.4317718940936865, | |
| "grad_norm": 2.2774689197540283, | |
| "learning_rate": 2.6705609214545585e-07, | |
| "loss": 0.19953173398971558, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 3.435845213849287, | |
| "grad_norm": 2.4968085289001465, | |
| "learning_rate": 2.65795779650105e-07, | |
| "loss": 0.19426950812339783, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 3.439918533604888, | |
| "grad_norm": 2.459174156188965, | |
| "learning_rate": 2.6453737055875974e-07, | |
| "loss": 0.1926077976822853, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 3.443991853360489, | |
| "grad_norm": 2.254603624343872, | |
| "learning_rate": 2.632808750986527e-07, | |
| "loss": 0.1818878874182701, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 3.4480651731160896, | |
| "grad_norm": 2.502915382385254, | |
| "learning_rate": 2.620263034814632e-07, | |
| "loss": 0.17121271044015884, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 3.4521384928716903, | |
| "grad_norm": 2.478640079498291, | |
| "learning_rate": 2.6077366590323605e-07, | |
| "loss": 0.17577876895666122, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 3.4562118126272914, | |
| "grad_norm": 2.799896478652954, | |
| "learning_rate": 2.5952297254429725e-07, | |
| "loss": 0.2371537759900093, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.460285132382892, | |
| "grad_norm": 2.4503631591796875, | |
| "learning_rate": 2.582742335691722e-07, | |
| "loss": 0.18308546394109726, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 3.4643584521384927, | |
| "grad_norm": 2.511253595352173, | |
| "learning_rate": 2.5702745912650327e-07, | |
| "loss": 0.1863907426595688, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 3.468431771894094, | |
| "grad_norm": 2.441244602203369, | |
| "learning_rate": 2.5578265934896586e-07, | |
| "loss": 0.17125633358955383, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 3.4725050916496945, | |
| "grad_norm": 2.5986480712890625, | |
| "learning_rate": 2.54539844353188e-07, | |
| "loss": 0.20211002230644226, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 3.4765784114052956, | |
| "grad_norm": 2.5657708644866943, | |
| "learning_rate": 2.5329902423966636e-07, | |
| "loss": 0.18166958540678024, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 3.480651731160896, | |
| "grad_norm": 2.759941577911377, | |
| "learning_rate": 2.5206020909268575e-07, | |
| "loss": 0.2045290172100067, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 3.484725050916497, | |
| "grad_norm": 2.3406291007995605, | |
| "learning_rate": 2.508234089802356e-07, | |
| "loss": 0.20272044837474823, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 3.4887983706720975, | |
| "grad_norm": 2.5458176136016846, | |
| "learning_rate": 2.4958863395392985e-07, | |
| "loss": 0.20205383747816086, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 3.4928716904276986, | |
| "grad_norm": 2.6369478702545166, | |
| "learning_rate": 2.483558940489235e-07, | |
| "loss": 0.21245616674423218, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 3.4969450101832993, | |
| "grad_norm": 2.407942533493042, | |
| "learning_rate": 2.4712519928383245e-07, | |
| "loss": 0.20060960948467255, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 3.5010183299389004, | |
| "grad_norm": 2.491145133972168, | |
| "learning_rate": 2.45896559660651e-07, | |
| "loss": 0.2001042366027832, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 3.505091649694501, | |
| "grad_norm": 2.3934686183929443, | |
| "learning_rate": 2.4466998516467176e-07, | |
| "loss": 0.20040663331747055, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 3.5091649694501017, | |
| "grad_norm": 2.350095510482788, | |
| "learning_rate": 2.4344548576440293e-07, | |
| "loss": 0.1889752671122551, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 3.513238289205703, | |
| "grad_norm": 2.4073328971862793, | |
| "learning_rate": 2.4222307141148906e-07, | |
| "loss": 0.18654479831457138, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 3.5173116089613035, | |
| "grad_norm": 2.4485344886779785, | |
| "learning_rate": 2.4100275204062897e-07, | |
| "loss": 0.21063948422670364, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 3.521384928716904, | |
| "grad_norm": 2.4200923442840576, | |
| "learning_rate": 2.397845375694949e-07, | |
| "loss": 0.1860312521457672, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 3.525458248472505, | |
| "grad_norm": 2.775789260864258, | |
| "learning_rate": 2.3856843789865303e-07, | |
| "loss": 0.2309775874018669, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 3.529531568228106, | |
| "grad_norm": 2.7096245288848877, | |
| "learning_rate": 2.3735446291148176e-07, | |
| "loss": 0.2163269817829132, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 3.5336048879837065, | |
| "grad_norm": 2.4550371170043945, | |
| "learning_rate": 2.361426224740924e-07, | |
| "loss": 0.1779681146144867, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 3.5376782077393076, | |
| "grad_norm": 2.2727365493774414, | |
| "learning_rate": 2.3493292643524799e-07, | |
| "loss": 0.19602014124393463, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 3.5417515274949083, | |
| "grad_norm": 2.445028066635132, | |
| "learning_rate": 2.3372538462628422e-07, | |
| "loss": 0.19011924415826797, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 3.5458248472505094, | |
| "grad_norm": 2.537092447280884, | |
| "learning_rate": 2.3252000686102912e-07, | |
| "loss": 0.22465527802705765, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 3.54989816700611, | |
| "grad_norm": 2.5350615978240967, | |
| "learning_rate": 2.3131680293572336e-07, | |
| "loss": 0.2048494815826416, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 3.5539714867617107, | |
| "grad_norm": 2.435441255569458, | |
| "learning_rate": 2.3011578262894015e-07, | |
| "loss": 0.22786639630794525, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 3.5580448065173114, | |
| "grad_norm": 2.409935235977173, | |
| "learning_rate": 2.2891695570150631e-07, | |
| "loss": 0.1882152482867241, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 3.5621181262729125, | |
| "grad_norm": 2.6188433170318604, | |
| "learning_rate": 2.2772033189642321e-07, | |
| "loss": 0.21629157662391663, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 3.566191446028513, | |
| "grad_norm": 2.4537856578826904, | |
| "learning_rate": 2.2652592093878665e-07, | |
| "loss": 0.19376155734062195, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 3.5702647657841142, | |
| "grad_norm": 2.438338279724121, | |
| "learning_rate": 2.2533373253570875e-07, | |
| "loss": 0.22396929562091827, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 3.574338085539715, | |
| "grad_norm": 2.4425530433654785, | |
| "learning_rate": 2.2414377637623865e-07, | |
| "loss": 0.1896074339747429, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 3.5784114052953155, | |
| "grad_norm": 2.46877384185791, | |
| "learning_rate": 2.2295606213128387e-07, | |
| "loss": 0.1916242465376854, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 3.5824847250509166, | |
| "grad_norm": 2.4224820137023926, | |
| "learning_rate": 2.2177059945353115e-07, | |
| "loss": 0.23046725243330002, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 3.5865580448065173, | |
| "grad_norm": 2.678439140319824, | |
| "learning_rate": 2.2058739797736914e-07, | |
| "loss": 0.1764805093407631, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 3.5906313645621184, | |
| "grad_norm": 2.5862419605255127, | |
| "learning_rate": 2.1940646731880885e-07, | |
| "loss": 0.1832810789346695, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 3.594704684317719, | |
| "grad_norm": 2.3499772548675537, | |
| "learning_rate": 2.1822781707540667e-07, | |
| "loss": 0.19466058164834976, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 3.5987780040733197, | |
| "grad_norm": 2.416409492492676, | |
| "learning_rate": 2.1705145682618502e-07, | |
| "loss": 0.20160751044750214, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 3.6028513238289204, | |
| "grad_norm": 2.7396693229675293, | |
| "learning_rate": 2.1587739613155653e-07, | |
| "loss": 0.2220790833234787, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 3.6069246435845215, | |
| "grad_norm": 2.438356876373291, | |
| "learning_rate": 2.1470564453324392e-07, | |
| "loss": 0.1909223347902298, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 3.610997963340122, | |
| "grad_norm": 2.637033224105835, | |
| "learning_rate": 2.1353621155420393e-07, | |
| "loss": 0.19567319750785828, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 3.6150712830957232, | |
| "grad_norm": 2.6245222091674805, | |
| "learning_rate": 2.1236910669855006e-07, | |
| "loss": 0.19575632363557816, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 3.619144602851324, | |
| "grad_norm": 2.6102139949798584, | |
| "learning_rate": 2.112043394514742e-07, | |
| "loss": 0.19621288776397705, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 3.6232179226069245, | |
| "grad_norm": 2.5597336292266846, | |
| "learning_rate": 2.100419192791708e-07, | |
| "loss": 0.17981623113155365, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 3.627291242362525, | |
| "grad_norm": 2.345719575881958, | |
| "learning_rate": 2.088818556287592e-07, | |
| "loss": 0.18147233873605728, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 3.6313645621181263, | |
| "grad_norm": 2.4701764583587646, | |
| "learning_rate": 2.0772415792820713e-07, | |
| "loss": 0.20388461649417877, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 3.635437881873727, | |
| "grad_norm": 2.6218173503875732, | |
| "learning_rate": 2.0656883558625348e-07, | |
| "loss": 0.20968149602413177, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 3.639511201629328, | |
| "grad_norm": 2.433162212371826, | |
| "learning_rate": 2.054158979923331e-07, | |
| "loss": 0.23324060440063477, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 3.6435845213849287, | |
| "grad_norm": 2.5269041061401367, | |
| "learning_rate": 2.042653545164989e-07, | |
| "loss": 0.22314583510160446, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 3.6476578411405294, | |
| "grad_norm": 2.772693634033203, | |
| "learning_rate": 2.0311721450934732e-07, | |
| "loss": 0.17379353195428848, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 3.6517311608961305, | |
| "grad_norm": 2.4417383670806885, | |
| "learning_rate": 2.0197148730194085e-07, | |
| "loss": 0.196005217730999, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 3.655804480651731, | |
| "grad_norm": 2.5393450260162354, | |
| "learning_rate": 2.0082818220573332e-07, | |
| "loss": 0.21743719279766083, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 3.6598778004073322, | |
| "grad_norm": 2.476929187774658, | |
| "learning_rate": 1.9968730851249388e-07, | |
| "loss": 0.20515238493680954, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.663951120162933, | |
| "grad_norm": 2.441044569015503, | |
| "learning_rate": 1.9854887549423082e-07, | |
| "loss": 0.1906992271542549, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 3.6680244399185336, | |
| "grad_norm": 2.5373618602752686, | |
| "learning_rate": 1.9741289240311754e-07, | |
| "loss": 0.20473621785640717, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 3.672097759674134, | |
| "grad_norm": 2.405190944671631, | |
| "learning_rate": 1.962793684714158e-07, | |
| "loss": 0.21750831604003906, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 3.6761710794297353, | |
| "grad_norm": 2.40073299407959, | |
| "learning_rate": 1.9514831291140228e-07, | |
| "loss": 0.19307416677474976, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 3.680244399185336, | |
| "grad_norm": 2.7207555770874023, | |
| "learning_rate": 1.940197349152923e-07, | |
| "loss": 0.2060309201478958, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 3.684317718940937, | |
| "grad_norm": 2.4056124687194824, | |
| "learning_rate": 1.9289364365516607e-07, | |
| "loss": 0.1817646324634552, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 3.6883910386965377, | |
| "grad_norm": 2.5248098373413086, | |
| "learning_rate": 1.9177004828289383e-07, | |
| "loss": 0.20411433279514313, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 3.6924643584521384, | |
| "grad_norm": 2.365913152694702, | |
| "learning_rate": 1.9064895793006153e-07, | |
| "loss": 0.1928766593337059, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 3.696537678207739, | |
| "grad_norm": 2.5754306316375732, | |
| "learning_rate": 1.8953038170789615e-07, | |
| "loss": 0.21009906381368637, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 3.70061099796334, | |
| "grad_norm": 2.641087055206299, | |
| "learning_rate": 1.8841432870719226e-07, | |
| "loss": 0.21582353860139847, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 3.704684317718941, | |
| "grad_norm": 2.567742347717285, | |
| "learning_rate": 1.8730080799823815e-07, | |
| "loss": 0.19261349737644196, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 3.708757637474542, | |
| "grad_norm": 2.521796703338623, | |
| "learning_rate": 1.861898286307413e-07, | |
| "loss": 0.20253480970859528, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 3.7128309572301426, | |
| "grad_norm": 2.504960298538208, | |
| "learning_rate": 1.8508139963375646e-07, | |
| "loss": 0.196384996175766, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 3.716904276985743, | |
| "grad_norm": 2.448503017425537, | |
| "learning_rate": 1.8397553001561012e-07, | |
| "loss": 0.22694706171751022, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 3.7209775967413443, | |
| "grad_norm": 2.448093891143799, | |
| "learning_rate": 1.8287222876382912e-07, | |
| "loss": 0.18733669072389603, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 3.725050916496945, | |
| "grad_norm": 2.4246034622192383, | |
| "learning_rate": 1.8177150484506642e-07, | |
| "loss": 0.19822125136852264, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 3.729124236252546, | |
| "grad_norm": 2.5895848274230957, | |
| "learning_rate": 1.806733672050293e-07, | |
| "loss": 0.18903044611215591, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 3.7331975560081467, | |
| "grad_norm": 2.5084774494171143, | |
| "learning_rate": 1.7957782476840528e-07, | |
| "loss": 0.2244538515806198, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 3.7372708757637474, | |
| "grad_norm": 2.436156749725342, | |
| "learning_rate": 1.78484886438791e-07, | |
| "loss": 0.19061604887247086, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 3.741344195519348, | |
| "grad_norm": 2.3762974739074707, | |
| "learning_rate": 1.7739456109861912e-07, | |
| "loss": 0.18180037289857864, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 3.745417515274949, | |
| "grad_norm": 2.5922470092773438, | |
| "learning_rate": 1.763068576090862e-07, | |
| "loss": 0.21426790952682495, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 3.74949083503055, | |
| "grad_norm": 2.582359790802002, | |
| "learning_rate": 1.7522178481008054e-07, | |
| "loss": 0.20259950309991837, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 3.753564154786151, | |
| "grad_norm": 2.3691251277923584, | |
| "learning_rate": 1.7413935152011055e-07, | |
| "loss": 0.21478895843029022, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 3.7576374745417516, | |
| "grad_norm": 2.511850595474243, | |
| "learning_rate": 1.7305956653623343e-07, | |
| "loss": 0.20169981569051743, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 3.7617107942973522, | |
| "grad_norm": 2.342623472213745, | |
| "learning_rate": 1.719824386339827e-07, | |
| "loss": 0.21613454818725586, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 3.765784114052953, | |
| "grad_norm": 2.58245587348938, | |
| "learning_rate": 1.7090797656729804e-07, | |
| "loss": 0.1654214784502983, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 3.769857433808554, | |
| "grad_norm": 2.3268702030181885, | |
| "learning_rate": 1.6983618906845332e-07, | |
| "loss": 0.18952950835227966, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 3.7739307535641546, | |
| "grad_norm": 2.42199444770813, | |
| "learning_rate": 1.6876708484798608e-07, | |
| "loss": 0.16338826343417168, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 3.7780040733197557, | |
| "grad_norm": 2.2870736122131348, | |
| "learning_rate": 1.677006725946261e-07, | |
| "loss": 0.18167713284492493, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 3.7820773930753564, | |
| "grad_norm": 2.5234158039093018, | |
| "learning_rate": 1.6663696097522585e-07, | |
| "loss": 0.1719643920660019, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 3.786150712830957, | |
| "grad_norm": 2.5721402168273926, | |
| "learning_rate": 1.6557595863468886e-07, | |
| "loss": 0.1708434671163559, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 3.790224032586558, | |
| "grad_norm": 2.392375946044922, | |
| "learning_rate": 1.6451767419590062e-07, | |
| "loss": 0.20495689660310745, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 3.794297352342159, | |
| "grad_norm": 2.815039873123169, | |
| "learning_rate": 1.6346211625965732e-07, | |
| "loss": 0.1895192414522171, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 3.79837067209776, | |
| "grad_norm": 2.6176204681396484, | |
| "learning_rate": 1.6240929340459703e-07, | |
| "loss": 0.19631709158420563, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 3.8024439918533606, | |
| "grad_norm": 2.4265332221984863, | |
| "learning_rate": 1.6135921418712955e-07, | |
| "loss": 0.19473101943731308, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 3.8065173116089612, | |
| "grad_norm": 2.5783004760742188, | |
| "learning_rate": 1.6031188714136623e-07, | |
| "loss": 0.2178090512752533, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 3.810590631364562, | |
| "grad_norm": 2.534191846847534, | |
| "learning_rate": 1.5926732077905203e-07, | |
| "loss": 0.16174981743097305, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 3.814663951120163, | |
| "grad_norm": 2.6376049518585205, | |
| "learning_rate": 1.582255235894947e-07, | |
| "loss": 0.22103732079267502, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 3.8187372708757636, | |
| "grad_norm": 2.646101474761963, | |
| "learning_rate": 1.571865040394973e-07, | |
| "loss": 0.21458610147237778, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 3.8228105906313647, | |
| "grad_norm": 2.335294246673584, | |
| "learning_rate": 1.561502705732883e-07, | |
| "loss": 0.21040001511573792, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 3.8268839103869654, | |
| "grad_norm": 2.527137279510498, | |
| "learning_rate": 1.5511683161245365e-07, | |
| "loss": 0.19486284255981445, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 3.830957230142566, | |
| "grad_norm": 2.3123862743377686, | |
| "learning_rate": 1.540861955558676e-07, | |
| "loss": 0.18999101221561432, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 3.835030549898167, | |
| "grad_norm": 2.6126627922058105, | |
| "learning_rate": 1.5305837077962542e-07, | |
| "loss": 0.20924139767885208, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 3.839103869653768, | |
| "grad_norm": 2.4893648624420166, | |
| "learning_rate": 1.5203336563697444e-07, | |
| "loss": 0.18669888377189636, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 3.8431771894093685, | |
| "grad_norm": 2.406766414642334, | |
| "learning_rate": 1.5101118845824628e-07, | |
| "loss": 0.18617846816778183, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 3.8472505091649696, | |
| "grad_norm": 2.2685155868530273, | |
| "learning_rate": 1.4999184755079004e-07, | |
| "loss": 0.1924063339829445, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 3.8513238289205702, | |
| "grad_norm": 2.443790912628174, | |
| "learning_rate": 1.4897535119890364e-07, | |
| "loss": 0.1853998601436615, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 3.855397148676171, | |
| "grad_norm": 2.320080041885376, | |
| "learning_rate": 1.4796170766376727e-07, | |
| "loss": 0.21173150092363358, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 3.859470468431772, | |
| "grad_norm": 2.5018677711486816, | |
| "learning_rate": 1.4695092518337554e-07, | |
| "loss": 0.1776503399014473, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 3.8635437881873727, | |
| "grad_norm": 2.493777275085449, | |
| "learning_rate": 1.459430119724715e-07, | |
| "loss": 0.17813850194215775, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.8676171079429738, | |
| "grad_norm": 2.4366796016693115, | |
| "learning_rate": 1.4493797622247867e-07, | |
| "loss": 0.19571475684642792, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 3.8716904276985744, | |
| "grad_norm": 2.559058427810669, | |
| "learning_rate": 1.439358261014359e-07, | |
| "loss": 0.19421598315238953, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 3.875763747454175, | |
| "grad_norm": 2.4444687366485596, | |
| "learning_rate": 1.4293656975392937e-07, | |
| "loss": 0.21629701554775238, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 3.8798370672097757, | |
| "grad_norm": 2.388728380203247, | |
| "learning_rate": 1.4194021530102783e-07, | |
| "loss": 0.19527916610240936, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 3.883910386965377, | |
| "grad_norm": 2.286078929901123, | |
| "learning_rate": 1.4094677084021588e-07, | |
| "loss": 0.1616881936788559, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 3.8879837067209775, | |
| "grad_norm": 2.5002880096435547, | |
| "learning_rate": 1.3995624444532844e-07, | |
| "loss": 0.19413011521100998, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 3.8920570264765786, | |
| "grad_norm": 2.6207797527313232, | |
| "learning_rate": 1.3896864416648452e-07, | |
| "loss": 0.15424808859825134, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 3.8961303462321792, | |
| "grad_norm": 2.5780515670776367, | |
| "learning_rate": 1.3798397803002237e-07, | |
| "loss": 0.20987361669540405, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 3.90020366598778, | |
| "grad_norm": 2.439293622970581, | |
| "learning_rate": 1.370022540384347e-07, | |
| "loss": 0.19203339517116547, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 3.904276985743381, | |
| "grad_norm": 2.810973644256592, | |
| "learning_rate": 1.360234801703023e-07, | |
| "loss": 0.25363121181726456, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 3.9083503054989817, | |
| "grad_norm": 2.6023387908935547, | |
| "learning_rate": 1.3504766438023042e-07, | |
| "loss": 0.21091164648532867, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 3.9124236252545828, | |
| "grad_norm": 2.3644959926605225, | |
| "learning_rate": 1.3407481459878366e-07, | |
| "loss": 0.21235870569944382, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 3.9164969450101834, | |
| "grad_norm": 2.3764562606811523, | |
| "learning_rate": 1.3310493873242167e-07, | |
| "loss": 0.1808951571583748, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 3.920570264765784, | |
| "grad_norm": 2.3367507457733154, | |
| "learning_rate": 1.321380446634342e-07, | |
| "loss": 0.2204703390598297, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 3.9246435845213847, | |
| "grad_norm": 2.595667600631714, | |
| "learning_rate": 1.3117414024987823e-07, | |
| "loss": 0.19948850572109222, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 3.928716904276986, | |
| "grad_norm": 2.6602208614349365, | |
| "learning_rate": 1.3021323332551294e-07, | |
| "loss": 0.22697525471448898, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 3.9327902240325865, | |
| "grad_norm": 2.483463764190674, | |
| "learning_rate": 1.2925533169973695e-07, | |
| "loss": 0.17191919684410095, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 3.9368635437881876, | |
| "grad_norm": 2.4962165355682373, | |
| "learning_rate": 1.283004431575246e-07, | |
| "loss": 0.20009542256593704, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 3.9409368635437882, | |
| "grad_norm": 2.5772528648376465, | |
| "learning_rate": 1.273485754593619e-07, | |
| "loss": 0.2056511491537094, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 3.945010183299389, | |
| "grad_norm": 2.4633169174194336, | |
| "learning_rate": 1.26399736341185e-07, | |
| "loss": 0.19378910213708878, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 3.9490835030549896, | |
| "grad_norm": 2.5064918994903564, | |
| "learning_rate": 1.254539335143156e-07, | |
| "loss": 0.2224956750869751, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 3.9531568228105907, | |
| "grad_norm": 2.723224401473999, | |
| "learning_rate": 1.2451117466539985e-07, | |
| "loss": 0.20264600962400436, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 3.9572301425661913, | |
| "grad_norm": 2.501723289489746, | |
| "learning_rate": 1.235714674563445e-07, | |
| "loss": 0.17863625288009644, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 3.9613034623217924, | |
| "grad_norm": 2.344696521759033, | |
| "learning_rate": 1.226348195242557e-07, | |
| "loss": 0.23713428527116776, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 3.965376782077393, | |
| "grad_norm": 2.421300172805786, | |
| "learning_rate": 1.2170123848137648e-07, | |
| "loss": 0.20281652361154556, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 3.9694501018329937, | |
| "grad_norm": 2.5802454948425293, | |
| "learning_rate": 1.2077073191502496e-07, | |
| "loss": 0.21466045081615448, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 3.973523421588595, | |
| "grad_norm": 2.3439295291900635, | |
| "learning_rate": 1.1984330738753218e-07, | |
| "loss": 0.1579430103302002, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 3.9775967413441955, | |
| "grad_norm": 2.491952896118164, | |
| "learning_rate": 1.1891897243618183e-07, | |
| "loss": 0.1682949811220169, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 3.9816700610997966, | |
| "grad_norm": 2.3951187133789062, | |
| "learning_rate": 1.1799773457314766e-07, | |
| "loss": 0.20190123468637466, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 3.9857433808553973, | |
| "grad_norm": 2.4365179538726807, | |
| "learning_rate": 1.1707960128543314e-07, | |
| "loss": 0.18969932198524475, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 3.989816700610998, | |
| "grad_norm": 2.2501721382141113, | |
| "learning_rate": 1.1616458003481084e-07, | |
| "loss": 0.16813133656978607, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 3.9938900203665986, | |
| "grad_norm": 2.550407886505127, | |
| "learning_rate": 1.1525267825776114e-07, | |
| "loss": 0.19014200568199158, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 3.9979633401221997, | |
| "grad_norm": 2.3324475288391113, | |
| "learning_rate": 1.1434390336541238e-07, | |
| "loss": 0.21611415594816208, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 2.8509156703948975, | |
| "learning_rate": 1.1343826274347995e-07, | |
| "loss": 0.2155466079711914, | |
| "step": 984 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 1225, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 100.0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |