| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 1.0, | |
| "global_step": 246, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004073319755600814, | |
| "grad_norm": 34.300819396972656, | |
| "learning_rate": 8.130081300813008e-09, | |
| "loss": 1.59619802236557, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.008146639511201629, | |
| "grad_norm": 30.720197677612305, | |
| "learning_rate": 1.6260162601626016e-08, | |
| "loss": 1.468272864818573, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.012219959266802444, | |
| "grad_norm": 30.16754722595215, | |
| "learning_rate": 2.4390243902439023e-08, | |
| "loss": 1.3843095302581787, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.016293279022403257, | |
| "grad_norm": 38.58047103881836, | |
| "learning_rate": 3.252032520325203e-08, | |
| "loss": 1.7031245231628418, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.020366598778004074, | |
| "grad_norm": 30.89760971069336, | |
| "learning_rate": 4.065040650406504e-08, | |
| "loss": 1.4844104647636414, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.024439918533604887, | |
| "grad_norm": 34.434993743896484, | |
| "learning_rate": 4.878048780487805e-08, | |
| "loss": 1.574910283088684, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.028513238289205704, | |
| "grad_norm": 32.540470123291016, | |
| "learning_rate": 5.6910569105691055e-08, | |
| "loss": 1.4606674909591675, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.032586558044806514, | |
| "grad_norm": 36.41299819946289, | |
| "learning_rate": 6.504065040650406e-08, | |
| "loss": 1.553576111793518, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.03665987780040733, | |
| "grad_norm": 34.50511932373047, | |
| "learning_rate": 7.317073170731706e-08, | |
| "loss": 1.3344553709030151, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.04073319755600815, | |
| "grad_norm": 27.898704528808594, | |
| "learning_rate": 8.130081300813008e-08, | |
| "loss": 1.3406395316123962, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.04480651731160896, | |
| "grad_norm": 29.29271125793457, | |
| "learning_rate": 8.943089430894309e-08, | |
| "loss": 1.4415303468704224, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.048879837067209775, | |
| "grad_norm": 28.2354736328125, | |
| "learning_rate": 9.75609756097561e-08, | |
| "loss": 1.2696096301078796, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.05295315682281059, | |
| "grad_norm": 35.44163131713867, | |
| "learning_rate": 1.0569105691056911e-07, | |
| "loss": 1.598312497138977, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.05702647657841141, | |
| "grad_norm": 26.94402313232422, | |
| "learning_rate": 1.1382113821138211e-07, | |
| "loss": 1.3497812747955322, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.06109979633401222, | |
| "grad_norm": 37.78248977661133, | |
| "learning_rate": 1.219512195121951e-07, | |
| "loss": 1.5689660906791687, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.06517311608961303, | |
| "grad_norm": 31.73078155517578, | |
| "learning_rate": 1.3008130081300813e-07, | |
| "loss": 1.525648295879364, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.06924643584521385, | |
| "grad_norm": 27.77250862121582, | |
| "learning_rate": 1.3821138211382114e-07, | |
| "loss": 1.304672360420227, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.07331975560081466, | |
| "grad_norm": 28.092498779296875, | |
| "learning_rate": 1.4634146341463413e-07, | |
| "loss": 1.346445381641388, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.07739307535641547, | |
| "grad_norm": 30.995866775512695, | |
| "learning_rate": 1.5447154471544717e-07, | |
| "loss": 1.447025179862976, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0814663951120163, | |
| "grad_norm": 28.858421325683594, | |
| "learning_rate": 1.6260162601626016e-07, | |
| "loss": 1.3801668882369995, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0855397148676171, | |
| "grad_norm": 31.91228485107422, | |
| "learning_rate": 1.7073170731707317e-07, | |
| "loss": 1.4577875137329102, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.08961303462321792, | |
| "grad_norm": 31.215259552001953, | |
| "learning_rate": 1.7886178861788619e-07, | |
| "loss": 1.4091373682022095, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.09368635437881874, | |
| "grad_norm": 30.24734115600586, | |
| "learning_rate": 1.8699186991869917e-07, | |
| "loss": 1.4649581909179688, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.09775967413441955, | |
| "grad_norm": 31.560291290283203, | |
| "learning_rate": 1.951219512195122e-07, | |
| "loss": 1.5308585166931152, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.10183299389002037, | |
| "grad_norm": 27.27391242980957, | |
| "learning_rate": 2.032520325203252e-07, | |
| "loss": 1.5144553780555725, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.10590631364562118, | |
| "grad_norm": 29.813785552978516, | |
| "learning_rate": 2.1138211382113822e-07, | |
| "loss": 1.519466757774353, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.109979633401222, | |
| "grad_norm": 24.201751708984375, | |
| "learning_rate": 2.195121951219512e-07, | |
| "loss": 1.3116011023521423, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.11405295315682282, | |
| "grad_norm": 27.95865249633789, | |
| "learning_rate": 2.2764227642276422e-07, | |
| "loss": 1.4637184143066406, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.11812627291242363, | |
| "grad_norm": 26.65915870666504, | |
| "learning_rate": 2.3577235772357723e-07, | |
| "loss": 1.4885194301605225, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.12219959266802444, | |
| "grad_norm": 27.386289596557617, | |
| "learning_rate": 2.439024390243902e-07, | |
| "loss": 1.3836334347724915, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.12627291242362526, | |
| "grad_norm": 25.87419319152832, | |
| "learning_rate": 2.520325203252032e-07, | |
| "loss": 1.3642336130142212, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.13034623217922606, | |
| "grad_norm": 26.620105743408203, | |
| "learning_rate": 2.6016260162601625e-07, | |
| "loss": 1.3461121916770935, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.13441955193482688, | |
| "grad_norm": 22.665058135986328, | |
| "learning_rate": 2.682926829268293e-07, | |
| "loss": 1.2577590942382812, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.1384928716904277, | |
| "grad_norm": 23.679920196533203, | |
| "learning_rate": 2.764227642276423e-07, | |
| "loss": 1.2572017908096313, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.1425661914460285, | |
| "grad_norm": 25.136371612548828, | |
| "learning_rate": 2.8455284552845527e-07, | |
| "loss": 1.2670851349830627, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.14663951120162932, | |
| "grad_norm": 21.567337036132812, | |
| "learning_rate": 2.9268292682926825e-07, | |
| "loss": 1.242683231830597, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.15071283095723015, | |
| "grad_norm": 20.61647605895996, | |
| "learning_rate": 3.008130081300813e-07, | |
| "loss": 1.279579222202301, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.15478615071283094, | |
| "grad_norm": 20.656513214111328, | |
| "learning_rate": 3.0894308943089434e-07, | |
| "loss": 1.2040475606918335, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.15885947046843177, | |
| "grad_norm": 22.86530876159668, | |
| "learning_rate": 3.170731707317073e-07, | |
| "loss": 1.2522715330123901, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.1629327902240326, | |
| "grad_norm": 20.22757911682129, | |
| "learning_rate": 3.252032520325203e-07, | |
| "loss": 1.2012774348258972, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1670061099796334, | |
| "grad_norm": 23.09739875793457, | |
| "learning_rate": 3.333333333333333e-07, | |
| "loss": 1.2088268399238586, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.1710794297352342, | |
| "grad_norm": 22.845685958862305, | |
| "learning_rate": 3.4146341463414634e-07, | |
| "loss": 1.0982880592346191, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.17515274949083504, | |
| "grad_norm": 19.80814552307129, | |
| "learning_rate": 3.4959349593495933e-07, | |
| "loss": 1.1271469593048096, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.17922606924643583, | |
| "grad_norm": 20.553686141967773, | |
| "learning_rate": 3.5772357723577237e-07, | |
| "loss": 1.0008204579353333, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.18329938900203666, | |
| "grad_norm": 16.66282844543457, | |
| "learning_rate": 3.6585365853658536e-07, | |
| "loss": 0.9251897931098938, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.18737270875763748, | |
| "grad_norm": 15.797308921813965, | |
| "learning_rate": 3.7398373983739835e-07, | |
| "loss": 1.0191328525543213, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.19144602851323828, | |
| "grad_norm": 13.579208374023438, | |
| "learning_rate": 3.821138211382114e-07, | |
| "loss": 0.774791806936264, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.1955193482688391, | |
| "grad_norm": 14.556002616882324, | |
| "learning_rate": 3.902439024390244e-07, | |
| "loss": 1.0026790797710419, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.19959266802443992, | |
| "grad_norm": 14.489509582519531, | |
| "learning_rate": 3.9837398373983736e-07, | |
| "loss": 0.9430837631225586, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.20366598778004075, | |
| "grad_norm": 12.495223999023438, | |
| "learning_rate": 4.065040650406504e-07, | |
| "loss": 0.8999880254268646, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.20773930753564154, | |
| "grad_norm": 11.441575050354004, | |
| "learning_rate": 4.146341463414634e-07, | |
| "loss": 0.8320233225822449, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.21181262729124237, | |
| "grad_norm": 10.894216537475586, | |
| "learning_rate": 4.2276422764227643e-07, | |
| "loss": 0.8139239549636841, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.2158859470468432, | |
| "grad_norm": 10.404220581054688, | |
| "learning_rate": 4.308943089430894e-07, | |
| "loss": 0.8323288261890411, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.219959266802444, | |
| "grad_norm": 10.463072776794434, | |
| "learning_rate": 4.390243902439024e-07, | |
| "loss": 0.882573276758194, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.2240325865580448, | |
| "grad_norm": 10.669075012207031, | |
| "learning_rate": 4.471544715447154e-07, | |
| "loss": 0.749780923128128, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.22810590631364563, | |
| "grad_norm": 10.453638076782227, | |
| "learning_rate": 4.5528455284552844e-07, | |
| "loss": 0.7727148830890656, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.23217922606924643, | |
| "grad_norm": 11.427080154418945, | |
| "learning_rate": 4.634146341463415e-07, | |
| "loss": 0.8585084676742554, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.23625254582484725, | |
| "grad_norm": 8.558117866516113, | |
| "learning_rate": 4.7154471544715447e-07, | |
| "loss": 0.7314337491989136, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.24032586558044808, | |
| "grad_norm": 9.031648635864258, | |
| "learning_rate": 4.796747967479675e-07, | |
| "loss": 0.701579749584198, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.24439918533604887, | |
| "grad_norm": 8.817708969116211, | |
| "learning_rate": 4.878048780487804e-07, | |
| "loss": 0.7815204560756683, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2484725050916497, | |
| "grad_norm": 8.00804615020752, | |
| "learning_rate": 4.959349593495934e-07, | |
| "loss": 0.655106246471405, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.2525458248472505, | |
| "grad_norm": 6.538842678070068, | |
| "learning_rate": 5.040650406504064e-07, | |
| "loss": 0.6697916388511658, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.25661914460285135, | |
| "grad_norm": 7.5446553230285645, | |
| "learning_rate": 5.121951219512195e-07, | |
| "loss": 0.7426944077014923, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.2606924643584521, | |
| "grad_norm": 6.402474403381348, | |
| "learning_rate": 5.203252032520325e-07, | |
| "loss": 0.6401277780532837, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.26476578411405294, | |
| "grad_norm": 7.257569313049316, | |
| "learning_rate": 5.284552845528455e-07, | |
| "loss": 0.6731106042861938, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.26883910386965376, | |
| "grad_norm": 6.263636589050293, | |
| "learning_rate": 5.365853658536586e-07, | |
| "loss": 0.5806022882461548, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.2729124236252546, | |
| "grad_norm": 5.273800849914551, | |
| "learning_rate": 5.447154471544715e-07, | |
| "loss": 0.5338439792394638, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.2769857433808554, | |
| "grad_norm": 5.2786149978637695, | |
| "learning_rate": 5.528455284552846e-07, | |
| "loss": 0.5390533208847046, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.28105906313645623, | |
| "grad_norm": 4.901702404022217, | |
| "learning_rate": 5.609756097560975e-07, | |
| "loss": 0.5899032056331635, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.285132382892057, | |
| "grad_norm": 4.853933811187744, | |
| "learning_rate": 5.691056910569105e-07, | |
| "loss": 0.5600310862064362, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.2892057026476578, | |
| "grad_norm": 4.680273532867432, | |
| "learning_rate": 5.772357723577236e-07, | |
| "loss": 0.5319355428218842, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.29327902240325865, | |
| "grad_norm": 3.7406885623931885, | |
| "learning_rate": 5.853658536585365e-07, | |
| "loss": 0.508156955242157, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.2973523421588595, | |
| "grad_norm": 4.389779567718506, | |
| "learning_rate": 5.934959349593496e-07, | |
| "loss": 0.49855048954486847, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.3014256619144603, | |
| "grad_norm": 4.23866081237793, | |
| "learning_rate": 6.016260162601626e-07, | |
| "loss": 0.5242476612329483, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.3054989816700611, | |
| "grad_norm": 4.1824951171875, | |
| "learning_rate": 6.097560975609756e-07, | |
| "loss": 0.532037615776062, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.3095723014256619, | |
| "grad_norm": 3.7223150730133057, | |
| "learning_rate": 6.178861788617887e-07, | |
| "loss": 0.46959882974624634, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.3136456211812627, | |
| "grad_norm": 3.545388698577881, | |
| "learning_rate": 6.260162601626016e-07, | |
| "loss": 0.4825982600450516, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.31771894093686354, | |
| "grad_norm": 3.6351099014282227, | |
| "learning_rate": 6.341463414634146e-07, | |
| "loss": 0.5095209777355194, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.32179226069246436, | |
| "grad_norm": 3.243072271347046, | |
| "learning_rate": 6.422764227642276e-07, | |
| "loss": 0.4842926263809204, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.3258655804480652, | |
| "grad_norm": 3.5646300315856934, | |
| "learning_rate": 6.504065040650406e-07, | |
| "loss": 0.4908552020788193, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.329938900203666, | |
| "grad_norm": 3.5380759239196777, | |
| "learning_rate": 6.585365853658536e-07, | |
| "loss": 0.4536065459251404, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.3340122199592668, | |
| "grad_norm": 3.128525495529175, | |
| "learning_rate": 6.666666666666666e-07, | |
| "loss": 0.47657161951065063, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.3380855397148676, | |
| "grad_norm": 3.3621485233306885, | |
| "learning_rate": 6.747967479674797e-07, | |
| "loss": 0.43791596591472626, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.3421588594704684, | |
| "grad_norm": 3.39066219329834, | |
| "learning_rate": 6.829268292682927e-07, | |
| "loss": 0.42947711050510406, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.34623217922606925, | |
| "grad_norm": 3.7795698642730713, | |
| "learning_rate": 6.910569105691057e-07, | |
| "loss": 0.4219910502433777, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.35030549898167007, | |
| "grad_norm": 3.633206367492676, | |
| "learning_rate": 6.991869918699187e-07, | |
| "loss": 0.4253977984189987, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.3543788187372709, | |
| "grad_norm": 3.6160175800323486, | |
| "learning_rate": 7.073170731707316e-07, | |
| "loss": 0.449339896440506, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.35845213849287166, | |
| "grad_norm": 3.30557918548584, | |
| "learning_rate": 7.154471544715447e-07, | |
| "loss": 0.45001736283302307, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.3625254582484725, | |
| "grad_norm": 3.1727640628814697, | |
| "learning_rate": 7.235772357723577e-07, | |
| "loss": 0.4165496975183487, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.3665987780040733, | |
| "grad_norm": 3.073976516723633, | |
| "learning_rate": 7.317073170731707e-07, | |
| "loss": 0.4443822205066681, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.37067209775967414, | |
| "grad_norm": 3.129105567932129, | |
| "learning_rate": 7.398373983739837e-07, | |
| "loss": 0.4265598952770233, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.37474541751527496, | |
| "grad_norm": 3.1485190391540527, | |
| "learning_rate": 7.479674796747967e-07, | |
| "loss": 0.3882734924554825, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.3788187372708758, | |
| "grad_norm": 3.1610565185546875, | |
| "learning_rate": 7.560975609756097e-07, | |
| "loss": 0.37010858952999115, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.38289205702647655, | |
| "grad_norm": 3.039264440536499, | |
| "learning_rate": 7.642276422764228e-07, | |
| "loss": 0.400989294052124, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.3869653767820774, | |
| "grad_norm": 2.9321980476379395, | |
| "learning_rate": 7.723577235772358e-07, | |
| "loss": 0.3771343380212784, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.3910386965376782, | |
| "grad_norm": 2.807072162628174, | |
| "learning_rate": 7.804878048780488e-07, | |
| "loss": 0.4001482129096985, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.395112016293279, | |
| "grad_norm": 2.8286941051483154, | |
| "learning_rate": 7.886178861788617e-07, | |
| "loss": 0.4234430640935898, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.39918533604887985, | |
| "grad_norm": 2.9245986938476562, | |
| "learning_rate": 7.967479674796747e-07, | |
| "loss": 0.3854667395353317, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.40325865580448067, | |
| "grad_norm": 3.015875816345215, | |
| "learning_rate": 8.048780487804878e-07, | |
| "loss": 0.38027653098106384, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.4073319755600815, | |
| "grad_norm": 2.907216787338257, | |
| "learning_rate": 8.130081300813008e-07, | |
| "loss": 0.34937676787376404, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.41140529531568226, | |
| "grad_norm": 3.131850004196167, | |
| "learning_rate": 8.211382113821138e-07, | |
| "loss": 0.4414845108985901, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.4154786150712831, | |
| "grad_norm": 2.9019775390625, | |
| "learning_rate": 8.292682926829268e-07, | |
| "loss": 0.3990558981895447, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.4195519348268839, | |
| "grad_norm": 2.9362523555755615, | |
| "learning_rate": 8.373983739837398e-07, | |
| "loss": 0.41413092613220215, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.42362525458248473, | |
| "grad_norm": 3.0895473957061768, | |
| "learning_rate": 8.455284552845529e-07, | |
| "loss": 0.3904542028903961, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.42769857433808556, | |
| "grad_norm": 2.9235992431640625, | |
| "learning_rate": 8.536585365853657e-07, | |
| "loss": 0.3995140939950943, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.4317718940936864, | |
| "grad_norm": 2.919102668762207, | |
| "learning_rate": 8.617886178861788e-07, | |
| "loss": 0.32857778668403625, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.43584521384928715, | |
| "grad_norm": 2.831698417663574, | |
| "learning_rate": 8.699186991869918e-07, | |
| "loss": 0.3507983237504959, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.439918533604888, | |
| "grad_norm": 2.952693223953247, | |
| "learning_rate": 8.780487804878048e-07, | |
| "loss": 0.37046514451503754, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.4439918533604888, | |
| "grad_norm": 3.315002679824829, | |
| "learning_rate": 8.861788617886179e-07, | |
| "loss": 0.391086682677269, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.4480651731160896, | |
| "grad_norm": 2.7241294384002686, | |
| "learning_rate": 8.943089430894308e-07, | |
| "loss": 0.3864188492298126, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.45213849287169044, | |
| "grad_norm": 2.782064199447632, | |
| "learning_rate": 9.024390243902439e-07, | |
| "loss": 0.38219109177589417, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.45621181262729127, | |
| "grad_norm": 4.001572132110596, | |
| "learning_rate": 9.105691056910569e-07, | |
| "loss": 0.3784598410129547, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.46028513238289204, | |
| "grad_norm": 2.607434034347534, | |
| "learning_rate": 9.186991869918699e-07, | |
| "loss": 0.3763512521982193, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.46435845213849286, | |
| "grad_norm": 2.97188138961792, | |
| "learning_rate": 9.26829268292683e-07, | |
| "loss": 0.36788034439086914, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.4684317718940937, | |
| "grad_norm": 2.9631524085998535, | |
| "learning_rate": 9.349593495934958e-07, | |
| "loss": 0.3696867823600769, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.4725050916496945, | |
| "grad_norm": 2.5895049571990967, | |
| "learning_rate": 9.430894308943089e-07, | |
| "loss": 0.3349902927875519, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.47657841140529533, | |
| "grad_norm": 2.600832462310791, | |
| "learning_rate": 9.512195121951218e-07, | |
| "loss": 0.34966227412223816, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.48065173116089616, | |
| "grad_norm": 3.0639443397521973, | |
| "learning_rate": 9.59349593495935e-07, | |
| "loss": 0.38310858607292175, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.4847250509164969, | |
| "grad_norm": 2.6944706439971924, | |
| "learning_rate": 9.67479674796748e-07, | |
| "loss": 0.3360476493835449, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.48879837067209775, | |
| "grad_norm": 2.8398237228393555, | |
| "learning_rate": 9.756097560975609e-07, | |
| "loss": 0.39176714420318604, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.49287169042769857, | |
| "grad_norm": 2.8028745651245117, | |
| "learning_rate": 9.83739837398374e-07, | |
| "loss": 0.37909021973609924, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.4969450101832994, | |
| "grad_norm": 2.6169185638427734, | |
| "learning_rate": 9.918699186991869e-07, | |
| "loss": 0.37069061398506165, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.5010183299389002, | |
| "grad_norm": 2.572046995162964, | |
| "learning_rate": 1e-06, | |
| "loss": 0.3428824096918106, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.505091649694501, | |
| "grad_norm": 2.7804417610168457, | |
| "learning_rate": 9.999979682219186e-07, | |
| "loss": 0.3680119812488556, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.5091649694501018, | |
| "grad_norm": 2.5910799503326416, | |
| "learning_rate": 9.999918729041868e-07, | |
| "loss": 0.33467385172843933, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.5132382892057027, | |
| "grad_norm": 2.8417587280273438, | |
| "learning_rate": 9.999817140963419e-07, | |
| "loss": 0.35100705921649933, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.5173116089613035, | |
| "grad_norm": 2.905728340148926, | |
| "learning_rate": 9.999674918809457e-07, | |
| "loss": 0.32811686396598816, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.5213849287169042, | |
| "grad_norm": 2.5878095626831055, | |
| "learning_rate": 9.99949206373584e-07, | |
| "loss": 0.32490645349025726, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.5254582484725051, | |
| "grad_norm": 2.9762229919433594, | |
| "learning_rate": 9.999268577228648e-07, | |
| "loss": 0.3934018760919571, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.5295315682281059, | |
| "grad_norm": 2.792989492416382, | |
| "learning_rate": 9.99900446110418e-07, | |
| "loss": 0.3315049111843109, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5336048879837068, | |
| "grad_norm": 2.6891062259674072, | |
| "learning_rate": 9.998699717508945e-07, | |
| "loss": 0.3097301423549652, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.5376782077393075, | |
| "grad_norm": 2.92191481590271, | |
| "learning_rate": 9.99835434891962e-07, | |
| "loss": 0.34749817848205566, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.5417515274949084, | |
| "grad_norm": 2.980543851852417, | |
| "learning_rate": 9.99796835814306e-07, | |
| "loss": 0.3367327153682709, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.5458248472505092, | |
| "grad_norm": 2.50433611869812, | |
| "learning_rate": 9.99754174831625e-07, | |
| "loss": 0.3090934008359909, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.5498981670061099, | |
| "grad_norm": 2.869647979736328, | |
| "learning_rate": 9.9970745229063e-07, | |
| "loss": 0.35603591799736023, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.5539714867617108, | |
| "grad_norm": 2.6435837745666504, | |
| "learning_rate": 9.9965666857104e-07, | |
| "loss": 0.3288918733596802, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.5580448065173116, | |
| "grad_norm": 2.7970142364501953, | |
| "learning_rate": 9.996018240855806e-07, | |
| "loss": 0.3878723680973053, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.5621181262729125, | |
| "grad_norm": 2.593043327331543, | |
| "learning_rate": 9.995429192799788e-07, | |
| "loss": 0.3534126281738281, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.5661914460285132, | |
| "grad_norm": 2.8867013454437256, | |
| "learning_rate": 9.994799546329602e-07, | |
| "loss": 0.38061630725860596, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.570264765784114, | |
| "grad_norm": 2.589017152786255, | |
| "learning_rate": 9.994129306562458e-07, | |
| "loss": 0.37725748121738434, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.5743380855397149, | |
| "grad_norm": 2.369696617126465, | |
| "learning_rate": 9.993418478945472e-07, | |
| "loss": 0.32034583389759064, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.5784114052953157, | |
| "grad_norm": 2.6410069465637207, | |
| "learning_rate": 9.992667069255618e-07, | |
| "loss": 0.36017628014087677, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.5824847250509165, | |
| "grad_norm": 2.597259283065796, | |
| "learning_rate": 9.991875083599688e-07, | |
| "loss": 0.32577911019325256, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.5865580448065173, | |
| "grad_norm": 2.761859655380249, | |
| "learning_rate": 9.991042528414237e-07, | |
| "loss": 0.33353830873966217, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.5906313645621182, | |
| "grad_norm": 2.7634713649749756, | |
| "learning_rate": 9.990169410465536e-07, | |
| "loss": 0.33604632318019867, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.594704684317719, | |
| "grad_norm": 2.820897340774536, | |
| "learning_rate": 9.98925573684951e-07, | |
| "loss": 0.3069554716348648, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.5987780040733197, | |
| "grad_norm": 2.856700897216797, | |
| "learning_rate": 9.98830151499169e-07, | |
| "loss": 0.33896636962890625, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.6028513238289206, | |
| "grad_norm": 2.9203782081604004, | |
| "learning_rate": 9.987306752647142e-07, | |
| "loss": 0.35070909559726715, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.6069246435845214, | |
| "grad_norm": 2.679352283477783, | |
| "learning_rate": 9.986271457900414e-07, | |
| "loss": 0.3325359970331192, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.6109979633401222, | |
| "grad_norm": 2.4953606128692627, | |
| "learning_rate": 9.98519563916546e-07, | |
| "loss": 0.32330869138240814, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.615071283095723, | |
| "grad_norm": 2.618744134902954, | |
| "learning_rate": 9.98407930518558e-07, | |
| "loss": 0.33912393450737, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.6191446028513238, | |
| "grad_norm": 2.6512296199798584, | |
| "learning_rate": 9.982922465033348e-07, | |
| "loss": 0.3045920431613922, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.6232179226069247, | |
| "grad_norm": 2.7606050968170166, | |
| "learning_rate": 9.981725128110532e-07, | |
| "loss": 0.32916732132434845, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.6272912423625254, | |
| "grad_norm": 2.95037841796875, | |
| "learning_rate": 9.980487304148024e-07, | |
| "loss": 0.36757831275463104, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.6313645621181263, | |
| "grad_norm": 2.890489339828491, | |
| "learning_rate": 9.97920900320576e-07, | |
| "loss": 0.36117151379585266, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.6354378818737271, | |
| "grad_norm": 2.7488858699798584, | |
| "learning_rate": 9.97789023567263e-07, | |
| "loss": 0.35026322305202484, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.639511201629328, | |
| "grad_norm": 2.5479671955108643, | |
| "learning_rate": 9.976531012266413e-07, | |
| "loss": 0.308156818151474, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.6435845213849287, | |
| "grad_norm": 2.717344045639038, | |
| "learning_rate": 9.975131344033664e-07, | |
| "loss": 0.29827529191970825, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.6476578411405295, | |
| "grad_norm": 2.569551467895508, | |
| "learning_rate": 9.973691242349648e-07, | |
| "loss": 0.3232528269290924, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.6517311608961304, | |
| "grad_norm": 3.0013420581817627, | |
| "learning_rate": 9.972210718918233e-07, | |
| "loss": 0.3270832598209381, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6558044806517311, | |
| "grad_norm": 2.7339162826538086, | |
| "learning_rate": 9.970689785771798e-07, | |
| "loss": 0.3668155074119568, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.659877800407332, | |
| "grad_norm": 2.6689724922180176, | |
| "learning_rate": 9.969128455271137e-07, | |
| "loss": 0.32853490114212036, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.6639511201629328, | |
| "grad_norm": 3.042081117630005, | |
| "learning_rate": 9.967526740105358e-07, | |
| "loss": 0.3487651199102402, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.6680244399185336, | |
| "grad_norm": 2.4641284942626953, | |
| "learning_rate": 9.965884653291783e-07, | |
| "loss": 0.35704147815704346, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.6720977596741344, | |
| "grad_norm": 2.6836225986480713, | |
| "learning_rate": 9.964202208175833e-07, | |
| "loss": 0.33587950468063354, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.6761710794297352, | |
| "grad_norm": 2.2905988693237305, | |
| "learning_rate": 9.962479418430932e-07, | |
| "loss": 0.3061918318271637, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.6802443991853361, | |
| "grad_norm": 2.4772934913635254, | |
| "learning_rate": 9.960716298058381e-07, | |
| "loss": 0.2896444499492645, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.6843177189409368, | |
| "grad_norm": 2.6987321376800537, | |
| "learning_rate": 9.958912861387258e-07, | |
| "loss": 0.3374595195055008, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.6883910386965377, | |
| "grad_norm": 2.6165449619293213, | |
| "learning_rate": 9.9570691230743e-07, | |
| "loss": 0.33027225732803345, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.6924643584521385, | |
| "grad_norm": 3.1326680183410645, | |
| "learning_rate": 9.955185098103771e-07, | |
| "loss": 0.3138381540775299, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.6965376782077393, | |
| "grad_norm": 2.5313732624053955, | |
| "learning_rate": 9.953260801787356e-07, | |
| "loss": 0.31824737787246704, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.7006109979633401, | |
| "grad_norm": 2.529325008392334, | |
| "learning_rate": 9.951296249764025e-07, | |
| "loss": 0.298155277967453, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.7046843177189409, | |
| "grad_norm": 2.6821744441986084, | |
| "learning_rate": 9.949291457999916e-07, | |
| "loss": 0.33296874165534973, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.7087576374745418, | |
| "grad_norm": 2.588157892227173, | |
| "learning_rate": 9.947246442788193e-07, | |
| "loss": 0.31226691603660583, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.7128309572301426, | |
| "grad_norm": 2.7822420597076416, | |
| "learning_rate": 9.945161220748927e-07, | |
| "loss": 0.322743222117424, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.7169042769857433, | |
| "grad_norm": 2.379702091217041, | |
| "learning_rate": 9.943035808828953e-07, | |
| "loss": 0.3056500107049942, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.7209775967413442, | |
| "grad_norm": 2.4450721740722656, | |
| "learning_rate": 9.94087022430173e-07, | |
| "loss": 0.3037564754486084, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.725050916496945, | |
| "grad_norm": 2.5885887145996094, | |
| "learning_rate": 9.938664484767205e-07, | |
| "loss": 0.327587828040123, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.7291242362525459, | |
| "grad_norm": 2.613290309906006, | |
| "learning_rate": 9.936418608151675e-07, | |
| "loss": 0.33323927223682404, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.7331975560081466, | |
| "grad_norm": 2.6541707515716553, | |
| "learning_rate": 9.93413261270763e-07, | |
| "loss": 0.3316569924354553, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7372708757637475, | |
| "grad_norm": 2.646383047103882, | |
| "learning_rate": 9.931806517013612e-07, | |
| "loss": 0.35486292839050293, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.7413441955193483, | |
| "grad_norm": 2.5270328521728516, | |
| "learning_rate": 9.92944033997406e-07, | |
| "loss": 0.3157142102718353, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.745417515274949, | |
| "grad_norm": 2.5851869583129883, | |
| "learning_rate": 9.927034100819163e-07, | |
| "loss": 0.3013855814933777, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.7494908350305499, | |
| "grad_norm": 2.75219988822937, | |
| "learning_rate": 9.924587819104695e-07, | |
| "loss": 0.3420049250125885, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.7535641547861507, | |
| "grad_norm": 2.436596632003784, | |
| "learning_rate": 9.922101514711865e-07, | |
| "loss": 0.3062688261270523, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.7576374745417516, | |
| "grad_norm": 2.9479236602783203, | |
| "learning_rate": 9.919575207847145e-07, | |
| "loss": 0.31793762743473053, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.7617107942973523, | |
| "grad_norm": 2.5482208728790283, | |
| "learning_rate": 9.917008919042116e-07, | |
| "loss": 0.3306496888399124, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.7657841140529531, | |
| "grad_norm": 2.609839677810669, | |
| "learning_rate": 9.914402669153295e-07, | |
| "loss": 0.29324449598789215, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.769857433808554, | |
| "grad_norm": 2.5740039348602295, | |
| "learning_rate": 9.91175647936197e-07, | |
| "loss": 0.3193310797214508, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.7739307535641547, | |
| "grad_norm": 2.3878629207611084, | |
| "learning_rate": 9.909070371174019e-07, | |
| "loss": 0.3040658235549927, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.7780040733197556, | |
| "grad_norm": 2.755152463912964, | |
| "learning_rate": 9.906344366419746e-07, | |
| "loss": 0.33930477499961853, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.7820773930753564, | |
| "grad_norm": 2.58367657661438, | |
| "learning_rate": 9.9035784872537e-07, | |
| "loss": 0.3244568109512329, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.7861507128309573, | |
| "grad_norm": 2.350712537765503, | |
| "learning_rate": 9.90077275615449e-07, | |
| "loss": 0.2779058516025543, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.790224032586558, | |
| "grad_norm": 2.7418465614318848, | |
| "learning_rate": 9.897927195924608e-07, | |
| "loss": 0.32641272246837616, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.7942973523421588, | |
| "grad_norm": 2.516510009765625, | |
| "learning_rate": 9.895041829690238e-07, | |
| "loss": 0.3083319664001465, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.7983706720977597, | |
| "grad_norm": 2.7772316932678223, | |
| "learning_rate": 9.892116680901084e-07, | |
| "loss": 0.30357757210731506, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.8024439918533605, | |
| "grad_norm": 2.5389041900634766, | |
| "learning_rate": 9.88915177333015e-07, | |
| "loss": 0.30694054067134857, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.8065173116089613, | |
| "grad_norm": 2.7129383087158203, | |
| "learning_rate": 9.886147131073579e-07, | |
| "loss": 0.3402569591999054, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.8105906313645621, | |
| "grad_norm": 2.654186248779297, | |
| "learning_rate": 9.883102778550434e-07, | |
| "loss": 0.3343619704246521, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.814663951120163, | |
| "grad_norm": 2.380168914794922, | |
| "learning_rate": 9.880018740502508e-07, | |
| "loss": 0.3020651191473007, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8187372708757638, | |
| "grad_norm": 2.771951198577881, | |
| "learning_rate": 9.876895041994127e-07, | |
| "loss": 0.30565840005874634, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.8228105906313645, | |
| "grad_norm": 2.4966540336608887, | |
| "learning_rate": 9.873731708411939e-07, | |
| "loss": 0.3085058331489563, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.8268839103869654, | |
| "grad_norm": 2.5919551849365234, | |
| "learning_rate": 9.870528765464711e-07, | |
| "loss": 0.34540820121765137, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.8309572301425662, | |
| "grad_norm": 3.0668885707855225, | |
| "learning_rate": 9.867286239183122e-07, | |
| "loss": 0.3307037353515625, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.835030549898167, | |
| "grad_norm": 2.4281554222106934, | |
| "learning_rate": 9.864004155919544e-07, | |
| "loss": 0.28929875791072845, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.8391038696537678, | |
| "grad_norm": 2.5561623573303223, | |
| "learning_rate": 9.860682542347838e-07, | |
| "loss": 0.3272414803504944, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.8431771894093686, | |
| "grad_norm": 2.824591636657715, | |
| "learning_rate": 9.85732142546313e-07, | |
| "loss": 0.3192295432090759, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.8472505091649695, | |
| "grad_norm": 2.643718719482422, | |
| "learning_rate": 9.853920832581597e-07, | |
| "loss": 0.31284041702747345, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.8513238289205702, | |
| "grad_norm": 2.6777195930480957, | |
| "learning_rate": 9.850480791340236e-07, | |
| "loss": 0.3136574327945709, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.8553971486761711, | |
| "grad_norm": 2.5229766368865967, | |
| "learning_rate": 9.847001329696652e-07, | |
| "loss": 0.3047819435596466, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.8594704684317719, | |
| "grad_norm": 2.659447431564331, | |
| "learning_rate": 9.843482475928818e-07, | |
| "loss": 0.3642407953739166, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.8635437881873728, | |
| "grad_norm": 2.697049379348755, | |
| "learning_rate": 9.839924258634853e-07, | |
| "loss": 0.3134022653102875, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.8676171079429735, | |
| "grad_norm": 2.629868745803833, | |
| "learning_rate": 9.83632670673279e-07, | |
| "loss": 0.306331992149353, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.8716904276985743, | |
| "grad_norm": 2.4997003078460693, | |
| "learning_rate": 9.832689849460339e-07, | |
| "loss": 0.3142865002155304, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.8757637474541752, | |
| "grad_norm": 2.826869010925293, | |
| "learning_rate": 9.829013716374647e-07, | |
| "loss": 0.2904099076986313, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.879837067209776, | |
| "grad_norm": 2.6697499752044678, | |
| "learning_rate": 9.825298337352058e-07, | |
| "loss": 0.29838354885578156, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.8839103869653768, | |
| "grad_norm": 2.5330023765563965, | |
| "learning_rate": 9.821543742587876e-07, | |
| "loss": 0.3052047789096832, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.8879837067209776, | |
| "grad_norm": 2.806683301925659, | |
| "learning_rate": 9.817749962596114e-07, | |
| "loss": 0.3121778964996338, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.8920570264765784, | |
| "grad_norm": 2.718122720718384, | |
| "learning_rate": 9.81391702820925e-07, | |
| "loss": 0.32955022156238556, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.8961303462321792, | |
| "grad_norm": 2.346466541290283, | |
| "learning_rate": 9.81004497057797e-07, | |
| "loss": 0.291049063205719, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.90020366598778, | |
| "grad_norm": 2.4048361778259277, | |
| "learning_rate": 9.806133821170924e-07, | |
| "loss": 0.30249159038066864, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.9042769857433809, | |
| "grad_norm": 2.681546688079834, | |
| "learning_rate": 9.80218361177446e-07, | |
| "loss": 0.362154021859169, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.9083503054989817, | |
| "grad_norm": 2.792266368865967, | |
| "learning_rate": 9.798194374492375e-07, | |
| "loss": 0.28344525396823883, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.9124236252545825, | |
| "grad_norm": 2.507050037384033, | |
| "learning_rate": 9.794166141745646e-07, | |
| "loss": 0.2935172915458679, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.9164969450101833, | |
| "grad_norm": 2.7160379886627197, | |
| "learning_rate": 9.790098946272177e-07, | |
| "loss": 0.3005199581384659, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.9205702647657841, | |
| "grad_norm": 2.666494131088257, | |
| "learning_rate": 9.785992821126518e-07, | |
| "loss": 0.30710943043231964, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.924643584521385, | |
| "grad_norm": 2.699313163757324, | |
| "learning_rate": 9.781847799679615e-07, | |
| "loss": 0.3164513558149338, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.9287169042769857, | |
| "grad_norm": 2.49406099319458, | |
| "learning_rate": 9.777663915618517e-07, | |
| "loss": 0.3061770647764206, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.9327902240325866, | |
| "grad_norm": 2.552093029022217, | |
| "learning_rate": 9.773441202946121e-07, | |
| "loss": 0.2973909080028534, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.9368635437881874, | |
| "grad_norm": 2.5773231983184814, | |
| "learning_rate": 9.76917969598089e-07, | |
| "loss": 0.31120532751083374, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.9409368635437881, | |
| "grad_norm": 2.653515100479126, | |
| "learning_rate": 9.76487942935657e-07, | |
| "loss": 0.3365926146507263, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.945010183299389, | |
| "grad_norm": 2.670433282852173, | |
| "learning_rate": 9.760540438021907e-07, | |
| "loss": 0.3196941614151001, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.9490835030549898, | |
| "grad_norm": 2.892035961151123, | |
| "learning_rate": 9.756162757240373e-07, | |
| "loss": 0.33982205390930176, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.9531568228105907, | |
| "grad_norm": 2.5157856941223145, | |
| "learning_rate": 9.751746422589872e-07, | |
| "loss": 0.2537951096892357, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.9572301425661914, | |
| "grad_norm": 2.6808388233184814, | |
| "learning_rate": 9.747291469962452e-07, | |
| "loss": 0.2846526652574539, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.9613034623217923, | |
| "grad_norm": 2.451559066772461, | |
| "learning_rate": 9.742797935564011e-07, | |
| "loss": 0.29611095786094666, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.9653767820773931, | |
| "grad_norm": 2.7313358783721924, | |
| "learning_rate": 9.738265855914012e-07, | |
| "loss": 0.3275996297597885, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.9694501018329938, | |
| "grad_norm": 2.5593299865722656, | |
| "learning_rate": 9.733695267845171e-07, | |
| "loss": 0.2993656247854233, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.9735234215885947, | |
| "grad_norm": 2.6013288497924805, | |
| "learning_rate": 9.729086208503173e-07, | |
| "loss": 0.31615155935287476, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.9775967413441955, | |
| "grad_norm": 2.5403575897216797, | |
| "learning_rate": 9.72443871534636e-07, | |
| "loss": 0.2843424677848816, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.9816700610997964, | |
| "grad_norm": 2.4495410919189453, | |
| "learning_rate": 9.719752826145432e-07, | |
| "loss": 0.2987358868122101, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.9857433808553971, | |
| "grad_norm": 2.719775676727295, | |
| "learning_rate": 9.715028578983136e-07, | |
| "loss": 0.34320636093616486, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.9898167006109979, | |
| "grad_norm": 2.7152929306030273, | |
| "learning_rate": 9.71026601225396e-07, | |
| "loss": 0.2937510758638382, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.9938900203665988, | |
| "grad_norm": 2.4305663108825684, | |
| "learning_rate": 9.705465164663817e-07, | |
| "loss": 0.29807206988334656, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.9979633401221996, | |
| "grad_norm": 2.322704792022705, | |
| "learning_rate": 9.700626075229738e-07, | |
| "loss": 0.3189048618078232, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 2.322704792022705, | |
| "learning_rate": 9.695748783279544e-07, | |
| "loss": 0.3195984363555908, | |
| "step": 246 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 1225, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 100.0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |