| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0008469791078487, |
| "eval_steps": 500, |
| "global_step": 886, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.001129305477131564, |
| "grad_norm": 0.20765775442123413, |
| "learning_rate": 2e-05, |
| "loss": 1.9066, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.002258610954263128, |
| "grad_norm": 0.06511678546667099, |
| "learning_rate": 4e-05, |
| "loss": 1.6166, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0033879164313946925, |
| "grad_norm": 0.1536235362291336, |
| "learning_rate": 6e-05, |
| "loss": 1.3065, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.004517221908526256, |
| "grad_norm": 0.1566154658794403, |
| "learning_rate": 8e-05, |
| "loss": 1.7229, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.00564652738565782, |
| "grad_norm": 0.10513211041688919, |
| "learning_rate": 0.0001, |
| "loss": 2.1128, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.006775832862789385, |
| "grad_norm": 0.10510624945163727, |
| "learning_rate": 9.999968210223322e-05, |
| "loss": 1.9685, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.007905138339920948, |
| "grad_norm": 0.0922314003109932, |
| "learning_rate": 9.999872841297521e-05, |
| "loss": 1.672, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.009034443817052512, |
| "grad_norm": 0.20644737780094147, |
| "learning_rate": 9.999713894435302e-05, |
| "loss": 1.7507, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.010163749294184076, |
| "grad_norm": 0.1501447558403015, |
| "learning_rate": 9.999491371657821e-05, |
| "loss": 2.3546, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.01129305477131564, |
| "grad_norm": 0.1670556217432022, |
| "learning_rate": 9.999205275794653e-05, |
| "loss": 1.4289, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.012422360248447204, |
| "grad_norm": 0.1365075558423996, |
| "learning_rate": 9.998855610483771e-05, |
| "loss": 1.7987, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.01355166572557877, |
| "grad_norm": 0.2672828137874603, |
| "learning_rate": 9.998442380171484e-05, |
| "loss": 1.6974, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.014680971202710334, |
| "grad_norm": 0.37785211205482483, |
| "learning_rate": 9.997965590112397e-05, |
| "loss": 2.1214, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.015810276679841896, |
| "grad_norm": 0.1777479499578476, |
| "learning_rate": 9.997425246369325e-05, |
| "loss": 2.0929, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.01693958215697346, |
| "grad_norm": 0.3635629713535309, |
| "learning_rate": 9.996821355813235e-05, |
| "loss": 1.7696, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.018068887634105024, |
| "grad_norm": 0.148004412651062, |
| "learning_rate": 9.996153926123141e-05, |
| "loss": 1.7952, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.019198193111236588, |
| "grad_norm": 0.14363674819469452, |
| "learning_rate": 9.995422965786025e-05, |
| "loss": 1.9941, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.020327498588368152, |
| "grad_norm": 0.17726008594036102, |
| "learning_rate": 9.994628484096706e-05, |
| "loss": 2.1081, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.021456804065499716, |
| "grad_norm": 0.1837041974067688, |
| "learning_rate": 9.993770491157749e-05, |
| "loss": 2.2376, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.02258610954263128, |
| "grad_norm": 0.28806236386299133, |
| "learning_rate": 9.992848997879312e-05, |
| "loss": 1.8928, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.023715415019762844, |
| "grad_norm": 0.16167967021465302, |
| "learning_rate": 9.991864015979021e-05, |
| "loss": 1.7153, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.024844720496894408, |
| "grad_norm": 0.25105348229408264, |
| "learning_rate": 9.99081555798182e-05, |
| "loss": 2.1614, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.025974025974025976, |
| "grad_norm": 0.1812150478363037, |
| "learning_rate": 9.989703637219806e-05, |
| "loss": 2.2657, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.02710333145115754, |
| "grad_norm": 0.1782921701669693, |
| "learning_rate": 9.988528267832062e-05, |
| "loss": 2.2051, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.028232636928289104, |
| "grad_norm": 0.18120209872722626, |
| "learning_rate": 9.987289464764485e-05, |
| "loss": 2.0319, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.029361942405420668, |
| "grad_norm": 0.2875784635543823, |
| "learning_rate": 9.985987243769578e-05, |
| "loss": 1.9888, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.030491247882552232, |
| "grad_norm": 0.18351072072982788, |
| "learning_rate": 9.984621621406273e-05, |
| "loss": 1.8919, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.03162055335968379, |
| "grad_norm": 0.199467733502388, |
| "learning_rate": 9.983192615039699e-05, |
| "loss": 1.9984, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.03274985883681536, |
| "grad_norm": 0.25572243332862854, |
| "learning_rate": 9.981700242840974e-05, |
| "loss": 2.2746, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.03387916431394692, |
| "grad_norm": 0.5004045367240906, |
| "learning_rate": 9.980144523786967e-05, |
| "loss": 1.8283, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.03500846979107849, |
| "grad_norm": 0.7726016640663147, |
| "learning_rate": 9.978525477660066e-05, |
| "loss": 1.889, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.03613777526821005, |
| "grad_norm": 0.2675948441028595, |
| "learning_rate": 9.976843125047916e-05, |
| "loss": 2.2772, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.037267080745341616, |
| "grad_norm": 0.27251410484313965, |
| "learning_rate": 9.975097487343161e-05, |
| "loss": 1.5502, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.038396386222473176, |
| "grad_norm": 0.3356260061264038, |
| "learning_rate": 9.973288586743175e-05, |
| "loss": 1.8092, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.039525691699604744, |
| "grad_norm": 0.2943379878997803, |
| "learning_rate": 9.971416446249777e-05, |
| "loss": 1.6588, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.040654997176736304, |
| "grad_norm": 0.3058868646621704, |
| "learning_rate": 9.969481089668938e-05, |
| "loss": 1.8885, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.04178430265386787, |
| "grad_norm": 0.4374743103981018, |
| "learning_rate": 9.96748254161048e-05, |
| "loss": 1.4605, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.04291360813099943, |
| "grad_norm": 0.4457000195980072, |
| "learning_rate": 9.965420827487759e-05, |
| "loss": 2.223, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.044042913608131, |
| "grad_norm": 0.4432419240474701, |
| "learning_rate": 9.963295973517352e-05, |
| "loss": 2.1732, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.04517221908526256, |
| "grad_norm": 0.7509264945983887, |
| "learning_rate": 9.961108006718708e-05, |
| "loss": 2.3801, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04630152456239413, |
| "grad_norm": 0.4913429319858551, |
| "learning_rate": 9.95885695491382e-05, |
| "loss": 2.2028, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.04743083003952569, |
| "grad_norm": 0.7258439660072327, |
| "learning_rate": 9.95654284672686e-05, |
| "loss": 2.2493, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.048560135516657256, |
| "grad_norm": 1.0519062280654907, |
| "learning_rate": 9.954165711583821e-05, |
| "loss": 1.8228, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.049689440993788817, |
| "grad_norm": 0.9518550634384155, |
| "learning_rate": 9.951725579712143e-05, |
| "loss": 1.9532, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.050818746470920384, |
| "grad_norm": 2.408336639404297, |
| "learning_rate": 9.949222482140325e-05, |
| "loss": 1.943, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.05194805194805195, |
| "grad_norm": 1.485158085823059, |
| "learning_rate": 9.946656450697529e-05, |
| "loss": 1.9683, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.05307735742518351, |
| "grad_norm": 1.8976699113845825, |
| "learning_rate": 9.944027518013187e-05, |
| "loss": 2.4141, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.05420666290231508, |
| "grad_norm": 2.9642817974090576, |
| "learning_rate": 9.941335717516564e-05, |
| "loss": 2.496, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.05533596837944664, |
| "grad_norm": 3.4149694442749023, |
| "learning_rate": 9.938581083436363e-05, |
| "loss": 1.9677, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.05646527385657821, |
| "grad_norm": 3.277317523956299, |
| "learning_rate": 9.935763650800259e-05, |
| "loss": 2.442, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05759457933370977, |
| "grad_norm": 0.18346086144447327, |
| "learning_rate": 9.932883455434476e-05, |
| "loss": 1.5118, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.058723884810841336, |
| "grad_norm": 0.24609524011611938, |
| "learning_rate": 9.929940533963322e-05, |
| "loss": 1.8492, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.059853190287972896, |
| "grad_norm": 0.33370301127433777, |
| "learning_rate": 9.926934923808722e-05, |
| "loss": 1.6053, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.060982495765104464, |
| "grad_norm": 0.2675216495990753, |
| "learning_rate": 9.923866663189748e-05, |
| "loss": 1.3325, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.062111801242236024, |
| "grad_norm": 0.32292208075523376, |
| "learning_rate": 9.920735791122126e-05, |
| "loss": 2.016, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.06324110671936758, |
| "grad_norm": 0.32643455266952515, |
| "learning_rate": 9.917542347417746e-05, |
| "loss": 1.9595, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.06437041219649915, |
| "grad_norm": 0.43551144003868103, |
| "learning_rate": 9.914286372684153e-05, |
| "loss": 1.6951, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.06549971767363072, |
| "grad_norm": 0.3900545835494995, |
| "learning_rate": 9.910967908324033e-05, |
| "loss": 1.9828, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.06662902315076229, |
| "grad_norm": 0.4311246871948242, |
| "learning_rate": 9.907586996534679e-05, |
| "loss": 1.4884, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.06775832862789384, |
| "grad_norm": 0.46540212631225586, |
| "learning_rate": 9.904143680307464e-05, |
| "loss": 2.019, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.06888763410502541, |
| "grad_norm": 0.3635936379432678, |
| "learning_rate": 9.900638003427291e-05, |
| "loss": 1.7411, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.07001693958215698, |
| "grad_norm": 0.4469711482524872, |
| "learning_rate": 9.897070010472033e-05, |
| "loss": 2.1121, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.07114624505928854, |
| "grad_norm": 0.3956710398197174, |
| "learning_rate": 9.89343974681197e-05, |
| "loss": 1.8976, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.0722755505364201, |
| "grad_norm": 0.3273422122001648, |
| "learning_rate": 9.88974725860921e-05, |
| "loss": 2.4247, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.07340485601355166, |
| "grad_norm": 0.40820926427841187, |
| "learning_rate": 9.885992592817103e-05, |
| "loss": 1.8519, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.07453416149068323, |
| "grad_norm": 0.41497233510017395, |
| "learning_rate": 9.882175797179647e-05, |
| "loss": 2.0412, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.0756634669678148, |
| "grad_norm": 0.26064857840538025, |
| "learning_rate": 9.878296920230869e-05, |
| "loss": 1.6159, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.07679277244494635, |
| "grad_norm": 0.25881126523017883, |
| "learning_rate": 9.874356011294226e-05, |
| "loss": 1.7785, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.07792207792207792, |
| "grad_norm": 0.31029218435287476, |
| "learning_rate": 9.870353120481961e-05, |
| "loss": 1.8466, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.07905138339920949, |
| "grad_norm": 0.24932654201984406, |
| "learning_rate": 9.866288298694479e-05, |
| "loss": 2.0475, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.08018068887634106, |
| "grad_norm": 0.24546779692173004, |
| "learning_rate": 9.862161597619689e-05, |
| "loss": 1.7603, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.08130999435347261, |
| "grad_norm": 0.2344997376203537, |
| "learning_rate": 9.857973069732354e-05, |
| "loss": 1.8594, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.08243929983060418, |
| "grad_norm": 0.27005735039711, |
| "learning_rate": 9.853722768293419e-05, |
| "loss": 2.0338, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.08356860530773574, |
| "grad_norm": 0.22687004506587982, |
| "learning_rate": 9.849410747349338e-05, |
| "loss": 2.0741, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.08469791078486731, |
| "grad_norm": 0.2578216791152954, |
| "learning_rate": 9.845037061731386e-05, |
| "loss": 2.0056, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.08582721626199886, |
| "grad_norm": 0.21841199696063995, |
| "learning_rate": 9.840601767054957e-05, |
| "loss": 2.0873, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.08695652173913043, |
| "grad_norm": 0.2152131050825119, |
| "learning_rate": 9.83610491971886e-05, |
| "loss": 1.6326, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.088085827216262, |
| "grad_norm": 0.2605539560317993, |
| "learning_rate": 9.831546576904609e-05, |
| "loss": 1.7596, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.08921513269339357, |
| "grad_norm": 0.2637302279472351, |
| "learning_rate": 9.826926796575679e-05, |
| "loss": 2.0953, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.09034443817052512, |
| "grad_norm": 0.27054956555366516, |
| "learning_rate": 9.822245637476787e-05, |
| "loss": 1.8897, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.09147374364765669, |
| "grad_norm": 0.25898051261901855, |
| "learning_rate": 9.817503159133132e-05, |
| "loss": 1.7747, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.09260304912478826, |
| "grad_norm": 0.2623620331287384, |
| "learning_rate": 9.812699421849647e-05, |
| "loss": 1.603, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.09373235460191982, |
| "grad_norm": 0.26288437843322754, |
| "learning_rate": 9.807834486710226e-05, |
| "loss": 1.8412, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.09486166007905138, |
| "grad_norm": 0.323650598526001, |
| "learning_rate": 9.802908415576948e-05, |
| "loss": 1.3957, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.09599096555618294, |
| "grad_norm": 0.3127897381782532, |
| "learning_rate": 9.797921271089294e-05, |
| "loss": 1.6553, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.09712027103331451, |
| "grad_norm": 0.3926398456096649, |
| "learning_rate": 9.792873116663348e-05, |
| "loss": 1.4887, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.09824957651044608, |
| "grad_norm": 0.6420180797576904, |
| "learning_rate": 9.787764016490992e-05, |
| "loss": 2.0315, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.09937888198757763, |
| "grad_norm": 0.4471886157989502, |
| "learning_rate": 9.782594035539085e-05, |
| "loss": 1.5553, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.1005081874647092, |
| "grad_norm": 0.4515712559223175, |
| "learning_rate": 9.777363239548644e-05, |
| "loss": 1.7503, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.10163749294184077, |
| "grad_norm": 0.41610848903656006, |
| "learning_rate": 9.772071695034006e-05, |
| "loss": 1.7643, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.10276679841897234, |
| "grad_norm": 0.41934433579444885, |
| "learning_rate": 9.766719469281974e-05, |
| "loss": 1.8208, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.1038961038961039, |
| "grad_norm": 0.42789050936698914, |
| "learning_rate": 9.761306630350976e-05, |
| "loss": 1.5227, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.10502540937323546, |
| "grad_norm": 0.47943541407585144, |
| "learning_rate": 9.755833247070188e-05, |
| "loss": 1.4097, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.10615471485036702, |
| "grad_norm": 0.7126927971839905, |
| "learning_rate": 9.750299389038659e-05, |
| "loss": 1.4319, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.10728402032749859, |
| "grad_norm": 0.7886515855789185, |
| "learning_rate": 9.744705126624439e-05, |
| "loss": 1.5488, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.10841332580463016, |
| "grad_norm": 0.9192765355110168, |
| "learning_rate": 9.739050530963665e-05, |
| "loss": 1.4749, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.10954263128176171, |
| "grad_norm": 0.9051669239997864, |
| "learning_rate": 9.733335673959671e-05, |
| "loss": 1.3721, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.11067193675889328, |
| "grad_norm": 1.4240723848342896, |
| "learning_rate": 9.727560628282071e-05, |
| "loss": 1.6698, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.11180124223602485, |
| "grad_norm": 1.5896399021148682, |
| "learning_rate": 9.721725467365826e-05, |
| "loss": 1.8088, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.11293054771315642, |
| "grad_norm": 3.1276590824127197, |
| "learning_rate": 9.715830265410324e-05, |
| "loss": 3.2245, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.11405985319028797, |
| "grad_norm": 0.10527385026216507, |
| "learning_rate": 9.709875097378425e-05, |
| "loss": 0.9533, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.11518915866741954, |
| "grad_norm": 0.11625129729509354, |
| "learning_rate": 9.703860038995515e-05, |
| "loss": 1.5122, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.1163184641445511, |
| "grad_norm": 0.14005830883979797, |
| "learning_rate": 9.697785166748536e-05, |
| "loss": 1.3598, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.11744776962168267, |
| "grad_norm": 0.20591479539871216, |
| "learning_rate": 9.691650557885026e-05, |
| "loss": 1.6923, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.11857707509881422, |
| "grad_norm": 0.1916189342737198, |
| "learning_rate": 9.685456290412119e-05, |
| "loss": 1.5945, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.11970638057594579, |
| "grad_norm": 0.18392325937747955, |
| "learning_rate": 9.679202443095566e-05, |
| "loss": 1.539, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.12083568605307736, |
| "grad_norm": 0.24299819767475128, |
| "learning_rate": 9.672889095458734e-05, |
| "loss": 2.0555, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.12196499153020893, |
| "grad_norm": 0.1931591033935547, |
| "learning_rate": 9.666516327781588e-05, |
| "loss": 1.4817, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.12309429700734048, |
| "grad_norm": 0.18632109463214874, |
| "learning_rate": 9.660084221099671e-05, |
| "loss": 1.8985, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.12422360248447205, |
| "grad_norm": 0.3496991693973541, |
| "learning_rate": 9.653592857203076e-05, |
| "loss": 1.8324, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.12535290796160362, |
| "grad_norm": 0.2754102945327759, |
| "learning_rate": 9.647042318635407e-05, |
| "loss": 1.8554, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.12648221343873517, |
| "grad_norm": 0.2471209615468979, |
| "learning_rate": 9.640432688692728e-05, |
| "loss": 1.8519, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.12761151891586675, |
| "grad_norm": 0.2555141746997833, |
| "learning_rate": 9.633764051422504e-05, |
| "loss": 2.0029, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.1287408243929983, |
| "grad_norm": 0.28241923451423645, |
| "learning_rate": 9.627036491622529e-05, |
| "loss": 1.8218, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.12987012987012986, |
| "grad_norm": 0.2457263320684433, |
| "learning_rate": 9.620250094839852e-05, |
| "loss": 1.984, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.13099943534726144, |
| "grad_norm": 0.2737242579460144, |
| "learning_rate": 9.61340494736969e-05, |
| "loss": 1.6238, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.132128740824393, |
| "grad_norm": 0.3082609176635742, |
| "learning_rate": 9.606501136254327e-05, |
| "loss": 1.6898, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.13325804630152457, |
| "grad_norm": 0.2536124289035797, |
| "learning_rate": 9.599538749282005e-05, |
| "loss": 1.8739, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.13438735177865613, |
| "grad_norm": 0.31439918279647827, |
| "learning_rate": 9.592517874985819e-05, |
| "loss": 1.7295, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.13551665725578768, |
| "grad_norm": 0.23434846103191376, |
| "learning_rate": 9.585438602642578e-05, |
| "loss": 1.8987, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.13664596273291926, |
| "grad_norm": 0.2667776048183441, |
| "learning_rate": 9.578301022271676e-05, |
| "loss": 1.8776, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.13777526821005082, |
| "grad_norm": 0.26877158880233765, |
| "learning_rate": 9.571105224633948e-05, |
| "loss": 1.9248, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.13890457368718237, |
| "grad_norm": 0.28717201948165894, |
| "learning_rate": 9.563851301230512e-05, |
| "loss": 1.9187, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.14003387916431395, |
| "grad_norm": 0.2537637948989868, |
| "learning_rate": 9.556539344301613e-05, |
| "loss": 2.0498, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.1411631846414455, |
| "grad_norm": 0.2713514566421509, |
| "learning_rate": 9.549169446825441e-05, |
| "loss": 1.9569, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.1422924901185771, |
| "grad_norm": 0.27854233980178833, |
| "learning_rate": 9.541741702516954e-05, |
| "loss": 1.9205, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.14342179559570864, |
| "grad_norm": 0.27771830558776855, |
| "learning_rate": 9.534256205826684e-05, |
| "loss": 1.8535, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.1445511010728402, |
| "grad_norm": 0.2616361975669861, |
| "learning_rate": 9.52671305193954e-05, |
| "loss": 1.7369, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.14568040654997177, |
| "grad_norm": 0.2548147141933441, |
| "learning_rate": 9.519112336773593e-05, |
| "loss": 2.1447, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.14680971202710333, |
| "grad_norm": 0.43677401542663574, |
| "learning_rate": 9.511454156978855e-05, |
| "loss": 1.7814, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.14793901750423488, |
| "grad_norm": 0.32601964473724365, |
| "learning_rate": 9.50373860993606e-05, |
| "loss": 1.8351, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.14906832298136646, |
| "grad_norm": 0.3155123293399811, |
| "learning_rate": 9.495965793755414e-05, |
| "loss": 1.8318, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.15019762845849802, |
| "grad_norm": 0.29326286911964417, |
| "learning_rate": 9.488135807275351e-05, |
| "loss": 1.613, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.1513269339356296, |
| "grad_norm": 0.3525848686695099, |
| "learning_rate": 9.480248750061283e-05, |
| "loss": 1.8371, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.15245623941276115, |
| "grad_norm": 0.30814632773399353, |
| "learning_rate": 9.472304722404323e-05, |
| "loss": 1.7233, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.1535855448898927, |
| "grad_norm": 0.34674981236457825, |
| "learning_rate": 9.464303825320018e-05, |
| "loss": 1.6202, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.1547148503670243, |
| "grad_norm": 0.3566695749759674, |
| "learning_rate": 9.456246160547057e-05, |
| "loss": 1.4083, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.15584415584415584, |
| "grad_norm": 0.5000084638595581, |
| "learning_rate": 9.448131830545992e-05, |
| "loss": 1.5184, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.15697346132128742, |
| "grad_norm": 0.341934472322464, |
| "learning_rate": 9.439960938497914e-05, |
| "loss": 1.7534, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.15810276679841898, |
| "grad_norm": 0.5167602300643921, |
| "learning_rate": 9.431733588303156e-05, |
| "loss": 1.5504, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.15923207227555053, |
| "grad_norm": 0.5055645108222961, |
| "learning_rate": 9.423449884579972e-05, |
| "loss": 2.1625, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.1603613777526821, |
| "grad_norm": 0.4868288040161133, |
| "learning_rate": 9.415109932663193e-05, |
| "loss": 1.6623, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.16149068322981366, |
| "grad_norm": 0.6852800250053406, |
| "learning_rate": 9.406713838602907e-05, |
| "loss": 1.7651, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.16261998870694522, |
| "grad_norm": 0.7421604990959167, |
| "learning_rate": 9.398261709163095e-05, |
| "loss": 0.8697, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.1637492941840768, |
| "grad_norm": 0.8236505389213562, |
| "learning_rate": 9.389753651820279e-05, |
| "loss": 1.2798, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.16487859966120835, |
| "grad_norm": 0.7554454207420349, |
| "learning_rate": 9.381189774762158e-05, |
| "loss": 0.9496, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.16600790513833993, |
| "grad_norm": 0.9927592873573303, |
| "learning_rate": 9.372570186886225e-05, |
| "loss": 2.1169, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.1671372106154715, |
| "grad_norm": 1.3748329877853394, |
| "learning_rate": 9.363894997798392e-05, |
| "loss": 1.5176, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.16826651609260304, |
| "grad_norm": 2.4232335090637207, |
| "learning_rate": 9.355164317811587e-05, |
| "loss": 1.9086, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.16939582156973462, |
| "grad_norm": 3.602113962173462, |
| "learning_rate": 9.346378257944357e-05, |
| "loss": 2.502, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.17052512704686618, |
| "grad_norm": 0.0982755646109581, |
| "learning_rate": 9.337536929919454e-05, |
| "loss": 0.9732, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.17165443252399773, |
| "grad_norm": 0.1385735720396042, |
| "learning_rate": 9.328640446162416e-05, |
| "loss": 1.6766, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.1727837380011293, |
| "grad_norm": 0.15490496158599854, |
| "learning_rate": 9.319688919800137e-05, |
| "loss": 1.4763, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.17391304347826086, |
| "grad_norm": 0.15035314857959747, |
| "learning_rate": 9.310682464659424e-05, |
| "loss": 1.7746, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.17504234895539245, |
| "grad_norm": 0.16564689576625824, |
| "learning_rate": 9.30162119526556e-05, |
| "loss": 1.7991, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.176171654432524, |
| "grad_norm": 0.24599392712116241, |
| "learning_rate": 9.292505226840832e-05, |
| "loss": 1.6264, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.17730095990965555, |
| "grad_norm": 0.17428162693977356, |
| "learning_rate": 9.283334675303084e-05, |
| "loss": 1.6458, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.17843026538678713, |
| "grad_norm": 0.16533192992210388, |
| "learning_rate": 9.274109657264227e-05, |
| "loss": 1.8436, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.1795595708639187, |
| "grad_norm": 0.2076898068189621, |
| "learning_rate": 9.264830290028771e-05, |
| "loss": 1.9228, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.18068887634105024, |
| "grad_norm": 0.2833722233772278, |
| "learning_rate": 9.255496691592316e-05, |
| "loss": 1.5502, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.18181818181818182, |
| "grad_norm": 0.23064666986465454, |
| "learning_rate": 9.246108980640069e-05, |
| "loss": 1.7692, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.18294748729531338, |
| "grad_norm": 0.3194381892681122, |
| "learning_rate": 9.236667276545323e-05, |
| "loss": 1.4503, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.18407679277244496, |
| "grad_norm": 0.24672725796699524, |
| "learning_rate": 9.227171699367943e-05, |
| "loss": 1.6418, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.1852060982495765, |
| "grad_norm": 0.2164398431777954, |
| "learning_rate": 9.217622369852842e-05, |
| "loss": 1.4357, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.18633540372670807, |
| "grad_norm": 0.18639886379241943, |
| "learning_rate": 9.208019409428439e-05, |
| "loss": 1.7777, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.18746470920383965, |
| "grad_norm": 0.2551623284816742, |
| "learning_rate": 9.198362940205123e-05, |
| "loss": 1.7788, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.1885940146809712, |
| "grad_norm": 0.20203350484371185, |
| "learning_rate": 9.188653084973692e-05, |
| "loss": 1.8567, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.18972332015810275, |
| "grad_norm": 0.25348034501075745, |
| "learning_rate": 9.178889967203798e-05, |
| "loss": 1.9362, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.19085262563523434, |
| "grad_norm": 0.22443613409996033, |
| "learning_rate": 9.169073711042378e-05, |
| "loss": 1.9439, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.1919819311123659, |
| "grad_norm": 0.23565413057804108, |
| "learning_rate": 9.159204441312064e-05, |
| "loss": 1.8284, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.19311123658949747, |
| "grad_norm": 0.24979020655155182, |
| "learning_rate": 9.14928228350961e-05, |
| "loss": 1.666, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.19424054206662902, |
| "grad_norm": 0.2663923501968384, |
| "learning_rate": 9.139307363804289e-05, |
| "loss": 1.822, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.19536984754376058, |
| "grad_norm": 0.23612940311431885, |
| "learning_rate": 9.129279809036287e-05, |
| "loss": 1.8881, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.19649915302089216, |
| "grad_norm": 0.22009217739105225, |
| "learning_rate": 9.119199746715096e-05, |
| "loss": 1.854, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.1976284584980237, |
| "grad_norm": 0.3293054699897766, |
| "learning_rate": 9.109067305017889e-05, |
| "loss": 1.8201, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.19875776397515527, |
| "grad_norm": 0.22837886214256287, |
| "learning_rate": 9.098882612787886e-05, |
| "loss": 1.8194, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.19988706945228685, |
| "grad_norm": 0.2876310646533966, |
| "learning_rate": 9.088645799532729e-05, |
| "loss": 1.7314, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.2010163749294184, |
| "grad_norm": 0.21625038981437683, |
| "learning_rate": 9.078356995422817e-05, |
| "loss": 2.0749, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.20214568040654998, |
| "grad_norm": 0.28515487909317017, |
| "learning_rate": 9.068016331289663e-05, |
| "loss": 1.8572, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.20327498588368154, |
| "grad_norm": 0.2739616632461548, |
| "learning_rate": 9.057623938624234e-05, |
| "loss": 1.8118, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.2044042913608131, |
| "grad_norm": 0.2997945547103882, |
| "learning_rate": 9.047179949575261e-05, |
| "loss": 1.713, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.20553359683794467, |
| "grad_norm": 0.2930619418621063, |
| "learning_rate": 9.036684496947577e-05, |
| "loss": 1.7162, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.20666290231507622, |
| "grad_norm": 0.2975890338420868, |
| "learning_rate": 9.026137714200423e-05, |
| "loss": 1.6059, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.2077922077922078, |
| "grad_norm": 0.3093002736568451, |
| "learning_rate": 9.015539735445742e-05, |
| "loss": 1.6468, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.20892151326933936, |
| "grad_norm": 0.3571140170097351, |
| "learning_rate": 9.004890695446489e-05, |
| "loss": 1.849, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.2100508187464709, |
| "grad_norm": 0.34700098633766174, |
| "learning_rate": 8.994190729614903e-05, |
| "loss": 1.8542, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.2111801242236025, |
| "grad_norm": 0.38373562693595886, |
| "learning_rate": 8.983439974010794e-05, |
| "loss": 1.4129, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.21230942970073405, |
| "grad_norm": 0.3819943368434906, |
| "learning_rate": 8.972638565339812e-05, |
| "loss": 1.6305, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.2134387351778656, |
| "grad_norm": 0.4611022472381592, |
| "learning_rate": 8.961786640951701e-05, |
| "loss": 1.9939, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.21456804065499718, |
| "grad_norm": 0.478437602519989, |
| "learning_rate": 8.950884338838567e-05, |
| "loss": 1.9253, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.21569734613212874, |
| "grad_norm": 0.5726389288902283, |
| "learning_rate": 8.939931797633106e-05, |
| "loss": 1.8656, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.21682665160926032, |
| "grad_norm": 0.7200562357902527, |
| "learning_rate": 8.928929156606854e-05, |
| "loss": 1.4691, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.21795595708639187, |
| "grad_norm": 0.6185989379882812, |
| "learning_rate": 8.917876555668412e-05, |
| "loss": 1.392, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.21908526256352343, |
| "grad_norm": 0.6441951394081116, |
| "learning_rate": 8.906774135361667e-05, |
| "loss": 0.839, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.220214568040655, |
| "grad_norm": 0.717701256275177, |
| "learning_rate": 8.895622036864004e-05, |
| "loss": 1.1955, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.22134387351778656, |
| "grad_norm": 0.7623339295387268, |
| "learning_rate": 8.884420401984509e-05, |
| "loss": 1.2057, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.2224731789949181, |
| "grad_norm": 1.0133534669876099, |
| "learning_rate": 8.873169373162174e-05, |
| "loss": 1.6524, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.2236024844720497, |
| "grad_norm": 2.122885227203369, |
| "learning_rate": 8.861869093464073e-05, |
| "loss": 1.6981, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.22473178994918125, |
| "grad_norm": 1.8476300239562988, |
| "learning_rate": 8.850519706583553e-05, |
| "loss": 1.4381, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.22586109542631283, |
| "grad_norm": 3.508267879486084, |
| "learning_rate": 8.839121356838406e-05, |
| "loss": 2.286, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.22699040090344438, |
| "grad_norm": 0.0983705148100853, |
| "learning_rate": 8.827674189169031e-05, |
| "loss": 1.8119, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.22811970638057594, |
| "grad_norm": 0.11233831197023392, |
| "learning_rate": 8.816178349136586e-05, |
| "loss": 1.5454, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.22924901185770752, |
| "grad_norm": 0.14189288020133972, |
| "learning_rate": 8.804633982921146e-05, |
| "loss": 1.9528, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.23037831733483907, |
| "grad_norm": 0.17341268062591553, |
| "learning_rate": 8.793041237319843e-05, |
| "loss": 1.3116, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.23150762281197063, |
| "grad_norm": 0.22769945859909058, |
| "learning_rate": 8.781400259744993e-05, |
| "loss": 1.5896, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.2326369282891022, |
| "grad_norm": 0.1609055995941162, |
| "learning_rate": 8.769711198222225e-05, |
| "loss": 1.188, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.23376623376623376, |
| "grad_norm": 0.15893089771270752, |
| "learning_rate": 8.757974201388605e-05, |
| "loss": 2.0351, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.23489553924336534, |
| "grad_norm": 0.21750855445861816, |
| "learning_rate": 8.746189418490736e-05, |
| "loss": 1.7586, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.2360248447204969, |
| "grad_norm": 0.20967909693717957, |
| "learning_rate": 8.73435699938286e-05, |
| "loss": 1.8875, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.23715415019762845, |
| "grad_norm": 0.2173241674900055, |
| "learning_rate": 8.722477094524967e-05, |
| "loss": 1.8267, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.23828345567476003, |
| "grad_norm": 0.23391224443912506, |
| "learning_rate": 8.710549854980863e-05, |
| "loss": 1.5153, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.23941276115189158, |
| "grad_norm": 0.25549450516700745, |
| "learning_rate": 8.69857543241626e-05, |
| "loss": 1.6052, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.24054206662902314, |
| "grad_norm": 0.18461880087852478, |
| "learning_rate": 8.686553979096848e-05, |
| "loss": 1.8639, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.24167137210615472, |
| "grad_norm": 0.2688834071159363, |
| "learning_rate": 8.674485647886351e-05, |
| "loss": 2.0688, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.24280067758328627, |
| "grad_norm": 0.28456270694732666, |
| "learning_rate": 8.662370592244593e-05, |
| "loss": 1.8468, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.24392998306041785, |
| "grad_norm": 0.2307300567626953, |
| "learning_rate": 8.650208966225537e-05, |
| "loss": 1.7642, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.2450592885375494, |
| "grad_norm": 0.2560746669769287, |
| "learning_rate": 8.638000924475336e-05, |
| "loss": 1.6622, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.24618859401468096, |
| "grad_norm": 0.20194853842258453, |
| "learning_rate": 8.625746622230355e-05, |
| "loss": 1.9663, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.24731789949181254, |
| "grad_norm": 0.33031973242759705, |
| "learning_rate": 8.61344621531521e-05, |
| "loss": 1.9537, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.2484472049689441, |
| "grad_norm": 0.26182833313941956, |
| "learning_rate": 8.601099860140774e-05, |
| "loss": 1.9116, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.24957651044607565, |
| "grad_norm": 0.23049500584602356, |
| "learning_rate": 8.588707713702198e-05, |
| "loss": 1.7189, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.25070581592320723, |
| "grad_norm": 0.21192899346351624, |
| "learning_rate": 8.576269933576909e-05, |
| "loss": 1.9939, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.2518351214003388, |
| "grad_norm": 0.27587172389030457, |
| "learning_rate": 8.563786677922608e-05, |
| "loss": 1.9758, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.25296442687747034, |
| "grad_norm": 0.29069381952285767, |
| "learning_rate": 8.551258105475256e-05, |
| "loss": 1.7336, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.2540937323546019, |
| "grad_norm": 0.2707800567150116, |
| "learning_rate": 8.538684375547064e-05, |
| "loss": 1.8063, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.2552230378317335, |
| "grad_norm": 0.2815471887588501, |
| "learning_rate": 8.526065648024459e-05, |
| "loss": 1.8871, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.256352343308865, |
| "grad_norm": 0.22785307466983795, |
| "learning_rate": 8.513402083366054e-05, |
| "loss": 1.6936, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.2574816487859966, |
| "grad_norm": 0.22646154463291168, |
| "learning_rate": 8.5006938426006e-05, |
| "loss": 1.9189, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.2586109542631282, |
| "grad_norm": 0.2953996956348419, |
| "learning_rate": 8.487941087324958e-05, |
| "loss": 1.8463, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.2597402597402597, |
| "grad_norm": 0.2842676043510437, |
| "learning_rate": 8.475143979702022e-05, |
| "loss": 1.4269, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.2608695652173913, |
| "grad_norm": 0.2800816297531128, |
| "learning_rate": 8.46230268245867e-05, |
| "loss": 1.8405, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.2619988706945229, |
| "grad_norm": 0.30798566341400146, |
| "learning_rate": 8.449417358883689e-05, |
| "loss": 1.706, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.26312817617165446, |
| "grad_norm": 0.32843559980392456, |
| "learning_rate": 8.436488172825705e-05, |
| "loss": 1.7581, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.264257481648786, |
| "grad_norm": 0.3350053131580353, |
| "learning_rate": 8.42351528869109e-05, |
| "loss": 1.4183, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.26538678712591757, |
| "grad_norm": 0.32234182953834534, |
| "learning_rate": 8.410498871441886e-05, |
| "loss": 1.6521, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.26651609260304915, |
| "grad_norm": 0.3101066052913666, |
| "learning_rate": 8.397439086593683e-05, |
| "loss": 1.7472, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.2676453980801807, |
| "grad_norm": 0.4354749917984009, |
| "learning_rate": 8.384336100213546e-05, |
| "loss": 1.5799, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.26877470355731226, |
| "grad_norm": 0.3919726014137268, |
| "learning_rate": 8.371190078917875e-05, |
| "loss": 1.2785, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.26990400903444384, |
| "grad_norm": 0.38805148005485535, |
| "learning_rate": 8.358001189870303e-05, |
| "loss": 1.5701, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.27103331451157536, |
| "grad_norm": 0.4695982336997986, |
| "learning_rate": 8.344769600779568e-05, |
| "loss": 1.7407, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.27216261998870694, |
| "grad_norm": 0.6635199189186096, |
| "learning_rate": 8.331495479897373e-05, |
| "loss": 1.5664, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.2732919254658385, |
| "grad_norm": 0.4444817900657654, |
| "learning_rate": 8.318178996016253e-05, |
| "loss": 1.6331, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.27442123094297005, |
| "grad_norm": 0.6371070146560669, |
| "learning_rate": 8.304820318467427e-05, |
| "loss": 1.6983, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.27555053642010163, |
| "grad_norm": 0.5904344916343689, |
| "learning_rate": 8.291419617118646e-05, |
| "loss": 1.4706, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.2766798418972332, |
| "grad_norm": 0.7313349843025208, |
| "learning_rate": 8.277977062372031e-05, |
| "loss": 1.1339, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.27780914737436474, |
| "grad_norm": 0.9247500896453857, |
| "learning_rate": 8.264492825161909e-05, |
| "loss": 1.1903, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.2789384528514963, |
| "grad_norm": 0.9339075088500977, |
| "learning_rate": 8.250967076952635e-05, |
| "loss": 1.6087, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.2800677583286279, |
| "grad_norm": 1.1781666278839111, |
| "learning_rate": 8.237399989736414e-05, |
| "loss": 1.8521, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.2811970638057595, |
| "grad_norm": 1.551308512687683, |
| "learning_rate": 8.223791736031117e-05, |
| "loss": 1.3684, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.282326369282891, |
| "grad_norm": 2.004934787750244, |
| "learning_rate": 8.210142488878078e-05, |
| "loss": 1.7356, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.2834556747600226, |
| "grad_norm": 0.11847388744354248, |
| "learning_rate": 8.196452421839911e-05, |
| "loss": 1.3472, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.2845849802371542, |
| "grad_norm": 0.114841990172863, |
| "learning_rate": 8.18272170899828e-05, |
| "loss": 1.5753, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 0.16253335773944855, |
| "learning_rate": 8.168950524951708e-05, |
| "loss": 1.7908, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.2868435911914173, |
| "grad_norm": 0.15040843188762665, |
| "learning_rate": 8.155139044813336e-05, |
| "loss": 1.3829, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.28797289666854886, |
| "grad_norm": 0.19803692400455475, |
| "learning_rate": 8.141287444208717e-05, |
| "loss": 1.3891, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.2891022021456804, |
| "grad_norm": 0.1951441764831543, |
| "learning_rate": 8.127395899273561e-05, |
| "loss": 1.7411, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.29023150762281197, |
| "grad_norm": 0.1544586420059204, |
| "learning_rate": 8.113464586651516e-05, |
| "loss": 1.7334, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.29136081309994355, |
| "grad_norm": 0.1889820396900177, |
| "learning_rate": 8.099493683491909e-05, |
| "loss": 1.603, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.2924901185770751, |
| "grad_norm": 0.2836708724498749, |
| "learning_rate": 8.085483367447498e-05, |
| "loss": 1.5592, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.29361942405420666, |
| "grad_norm": 0.20887868106365204, |
| "learning_rate": 8.071433816672204e-05, |
| "loss": 1.9915, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.29474872953133824, |
| "grad_norm": 0.19964289665222168, |
| "learning_rate": 8.057345209818868e-05, |
| "loss": 1.4532, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.29587803500846976, |
| "grad_norm": 0.23576165735721588, |
| "learning_rate": 8.04321772603695e-05, |
| "loss": 1.6616, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.29700734048560135, |
| "grad_norm": 0.1948481649160385, |
| "learning_rate": 8.029051544970274e-05, |
| "loss": 1.6309, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.2981366459627329, |
| "grad_norm": 0.21681681275367737, |
| "learning_rate": 8.014846846754734e-05, |
| "loss": 1.8207, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.2992659514398645, |
| "grad_norm": 0.1701822280883789, |
| "learning_rate": 8.000603812016002e-05, |
| "loss": 2.0191, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.30039525691699603, |
| "grad_norm": 0.2877520024776459, |
| "learning_rate": 7.986322621867237e-05, |
| "loss": 1.7949, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.3015245623941276, |
| "grad_norm": 0.22249895334243774, |
| "learning_rate": 7.972003457906773e-05, |
| "loss": 1.7619, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.3026538678712592, |
| "grad_norm": 0.20995785295963287, |
| "learning_rate": 7.957646502215826e-05, |
| "loss": 1.919, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.3037831733483907, |
| "grad_norm": 0.20118004083633423, |
| "learning_rate": 7.943251937356158e-05, |
| "loss": 1.9674, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.3049124788255223, |
| "grad_norm": 0.24078606069087982, |
| "learning_rate": 7.928819946367772e-05, |
| "loss": 1.8938, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.3060417843026539, |
| "grad_norm": 0.29334747791290283, |
| "learning_rate": 7.914350712766575e-05, |
| "loss": 1.8251, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.3071710897797854, |
| "grad_norm": 0.2302616685628891, |
| "learning_rate": 7.899844420542047e-05, |
| "loss": 1.9416, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.308300395256917, |
| "grad_norm": 0.3773344159126282, |
| "learning_rate": 7.885301254154908e-05, |
| "loss": 1.9136, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.3094297007340486, |
| "grad_norm": 0.2522296607494354, |
| "learning_rate": 7.870721398534762e-05, |
| "loss": 2.0552, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.3105590062111801, |
| "grad_norm": 0.2509544789791107, |
| "learning_rate": 7.856105039077748e-05, |
| "loss": 2.0108, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.3116883116883117, |
| "grad_norm": 0.2968475818634033, |
| "learning_rate": 7.841452361644188e-05, |
| "loss": 2.0004, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.31281761716544326, |
| "grad_norm": 0.2396615594625473, |
| "learning_rate": 7.826763552556222e-05, |
| "loss": 1.8284, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.31394692264257484, |
| "grad_norm": 0.22894443571567535, |
| "learning_rate": 7.812038798595431e-05, |
| "loss": 2.1324, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.31507622811970637, |
| "grad_norm": 0.27567097544670105, |
| "learning_rate": 7.797278287000475e-05, |
| "loss": 1.6882, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.31620553359683795, |
| "grad_norm": 0.28118517994880676, |
| "learning_rate": 7.782482205464696e-05, |
| "loss": 1.8576, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.31733483907396953, |
| "grad_norm": 0.2652731239795685, |
| "learning_rate": 7.767650742133747e-05, |
| "loss": 1.9145, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.31846414455110106, |
| "grad_norm": 0.29614031314849854, |
| "learning_rate": 7.752784085603193e-05, |
| "loss": 1.6665, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.31959345002823264, |
| "grad_norm": 0.3341023921966553, |
| "learning_rate": 7.737882424916107e-05, |
| "loss": 1.4237, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.3207227555053642, |
| "grad_norm": 0.3023217022418976, |
| "learning_rate": 7.722945949560678e-05, |
| "loss": 1.7299, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.32185206098249575, |
| "grad_norm": 0.30460652709007263, |
| "learning_rate": 7.707974849467791e-05, |
| "loss": 1.4939, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.32298136645962733, |
| "grad_norm": 0.3721717894077301, |
| "learning_rate": 7.692969315008616e-05, |
| "loss": 1.2991, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.3241106719367589, |
| "grad_norm": 0.31576859951019287, |
| "learning_rate": 7.677929536992194e-05, |
| "loss": 1.7151, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.32523997741389044, |
| "grad_norm": 0.3870582580566406, |
| "learning_rate": 7.662855706662992e-05, |
| "loss": 1.6205, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.326369282891022, |
| "grad_norm": 0.45515769720077515, |
| "learning_rate": 7.647748015698495e-05, |
| "loss": 1.8497, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.3274985883681536, |
| "grad_norm": 0.4135870337486267, |
| "learning_rate": 7.632606656206748e-05, |
| "loss": 1.669, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.3286278938452851, |
| "grad_norm": 0.5970892906188965, |
| "learning_rate": 7.617431820723928e-05, |
| "loss": 2.045, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.3297571993224167, |
| "grad_norm": 0.560570478439331, |
| "learning_rate": 7.602223702211888e-05, |
| "loss": 1.739, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.3308865047995483, |
| "grad_norm": 0.7267481088638306, |
| "learning_rate": 7.586982494055703e-05, |
| "loss": 1.2534, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.33201581027667987, |
| "grad_norm": 0.6260027289390564, |
| "learning_rate": 7.571708390061215e-05, |
| "loss": 1.326, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.3331451157538114, |
| "grad_norm": 0.637332558631897, |
| "learning_rate": 7.556401584452565e-05, |
| "loss": 1.209, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.334274421230943, |
| "grad_norm": 0.6579151153564453, |
| "learning_rate": 7.541062271869727e-05, |
| "loss": 1.3, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.33540372670807456, |
| "grad_norm": 0.9693841934204102, |
| "learning_rate": 7.525690647366032e-05, |
| "loss": 1.4362, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.3365330321852061, |
| "grad_norm": 1.3491055965423584, |
| "learning_rate": 7.510286906405679e-05, |
| "loss": 1.8567, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.33766233766233766, |
| "grad_norm": 1.6843390464782715, |
| "learning_rate": 7.494851244861265e-05, |
| "loss": 2.1742, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.33879164313946925, |
| "grad_norm": 2.8176753520965576, |
| "learning_rate": 7.479383859011282e-05, |
| "loss": 2.1492, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.33992094861660077, |
| "grad_norm": 0.11592619866132736, |
| "learning_rate": 7.463884945537629e-05, |
| "loss": 1.9557, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.34105025409373235, |
| "grad_norm": 0.09616155922412872, |
| "learning_rate": 7.448354701523103e-05, |
| "loss": 1.5362, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.34217955957086393, |
| "grad_norm": 0.1662580966949463, |
| "learning_rate": 7.4327933244489e-05, |
| "loss": 1.714, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.34330886504799546, |
| "grad_norm": 0.1190900206565857, |
| "learning_rate": 7.417201012192102e-05, |
| "loss": 1.7049, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.34443817052512704, |
| "grad_norm": 0.17841818928718567, |
| "learning_rate": 7.401577963023159e-05, |
| "loss": 1.4065, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.3455674760022586, |
| "grad_norm": 0.14446666836738586, |
| "learning_rate": 7.385924375603365e-05, |
| "loss": 1.9471, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.34669678147939015, |
| "grad_norm": 0.1485661119222641, |
| "learning_rate": 7.370240448982344e-05, |
| "loss": 1.536, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.34782608695652173, |
| "grad_norm": 0.14800912141799927, |
| "learning_rate": 7.354526382595502e-05, |
| "loss": 1.9858, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.3489553924336533, |
| "grad_norm": 0.16698437929153442, |
| "learning_rate": 7.338782376261508e-05, |
| "loss": 1.7016, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.3500846979107849, |
| "grad_norm": 0.2107677012681961, |
| "learning_rate": 7.323008630179735e-05, |
| "loss": 1.9076, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.3512140033879164, |
| "grad_norm": 0.17815802991390228, |
| "learning_rate": 7.307205344927733e-05, |
| "loss": 1.9451, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.352343308865048, |
| "grad_norm": 0.22763967514038086, |
| "learning_rate": 7.291372721458663e-05, |
| "loss": 1.7138, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.3534726143421796, |
| "grad_norm": 0.2137778401374817, |
| "learning_rate": 7.275510961098754e-05, |
| "loss": 1.7655, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.3546019198193111, |
| "grad_norm": 0.3158370852470398, |
| "learning_rate": 7.25962026554473e-05, |
| "loss": 1.7698, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.3557312252964427, |
| "grad_norm": 0.2486497312784195, |
| "learning_rate": 7.243700836861259e-05, |
| "loss": 1.4603, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.35686053077357427, |
| "grad_norm": 0.2606787383556366, |
| "learning_rate": 7.227752877478372e-05, |
| "loss": 1.8183, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.3579898362507058, |
| "grad_norm": 0.21474474668502808, |
| "learning_rate": 7.211776590188898e-05, |
| "loss": 1.8927, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.3591191417278374, |
| "grad_norm": 0.21336178481578827, |
| "learning_rate": 7.195772178145877e-05, |
| "loss": 1.7893, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.36024844720496896, |
| "grad_norm": 0.22856034338474274, |
| "learning_rate": 7.179739844859986e-05, |
| "loss": 1.8327, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.3613777526821005, |
| "grad_norm": 0.19083748757839203, |
| "learning_rate": 7.163679794196937e-05, |
| "loss": 1.4431, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.36250705815923207, |
| "grad_norm": 0.21726277470588684, |
| "learning_rate": 7.147592230374907e-05, |
| "loss": 1.8487, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.36363636363636365, |
| "grad_norm": 0.2296500951051712, |
| "learning_rate": 7.131477357961913e-05, |
| "loss": 1.8114, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.36476566911349523, |
| "grad_norm": 0.21099169552326202, |
| "learning_rate": 7.115335381873241e-05, |
| "loss": 1.7014, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.36589497459062675, |
| "grad_norm": 0.2625615894794464, |
| "learning_rate": 7.09916650736881e-05, |
| "loss": 1.816, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.36702428006775834, |
| "grad_norm": 0.2657228112220764, |
| "learning_rate": 7.082970940050589e-05, |
| "loss": 1.6727, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.3681535855448899, |
| "grad_norm": 0.2254035770893097, |
| "learning_rate": 7.06674888585996e-05, |
| "loss": 1.9412, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.36928289102202144, |
| "grad_norm": 0.3340676426887512, |
| "learning_rate": 7.050500551075121e-05, |
| "loss": 1.8683, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.370412196499153, |
| "grad_norm": 0.2350630760192871, |
| "learning_rate": 7.03422614230844e-05, |
| "loss": 1.7303, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.3715415019762846, |
| "grad_norm": 0.27818697690963745, |
| "learning_rate": 7.017925866503852e-05, |
| "loss": 1.642, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.37267080745341613, |
| "grad_norm": 0.2882806062698364, |
| "learning_rate": 7.001599930934201e-05, |
| "loss": 1.8531, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.3738001129305477, |
| "grad_norm": 0.2653593122959137, |
| "learning_rate": 6.985248543198628e-05, |
| "loss": 1.7041, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.3749294184076793, |
| "grad_norm": 0.34544265270233154, |
| "learning_rate": 6.96887191121992e-05, |
| "loss": 1.4506, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.3760587238848108, |
| "grad_norm": 0.33744436502456665, |
| "learning_rate": 6.952470243241865e-05, |
| "loss": 1.9651, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.3771880293619424, |
| "grad_norm": 0.3108724057674408, |
| "learning_rate": 6.936043747826608e-05, |
| "loss": 1.6384, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.378317334839074, |
| "grad_norm": 0.3719252347946167, |
| "learning_rate": 6.919592633851999e-05, |
| "loss": 1.7721, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.3794466403162055, |
| "grad_norm": 0.39266571402549744, |
| "learning_rate": 6.903117110508931e-05, |
| "loss": 1.4012, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.3805759457933371, |
| "grad_norm": 0.4938894808292389, |
| "learning_rate": 6.886617387298689e-05, |
| "loss": 1.3363, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.38170525127046867, |
| "grad_norm": 0.4643020033836365, |
| "learning_rate": 6.870093674030277e-05, |
| "loss": 1.8535, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.38283455674760025, |
| "grad_norm": 0.4194999039173126, |
| "learning_rate": 6.853546180817763e-05, |
| "loss": 1.3464, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.3839638622247318, |
| "grad_norm": 0.4396350383758545, |
| "learning_rate": 6.836975118077585e-05, |
| "loss": 1.5401, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.38509316770186336, |
| "grad_norm": 0.5123130679130554, |
| "learning_rate": 6.8203806965259e-05, |
| "loss": 1.4395, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.38622247317899494, |
| "grad_norm": 0.5199635624885559, |
| "learning_rate": 6.803763127175892e-05, |
| "loss": 1.3563, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.38735177865612647, |
| "grad_norm": 0.5113270282745361, |
| "learning_rate": 6.787122621335084e-05, |
| "loss": 1.2656, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.38848108413325805, |
| "grad_norm": 0.8492355942726135, |
| "learning_rate": 6.770459390602665e-05, |
| "loss": 1.1771, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.38961038961038963, |
| "grad_norm": 0.6425052881240845, |
| "learning_rate": 6.75377364686679e-05, |
| "loss": 1.0433, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.39073969508752115, |
| "grad_norm": 0.7178412675857544, |
| "learning_rate": 6.73706560230188e-05, |
| "loss": 1.3795, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.39186900056465274, |
| "grad_norm": 0.8413724899291992, |
| "learning_rate": 6.720335469365943e-05, |
| "loss": 1.3675, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.3929983060417843, |
| "grad_norm": 1.199455738067627, |
| "learning_rate": 6.703583460797851e-05, |
| "loss": 1.1964, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.39412761151891584, |
| "grad_norm": 1.3447051048278809, |
| "learning_rate": 6.686809789614652e-05, |
| "loss": 1.7217, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.3952569169960474, |
| "grad_norm": 3.1556925773620605, |
| "learning_rate": 6.670014669108846e-05, |
| "loss": 2.2129, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.396386222473179, |
| "grad_norm": 0.12518011033535004, |
| "learning_rate": 6.65319831284569e-05, |
| "loss": 1.803, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.39751552795031053, |
| "grad_norm": 0.11627189069986343, |
| "learning_rate": 6.636360934660464e-05, |
| "loss": 1.5064, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.3986448334274421, |
| "grad_norm": 0.2026718109846115, |
| "learning_rate": 6.619502748655768e-05, |
| "loss": 1.2533, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.3997741389045737, |
| "grad_norm": 0.1420208066701889, |
| "learning_rate": 6.602623969198786e-05, |
| "loss": 1.7044, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.4009034443817053, |
| "grad_norm": 0.2128676176071167, |
| "learning_rate": 6.585724810918575e-05, |
| "loss": 1.4417, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.4020327498588368, |
| "grad_norm": 0.16009800136089325, |
| "learning_rate": 6.568805488703316e-05, |
| "loss": 1.7954, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.4031620553359684, |
| "grad_norm": 0.22958825528621674, |
| "learning_rate": 6.551866217697602e-05, |
| "loss": 1.4241, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.40429136081309996, |
| "grad_norm": 0.18111076951026917, |
| "learning_rate": 6.53490721329969e-05, |
| "loss": 2.1858, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.4054206662902315, |
| "grad_norm": 0.20647546648979187, |
| "learning_rate": 6.517928691158766e-05, |
| "loss": 1.6176, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.40654997176736307, |
| "grad_norm": 0.22477790713310242, |
| "learning_rate": 6.500930867172197e-05, |
| "loss": 2.0384, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.40767927724449465, |
| "grad_norm": 0.19441178441047668, |
| "learning_rate": 6.4839139574828e-05, |
| "loss": 1.8078, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.4088085827216262, |
| "grad_norm": 0.1936100721359253, |
| "learning_rate": 6.466878178476072e-05, |
| "loss": 2.1223, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.40993788819875776, |
| "grad_norm": 0.16981804370880127, |
| "learning_rate": 6.449823746777463e-05, |
| "loss": 1.7857, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.41106719367588934, |
| "grad_norm": 0.18102099001407623, |
| "learning_rate": 6.4327508792496e-05, |
| "loss": 1.9586, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.41219649915302087, |
| "grad_norm": 0.2525807023048401, |
| "learning_rate": 6.415659792989543e-05, |
| "loss": 1.3721, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.41332580463015245, |
| "grad_norm": 0.2487485557794571, |
| "learning_rate": 6.398550705326017e-05, |
| "loss": 1.5905, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.41445511010728403, |
| "grad_norm": 0.2146550863981247, |
| "learning_rate": 6.381423833816653e-05, |
| "loss": 2.0711, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.4155844155844156, |
| "grad_norm": 0.2551126778125763, |
| "learning_rate": 6.364279396245216e-05, |
| "loss": 1.9295, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.41671372106154714, |
| "grad_norm": 0.21161001920700073, |
| "learning_rate": 6.347117610618847e-05, |
| "loss": 1.8385, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.4178430265386787, |
| "grad_norm": 0.2629604935646057, |
| "learning_rate": 6.329938695165279e-05, |
| "loss": 1.6472, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.4189723320158103, |
| "grad_norm": 0.265480101108551, |
| "learning_rate": 6.312742868330063e-05, |
| "loss": 1.9359, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.4201016374929418, |
| "grad_norm": 0.21452881395816803, |
| "learning_rate": 6.295530348773799e-05, |
| "loss": 1.9496, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.4212309429700734, |
| "grad_norm": 0.25752097368240356, |
| "learning_rate": 6.278301355369347e-05, |
| "loss": 1.7407, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.422360248447205, |
| "grad_norm": 0.2401757687330246, |
| "learning_rate": 6.26105610719905e-05, |
| "loss": 1.9668, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.4234895539243365, |
| "grad_norm": 0.2806456983089447, |
| "learning_rate": 6.243794823551943e-05, |
| "loss": 1.8072, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.4246188594014681, |
| "grad_norm": 0.22247202694416046, |
| "learning_rate": 6.226517723920965e-05, |
| "loss": 1.9812, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.4257481648785997, |
| "grad_norm": 0.28262099623680115, |
| "learning_rate": 6.209225028000173e-05, |
| "loss": 1.8815, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.4268774703557312, |
| "grad_norm": 0.26785987615585327, |
| "learning_rate": 6.191916955681942e-05, |
| "loss": 1.7281, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.4280067758328628, |
| "grad_norm": 0.26913347840309143, |
| "learning_rate": 6.174593727054176e-05, |
| "loss": 1.9988, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.42913608130999437, |
| "grad_norm": 0.2614772617816925, |
| "learning_rate": 6.157255562397501e-05, |
| "loss": 2.0497, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.4302653867871259, |
| "grad_norm": 0.28345417976379395, |
| "learning_rate": 6.139902682182472e-05, |
| "loss": 1.4859, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.4313946922642575, |
| "grad_norm": 0.38483351469039917, |
| "learning_rate": 6.122535307066762e-05, |
| "loss": 1.8345, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.43252399774138905, |
| "grad_norm": 0.3730812966823578, |
| "learning_rate": 6.105153657892361e-05, |
| "loss": 1.9294, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.43365330321852064, |
| "grad_norm": 0.300212562084198, |
| "learning_rate": 6.0877579556827666e-05, |
| "loss": 1.8527, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.43478260869565216, |
| "grad_norm": 0.3289623260498047, |
| "learning_rate": 6.0703484216401775e-05, |
| "loss": 1.4542, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.43591191417278374, |
| "grad_norm": 0.33357787132263184, |
| "learning_rate": 6.0529252771426704e-05, |
| "loss": 1.6104, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.4370412196499153, |
| "grad_norm": 0.3852611184120178, |
| "learning_rate": 6.0354887437413965e-05, |
| "loss": 1.7609, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.43817052512704685, |
| "grad_norm": 0.4012943506240845, |
| "learning_rate": 6.018039043157755e-05, |
| "loss": 1.5484, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.43929983060417843, |
| "grad_norm": 0.48133382201194763, |
| "learning_rate": 6.000576397280582e-05, |
| "loss": 1.5734, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.44042913608131, |
| "grad_norm": 0.5372048020362854, |
| "learning_rate": 5.9831010281633194e-05, |
| "loss": 1.4444, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.44155844155844154, |
| "grad_norm": 0.4447720944881439, |
| "learning_rate": 5.965613158021204e-05, |
| "loss": 1.7868, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.4426877470355731, |
| "grad_norm": 0.48003891110420227, |
| "learning_rate": 5.948113009228426e-05, |
| "loss": 1.861, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.4438170525127047, |
| "grad_norm": 0.5163267850875854, |
| "learning_rate": 5.9306008043153164e-05, |
| "loss": 1.5805, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.4449463579898362, |
| "grad_norm": 0.9821428060531616, |
| "learning_rate": 5.9130767659655086e-05, |
| "loss": 1.3601, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.4460756634669678, |
| "grad_norm": 0.8409574627876282, |
| "learning_rate": 5.895541117013109e-05, |
| "loss": 1.5384, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.4472049689440994, |
| "grad_norm": 0.7223172187805176, |
| "learning_rate": 5.877994080439861e-05, |
| "loss": 1.5718, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.4483342744212309, |
| "grad_norm": 0.7617491483688354, |
| "learning_rate": 5.860435879372319e-05, |
| "loss": 0.8487, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.4494635798983625, |
| "grad_norm": 1.3099548816680908, |
| "learning_rate": 5.842866737078995e-05, |
| "loss": 1.9787, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.4505928853754941, |
| "grad_norm": 1.2898166179656982, |
| "learning_rate": 5.825286876967534e-05, |
| "loss": 1.2648, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.45172219085262566, |
| "grad_norm": 2.04656982421875, |
| "learning_rate": 5.807696522581867e-05, |
| "loss": 1.3147, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.4528514963297572, |
| "grad_norm": 0.14516253769397736, |
| "learning_rate": 5.7900958975993705e-05, |
| "loss": 1.6677, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.45398080180688877, |
| "grad_norm": 0.11851727962493896, |
| "learning_rate": 5.772485225828017e-05, |
| "loss": 1.3764, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.45511010728402035, |
| "grad_norm": 0.1660085767507553, |
| "learning_rate": 5.754864731203537e-05, |
| "loss": 0.8836, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.4562394127611519, |
| "grad_norm": 0.1634090393781662, |
| "learning_rate": 5.737234637786567e-05, |
| "loss": 1.7064, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.45736871823828346, |
| "grad_norm": 0.1739731878042221, |
| "learning_rate": 5.7195951697597984e-05, |
| "loss": 1.4277, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.45849802371541504, |
| "grad_norm": 0.15163052082061768, |
| "learning_rate": 5.7019465514251317e-05, |
| "loss": 1.7148, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.45962732919254656, |
| "grad_norm": 0.18431293964385986, |
| "learning_rate": 5.684289007200819e-05, |
| "loss": 1.5384, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.46075663466967814, |
| "grad_norm": 0.194001704454422, |
| "learning_rate": 5.6666227616186196e-05, |
| "loss": 1.5875, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.4618859401468097, |
| "grad_norm": 0.19858571887016296, |
| "learning_rate": 5.648948039320932e-05, |
| "loss": 1.6302, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.46301524562394125, |
| "grad_norm": 0.20067928731441498, |
| "learning_rate": 5.6312650650579434e-05, |
| "loss": 1.6761, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.46414455110107283, |
| "grad_norm": 0.20594027638435364, |
| "learning_rate": 5.61357406368478e-05, |
| "loss": 2.0831, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.4652738565782044, |
| "grad_norm": 0.20570452511310577, |
| "learning_rate": 5.595875260158632e-05, |
| "loss": 1.5535, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.466403162055336, |
| "grad_norm": 0.21128208935260773, |
| "learning_rate": 5.578168879535905e-05, |
| "loss": 1.5396, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.4675324675324675, |
| "grad_norm": 0.22810891270637512, |
| "learning_rate": 5.560455146969351e-05, |
| "loss": 2.0007, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.4686617730095991, |
| "grad_norm": 0.23436829447746277, |
| "learning_rate": 5.5427342877052146e-05, |
| "loss": 1.7637, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.4697910784867307, |
| "grad_norm": 0.20258279144763947, |
| "learning_rate": 5.525006527080356e-05, |
| "loss": 1.9177, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.4709203839638622, |
| "grad_norm": 0.3795338571071625, |
| "learning_rate": 5.5072720905193984e-05, |
| "loss": 1.933, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.4720496894409938, |
| "grad_norm": 0.2246110886335373, |
| "learning_rate": 5.4895312035318505e-05, |
| "loss": 1.8509, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.4731789949181254, |
| "grad_norm": 0.20534281432628632, |
| "learning_rate": 5.471784091709249e-05, |
| "loss": 1.7642, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.4743083003952569, |
| "grad_norm": 0.21959885954856873, |
| "learning_rate": 5.4540309807222787e-05, |
| "loss": 1.8869, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.4754376058723885, |
| "grad_norm": 0.23442259430885315, |
| "learning_rate": 5.4362720963179184e-05, |
| "loss": 1.7901, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.47656691134952006, |
| "grad_norm": 0.2303483933210373, |
| "learning_rate": 5.418507664316551e-05, |
| "loss": 1.8651, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.4776962168266516, |
| "grad_norm": 0.23268766701221466, |
| "learning_rate": 5.40073791060911e-05, |
| "loss": 1.8254, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.47882552230378317, |
| "grad_norm": 0.5629847049713135, |
| "learning_rate": 5.382963061154194e-05, |
| "loss": 1.9735, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.47995482778091475, |
| "grad_norm": 0.3409874141216278, |
| "learning_rate": 5.3651833419752026e-05, |
| "loss": 1.9609, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.4810841332580463, |
| "grad_norm": 0.24846181273460388, |
| "learning_rate": 5.347398979157455e-05, |
| "loss": 1.5292, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.48221343873517786, |
| "grad_norm": 0.32662272453308105, |
| "learning_rate": 5.329610198845322e-05, |
| "loss": 1.5789, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.48334274421230944, |
| "grad_norm": 0.2438475340604782, |
| "learning_rate": 5.311817227239343e-05, |
| "loss": 1.5546, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.484472049689441, |
| "grad_norm": 0.3485732972621918, |
| "learning_rate": 5.2940202905933576e-05, |
| "loss": 1.9349, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.48560135516657255, |
| "grad_norm": 0.30109038949012756, |
| "learning_rate": 5.276219615211622e-05, |
| "loss": 1.7651, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.4867306606437041, |
| "grad_norm": 0.29728347063064575, |
| "learning_rate": 5.258415427445933e-05, |
| "loss": 1.7196, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.4878599661208357, |
| "grad_norm": 0.3463418483734131, |
| "learning_rate": 5.240607953692751e-05, |
| "loss": 1.4324, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.48898927159796723, |
| "grad_norm": 0.28663697838783264, |
| "learning_rate": 5.222797420390325e-05, |
| "loss": 1.6675, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.4901185770750988, |
| "grad_norm": 0.3074226379394531, |
| "learning_rate": 5.204984054015803e-05, |
| "loss": 1.503, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.4912478825522304, |
| "grad_norm": 0.3598564863204956, |
| "learning_rate": 5.187168081082361e-05, |
| "loss": 1.5335, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.4923771880293619, |
| "grad_norm": 0.3530828654766083, |
| "learning_rate": 5.169349728136319e-05, |
| "loss": 1.3829, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.4935064935064935, |
| "grad_norm": 0.42433059215545654, |
| "learning_rate": 5.151529221754262e-05, |
| "loss": 1.5466, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.4946357989836251, |
| "grad_norm": 0.37672513723373413, |
| "learning_rate": 5.133706788540157e-05, |
| "loss": 1.5813, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.4957651044607566, |
| "grad_norm": 0.5108361840248108, |
| "learning_rate": 5.1158826551224736e-05, |
| "loss": 1.8655, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.4968944099378882, |
| "grad_norm": 0.43977028131484985, |
| "learning_rate": 5.098057048151298e-05, |
| "loss": 1.6398, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.4980237154150198, |
| "grad_norm": 0.5553740859031677, |
| "learning_rate": 5.0802301942954586e-05, |
| "loss": 1.6456, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.4991530208921513, |
| "grad_norm": 0.4294775724411011, |
| "learning_rate": 5.0624023202396346e-05, |
| "loss": 1.5727, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.5002823263692829, |
| "grad_norm": 0.5539741516113281, |
| "learning_rate": 5.0445736526814814e-05, |
| "loss": 1.0095, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.5014116318464145, |
| "grad_norm": 0.7881059646606445, |
| "learning_rate": 5.026744418328741e-05, |
| "loss": 1.175, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.502540937323546, |
| "grad_norm": 0.5578503608703613, |
| "learning_rate": 5.0089148438963664e-05, |
| "loss": 0.7371, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.5036702428006776, |
| "grad_norm": 0.6809929609298706, |
| "learning_rate": 4.991085156103635e-05, |
| "loss": 1.0037, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.5047995482778092, |
| "grad_norm": 0.6220012307167053, |
| "learning_rate": 4.97325558167126e-05, |
| "loss": 1.1134, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.5059288537549407, |
| "grad_norm": 1.3821215629577637, |
| "learning_rate": 4.955426347318521e-05, |
| "loss": 1.3825, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.5070581592320723, |
| "grad_norm": 1.268893837928772, |
| "learning_rate": 4.9375976797603666e-05, |
| "loss": 1.0811, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.5081874647092038, |
| "grad_norm": 3.948934555053711, |
| "learning_rate": 4.9197698057045426e-05, |
| "loss": 1.8798, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.5093167701863354, |
| "grad_norm": 0.11119662970304489, |
| "learning_rate": 4.9019429518487034e-05, |
| "loss": 1.6768, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.510446075663467, |
| "grad_norm": 0.12353526800870895, |
| "learning_rate": 4.884117344877528e-05, |
| "loss": 1.3001, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.5115753811405985, |
| "grad_norm": 0.17551442980766296, |
| "learning_rate": 4.866293211459844e-05, |
| "loss": 1.1936, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.51270468661773, |
| "grad_norm": 0.16263023018836975, |
| "learning_rate": 4.8484707782457384e-05, |
| "loss": 1.7866, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.5138339920948617, |
| "grad_norm": 0.16919438540935516, |
| "learning_rate": 4.830650271863681e-05, |
| "loss": 1.479, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.5149632975719932, |
| "grad_norm": 0.1805877834558487, |
| "learning_rate": 4.812831918917641e-05, |
| "loss": 1.4336, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.5160926030491247, |
| "grad_norm": 0.2215701937675476, |
| "learning_rate": 4.795015945984198e-05, |
| "loss": 1.7715, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.5172219085262564, |
| "grad_norm": 0.2349424511194229, |
| "learning_rate": 4.777202579609676e-05, |
| "loss": 2.181, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.5183512140033879, |
| "grad_norm": 0.21069027483463287, |
| "learning_rate": 4.7593920463072485e-05, |
| "loss": 1.8312, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.5194805194805194, |
| "grad_norm": 0.23758326470851898, |
| "learning_rate": 4.74158457255407e-05, |
| "loss": 1.7325, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.5206098249576511, |
| "grad_norm": 0.19673678278923035, |
| "learning_rate": 4.72378038478838e-05, |
| "loss": 1.6742, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.5217391304347826, |
| "grad_norm": 0.22907458245754242, |
| "learning_rate": 4.705979709406643e-05, |
| "loss": 1.6255, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.5228684359119141, |
| "grad_norm": 0.2475663274526596, |
| "learning_rate": 4.688182772760656e-05, |
| "loss": 1.5928, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.5239977413890458, |
| "grad_norm": 0.18993178009986877, |
| "learning_rate": 4.67038980115468e-05, |
| "loss": 2.0439, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.5251270468661773, |
| "grad_norm": 0.19921617209911346, |
| "learning_rate": 4.652601020842546e-05, |
| "loss": 1.8901, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.5262563523433089, |
| "grad_norm": 0.1859847903251648, |
| "learning_rate": 4.6348166580247986e-05, |
| "loss": 1.7906, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.5273856578204404, |
| "grad_norm": 0.2471705973148346, |
| "learning_rate": 4.617036938845806e-05, |
| "loss": 1.541, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.528514963297572, |
| "grad_norm": 0.18537718057632446, |
| "learning_rate": 4.599262089390892e-05, |
| "loss": 1.7591, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.5296442687747036, |
| "grad_norm": 0.20029859244823456, |
| "learning_rate": 4.5814923356834507e-05, |
| "loss": 1.8927, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.5307735742518351, |
| "grad_norm": 0.19296778738498688, |
| "learning_rate": 4.563727903682083e-05, |
| "loss": 1.724, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.5319028797289667, |
| "grad_norm": 0.22295331954956055, |
| "learning_rate": 4.5459690192777205e-05, |
| "loss": 1.837, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.5330321852060983, |
| "grad_norm": 0.22168877720832825, |
| "learning_rate": 4.528215908290753e-05, |
| "loss": 1.6778, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.5341614906832298, |
| "grad_norm": 0.20707228779792786, |
| "learning_rate": 4.510468796468151e-05, |
| "loss": 1.9187, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.5352907961603613, |
| "grad_norm": 0.2480616569519043, |
| "learning_rate": 4.492727909480603e-05, |
| "loss": 1.7309, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.536420101637493, |
| "grad_norm": 0.2842020094394684, |
| "learning_rate": 4.4749934729196444e-05, |
| "loss": 1.8762, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.5375494071146245, |
| "grad_norm": 0.2506377696990967, |
| "learning_rate": 4.457265712294787e-05, |
| "loss": 1.7255, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.538678712591756, |
| "grad_norm": 0.3282622992992401, |
| "learning_rate": 4.43954485303065e-05, |
| "loss": 1.8469, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.5398080180688877, |
| "grad_norm": 0.22683817148208618, |
| "learning_rate": 4.4218311204640964e-05, |
| "loss": 1.8733, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.5409373235460192, |
| "grad_norm": 0.3191218972206116, |
| "learning_rate": 4.404124739841368e-05, |
| "loss": 1.98, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.5420666290231507, |
| "grad_norm": 0.2846674621105194, |
| "learning_rate": 4.386425936315221e-05, |
| "loss": 1.38, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.5431959345002824, |
| "grad_norm": 0.31951338052749634, |
| "learning_rate": 4.368734934942057e-05, |
| "loss": 1.6798, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.5443252399774139, |
| "grad_norm": 0.27403882145881653, |
| "learning_rate": 4.35105196067907e-05, |
| "loss": 1.3672, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.5454545454545454, |
| "grad_norm": 0.36696842312812805, |
| "learning_rate": 4.333377238381381e-05, |
| "loss": 1.7081, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.546583850931677, |
| "grad_norm": 0.3341814875602722, |
| "learning_rate": 4.315710992799182e-05, |
| "loss": 1.6637, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.5477131564088086, |
| "grad_norm": 0.381578654050827, |
| "learning_rate": 4.29805344857487e-05, |
| "loss": 1.82, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.5488424618859401, |
| "grad_norm": 0.47679901123046875, |
| "learning_rate": 4.280404830240202e-05, |
| "loss": 1.5453, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.5499717673630717, |
| "grad_norm": 0.2999393045902252, |
| "learning_rate": 4.2627653622134346e-05, |
| "loss": 1.4535, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.5511010728402033, |
| "grad_norm": 0.39911600947380066, |
| "learning_rate": 4.245135268796464e-05, |
| "loss": 1.526, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.5522303783173348, |
| "grad_norm": 0.4650627672672272, |
| "learning_rate": 4.2275147741719836e-05, |
| "loss": 1.8196, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.5533596837944664, |
| "grad_norm": 0.4915354549884796, |
| "learning_rate": 4.20990410240063e-05, |
| "loss": 1.5382, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.554488989271598, |
| "grad_norm": 0.5710887312889099, |
| "learning_rate": 4.192303477418132e-05, |
| "loss": 1.8352, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.5556182947487295, |
| "grad_norm": 0.7017544507980347, |
| "learning_rate": 4.1747131230324674e-05, |
| "loss": 1.5447, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.5567476002258611, |
| "grad_norm": 0.5755261182785034, |
| "learning_rate": 4.157133262921007e-05, |
| "loss": 1.0984, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.5578769057029926, |
| "grad_norm": 0.5910277366638184, |
| "learning_rate": 4.139564120627682e-05, |
| "loss": 1.2644, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.5590062111801242, |
| "grad_norm": 0.7593830227851868, |
| "learning_rate": 4.122005919560138e-05, |
| "loss": 1.0888, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.5601355166572558, |
| "grad_norm": 0.9940705895423889, |
| "learning_rate": 4.104458882986893e-05, |
| "loss": 1.3263, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.5612648221343873, |
| "grad_norm": 0.9789013862609863, |
| "learning_rate": 4.086923234034493e-05, |
| "loss": 2.0872, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.562394127611519, |
| "grad_norm": 1.05492103099823, |
| "learning_rate": 4.069399195684684e-05, |
| "loss": 1.5978, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.5635234330886505, |
| "grad_norm": 1.5677920579910278, |
| "learning_rate": 4.051886990771575e-05, |
| "loss": 1.4674, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.564652738565782, |
| "grad_norm": 2.827597141265869, |
| "learning_rate": 4.034386841978799e-05, |
| "loss": 1.6893, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5657820440429137, |
| "grad_norm": 0.11306215822696686, |
| "learning_rate": 4.016898971836682e-05, |
| "loss": 1.8998, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.5669113495200452, |
| "grad_norm": 0.12952418625354767, |
| "learning_rate": 3.999423602719419e-05, |
| "loss": 1.4633, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.5680406549971767, |
| "grad_norm": 0.1617014855146408, |
| "learning_rate": 3.9819609568422444e-05, |
| "loss": 1.4874, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.5691699604743083, |
| "grad_norm": 0.24738018214702606, |
| "learning_rate": 3.964511256258605e-05, |
| "loss": 1.2834, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.5702992659514399, |
| "grad_norm": 0.15966880321502686, |
| "learning_rate": 3.94707472285733e-05, |
| "loss": 1.913, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 0.2285860925912857, |
| "learning_rate": 3.929651578359823e-05, |
| "loss": 1.1719, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.572557876905703, |
| "grad_norm": 0.149735227227211, |
| "learning_rate": 3.912242044317233e-05, |
| "loss": 1.6384, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.5736871823828346, |
| "grad_norm": 0.1617288887500763, |
| "learning_rate": 3.8948463421076416e-05, |
| "loss": 1.7541, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.5748164878599661, |
| "grad_norm": 0.20702283084392548, |
| "learning_rate": 3.87746469293324e-05, |
| "loss": 1.3209, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.5759457933370977, |
| "grad_norm": 0.21300503611564636, |
| "learning_rate": 3.860097317817529e-05, |
| "loss": 1.2512, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.5770750988142292, |
| "grad_norm": 0.18188899755477905, |
| "learning_rate": 3.842744437602498e-05, |
| "loss": 2.1871, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.5782044042913608, |
| "grad_norm": 0.21721036732196808, |
| "learning_rate": 3.825406272945825e-05, |
| "loss": 1.8422, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.5793337097684924, |
| "grad_norm": 0.19083920121192932, |
| "learning_rate": 3.8080830443180586e-05, |
| "loss": 1.6101, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.5804630152456239, |
| "grad_norm": 0.2515491545200348, |
| "learning_rate": 3.7907749719998283e-05, |
| "loss": 1.2861, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.5815923207227555, |
| "grad_norm": 0.16136078536510468, |
| "learning_rate": 3.773482276079035e-05, |
| "loss": 1.9461, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.5827216261998871, |
| "grad_norm": 0.212098628282547, |
| "learning_rate": 3.7562051764480584e-05, |
| "loss": 2.0323, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.5838509316770186, |
| "grad_norm": 0.28270667791366577, |
| "learning_rate": 3.738943892800951e-05, |
| "loss": 1.5369, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.5849802371541502, |
| "grad_norm": 0.22667361795902252, |
| "learning_rate": 3.721698644630653e-05, |
| "loss": 1.7268, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.5861095426312818, |
| "grad_norm": 0.23249778151512146, |
| "learning_rate": 3.704469651226202e-05, |
| "loss": 2.135, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.5872388481084133, |
| "grad_norm": 0.2777628004550934, |
| "learning_rate": 3.687257131669939e-05, |
| "loss": 1.4989, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5883681535855448, |
| "grad_norm": 0.2231837958097458, |
| "learning_rate": 3.6700613048347226e-05, |
| "loss": 1.761, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.5894974590626765, |
| "grad_norm": 0.2245432585477829, |
| "learning_rate": 3.652882389381154e-05, |
| "loss": 1.5851, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.590626764539808, |
| "grad_norm": 0.2458360344171524, |
| "learning_rate": 3.635720603754785e-05, |
| "loss": 1.8387, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.5917560700169395, |
| "grad_norm": 0.2425604611635208, |
| "learning_rate": 3.61857616618335e-05, |
| "loss": 1.984, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.5928853754940712, |
| "grad_norm": 0.2515004575252533, |
| "learning_rate": 3.601449294673984e-05, |
| "loss": 1.5634, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.5940146809712027, |
| "grad_norm": 0.29173892736434937, |
| "learning_rate": 3.5843402070104575e-05, |
| "loss": 2.1866, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.5951439864483343, |
| "grad_norm": 0.2624877691268921, |
| "learning_rate": 3.5672491207504e-05, |
| "loss": 1.9706, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.5962732919254659, |
| "grad_norm": 0.2421773225069046, |
| "learning_rate": 3.550176253222538e-05, |
| "loss": 1.8329, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.5974025974025974, |
| "grad_norm": 0.24929580092430115, |
| "learning_rate": 3.533121821523928e-05, |
| "loss": 1.9352, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.598531902879729, |
| "grad_norm": 0.28088921308517456, |
| "learning_rate": 3.516086042517202e-05, |
| "loss": 2.0107, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.5996612083568605, |
| "grad_norm": 0.28900331258773804, |
| "learning_rate": 3.4990691328278026e-05, |
| "loss": 1.6626, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.6007905138339921, |
| "grad_norm": 0.3050013780593872, |
| "learning_rate": 3.482071308841237e-05, |
| "loss": 1.3869, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.6019198193111237, |
| "grad_norm": 0.2838362157344818, |
| "learning_rate": 3.4650927867003116e-05, |
| "loss": 1.7871, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.6030491247882552, |
| "grad_norm": 0.3072238564491272, |
| "learning_rate": 3.448133782302399e-05, |
| "loss": 1.7517, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.6041784302653868, |
| "grad_norm": 0.37723538279533386, |
| "learning_rate": 3.431194511296685e-05, |
| "loss": 1.6763, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.6053077357425184, |
| "grad_norm": 0.35179805755615234, |
| "learning_rate": 3.4142751890814285e-05, |
| "loss": 1.4846, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.6064370412196499, |
| "grad_norm": 0.3520725667476654, |
| "learning_rate": 3.397376030801215e-05, |
| "loss": 1.4679, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.6075663466967814, |
| "grad_norm": 0.4937370717525482, |
| "learning_rate": 3.380497251344233e-05, |
| "loss": 1.369, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.6086956521739131, |
| "grad_norm": 0.5829998254776001, |
| "learning_rate": 3.363639065339536e-05, |
| "loss": 1.4504, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.6098249576510446, |
| "grad_norm": 0.5117580890655518, |
| "learning_rate": 3.346801687154312e-05, |
| "loss": 1.6758, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.6109542631281761, |
| "grad_norm": 0.5459957122802734, |
| "learning_rate": 3.329985330891154e-05, |
| "loss": 1.4033, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.6120835686053078, |
| "grad_norm": 0.6639454364776611, |
| "learning_rate": 3.3131902103853496e-05, |
| "loss": 1.2722, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.6132128740824393, |
| "grad_norm": 0.6231001019477844, |
| "learning_rate": 3.296416539202149e-05, |
| "loss": 1.5103, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.6143421795595708, |
| "grad_norm": 0.6634608507156372, |
| "learning_rate": 3.279664530634059e-05, |
| "loss": 1.4552, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.6154714850367025, |
| "grad_norm": 0.7582663893699646, |
| "learning_rate": 3.2629343976981205e-05, |
| "loss": 1.6744, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.616600790513834, |
| "grad_norm": 0.8784703016281128, |
| "learning_rate": 3.2462263531332114e-05, |
| "loss": 1.0153, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.6177300959909655, |
| "grad_norm": 0.9141214489936829, |
| "learning_rate": 3.229540609397334e-05, |
| "loss": 1.8154, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.6188594014680971, |
| "grad_norm": 1.0494526624679565, |
| "learning_rate": 3.212877378664917e-05, |
| "loss": 1.2725, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.6199887069452287, |
| "grad_norm": 1.2754578590393066, |
| "learning_rate": 3.19623687282411e-05, |
| "loss": 1.3594, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.6211180124223602, |
| "grad_norm": 3.0668442249298096, |
| "learning_rate": 3.1796193034740995e-05, |
| "loss": 2.2957, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.6222473178994918, |
| "grad_norm": 0.14963117241859436, |
| "learning_rate": 3.163024881922415e-05, |
| "loss": 1.7012, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.6233766233766234, |
| "grad_norm": 0.19053514301776886, |
| "learning_rate": 3.1464538191822395e-05, |
| "loss": 1.471, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.6245059288537549, |
| "grad_norm": 0.14380377531051636, |
| "learning_rate": 3.1299063259697224e-05, |
| "loss": 1.3501, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.6256352343308865, |
| "grad_norm": 0.13697926700115204, |
| "learning_rate": 3.113382612701312e-05, |
| "loss": 1.822, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.626764539808018, |
| "grad_norm": 0.1762678027153015, |
| "learning_rate": 3.0968828894910696e-05, |
| "loss": 1.4053, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.6278938452851497, |
| "grad_norm": 0.19729723036289215, |
| "learning_rate": 3.0804073661480024e-05, |
| "loss": 1.2143, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.6290231507622812, |
| "grad_norm": 0.1666150987148285, |
| "learning_rate": 3.0639562521733935e-05, |
| "loss": 2.0227, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.6301524562394127, |
| "grad_norm": 0.19616784155368805, |
| "learning_rate": 3.0475297567581363e-05, |
| "loss": 1.8752, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.6312817617165444, |
| "grad_norm": 0.17856550216674805, |
| "learning_rate": 3.0311280887800807e-05, |
| "loss": 1.8782, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.6324110671936759, |
| "grad_norm": 0.2001560926437378, |
| "learning_rate": 3.0147514568013736e-05, |
| "loss": 1.6296, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.6335403726708074, |
| "grad_norm": 0.18400637805461884, |
| "learning_rate": 2.9984000690658003e-05, |
| "loss": 1.9136, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.6346696781479391, |
| "grad_norm": 0.3142743408679962, |
| "learning_rate": 2.98207413349615e-05, |
| "loss": 1.7852, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.6357989836250706, |
| "grad_norm": 0.21863651275634766, |
| "learning_rate": 2.9657738576915593e-05, |
| "loss": 2.0031, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.6369282891022021, |
| "grad_norm": 0.20468103885650635, |
| "learning_rate": 2.9494994489248807e-05, |
| "loss": 1.7731, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.6380575945793338, |
| "grad_norm": 0.17776687443256378, |
| "learning_rate": 2.9332511141400405e-05, |
| "loss": 1.6393, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.6391869000564653, |
| "grad_norm": 0.200006902217865, |
| "learning_rate": 2.917029059949413e-05, |
| "loss": 1.7989, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.6403162055335968, |
| "grad_norm": 0.20339208841323853, |
| "learning_rate": 2.900833492631191e-05, |
| "loss": 1.7774, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.6414455110107284, |
| "grad_norm": 0.21164321899414062, |
| "learning_rate": 2.8846646181267617e-05, |
| "loss": 2.0764, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.64257481648786, |
| "grad_norm": 0.2051491141319275, |
| "learning_rate": 2.8685226420380885e-05, |
| "loss": 1.985, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.6437041219649915, |
| "grad_norm": 0.27175769209861755, |
| "learning_rate": 2.852407769625095e-05, |
| "loss": 1.9218, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.6448334274421231, |
| "grad_norm": 0.23221158981323242, |
| "learning_rate": 2.8363202058030636e-05, |
| "loss": 1.8568, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.6459627329192547, |
| "grad_norm": 0.2595285177230835, |
| "learning_rate": 2.8202601551400176e-05, |
| "loss": 1.609, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.6470920383963862, |
| "grad_norm": 0.20131221413612366, |
| "learning_rate": 2.804227821854125e-05, |
| "loss": 1.7826, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.6482213438735178, |
| "grad_norm": 0.20102736353874207, |
| "learning_rate": 2.7882234098111025e-05, |
| "loss": 1.6074, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.6493506493506493, |
| "grad_norm": 0.3271494209766388, |
| "learning_rate": 2.7722471225216284e-05, |
| "loss": 2.046, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.6504799548277809, |
| "grad_norm": 0.2570958137512207, |
| "learning_rate": 2.7562991631387424e-05, |
| "loss": 1.8585, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.6516092603049125, |
| "grad_norm": 0.2624165117740631, |
| "learning_rate": 2.7403797344552696e-05, |
| "loss": 1.9614, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.652738565782044, |
| "grad_norm": 0.2565469741821289, |
| "learning_rate": 2.7244890389012468e-05, |
| "loss": 2.0746, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.6538678712591756, |
| "grad_norm": 0.2802393138408661, |
| "learning_rate": 2.708627278541337e-05, |
| "loss": 1.8132, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.6549971767363072, |
| "grad_norm": 0.31447547674179077, |
| "learning_rate": 2.692794655072268e-05, |
| "loss": 1.9015, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.6561264822134387, |
| "grad_norm": 0.3330397605895996, |
| "learning_rate": 2.6769913698202646e-05, |
| "loss": 1.5298, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.6572557876905702, |
| "grad_norm": 0.2743810713291168, |
| "learning_rate": 2.6612176237384934e-05, |
| "loss": 1.7217, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.6583850931677019, |
| "grad_norm": 0.3404594659805298, |
| "learning_rate": 2.645473617404498e-05, |
| "loss": 1.8576, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.6595143986448334, |
| "grad_norm": 0.2993510365486145, |
| "learning_rate": 2.629759551017658e-05, |
| "loss": 1.5187, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.6606437041219649, |
| "grad_norm": 0.28750112652778625, |
| "learning_rate": 2.6140756243966348e-05, |
| "loss": 1.9287, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.6617730095990966, |
| "grad_norm": 0.39892521500587463, |
| "learning_rate": 2.598422036976843e-05, |
| "loss": 1.5573, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.6629023150762281, |
| "grad_norm": 0.40390318632125854, |
| "learning_rate": 2.5827989878078972e-05, |
| "loss": 1.6552, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.6640316205533597, |
| "grad_norm": 0.49753913283348083, |
| "learning_rate": 2.5672066755511015e-05, |
| "loss": 1.5754, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.6651609260304913, |
| "grad_norm": 0.4602469205856323, |
| "learning_rate": 2.5516452984768972e-05, |
| "loss": 1.7824, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.6662902315076228, |
| "grad_norm": 0.46448126435279846, |
| "learning_rate": 2.536115054462372e-05, |
| "loss": 1.2975, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.6674195369847544, |
| "grad_norm": 0.5113171339035034, |
| "learning_rate": 2.5206161409887164e-05, |
| "loss": 1.3596, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.668548842461886, |
| "grad_norm": 0.6399116516113281, |
| "learning_rate": 2.5051487551387366e-05, |
| "loss": 1.5185, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.6696781479390175, |
| "grad_norm": 0.530022919178009, |
| "learning_rate": 2.4897130935943215e-05, |
| "loss": 1.6079, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.6708074534161491, |
| "grad_norm": 0.7399399280548096, |
| "learning_rate": 2.4743093526339695e-05, |
| "loss": 1.6957, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.6719367588932806, |
| "grad_norm": 0.9889739155769348, |
| "learning_rate": 2.458937728130271e-05, |
| "loss": 1.6572, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.6730660643704122, |
| "grad_norm": 0.7296352982521057, |
| "learning_rate": 2.4435984155474362e-05, |
| "loss": 1.4702, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.6741953698475438, |
| "grad_norm": 0.6716480851173401, |
| "learning_rate": 2.428291609938786e-05, |
| "loss": 1.6475, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.6753246753246753, |
| "grad_norm": 0.9753092527389526, |
| "learning_rate": 2.4130175059442983e-05, |
| "loss": 1.6509, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.6764539808018069, |
| "grad_norm": 1.042049765586853, |
| "learning_rate": 2.397776297788112e-05, |
| "loss": 1.9047, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.6775832862789385, |
| "grad_norm": 1.636167287826538, |
| "learning_rate": 2.382568179276074e-05, |
| "loss": 1.5812, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.67871259175607, |
| "grad_norm": 0.10443209111690521, |
| "learning_rate": 2.367393343793253e-05, |
| "loss": 1.6011, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.6798418972332015, |
| "grad_norm": 0.11894982308149338, |
| "learning_rate": 2.352251984301508e-05, |
| "loss": 1.1336, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.6809712027103332, |
| "grad_norm": 0.15143415331840515, |
| "learning_rate": 2.337144293337008e-05, |
| "loss": 1.4442, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.6821005081874647, |
| "grad_norm": 0.17936274409294128, |
| "learning_rate": 2.3220704630078093e-05, |
| "loss": 1.5153, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.6832298136645962, |
| "grad_norm": 0.19904710352420807, |
| "learning_rate": 2.3070306849913843e-05, |
| "loss": 1.4951, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.6843591191417279, |
| "grad_norm": 0.20843562483787537, |
| "learning_rate": 2.292025150532211e-05, |
| "loss": 1.4662, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.6854884246188594, |
| "grad_norm": 0.1950257271528244, |
| "learning_rate": 2.2770540504393224e-05, |
| "loss": 1.9538, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.6866177300959909, |
| "grad_norm": 0.15244010090827942, |
| "learning_rate": 2.2621175750838954e-05, |
| "loss": 1.801, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.6877470355731226, |
| "grad_norm": 0.2256423383951187, |
| "learning_rate": 2.2472159143968085e-05, |
| "loss": 1.2293, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.6888763410502541, |
| "grad_norm": 0.24459119141101837, |
| "learning_rate": 2.232349257866254e-05, |
| "loss": 1.7217, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.6900056465273856, |
| "grad_norm": 0.19310763478279114, |
| "learning_rate": 2.217517794535305e-05, |
| "loss": 1.9453, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.6911349520045172, |
| "grad_norm": 0.19308945536613464, |
| "learning_rate": 2.2027217129995266e-05, |
| "loss": 1.6961, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.6922642574816488, |
| "grad_norm": 0.1873122751712799, |
| "learning_rate": 2.1879612014045693e-05, |
| "loss": 1.6995, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.6933935629587803, |
| "grad_norm": 0.18931369483470917, |
| "learning_rate": 2.1732364474437794e-05, |
| "loss": 1.9309, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.6945228684359119, |
| "grad_norm": 0.25303930044174194, |
| "learning_rate": 2.158547638355811e-05, |
| "loss": 1.7571, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.6956521739130435, |
| "grad_norm": 0.22161240875720978, |
| "learning_rate": 2.143894960922253e-05, |
| "loss": 2.0939, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.6967814793901751, |
| "grad_norm": 0.23233333230018616, |
| "learning_rate": 2.1292786014652398e-05, |
| "loss": 1.803, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.6979107848673066, |
| "grad_norm": 0.29414427280426025, |
| "learning_rate": 2.1146987458450935e-05, |
| "loss": 2.0023, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.6990400903444381, |
| "grad_norm": 0.21665748953819275, |
| "learning_rate": 2.100155579457953e-05, |
| "loss": 1.8504, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.7001693958215698, |
| "grad_norm": 0.2273651361465454, |
| "learning_rate": 2.0856492872334273e-05, |
| "loss": 1.401, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.7012987012987013, |
| "grad_norm": 0.25878530740737915, |
| "learning_rate": 2.0711800536322296e-05, |
| "loss": 2.0893, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.7024280067758328, |
| "grad_norm": 0.28381770849227905, |
| "learning_rate": 2.0567480626438416e-05, |
| "loss": 1.8416, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.7035573122529645, |
| "grad_norm": 1.1040054559707642, |
| "learning_rate": 2.042353497784174e-05, |
| "loss": 1.871, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.704686617730096, |
| "grad_norm": 0.27611055970191956, |
| "learning_rate": 2.0279965420932267e-05, |
| "loss": 2.0418, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.7058159232072275, |
| "grad_norm": 0.21912366151809692, |
| "learning_rate": 2.0136773781327656e-05, |
| "loss": 1.6895, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.7069452286843592, |
| "grad_norm": 0.22892306745052338, |
| "learning_rate": 1.999396187983998e-05, |
| "loss": 1.7621, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.7080745341614907, |
| "grad_norm": 0.24957579374313354, |
| "learning_rate": 1.9851531532452665e-05, |
| "loss": 1.8752, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.7092038396386222, |
| "grad_norm": 0.27737995982170105, |
| "learning_rate": 1.9709484550297263e-05, |
| "loss": 1.8712, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.7103331451157539, |
| "grad_norm": 0.6962355375289917, |
| "learning_rate": 1.956782273963051e-05, |
| "loss": 1.8764, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.7114624505928854, |
| "grad_norm": 0.27960485219955444, |
| "learning_rate": 1.942654790181132e-05, |
| "loss": 1.6422, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.7125917560700169, |
| "grad_norm": 0.27620258927345276, |
| "learning_rate": 1.9285661833277953e-05, |
| "loss": 1.7739, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.7137210615471485, |
| "grad_norm": 0.28414490818977356, |
| "learning_rate": 1.914516632552504e-05, |
| "loss": 1.9286, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.7148503670242801, |
| "grad_norm": 0.32783564925193787, |
| "learning_rate": 1.9005063165080915e-05, |
| "loss": 1.6393, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.7159796725014116, |
| "grad_norm": 0.5030328631401062, |
| "learning_rate": 1.8865354133484835e-05, |
| "loss": 1.9269, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.7171089779785432, |
| "grad_norm": 0.3257984519004822, |
| "learning_rate": 1.8726041007264394e-05, |
| "loss": 1.6148, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.7182382834556748, |
| "grad_norm": 0.32803335785865784, |
| "learning_rate": 1.8587125557912856e-05, |
| "loss": 1.9035, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.7193675889328063, |
| "grad_norm": 0.4144839644432068, |
| "learning_rate": 1.8448609551866647e-05, |
| "loss": 1.6207, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.7204968944099379, |
| "grad_norm": 0.5742985010147095, |
| "learning_rate": 1.8310494750482925e-05, |
| "loss": 1.5247, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.7216261998870694, |
| "grad_norm": 0.45039287209510803, |
| "learning_rate": 1.8172782910017193e-05, |
| "loss": 1.8702, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.722755505364201, |
| "grad_norm": 0.4651569128036499, |
| "learning_rate": 1.80354757816009e-05, |
| "loss": 1.6114, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.7238848108413326, |
| "grad_norm": 0.5533832311630249, |
| "learning_rate": 1.7898575111219224e-05, |
| "loss": 1.724, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.7250141163184641, |
| "grad_norm": 0.5084080696105957, |
| "learning_rate": 1.7762082639688844e-05, |
| "loss": 1.6029, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.7261434217955957, |
| "grad_norm": 0.593112587928772, |
| "learning_rate": 1.7626000102635863e-05, |
| "loss": 1.7722, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.7272727272727273, |
| "grad_norm": 0.7219982743263245, |
| "learning_rate": 1.7490329230473664e-05, |
| "loss": 1.1574, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.7284020327498588, |
| "grad_norm": 0.8209093809127808, |
| "learning_rate": 1.735507174838092e-05, |
| "loss": 1.4226, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.7295313382269905, |
| "grad_norm": 0.817489504814148, |
| "learning_rate": 1.7220229376279683e-05, |
| "loss": 0.6924, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.730660643704122, |
| "grad_norm": 2.4483489990234375, |
| "learning_rate": 1.7085803828813546e-05, |
| "loss": 1.541, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.7317899491812535, |
| "grad_norm": 1.2162669897079468, |
| "learning_rate": 1.6951796815325748e-05, |
| "loss": 1.9561, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.7329192546583851, |
| "grad_norm": 1.5169554948806763, |
| "learning_rate": 1.6818210039837496e-05, |
| "loss": 1.9823, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.7340485601355167, |
| "grad_norm": 1.8656399250030518, |
| "learning_rate": 1.668504520102628e-05, |
| "loss": 1.8061, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.7351778656126482, |
| "grad_norm": 0.12022317200899124, |
| "learning_rate": 1.6552303992204327e-05, |
| "loss": 1.383, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.7363071710897798, |
| "grad_norm": 0.13317741453647614, |
| "learning_rate": 1.6419988101296974e-05, |
| "loss": 1.6712, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.7374364765669114, |
| "grad_norm": 0.13640844821929932, |
| "learning_rate": 1.6288099210821274e-05, |
| "loss": 1.5652, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.7385657820440429, |
| "grad_norm": 0.20749111473560333, |
| "learning_rate": 1.615663899786456e-05, |
| "loss": 1.2273, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.7396950875211745, |
| "grad_norm": 0.18163056671619415, |
| "learning_rate": 1.602560913406318e-05, |
| "loss": 1.3978, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.740824392998306, |
| "grad_norm": 0.1996774822473526, |
| "learning_rate": 1.5895011285581173e-05, |
| "loss": 1.9716, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.7419536984754376, |
| "grad_norm": 0.18158242106437683, |
| "learning_rate": 1.5764847113089094e-05, |
| "loss": 1.5364, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.7430830039525692, |
| "grad_norm": 0.18996961414813995, |
| "learning_rate": 1.563511827174296e-05, |
| "loss": 1.5873, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.7442123094297007, |
| "grad_norm": 0.22422468662261963, |
| "learning_rate": 1.5505826411163122e-05, |
| "loss": 1.6887, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.7453416149068323, |
| "grad_norm": 0.19467850029468536, |
| "learning_rate": 1.5376973175413322e-05, |
| "loss": 1.8702, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.7464709203839639, |
| "grad_norm": 0.27403318881988525, |
| "learning_rate": 1.5248560202979784e-05, |
| "loss": 1.479, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.7476002258610954, |
| "grad_norm": 0.20241166651248932, |
| "learning_rate": 1.5120589126750429e-05, |
| "loss": 1.78, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.748729531338227, |
| "grad_norm": 0.20999853312969208, |
| "learning_rate": 1.4993061573993988e-05, |
| "loss": 1.6491, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.7498588368153586, |
| "grad_norm": 0.2149931937456131, |
| "learning_rate": 1.4865979166339489e-05, |
| "loss": 1.5793, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.7509881422924901, |
| "grad_norm": 0.318891316652298, |
| "learning_rate": 1.473934351975541e-05, |
| "loss": 1.3242, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.7521174477696216, |
| "grad_norm": 0.23082397878170013, |
| "learning_rate": 1.4613156244529363e-05, |
| "loss": 1.9251, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.7532467532467533, |
| "grad_norm": 0.18606293201446533, |
| "learning_rate": 1.4487418945247438e-05, |
| "loss": 2.0595, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.7543760587238848, |
| "grad_norm": 0.19954818487167358, |
| "learning_rate": 1.4362133220773955e-05, |
| "loss": 1.9616, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.7555053642010163, |
| "grad_norm": 0.22097231447696686, |
| "learning_rate": 1.4237300664230923e-05, |
| "loss": 2.1689, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.756634669678148, |
| "grad_norm": 0.23413076996803284, |
| "learning_rate": 1.411292286297803e-05, |
| "loss": 1.7825, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.7577639751552795, |
| "grad_norm": 0.20669344067573547, |
| "learning_rate": 1.3989001398592255e-05, |
| "loss": 2.2408, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.758893280632411, |
| "grad_norm": 0.23911398649215698, |
| "learning_rate": 1.386553784684792e-05, |
| "loss": 1.9885, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.7600225861095427, |
| "grad_norm": 0.23120582103729248, |
| "learning_rate": 1.3742533777696454e-05, |
| "loss": 1.7861, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.7611518915866742, |
| "grad_norm": 0.30187371373176575, |
| "learning_rate": 1.3619990755246654e-05, |
| "loss": 1.7233, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.7622811970638057, |
| "grad_norm": 0.2317027598619461, |
| "learning_rate": 1.3497910337744624e-05, |
| "loss": 1.9729, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.7634105025409373, |
| "grad_norm": 0.24414820969104767, |
| "learning_rate": 1.337629407755409e-05, |
| "loss": 1.5769, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.7645398080180689, |
| "grad_norm": 0.3238002061843872, |
| "learning_rate": 1.3255143521136498e-05, |
| "loss": 2.1359, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.7656691134952005, |
| "grad_norm": 0.2557482123374939, |
| "learning_rate": 1.3134460209031541e-05, |
| "loss": 1.7676, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.766798418972332, |
| "grad_norm": 0.360801637172699, |
| "learning_rate": 1.30142456758374e-05, |
| "loss": 1.5168, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.7679277244494636, |
| "grad_norm": 0.24271051585674286, |
| "learning_rate": 1.2894501450191399e-05, |
| "loss": 1.9566, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.7690570299265952, |
| "grad_norm": 0.32158005237579346, |
| "learning_rate": 1.2775229054750343e-05, |
| "loss": 1.4323, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.7701863354037267, |
| "grad_norm": 0.29279786348342896, |
| "learning_rate": 1.2656430006171404e-05, |
| "loss": 1.9157, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.7713156408808582, |
| "grad_norm": 0.37682029604911804, |
| "learning_rate": 1.253810581509265e-05, |
| "loss": 1.5932, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.7724449463579899, |
| "grad_norm": 0.3008486032485962, |
| "learning_rate": 1.2420257986113959e-05, |
| "loss": 1.5601, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.7735742518351214, |
| "grad_norm": 0.3406381905078888, |
| "learning_rate": 1.2302888017777747e-05, |
| "loss": 1.7338, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.7747035573122529, |
| "grad_norm": 0.3354251980781555, |
| "learning_rate": 1.2185997402550087e-05, |
| "loss": 1.1953, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.7758328627893846, |
| "grad_norm": 0.45363056659698486, |
| "learning_rate": 1.206958762680157e-05, |
| "loss": 1.6571, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.7769621682665161, |
| "grad_norm": 0.3696065843105316, |
| "learning_rate": 1.1953660170788538e-05, |
| "loss": 1.6085, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.7780914737436476, |
| "grad_norm": 0.5141958594322205, |
| "learning_rate": 1.1838216508634154e-05, |
| "loss": 1.7784, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.7792207792207793, |
| "grad_norm": 0.42403480410575867, |
| "learning_rate": 1.1723258108309703e-05, |
| "loss": 1.7745, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.7803500846979108, |
| "grad_norm": 0.5388137698173523, |
| "learning_rate": 1.1608786431615931e-05, |
| "loss": 1.5025, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.7814793901750423, |
| "grad_norm": 0.5538594126701355, |
| "learning_rate": 1.1494802934164473e-05, |
| "loss": 1.6429, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.782608695652174, |
| "grad_norm": 0.6132928729057312, |
| "learning_rate": 1.1381309065359297e-05, |
| "loss": 1.0125, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.7837380011293055, |
| "grad_norm": 0.6523614525794983, |
| "learning_rate": 1.1268306268378286e-05, |
| "loss": 1.2307, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.784867306606437, |
| "grad_norm": 0.6368614435195923, |
| "learning_rate": 1.1155795980154916e-05, |
| "loss": 1.3637, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.7859966120835686, |
| "grad_norm": 0.620228111743927, |
| "learning_rate": 1.1043779631359973e-05, |
| "loss": 0.9165, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.7871259175607002, |
| "grad_norm": 0.7397240400314331, |
| "learning_rate": 1.0932258646383336e-05, |
| "loss": 1.1481, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.7882552230378317, |
| "grad_norm": 0.9581983685493469, |
| "learning_rate": 1.0821234443315876e-05, |
| "loss": 2.1559, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.7893845285149633, |
| "grad_norm": 1.2329559326171875, |
| "learning_rate": 1.0710708433931466e-05, |
| "loss": 1.61, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.7905138339920948, |
| "grad_norm": 1.888492226600647, |
| "learning_rate": 1.060068202366895e-05, |
| "loss": 1.8341, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7916431394692264, |
| "grad_norm": 0.16424258053302765, |
| "learning_rate": 1.0491156611614344e-05, |
| "loss": 1.6505, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.792772444946358, |
| "grad_norm": 0.16047315299510956, |
| "learning_rate": 1.0382133590482978e-05, |
| "loss": 1.276, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.7939017504234895, |
| "grad_norm": 0.21792340278625488, |
| "learning_rate": 1.0273614346601883e-05, |
| "loss": 1.4329, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.7950310559006211, |
| "grad_norm": 0.173606276512146, |
| "learning_rate": 1.0165600259892061e-05, |
| "loss": 2.0379, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.7961603613777527, |
| "grad_norm": 0.13415886461734772, |
| "learning_rate": 1.0058092703850985e-05, |
| "loss": 1.9901, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.7972896668548842, |
| "grad_norm": 0.15839488804340363, |
| "learning_rate": 9.951093045535115e-06, |
| "loss": 1.811, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.7984189723320159, |
| "grad_norm": 0.20288027822971344, |
| "learning_rate": 9.844602645542584e-06, |
| "loss": 2.0712, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.7995482778091474, |
| "grad_norm": 0.2803378999233246, |
| "learning_rate": 9.738622857995788e-06, |
| "loss": 1.8512, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.8006775832862789, |
| "grad_norm": 0.21160300076007843, |
| "learning_rate": 9.633155030524243e-06, |
| "loss": 1.5314, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.8018068887634106, |
| "grad_norm": 0.16740640997886658, |
| "learning_rate": 9.528200504247404e-06, |
| "loss": 1.7808, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.8029361942405421, |
| "grad_norm": 0.22359423339366913, |
| "learning_rate": 9.423760613757677e-06, |
| "loss": 1.7337, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.8040654997176736, |
| "grad_norm": 0.19431254267692566, |
| "learning_rate": 9.319836687103368e-06, |
| "loss": 2.122, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.8051948051948052, |
| "grad_norm": 0.2232765257358551, |
| "learning_rate": 9.216430045771845e-06, |
| "loss": 1.7579, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.8063241106719368, |
| "grad_norm": 0.203886941075325, |
| "learning_rate": 9.11354200467271e-06, |
| "loss": 1.9332, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.8074534161490683, |
| "grad_norm": 0.2308446764945984, |
| "learning_rate": 9.011173872121132e-06, |
| "loss": 1.3203, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.8085827216261999, |
| "grad_norm": 0.182839035987854, |
| "learning_rate": 8.909326949821123e-06, |
| "loss": 2.0008, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.8097120271033315, |
| "grad_norm": 0.22202719748020172, |
| "learning_rate": 8.808002532849047e-06, |
| "loss": 2.0819, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.810841332580463, |
| "grad_norm": 0.1916837841272354, |
| "learning_rate": 8.707201909637137e-06, |
| "loss": 1.9595, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.8119706380575946, |
| "grad_norm": 0.26201632618904114, |
| "learning_rate": 8.606926361957124e-06, |
| "loss": 1.894, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.8130999435347261, |
| "grad_norm": 0.5175297856330872, |
| "learning_rate": 8.507177164903907e-06, |
| "loss": 1.9983, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.8142292490118577, |
| "grad_norm": 0.21798285841941833, |
| "learning_rate": 8.407955586879373e-06, |
| "loss": 1.9481, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.8153585544889893, |
| "grad_norm": 0.22814205288887024, |
| "learning_rate": 8.309262889576225e-06, |
| "loss": 1.9928, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.8164878599661208, |
| "grad_norm": 0.25377923250198364, |
| "learning_rate": 8.211100327962013e-06, |
| "loss": 1.9169, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.8176171654432524, |
| "grad_norm": 0.23791451752185822, |
| "learning_rate": 8.113469150263087e-06, |
| "loss": 1.8194, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.818746470920384, |
| "grad_norm": 0.2693537473678589, |
| "learning_rate": 8.016370597948785e-06, |
| "loss": 1.6758, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.8198757763975155, |
| "grad_norm": 0.20643536746501923, |
| "learning_rate": 7.91980590571561e-06, |
| "loss": 1.5467, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.821005081874647, |
| "grad_norm": 0.2922951877117157, |
| "learning_rate": 7.823776301471591e-06, |
| "loss": 1.7632, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.8221343873517787, |
| "grad_norm": 0.33710768818855286, |
| "learning_rate": 7.72828300632058e-06, |
| "loss": 1.532, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.8232636928289102, |
| "grad_norm": 0.33453261852264404, |
| "learning_rate": 7.633327234546788e-06, |
| "loss": 1.727, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.8243929983060417, |
| "grad_norm": 0.2797680199146271, |
| "learning_rate": 7.538910193599313e-06, |
| "loss": 1.723, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.8255223037831734, |
| "grad_norm": 0.3717952370643616, |
| "learning_rate": 7.445033084076847e-06, |
| "loss": 1.5709, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.8266516092603049, |
| "grad_norm": 0.3003871738910675, |
| "learning_rate": 7.351697099712307e-06, |
| "loss": 1.7581, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.8277809147374364, |
| "grad_norm": 0.36279451847076416, |
| "learning_rate": 7.258903427357727e-06, |
| "loss": 1.5827, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.8289102202145681, |
| "grad_norm": 0.313398152589798, |
| "learning_rate": 7.166653246969174e-06, |
| "loss": 1.6037, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.8300395256916996, |
| "grad_norm": 0.28683504462242126, |
| "learning_rate": 7.074947731591691e-06, |
| "loss": 1.331, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.8311688311688312, |
| "grad_norm": 0.367031991481781, |
| "learning_rate": 6.983788047344419e-06, |
| "loss": 1.6444, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.8322981366459627, |
| "grad_norm": 0.41479209065437317, |
| "learning_rate": 6.893175353405756e-06, |
| "loss": 1.4512, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.8334274421230943, |
| "grad_norm": 0.346797913312912, |
| "learning_rate": 6.8031108019986356e-06, |
| "loss": 1.8215, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.8345567476002259, |
| "grad_norm": 0.4141133427619934, |
| "learning_rate": 6.713595538375833e-06, |
| "loss": 1.73, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.8356860530773574, |
| "grad_norm": 0.532598078250885, |
| "learning_rate": 6.624630700805473e-06, |
| "loss": 1.6717, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.836815358554489, |
| "grad_norm": 0.6603345274925232, |
| "learning_rate": 6.53621742055644e-06, |
| "loss": 1.8964, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.8379446640316206, |
| "grad_norm": 0.4613952040672302, |
| "learning_rate": 6.448356821884144e-06, |
| "loss": 1.4167, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.8390739695087521, |
| "grad_norm": 0.5672091841697693, |
| "learning_rate": 6.361050022016085e-06, |
| "loss": 1.547, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.8402032749858837, |
| "grad_norm": 0.47616711258888245, |
| "learning_rate": 6.274298131137763e-06, |
| "loss": 1.1175, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.8413325804630153, |
| "grad_norm": 1.295542597770691, |
| "learning_rate": 6.188102252378431e-06, |
| "loss": 1.2317, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.8424618859401468, |
| "grad_norm": 0.7239735126495361, |
| "learning_rate": 6.102463481797216e-06, |
| "loss": 0.9637, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.8435911914172783, |
| "grad_norm": 0.9294856190681458, |
| "learning_rate": 6.017382908369051e-06, |
| "loss": 1.5054, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.84472049689441, |
| "grad_norm": 0.9071400165557861, |
| "learning_rate": 5.932861613970941e-06, |
| "loss": 1.1075, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.8458498023715415, |
| "grad_norm": 1.2108523845672607, |
| "learning_rate": 5.848900673368074e-06, |
| "loss": 1.5638, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.846979107848673, |
| "grad_norm": 2.483159065246582, |
| "learning_rate": 5.765501154200298e-06, |
| "loss": 1.4443, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.8481084133258047, |
| "grad_norm": 0.17873716354370117, |
| "learning_rate": 5.682664116968434e-06, |
| "loss": 1.1442, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.8492377188029362, |
| "grad_norm": 0.13179203867912292, |
| "learning_rate": 5.600390615020879e-06, |
| "loss": 2.232, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.8503670242800677, |
| "grad_norm": 0.16131728887557983, |
| "learning_rate": 5.518681694540084e-06, |
| "loss": 1.5456, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.8514963297571994, |
| "grad_norm": 0.4394928514957428, |
| "learning_rate": 5.437538394529429e-06, |
| "loss": 1.8573, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.8526256352343309, |
| "grad_norm": 0.19867239892482758, |
| "learning_rate": 5.3569617467998325e-06, |
| "loss": 1.6727, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.8537549407114624, |
| "grad_norm": 0.18569554388523102, |
| "learning_rate": 5.276952775956784e-06, |
| "loss": 1.6288, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.854884246188594, |
| "grad_norm": 0.20019683241844177, |
| "learning_rate": 5.197512499387175e-06, |
| "loss": 1.6844, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.8560135516657256, |
| "grad_norm": 0.16318097710609436, |
| "learning_rate": 5.118641927246492e-06, |
| "loss": 1.7957, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.15840142965316772, |
| "learning_rate": 5.040342062445869e-06, |
| "loss": 1.5726, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.8582721626199887, |
| "grad_norm": 0.20747163891792297, |
| "learning_rate": 4.962613900639412e-06, |
| "loss": 1.2894, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.8594014680971203, |
| "grad_norm": 0.17959041893482208, |
| "learning_rate": 4.8854584302114536e-06, |
| "loss": 1.9222, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.8605307735742518, |
| "grad_norm": 0.253172904253006, |
| "learning_rate": 4.808876632264092e-06, |
| "loss": 1.7508, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.8616600790513834, |
| "grad_norm": 0.1783914417028427, |
| "learning_rate": 4.732869480604607e-06, |
| "loss": 1.9951, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.862789384528515, |
| "grad_norm": 0.1955009400844574, |
| "learning_rate": 4.657437941733167e-06, |
| "loss": 1.4675, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.8639186900056465, |
| "grad_norm": 0.2847943603992462, |
| "learning_rate": 4.58258297483048e-06, |
| "loss": 1.621, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.8650479954827781, |
| "grad_norm": 0.20446226000785828, |
| "learning_rate": 4.5083055317456045e-06, |
| "loss": 1.7117, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.8661773009599096, |
| "grad_norm": 0.1737479567527771, |
| "learning_rate": 4.434606556983878e-06, |
| "loss": 1.9038, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.8673066064370413, |
| "grad_norm": 0.22281497716903687, |
| "learning_rate": 4.361486987694891e-06, |
| "loss": 2.2309, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.8684359119141728, |
| "grad_norm": 0.2025764435529709, |
| "learning_rate": 4.288947753660544e-06, |
| "loss": 2.1066, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 0.2314663976430893, |
| "learning_rate": 4.216989777283259e-06, |
| "loss": 1.7796, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.870694522868436, |
| "grad_norm": 0.21215543150901794, |
| "learning_rate": 4.14561397357423e-06, |
| "loss": 1.909, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.8718238283455675, |
| "grad_norm": 0.2464420199394226, |
| "learning_rate": 4.074821250141814e-06, |
| "loss": 1.5897, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.872953133822699, |
| "grad_norm": 0.2283257395029068, |
| "learning_rate": 4.00461250717995e-06, |
| "loss": 1.617, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.8740824392998306, |
| "grad_norm": 0.29658201336860657, |
| "learning_rate": 3.934988637456738e-06, |
| "loss": 1.7849, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.8752117447769622, |
| "grad_norm": 0.25188517570495605, |
| "learning_rate": 3.8659505263031025e-06, |
| "loss": 1.7917, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.8763410502540937, |
| "grad_norm": 0.27738890051841736, |
| "learning_rate": 3.797499051601483e-06, |
| "loss": 1.6018, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.8774703557312253, |
| "grad_norm": 0.2526523172855377, |
| "learning_rate": 3.729635083774724e-06, |
| "loss": 1.7928, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.8785996612083569, |
| "grad_norm": 0.22987103462219238, |
| "learning_rate": 3.6623594857749633e-06, |
| "loss": 1.4585, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.8797289666854884, |
| "grad_norm": 0.2625342607498169, |
| "learning_rate": 3.5956731130727173e-06, |
| "loss": 1.9541, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.88085827216262, |
| "grad_norm": 0.34188714623451233, |
| "learning_rate": 3.529576813645935e-06, |
| "loss": 1.5689, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.8819875776397516, |
| "grad_norm": 0.2936117649078369, |
| "learning_rate": 3.4640714279692566e-06, |
| "loss": 1.5456, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.8831168831168831, |
| "grad_norm": 0.26569730043411255, |
| "learning_rate": 3.399157789003299e-06, |
| "loss": 1.7835, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.8842461885940147, |
| "grad_norm": 0.3196849524974823, |
| "learning_rate": 3.3348367221841257e-06, |
| "loss": 1.3151, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.8853754940711462, |
| "grad_norm": 0.32176318764686584, |
| "learning_rate": 3.271109045412657e-06, |
| "loss": 1.3831, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.8865047995482778, |
| "grad_norm": 0.33749616146087646, |
| "learning_rate": 3.207975569044347e-06, |
| "loss": 1.2601, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.8876341050254094, |
| "grad_norm": 0.40785613656044006, |
| "learning_rate": 3.145437095878828e-06, |
| "loss": 1.9139, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.8887634105025409, |
| "grad_norm": 0.33301037549972534, |
| "learning_rate": 3.083494421149752e-06, |
| "loss": 1.4349, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.8898927159796725, |
| "grad_norm": 0.3668874502182007, |
| "learning_rate": 3.022148332514635e-06, |
| "loss": 1.6247, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.8910220214568041, |
| "grad_norm": 0.42873692512512207, |
| "learning_rate": 2.9613996100448625e-06, |
| "loss": 1.4897, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.8921513269339356, |
| "grad_norm": 0.49289876222610474, |
| "learning_rate": 2.9012490262157465e-06, |
| "loss": 1.4293, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.8932806324110671, |
| "grad_norm": 0.535629391670227, |
| "learning_rate": 2.84169734589676e-06, |
| "loss": 1.7038, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.8944099378881988, |
| "grad_norm": 0.830600917339325, |
| "learning_rate": 2.78274532634174e-06, |
| "loss": 1.4014, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.8955392433653303, |
| "grad_norm": 0.5958054661750793, |
| "learning_rate": 2.724393717179302e-06, |
| "loss": 1.5625, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.8966685488424618, |
| "grad_norm": 0.7064595818519592, |
| "learning_rate": 2.66664326040329e-06, |
| "loss": 1.2575, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.8977978543195935, |
| "grad_norm": 0.7008121013641357, |
| "learning_rate": 2.609494690363362e-06, |
| "loss": 1.0175, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.898927159796725, |
| "grad_norm": 0.8058467507362366, |
| "learning_rate": 2.5529487337556277e-06, |
| "loss": 1.0846, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.9000564652738566, |
| "grad_norm": 0.8895508050918579, |
| "learning_rate": 2.49700610961342e-06, |
| "loss": 1.4598, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.9011857707509882, |
| "grad_norm": 0.9604859948158264, |
| "learning_rate": 2.4416675292981417e-06, |
| "loss": 1.2935, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.9023150762281197, |
| "grad_norm": 1.620484709739685, |
| "learning_rate": 2.3869336964902455e-06, |
| "loss": 1.5513, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.9034443817052513, |
| "grad_norm": 2.574310541152954, |
| "learning_rate": 2.3328053071802637e-06, |
| "loss": 2.1875, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.9045736871823828, |
| "grad_norm": 0.14299297332763672, |
| "learning_rate": 2.2792830496599583e-06, |
| "loss": 1.1229, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.9057029926595144, |
| "grad_norm": 0.12972106039524078, |
| "learning_rate": 2.226367604513557e-06, |
| "loss": 1.8229, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.906832298136646, |
| "grad_norm": 0.15199488401412964, |
| "learning_rate": 2.174059644609161e-06, |
| "loss": 1.5613, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.9079616036137775, |
| "grad_norm": 0.16729173064231873, |
| "learning_rate": 2.1223598350900988e-06, |
| "loss": 1.9501, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 0.15182441473007202, |
| "learning_rate": 2.0712688333665297e-06, |
| "loss": 1.7393, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.9102202145680407, |
| "grad_norm": 0.1326162964105606, |
| "learning_rate": 2.0207872891070736e-06, |
| "loss": 1.861, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.9113495200451722, |
| "grad_norm": 0.18286187946796417, |
| "learning_rate": 1.9709158442305365e-06, |
| "loss": 1.7116, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.9124788255223037, |
| "grad_norm": 0.1932005137205124, |
| "learning_rate": 1.9216551328977537e-06, |
| "loss": 1.8832, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.9136081309994354, |
| "grad_norm": 0.1856583058834076, |
| "learning_rate": 1.8730057815035285e-06, |
| "loss": 1.8687, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.9147374364765669, |
| "grad_norm": 0.19752384722232819, |
| "learning_rate": 1.8249684086686813e-06, |
| "loss": 1.9856, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.9158667419536984, |
| "grad_norm": 0.2770135700702667, |
| "learning_rate": 1.777543625232142e-06, |
| "loss": 0.9592, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.9169960474308301, |
| "grad_norm": 0.19897840917110443, |
| "learning_rate": 1.730732034243221e-06, |
| "loss": 1.7433, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.9181253529079616, |
| "grad_norm": 0.17861053347587585, |
| "learning_rate": 1.6845342309539213e-06, |
| "loss": 1.4184, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.9192546583850931, |
| "grad_norm": 0.6110484004020691, |
| "learning_rate": 1.638950802811401e-06, |
| "loss": 1.5792, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.9203839638622248, |
| "grad_norm": 0.25728702545166016, |
| "learning_rate": 1.5939823294504386e-06, |
| "loss": 1.6094, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.9215132693393563, |
| "grad_norm": 0.24172663688659668, |
| "learning_rate": 1.549629382686152e-06, |
| "loss": 1.7054, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.9226425748164878, |
| "grad_norm": 0.24173271656036377, |
| "learning_rate": 1.5058925265066194e-06, |
| "loss": 1.7461, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.9237718802936195, |
| "grad_norm": 0.30865177512168884, |
| "learning_rate": 1.4627723170658192e-06, |
| "loss": 1.6537, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.924901185770751, |
| "grad_norm": 0.18484273552894592, |
| "learning_rate": 1.4202693026764636e-06, |
| "loss": 1.866, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.9260304912478825, |
| "grad_norm": 0.2225736677646637, |
| "learning_rate": 1.3783840238031143e-06, |
| "loss": 1.7052, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.9271597967250141, |
| "grad_norm": 0.21656034886837006, |
| "learning_rate": 1.3371170130552114e-06, |
| "loss": 1.6997, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.9282891022021457, |
| "grad_norm": 0.218043714761734, |
| "learning_rate": 1.2964687951803888e-06, |
| "loss": 1.7289, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.9294184076792772, |
| "grad_norm": 0.21153298020362854, |
| "learning_rate": 1.2564398870577476e-06, |
| "loss": 1.9596, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.9305477131564088, |
| "grad_norm": 0.20686852931976318, |
| "learning_rate": 1.2170307976913154e-06, |
| "loss": 2.0872, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.9316770186335404, |
| "grad_norm": 0.2282426804304123, |
| "learning_rate": 1.1782420282035467e-06, |
| "loss": 1.8915, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.932806324110672, |
| "grad_norm": 0.31399673223495483, |
| "learning_rate": 1.1400740718289672e-06, |
| "loss": 1.8344, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.9339356295878035, |
| "grad_norm": 0.24079066514968872, |
| "learning_rate": 1.1025274139079057e-06, |
| "loss": 2.0116, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.935064935064935, |
| "grad_norm": 0.2544754147529602, |
| "learning_rate": 1.0656025318803165e-06, |
| "loss": 1.7656, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.9361942405420667, |
| "grad_norm": 0.32379066944122314, |
| "learning_rate": 1.029299895279684e-06, |
| "loss": 1.8436, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.9373235460191982, |
| "grad_norm": 0.33525246381759644, |
| "learning_rate": 9.93619965727105e-07, |
| "loss": 1.5118, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.9384528514963297, |
| "grad_norm": 0.29879966378211975, |
| "learning_rate": 9.58563196925366e-07, |
| "loss": 1.8298, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.9395821569734614, |
| "grad_norm": 0.2798129916191101, |
| "learning_rate": 9.241300346532255e-07, |
| "loss": 1.9266, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.9407114624505929, |
| "grad_norm": 0.3247831463813782, |
| "learning_rate": 8.903209167596848e-07, |
| "loss": 1.6367, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.9418407679277244, |
| "grad_norm": 0.6924816966056824, |
| "learning_rate": 8.571362731584653e-07, |
| "loss": 1.73, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.9429700734048561, |
| "grad_norm": 0.33074456453323364, |
| "learning_rate": 8.245765258225402e-07, |
| "loss": 1.3488, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.9440993788819876, |
| "grad_norm": 0.31922397017478943, |
| "learning_rate": 7.926420887787444e-07, |
| "loss": 1.8534, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.9452286843591191, |
| "grad_norm": 0.48577114939689636, |
| "learning_rate": 7.613333681025236e-07, |
| "loss": 1.5371, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.9463579898362507, |
| "grad_norm": 0.3482813239097595, |
| "learning_rate": 7.306507619127767e-07, |
| "loss": 1.777, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.9474872953133823, |
| "grad_norm": 0.40237051248550415, |
| "learning_rate": 7.005946603667768e-07, |
| "loss": 1.9003, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.9486166007905138, |
| "grad_norm": 0.3908424377441406, |
| "learning_rate": 6.711654456552364e-07, |
| "loss": 1.3752, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.9497459062676454, |
| "grad_norm": 0.5101377964019775, |
| "learning_rate": 6.423634919974164e-07, |
| "loss": 1.7104, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.950875211744777, |
| "grad_norm": 0.45392563939094543, |
| "learning_rate": 6.141891656363863e-07, |
| "loss": 1.6423, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.9520045172219085, |
| "grad_norm": 0.4994739592075348, |
| "learning_rate": 5.866428248343603e-07, |
| "loss": 0.9681, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.9531338226990401, |
| "grad_norm": 0.7889621257781982, |
| "learning_rate": 5.59724819868157e-07, |
| "loss": 1.4515, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.9542631281761716, |
| "grad_norm": 0.7431004643440247, |
| "learning_rate": 5.334354930247087e-07, |
| "loss": 0.5782, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.9553924336533032, |
| "grad_norm": 0.8190014362335205, |
| "learning_rate": 5.077751785967588e-07, |
| "loss": 1.1897, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.9565217391304348, |
| "grad_norm": 0.7467794418334961, |
| "learning_rate": 4.827442028785655e-07, |
| "loss": 1.0159, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.9576510446075663, |
| "grad_norm": 1.3388633728027344, |
| "learning_rate": 4.5834288416178874e-07, |
| "loss": 1.6963, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.9587803500846979, |
| "grad_norm": 1.1506050825119019, |
| "learning_rate": 4.3457153273140925e-07, |
| "loss": 1.1707, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.9599096555618295, |
| "grad_norm": 2.7924423217773438, |
| "learning_rate": 4.114304508618105e-07, |
| "loss": 2.2042, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.961038961038961, |
| "grad_norm": 0.20497886836528778, |
| "learning_rate": 3.8891993281293117e-07, |
| "loss": 0.7544, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.9621682665160926, |
| "grad_norm": 0.17141655087471008, |
| "learning_rate": 3.670402648264959e-07, |
| "loss": 1.5099, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.9632975719932242, |
| "grad_norm": 0.19558130204677582, |
| "learning_rate": 3.45791725122413e-07, |
| "loss": 1.5086, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.9644268774703557, |
| "grad_norm": 0.2082233726978302, |
| "learning_rate": 3.2517458389521027e-07, |
| "loss": 1.5123, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.9655561829474872, |
| "grad_norm": 0.20731768012046814, |
| "learning_rate": 3.0518910331062135e-07, |
| "loss": 1.4448, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.9666854884246189, |
| "grad_norm": 0.18717971444129944, |
| "learning_rate": 2.8583553750223255e-07, |
| "loss": 1.8793, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.9678147939017504, |
| "grad_norm": 0.17500977218151093, |
| "learning_rate": 2.67114132568258e-07, |
| "loss": 1.9831, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.968944099378882, |
| "grad_norm": 0.19684267044067383, |
| "learning_rate": 2.490251265683974e-07, |
| "loss": 1.741, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.9700734048560136, |
| "grad_norm": 0.19779165089130402, |
| "learning_rate": 2.3156874952085516e-07, |
| "loss": 1.6896, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.9712027103331451, |
| "grad_norm": 0.2639060616493225, |
| "learning_rate": 2.14745223399343e-07, |
| "loss": 1.3446, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.9723320158102767, |
| "grad_norm": 0.18881270289421082, |
| "learning_rate": 1.9855476213033185e-07, |
| "loss": 1.4884, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.9734613212874083, |
| "grad_norm": 0.23728030920028687, |
| "learning_rate": 1.82997571590271e-07, |
| "loss": 1.6801, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.9745906267645398, |
| "grad_norm": 0.3492504358291626, |
| "learning_rate": 1.6807384960301208e-07, |
| "loss": 1.8541, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.9757199322416714, |
| "grad_norm": 0.20422044396400452, |
| "learning_rate": 1.5378378593726706e-07, |
| "loss": 1.6448, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.9768492377188029, |
| "grad_norm": 0.21626874804496765, |
| "learning_rate": 1.4012756230421532e-07, |
| "loss": 1.8438, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.9779785431959345, |
| "grad_norm": 0.2580612301826477, |
| "learning_rate": 1.271053523551613e-07, |
| "loss": 1.9015, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.9791078486730661, |
| "grad_norm": 0.23967643082141876, |
| "learning_rate": 1.1471732167938065e-07, |
| "loss": 1.7772, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.9802371541501976, |
| "grad_norm": 0.2442159354686737, |
| "learning_rate": 1.0296362780195524e-07, |
| "loss": 1.6739, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.9813664596273292, |
| "grad_norm": 0.4127430021762848, |
| "learning_rate": 9.184442018180805e-08, |
| "loss": 2.2228, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.9824957651044608, |
| "grad_norm": 0.2699223756790161, |
| "learning_rate": 8.13598402097937e-08, |
| "loss": 1.9218, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.9836250705815923, |
| "grad_norm": 0.24486035108566284, |
| "learning_rate": 7.151002120688865e-08, |
| "loss": 1.7938, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.9847543760587238, |
| "grad_norm": 0.3202971816062927, |
| "learning_rate": 6.229508842251486e-08, |
| "loss": 1.57, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.9858836815358555, |
| "grad_norm": 0.3089115619659424, |
| "learning_rate": 5.371515903293545e-08, |
| "loss": 1.5306, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.987012987012987, |
| "grad_norm": 0.48857423663139343, |
| "learning_rate": 4.5770342139761504e-08, |
| "loss": 1.3286, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.9881422924901185, |
| "grad_norm": 0.33920764923095703, |
| "learning_rate": 3.8460738768586465e-08, |
| "loss": 1.7378, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.9892715979672502, |
| "grad_norm": 0.45418545603752136, |
| "learning_rate": 3.1786441867659446e-08, |
| "loss": 1.5101, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.9904009034443817, |
| "grad_norm": 0.7004565000534058, |
| "learning_rate": 2.57475363067472e-08, |
| "loss": 1.2948, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.9915302089215132, |
| "grad_norm": 0.5156087875366211, |
| "learning_rate": 2.0344098876040608e-08, |
| "loss": 1.9831, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.9926595143986449, |
| "grad_norm": 0.5740581750869751, |
| "learning_rate": 1.557619828516099e-08, |
| "loss": 1.2267, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.9937888198757764, |
| "grad_norm": 0.675317645072937, |
| "learning_rate": 1.1443895162305263e-08, |
| "loss": 1.1948, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.9949181253529079, |
| "grad_norm": 0.5960811972618103, |
| "learning_rate": 7.947242053479853e-09, |
| "loss": 1.5265, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.9960474308300395, |
| "grad_norm": 0.5866406559944153, |
| "learning_rate": 5.086283421801286e-09, |
| "loss": 0.8661, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.9971767363071711, |
| "grad_norm": 0.910371720790863, |
| "learning_rate": 2.861055646968813e-09, |
| "loss": 1.4349, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.9983060417843026, |
| "grad_norm": 0.9026908278465271, |
| "learning_rate": 1.2715870247870244e-09, |
| "loss": 1.5981, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.9994353472614342, |
| "grad_norm": 1.4996330738067627, |
| "learning_rate": 3.178977667883665e-10, |
| "loss": 1.5431, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.9994353472614342, |
| "eval_loss": 1.513724684715271, |
| "eval_runtime": 17.4374, |
| "eval_samples_per_second": 42.782, |
| "eval_steps_per_second": 10.724, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.0008469791078487, |
| "grad_norm": 3.4714646339416504, |
| "learning_rate": 0.0, |
| "loss": 2.7846, |
| "step": 886 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 886, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 222, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.4350477361676288e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|