| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 1611, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0006207324643078833, |
| "grad_norm": 4.0, |
| "learning_rate": 0.0, |
| "loss": 1.179443359375, |
| "num_tokens": 79611.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0012414649286157666, |
| "grad_norm": 4.59375, |
| "learning_rate": 6.122448979591837e-07, |
| "loss": 1.44921875, |
| "num_tokens": 151724.0, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.00186219739292365, |
| "grad_norm": 4.5, |
| "learning_rate": 1.2244897959183673e-06, |
| "loss": 1.38330078125, |
| "num_tokens": 236392.0, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.002482929857231533, |
| "grad_norm": 4.375, |
| "learning_rate": 1.836734693877551e-06, |
| "loss": 1.268798828125, |
| "num_tokens": 318898.0, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0031036623215394167, |
| "grad_norm": 4.875, |
| "learning_rate": 2.4489795918367347e-06, |
| "loss": 1.47998046875, |
| "num_tokens": 397311.0, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0037243947858473, |
| "grad_norm": 4.0625, |
| "learning_rate": 3.0612244897959185e-06, |
| "loss": 1.22265625, |
| "num_tokens": 475395.0, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.004345127250155183, |
| "grad_norm": 4.25, |
| "learning_rate": 3.673469387755102e-06, |
| "loss": 1.25286865234375, |
| "num_tokens": 552206.0, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.004965859714463066, |
| "grad_norm": 3.78125, |
| "learning_rate": 4.2857142857142855e-06, |
| "loss": 1.16796875, |
| "num_tokens": 627230.0, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.00558659217877095, |
| "grad_norm": 3.390625, |
| "learning_rate": 4.897959183673469e-06, |
| "loss": 1.222900390625, |
| "num_tokens": 707653.0, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.006207324643078833, |
| "grad_norm": 3.28125, |
| "learning_rate": 5.510204081632653e-06, |
| "loss": 1.28173828125, |
| "num_tokens": 791267.0, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.006828057107386716, |
| "grad_norm": 2.734375, |
| "learning_rate": 6.122448979591837e-06, |
| "loss": 1.248046875, |
| "num_tokens": 867206.0, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0074487895716946, |
| "grad_norm": 2.265625, |
| "learning_rate": 6.734693877551021e-06, |
| "loss": 1.0118408203125, |
| "num_tokens": 938194.0, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.008069522036002483, |
| "grad_norm": 2.078125, |
| "learning_rate": 7.346938775510204e-06, |
| "loss": 1.130615234375, |
| "num_tokens": 1015387.0, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.008690254500310366, |
| "grad_norm": 1.8359375, |
| "learning_rate": 7.959183673469388e-06, |
| "loss": 1.085205078125, |
| "num_tokens": 1100738.0, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.00931098696461825, |
| "grad_norm": 1.7421875, |
| "learning_rate": 8.571428571428571e-06, |
| "loss": 1.151123046875, |
| "num_tokens": 1179837.0, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.009931719428926133, |
| "grad_norm": 1.5859375, |
| "learning_rate": 9.183673469387756e-06, |
| "loss": 1.054931640625, |
| "num_tokens": 1267114.0, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.010552451893234015, |
| "grad_norm": 1.5390625, |
| "learning_rate": 9.795918367346939e-06, |
| "loss": 0.9664306640625, |
| "num_tokens": 1343079.0, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0111731843575419, |
| "grad_norm": 1.3984375, |
| "learning_rate": 1.0408163265306123e-05, |
| "loss": 1.0679931640625, |
| "num_tokens": 1432157.0, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.011793916821849782, |
| "grad_norm": 1.3671875, |
| "learning_rate": 1.1020408163265306e-05, |
| "loss": 1.0408935546875, |
| "num_tokens": 1505923.0, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.012414649286157667, |
| "grad_norm": 1.2578125, |
| "learning_rate": 1.163265306122449e-05, |
| "loss": 1.0843505859375, |
| "num_tokens": 1588504.0, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01303538175046555, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.2244897959183674e-05, |
| "loss": 0.90338134765625, |
| "num_tokens": 1668359.0, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.013656114214773432, |
| "grad_norm": 1.0, |
| "learning_rate": 1.2857142857142857e-05, |
| "loss": 0.9005126953125, |
| "num_tokens": 1745555.0, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.014276846679081317, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.3469387755102042e-05, |
| "loss": 0.880615234375, |
| "num_tokens": 1826162.0, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0148975791433892, |
| "grad_norm": 1.0, |
| "learning_rate": 1.4081632653061225e-05, |
| "loss": 0.9219970703125, |
| "num_tokens": 1907592.0, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.015518311607697082, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.4693877551020408e-05, |
| "loss": 0.845458984375, |
| "num_tokens": 1984816.0, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.016139044072004966, |
| "grad_norm": 0.98828125, |
| "learning_rate": 1.530612244897959e-05, |
| "loss": 0.8896484375, |
| "num_tokens": 2065689.0, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.01675977653631285, |
| "grad_norm": 0.93359375, |
| "learning_rate": 1.5918367346938776e-05, |
| "loss": 0.86383056640625, |
| "num_tokens": 2145214.0, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.01738050900062073, |
| "grad_norm": 0.92578125, |
| "learning_rate": 1.6530612244897957e-05, |
| "loss": 0.79644775390625, |
| "num_tokens": 2227084.0, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.018001241464928614, |
| "grad_norm": 0.87109375, |
| "learning_rate": 1.7142857142857142e-05, |
| "loss": 0.81396484375, |
| "num_tokens": 2309170.0, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.0186219739292365, |
| "grad_norm": 0.890625, |
| "learning_rate": 1.7755102040816327e-05, |
| "loss": 0.732086181640625, |
| "num_tokens": 2386568.0, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.019242706393544383, |
| "grad_norm": 0.85546875, |
| "learning_rate": 1.836734693877551e-05, |
| "loss": 0.6558837890625, |
| "num_tokens": 2465738.0, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.019863438857852266, |
| "grad_norm": 0.9375, |
| "learning_rate": 1.8979591836734696e-05, |
| "loss": 0.671142578125, |
| "num_tokens": 2544797.0, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.020484171322160148, |
| "grad_norm": 0.82421875, |
| "learning_rate": 1.9591836734693877e-05, |
| "loss": 0.74334716796875, |
| "num_tokens": 2629296.0, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.02110490378646803, |
| "grad_norm": 0.79296875, |
| "learning_rate": 2.0204081632653062e-05, |
| "loss": 0.687713623046875, |
| "num_tokens": 2717226.0, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.021725636250775917, |
| "grad_norm": 0.90625, |
| "learning_rate": 2.0816326530612247e-05, |
| "loss": 0.6878662109375, |
| "num_tokens": 2796213.0, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0223463687150838, |
| "grad_norm": 0.8359375, |
| "learning_rate": 2.1428571428571428e-05, |
| "loss": 0.61114501953125, |
| "num_tokens": 2881296.0, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.022967101179391682, |
| "grad_norm": 0.79296875, |
| "learning_rate": 2.2040816326530613e-05, |
| "loss": 0.55914306640625, |
| "num_tokens": 2960034.0, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.023587833643699565, |
| "grad_norm": 0.77734375, |
| "learning_rate": 2.2653061224489794e-05, |
| "loss": 0.5269775390625, |
| "num_tokens": 3038779.0, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.024208566108007448, |
| "grad_norm": 0.73828125, |
| "learning_rate": 2.326530612244898e-05, |
| "loss": 0.63275146484375, |
| "num_tokens": 3115602.0, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.024829298572315334, |
| "grad_norm": 0.828125, |
| "learning_rate": 2.3877551020408164e-05, |
| "loss": 0.6881103515625, |
| "num_tokens": 3189503.0, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.025450031036623216, |
| "grad_norm": 0.859375, |
| "learning_rate": 2.448979591836735e-05, |
| "loss": 0.78192138671875, |
| "num_tokens": 3269301.0, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.0260707635009311, |
| "grad_norm": 0.6875, |
| "learning_rate": 2.5102040816326533e-05, |
| "loss": 0.49462890625, |
| "num_tokens": 3346752.0, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.02669149596523898, |
| "grad_norm": 0.79296875, |
| "learning_rate": 2.5714285714285714e-05, |
| "loss": 0.63861083984375, |
| "num_tokens": 3429216.0, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.027312228429546864, |
| "grad_norm": 0.76171875, |
| "learning_rate": 2.63265306122449e-05, |
| "loss": 0.61676025390625, |
| "num_tokens": 3508782.0, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.027932960893854747, |
| "grad_norm": 0.7890625, |
| "learning_rate": 2.6938775510204084e-05, |
| "loss": 0.59075927734375, |
| "num_tokens": 3597953.0, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.028553693358162633, |
| "grad_norm": 0.8671875, |
| "learning_rate": 2.7551020408163265e-05, |
| "loss": 0.5614013671875, |
| "num_tokens": 3672684.0, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.029174425822470516, |
| "grad_norm": 0.87890625, |
| "learning_rate": 2.816326530612245e-05, |
| "loss": 0.49676513671875, |
| "num_tokens": 3749794.0, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.0297951582867784, |
| "grad_norm": 0.73046875, |
| "learning_rate": 2.877551020408163e-05, |
| "loss": 0.454132080078125, |
| "num_tokens": 3827513.0, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.03041589075108628, |
| "grad_norm": 0.859375, |
| "learning_rate": 2.9387755102040816e-05, |
| "loss": 0.4815673828125, |
| "num_tokens": 3905091.0, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.031036623215394164, |
| "grad_norm": 0.9375, |
| "learning_rate": 3e-05, |
| "loss": 0.5006103515625, |
| "num_tokens": 3984283.0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03165735567970205, |
| "grad_norm": 0.74609375, |
| "learning_rate": 2.9999969661171756e-05, |
| "loss": 0.46038818359375, |
| "num_tokens": 4068007.0, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.03227808814400993, |
| "grad_norm": 0.76953125, |
| "learning_rate": 2.9999878644809757e-05, |
| "loss": 0.421356201171875, |
| "num_tokens": 4149338.0, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.032898820608317815, |
| "grad_norm": 0.94921875, |
| "learning_rate": 2.9999726951282172e-05, |
| "loss": 0.4410400390625, |
| "num_tokens": 4224195.0, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.0335195530726257, |
| "grad_norm": 0.72265625, |
| "learning_rate": 2.999951458120263e-05, |
| "loss": 0.359619140625, |
| "num_tokens": 4298713.0, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.03414028553693358, |
| "grad_norm": 0.7421875, |
| "learning_rate": 2.999924153543021e-05, |
| "loss": 0.36236572265625, |
| "num_tokens": 4375052.0, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.03476101800124146, |
| "grad_norm": 0.7421875, |
| "learning_rate": 2.9998907815069425e-05, |
| "loss": 0.424468994140625, |
| "num_tokens": 4452913.0, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.035381750465549346, |
| "grad_norm": 1.046875, |
| "learning_rate": 2.9998513421470235e-05, |
| "loss": 0.473388671875, |
| "num_tokens": 4538717.0, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.03600248292985723, |
| "grad_norm": 0.71484375, |
| "learning_rate": 2.9998058356228036e-05, |
| "loss": 0.37030029296875, |
| "num_tokens": 4608998.0, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.03662321539416512, |
| "grad_norm": 0.76171875, |
| "learning_rate": 2.999754262118364e-05, |
| "loss": 0.47113037109375, |
| "num_tokens": 4686972.0, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.037243947858473, |
| "grad_norm": 0.7578125, |
| "learning_rate": 2.9996966218423296e-05, |
| "loss": 0.3825836181640625, |
| "num_tokens": 4773118.0, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03786468032278088, |
| "grad_norm": 0.640625, |
| "learning_rate": 2.9996329150278646e-05, |
| "loss": 0.30316162109375, |
| "num_tokens": 4841124.0, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.038485412787088766, |
| "grad_norm": 0.796875, |
| "learning_rate": 2.999563141932675e-05, |
| "loss": 0.401214599609375, |
| "num_tokens": 4926293.0, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.03910614525139665, |
| "grad_norm": 0.72265625, |
| "learning_rate": 2.999487302839005e-05, |
| "loss": 0.38299560546875, |
| "num_tokens": 5003517.0, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.03972687771570453, |
| "grad_norm": 1.2890625, |
| "learning_rate": 2.9994053980536368e-05, |
| "loss": 0.44708251953125, |
| "num_tokens": 5090773.0, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.040347610180012414, |
| "grad_norm": 0.671875, |
| "learning_rate": 2.9993174279078906e-05, |
| "loss": 0.3594970703125, |
| "num_tokens": 5179656.0, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.040968342644320296, |
| "grad_norm": 0.67578125, |
| "learning_rate": 2.9992233927576207e-05, |
| "loss": 0.31390380859375, |
| "num_tokens": 5258523.0, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.04158907510862818, |
| "grad_norm": 0.84375, |
| "learning_rate": 2.9991232929832165e-05, |
| "loss": 0.340301513671875, |
| "num_tokens": 5334871.0, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.04220980757293606, |
| "grad_norm": 0.66015625, |
| "learning_rate": 2.9990171289895983e-05, |
| "loss": 0.305419921875, |
| "num_tokens": 5414795.0, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.04283054003724395, |
| "grad_norm": 0.8359375, |
| "learning_rate": 2.998904901206219e-05, |
| "loss": 0.321746826171875, |
| "num_tokens": 5493669.0, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.043451272501551834, |
| "grad_norm": 0.734375, |
| "learning_rate": 2.9987866100870597e-05, |
| "loss": 0.335052490234375, |
| "num_tokens": 5570527.0, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.04407200496585972, |
| "grad_norm": 0.6171875, |
| "learning_rate": 2.998662256110629e-05, |
| "loss": 0.316741943359375, |
| "num_tokens": 5648297.0, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.0446927374301676, |
| "grad_norm": 0.62890625, |
| "learning_rate": 2.9985318397799606e-05, |
| "loss": 0.27838134765625, |
| "num_tokens": 5725943.0, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.04531346989447548, |
| "grad_norm": 0.6796875, |
| "learning_rate": 2.9983953616226113e-05, |
| "loss": 0.3489532470703125, |
| "num_tokens": 5801236.0, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.045934202358783364, |
| "grad_norm": 0.70703125, |
| "learning_rate": 2.9982528221906606e-05, |
| "loss": 0.322601318359375, |
| "num_tokens": 5879666.0, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.04655493482309125, |
| "grad_norm": 0.69140625, |
| "learning_rate": 2.9981042220607046e-05, |
| "loss": 0.2409820556640625, |
| "num_tokens": 5959047.0, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.04717566728739913, |
| "grad_norm": 0.66015625, |
| "learning_rate": 2.9979495618338574e-05, |
| "loss": 0.29443359375, |
| "num_tokens": 6039447.0, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.04779639975170701, |
| "grad_norm": 0.625, |
| "learning_rate": 2.997788842135747e-05, |
| "loss": 0.278045654296875, |
| "num_tokens": 6118541.0, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.048417132216014895, |
| "grad_norm": 0.81640625, |
| "learning_rate": 2.9976220636165136e-05, |
| "loss": 0.3525390625, |
| "num_tokens": 6203755.0, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.04903786468032278, |
| "grad_norm": 0.640625, |
| "learning_rate": 2.9974492269508053e-05, |
| "loss": 0.29705810546875, |
| "num_tokens": 6279655.0, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.04965859714463067, |
| "grad_norm": 0.89453125, |
| "learning_rate": 2.997270332837777e-05, |
| "loss": 0.302825927734375, |
| "num_tokens": 6363657.0, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.05027932960893855, |
| "grad_norm": 0.6640625, |
| "learning_rate": 2.9970853820010878e-05, |
| "loss": 0.325653076171875, |
| "num_tokens": 6445428.0, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.05090006207324643, |
| "grad_norm": 0.640625, |
| "learning_rate": 2.9968943751888953e-05, |
| "loss": 0.306793212890625, |
| "num_tokens": 6521708.0, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.051520794537554315, |
| "grad_norm": 0.55859375, |
| "learning_rate": 2.996697313173857e-05, |
| "loss": 0.2458343505859375, |
| "num_tokens": 6599847.0, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.0521415270018622, |
| "grad_norm": 0.58984375, |
| "learning_rate": 2.9964941967531228e-05, |
| "loss": 0.2946014404296875, |
| "num_tokens": 6683876.0, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.05276225946617008, |
| "grad_norm": 0.71484375, |
| "learning_rate": 2.9962850267483353e-05, |
| "loss": 0.323455810546875, |
| "num_tokens": 6757931.0, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.05338299193047796, |
| "grad_norm": 0.65234375, |
| "learning_rate": 2.9960698040056236e-05, |
| "loss": 0.272979736328125, |
| "num_tokens": 6835781.0, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.054003724394785846, |
| "grad_norm": 0.54296875, |
| "learning_rate": 2.9958485293956025e-05, |
| "loss": 0.280120849609375, |
| "num_tokens": 6916505.0, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.05462445685909373, |
| "grad_norm": 0.65625, |
| "learning_rate": 2.9956212038133666e-05, |
| "loss": 0.31390380859375, |
| "num_tokens": 6996864.0, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.05524518932340161, |
| "grad_norm": 0.57421875, |
| "learning_rate": 2.995387828178488e-05, |
| "loss": 0.2998046875, |
| "num_tokens": 7076621.0, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.055865921787709494, |
| "grad_norm": 0.54296875, |
| "learning_rate": 2.995148403435013e-05, |
| "loss": 0.2631988525390625, |
| "num_tokens": 7155470.0, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.05648665425201738, |
| "grad_norm": 0.5546875, |
| "learning_rate": 2.994902930551457e-05, |
| "loss": 0.212493896484375, |
| "num_tokens": 7231794.0, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.057107386716325266, |
| "grad_norm": 0.6875, |
| "learning_rate": 2.9946514105208e-05, |
| "loss": 0.25518798828125, |
| "num_tokens": 7311756.0, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.05772811918063315, |
| "grad_norm": 0.486328125, |
| "learning_rate": 2.994393844360487e-05, |
| "loss": 0.1932830810546875, |
| "num_tokens": 7389961.0, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.05834885164494103, |
| "grad_norm": 0.62109375, |
| "learning_rate": 2.9941302331124173e-05, |
| "loss": 0.2710113525390625, |
| "num_tokens": 7468362.0, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.058969584109248914, |
| "grad_norm": 0.486328125, |
| "learning_rate": 2.993860577842946e-05, |
| "loss": 0.245635986328125, |
| "num_tokens": 7556759.0, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.0595903165735568, |
| "grad_norm": 0.69140625, |
| "learning_rate": 2.9935848796428756e-05, |
| "loss": 0.31903076171875, |
| "num_tokens": 7640993.0, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.06021104903786468, |
| "grad_norm": 0.482421875, |
| "learning_rate": 2.993303139627455e-05, |
| "loss": 0.293212890625, |
| "num_tokens": 7723016.0, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.06083178150217256, |
| "grad_norm": 0.5859375, |
| "learning_rate": 2.9930153589363717e-05, |
| "loss": 0.2859039306640625, |
| "num_tokens": 7800241.0, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.061452513966480445, |
| "grad_norm": 0.5, |
| "learning_rate": 2.9927215387337502e-05, |
| "loss": 0.294921875, |
| "num_tokens": 7888881.0, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.06207324643078833, |
| "grad_norm": 0.484375, |
| "learning_rate": 2.9924216802081445e-05, |
| "loss": 0.22564697265625, |
| "num_tokens": 7980394.0, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06269397889509622, |
| "grad_norm": 0.54296875, |
| "learning_rate": 2.9921157845725362e-05, |
| "loss": 0.2675628662109375, |
| "num_tokens": 8057881.0, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.0633147113594041, |
| "grad_norm": 0.5, |
| "learning_rate": 2.9918038530643273e-05, |
| "loss": 0.2433929443359375, |
| "num_tokens": 8140601.0, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.06393544382371198, |
| "grad_norm": 0.56640625, |
| "learning_rate": 2.991485886945335e-05, |
| "loss": 0.22802734375, |
| "num_tokens": 8225138.0, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.06455617628801986, |
| "grad_norm": 0.4921875, |
| "learning_rate": 2.9911618875017895e-05, |
| "loss": 0.263763427734375, |
| "num_tokens": 8304312.0, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.06517690875232775, |
| "grad_norm": 0.494140625, |
| "learning_rate": 2.990831856044326e-05, |
| "loss": 0.23895263671875, |
| "num_tokens": 8391772.0, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.06579764121663563, |
| "grad_norm": 0.458984375, |
| "learning_rate": 2.99049579390798e-05, |
| "loss": 0.2308502197265625, |
| "num_tokens": 8470529.0, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.06641837368094351, |
| "grad_norm": 0.498046875, |
| "learning_rate": 2.9901537024521817e-05, |
| "loss": 0.2149200439453125, |
| "num_tokens": 8566396.0, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.0670391061452514, |
| "grad_norm": 0.5, |
| "learning_rate": 2.9898055830607526e-05, |
| "loss": 0.265045166015625, |
| "num_tokens": 8643246.0, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.06765983860955928, |
| "grad_norm": 0.5546875, |
| "learning_rate": 2.9894514371418968e-05, |
| "loss": 0.3111572265625, |
| "num_tokens": 8720654.0, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.06828057107386716, |
| "grad_norm": 0.5078125, |
| "learning_rate": 2.989091266128197e-05, |
| "loss": 0.23321533203125, |
| "num_tokens": 8793996.0, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.06890130353817504, |
| "grad_norm": 0.46484375, |
| "learning_rate": 2.9887250714766094e-05, |
| "loss": 0.2656402587890625, |
| "num_tokens": 8871414.0, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.06952203600248293, |
| "grad_norm": 0.462890625, |
| "learning_rate": 2.988352854668456e-05, |
| "loss": 0.234954833984375, |
| "num_tokens": 8961178.0, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.07014276846679081, |
| "grad_norm": 0.54296875, |
| "learning_rate": 2.9879746172094195e-05, |
| "loss": 0.2712860107421875, |
| "num_tokens": 9042073.0, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.07076350093109869, |
| "grad_norm": 0.482421875, |
| "learning_rate": 2.987590360629537e-05, |
| "loss": 0.248870849609375, |
| "num_tokens": 9113971.0, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.07138423339540657, |
| "grad_norm": 0.4765625, |
| "learning_rate": 2.9872000864831953e-05, |
| "loss": 0.2508392333984375, |
| "num_tokens": 9200614.0, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.07200496585971446, |
| "grad_norm": 0.390625, |
| "learning_rate": 2.986803796349122e-05, |
| "loss": 0.17407989501953125, |
| "num_tokens": 9288686.0, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.07262569832402235, |
| "grad_norm": 0.494140625, |
| "learning_rate": 2.986401491830381e-05, |
| "loss": 0.272369384765625, |
| "num_tokens": 9366942.0, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.07324643078833024, |
| "grad_norm": 0.466796875, |
| "learning_rate": 2.9859931745543648e-05, |
| "loss": 0.23114013671875, |
| "num_tokens": 9438305.0, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.07386716325263812, |
| "grad_norm": 0.44140625, |
| "learning_rate": 2.9855788461727905e-05, |
| "loss": 0.2272491455078125, |
| "num_tokens": 9514539.0, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.074487895716946, |
| "grad_norm": 0.474609375, |
| "learning_rate": 2.985158508361688e-05, |
| "loss": 0.2364654541015625, |
| "num_tokens": 9591929.0, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.07510862818125388, |
| "grad_norm": 0.419921875, |
| "learning_rate": 2.984732162821399e-05, |
| "loss": 0.2118682861328125, |
| "num_tokens": 9677202.0, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.07572936064556177, |
| "grad_norm": 0.423828125, |
| "learning_rate": 2.984299811276567e-05, |
| "loss": 0.1953582763671875, |
| "num_tokens": 9756886.0, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.07635009310986965, |
| "grad_norm": 0.400390625, |
| "learning_rate": 2.9838614554761306e-05, |
| "loss": 0.204193115234375, |
| "num_tokens": 9835849.0, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.07697082557417753, |
| "grad_norm": 0.470703125, |
| "learning_rate": 2.9834170971933157e-05, |
| "loss": 0.2611083984375, |
| "num_tokens": 9920661.0, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.07759155803848541, |
| "grad_norm": 0.384765625, |
| "learning_rate": 2.9829667382256313e-05, |
| "loss": 0.1618194580078125, |
| "num_tokens": 10007676.0, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.0782122905027933, |
| "grad_norm": 0.439453125, |
| "learning_rate": 2.9825103803948588e-05, |
| "loss": 0.2403717041015625, |
| "num_tokens": 10093591.0, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.07883302296710118, |
| "grad_norm": 0.48828125, |
| "learning_rate": 2.982048025547046e-05, |
| "loss": 0.23187255859375, |
| "num_tokens": 10166333.0, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.07945375543140906, |
| "grad_norm": 0.53125, |
| "learning_rate": 2.9815796755525012e-05, |
| "loss": 0.3082122802734375, |
| "num_tokens": 10243848.0, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.08007448789571694, |
| "grad_norm": 0.5, |
| "learning_rate": 2.981105332305782e-05, |
| "loss": 0.309478759765625, |
| "num_tokens": 10323097.0, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.08069522036002483, |
| "grad_norm": 0.466796875, |
| "learning_rate": 2.9806249977256914e-05, |
| "loss": 0.2358245849609375, |
| "num_tokens": 10402310.0, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.08131595282433271, |
| "grad_norm": 0.44921875, |
| "learning_rate": 2.9801386737552676e-05, |
| "loss": 0.2324066162109375, |
| "num_tokens": 10480727.0, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.08193668528864059, |
| "grad_norm": 0.48828125, |
| "learning_rate": 2.9796463623617772e-05, |
| "loss": 0.2565460205078125, |
| "num_tokens": 10559472.0, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.08255741775294848, |
| "grad_norm": 0.423828125, |
| "learning_rate": 2.979148065536707e-05, |
| "loss": 0.1997222900390625, |
| "num_tokens": 10635365.0, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.08317815021725636, |
| "grad_norm": 0.48046875, |
| "learning_rate": 2.9786437852957564e-05, |
| "loss": 0.25286865234375, |
| "num_tokens": 10719764.0, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.08379888268156424, |
| "grad_norm": 0.546875, |
| "learning_rate": 2.978133523678828e-05, |
| "loss": 0.28729248046875, |
| "num_tokens": 10798969.0, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.08441961514587212, |
| "grad_norm": 0.41796875, |
| "learning_rate": 2.9776172827500196e-05, |
| "loss": 0.1951141357421875, |
| "num_tokens": 10872762.0, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.08504034761018, |
| "grad_norm": 0.494140625, |
| "learning_rate": 2.9770950645976186e-05, |
| "loss": 0.23980712890625, |
| "num_tokens": 10955939.0, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.0856610800744879, |
| "grad_norm": 0.486328125, |
| "learning_rate": 2.9765668713340883e-05, |
| "loss": 0.2687225341796875, |
| "num_tokens": 11039455.0, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.08628181253879579, |
| "grad_norm": 0.46875, |
| "learning_rate": 2.9760327050960654e-05, |
| "loss": 0.20829010009765625, |
| "num_tokens": 11112387.0, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.08690254500310367, |
| "grad_norm": 0.5234375, |
| "learning_rate": 2.9754925680443464e-05, |
| "loss": 0.289093017578125, |
| "num_tokens": 11192385.0, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.08752327746741155, |
| "grad_norm": 0.48828125, |
| "learning_rate": 2.974946462363881e-05, |
| "loss": 0.253509521484375, |
| "num_tokens": 11272622.0, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.08814400993171943, |
| "grad_norm": 0.474609375, |
| "learning_rate": 2.974394390263764e-05, |
| "loss": 0.2116241455078125, |
| "num_tokens": 11351393.0, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.08876474239602732, |
| "grad_norm": 0.4765625, |
| "learning_rate": 2.973836353977225e-05, |
| "loss": 0.257659912109375, |
| "num_tokens": 11436612.0, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.0893854748603352, |
| "grad_norm": 0.486328125, |
| "learning_rate": 2.9732723557616182e-05, |
| "loss": 0.28607177734375, |
| "num_tokens": 11516523.0, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.09000620732464308, |
| "grad_norm": 0.44140625, |
| "learning_rate": 2.9727023978984175e-05, |
| "loss": 0.235870361328125, |
| "num_tokens": 11602468.0, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.09062693978895096, |
| "grad_norm": 0.494140625, |
| "learning_rate": 2.9721264826932037e-05, |
| "loss": 0.295379638671875, |
| "num_tokens": 11678165.0, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.09124767225325885, |
| "grad_norm": 0.4921875, |
| "learning_rate": 2.9715446124756545e-05, |
| "loss": 0.28414154052734375, |
| "num_tokens": 11757826.0, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.09186840471756673, |
| "grad_norm": 0.4296875, |
| "learning_rate": 2.970956789599539e-05, |
| "loss": 0.1726837158203125, |
| "num_tokens": 11835725.0, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.09248913718187461, |
| "grad_norm": 0.53125, |
| "learning_rate": 2.9703630164427042e-05, |
| "loss": 0.2672882080078125, |
| "num_tokens": 11909671.0, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.0931098696461825, |
| "grad_norm": 0.4765625, |
| "learning_rate": 2.9697632954070684e-05, |
| "loss": 0.24822998046875, |
| "num_tokens": 11985432.0, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.09373060211049038, |
| "grad_norm": 0.466796875, |
| "learning_rate": 2.9691576289186088e-05, |
| "loss": 0.2350006103515625, |
| "num_tokens": 12057130.0, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.09435133457479826, |
| "grad_norm": 0.435546875, |
| "learning_rate": 2.9685460194273538e-05, |
| "loss": 0.235595703125, |
| "num_tokens": 12148835.0, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.09497206703910614, |
| "grad_norm": 0.494140625, |
| "learning_rate": 2.9679284694073717e-05, |
| "loss": 0.27794647216796875, |
| "num_tokens": 12232513.0, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.09559279950341402, |
| "grad_norm": 0.421875, |
| "learning_rate": 2.9673049813567627e-05, |
| "loss": 0.204833984375, |
| "num_tokens": 12315268.0, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.09621353196772191, |
| "grad_norm": 0.443359375, |
| "learning_rate": 2.9666755577976457e-05, |
| "loss": 0.2125244140625, |
| "num_tokens": 12392742.0, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.09683426443202979, |
| "grad_norm": 0.458984375, |
| "learning_rate": 2.9660402012761504e-05, |
| "loss": 0.219146728515625, |
| "num_tokens": 12467919.0, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.09745499689633767, |
| "grad_norm": 0.421875, |
| "learning_rate": 2.9653989143624066e-05, |
| "loss": 0.216033935546875, |
| "num_tokens": 12548976.0, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.09807572936064556, |
| "grad_norm": 0.478515625, |
| "learning_rate": 2.9647516996505335e-05, |
| "loss": 0.1933441162109375, |
| "num_tokens": 12621858.0, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.09869646182495345, |
| "grad_norm": 0.4921875, |
| "learning_rate": 2.9640985597586293e-05, |
| "loss": 0.2196807861328125, |
| "num_tokens": 12699962.0, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.09931719428926133, |
| "grad_norm": 0.4765625, |
| "learning_rate": 2.9634394973287605e-05, |
| "loss": 0.2627105712890625, |
| "num_tokens": 12779843.0, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.09993792675356922, |
| "grad_norm": 0.5078125, |
| "learning_rate": 2.962774515026951e-05, |
| "loss": 0.2523651123046875, |
| "num_tokens": 12855499.0, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.1005586592178771, |
| "grad_norm": 0.455078125, |
| "learning_rate": 2.962103615543172e-05, |
| "loss": 0.22833251953125, |
| "num_tokens": 12938026.0, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.10117939168218498, |
| "grad_norm": 0.416015625, |
| "learning_rate": 2.9614268015913314e-05, |
| "loss": 0.20711517333984375, |
| "num_tokens": 13017379.0, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.10180012414649287, |
| "grad_norm": 0.474609375, |
| "learning_rate": 2.9607440759092608e-05, |
| "loss": 0.26580047607421875, |
| "num_tokens": 13092012.0, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.10242085661080075, |
| "grad_norm": 0.4765625, |
| "learning_rate": 2.960055441258707e-05, |
| "loss": 0.28448486328125, |
| "num_tokens": 13171154.0, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.10304158907510863, |
| "grad_norm": 0.447265625, |
| "learning_rate": 2.9593609004253185e-05, |
| "loss": 0.1904296875, |
| "num_tokens": 13244589.0, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.10366232153941651, |
| "grad_norm": 0.46484375, |
| "learning_rate": 2.9586604562186365e-05, |
| "loss": 0.2339019775390625, |
| "num_tokens": 13320711.0, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.1042830540037244, |
| "grad_norm": 0.41796875, |
| "learning_rate": 2.9579541114720817e-05, |
| "loss": 0.2139434814453125, |
| "num_tokens": 13397936.0, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.10490378646803228, |
| "grad_norm": 0.5078125, |
| "learning_rate": 2.957241869042944e-05, |
| "loss": 0.27524566650390625, |
| "num_tokens": 13476414.0, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.10552451893234016, |
| "grad_norm": 0.490234375, |
| "learning_rate": 2.9565237318123695e-05, |
| "loss": 0.24920654296875, |
| "num_tokens": 13550890.0, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.10614525139664804, |
| "grad_norm": 0.49609375, |
| "learning_rate": 2.955799702685351e-05, |
| "loss": 0.29901123046875, |
| "num_tokens": 13630283.0, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.10676598386095593, |
| "grad_norm": 0.453125, |
| "learning_rate": 2.9550697845907146e-05, |
| "loss": 0.225738525390625, |
| "num_tokens": 13710519.0, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.10738671632526381, |
| "grad_norm": 0.46484375, |
| "learning_rate": 2.9543339804811078e-05, |
| "loss": 0.25389862060546875, |
| "num_tokens": 13789045.0, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.10800744878957169, |
| "grad_norm": 0.421875, |
| "learning_rate": 2.9535922933329887e-05, |
| "loss": 0.2156219482421875, |
| "num_tokens": 13871966.0, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.10862818125387957, |
| "grad_norm": 0.50390625, |
| "learning_rate": 2.9528447261466134e-05, |
| "loss": 0.30413818359375, |
| "num_tokens": 13961486.0, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.10924891371818746, |
| "grad_norm": 0.412109375, |
| "learning_rate": 2.9520912819460233e-05, |
| "loss": 0.178863525390625, |
| "num_tokens": 14039379.0, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.10986964618249534, |
| "grad_norm": 0.443359375, |
| "learning_rate": 2.9513319637790337e-05, |
| "loss": 0.20584869384765625, |
| "num_tokens": 14119943.0, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.11049037864680322, |
| "grad_norm": 0.45703125, |
| "learning_rate": 2.9505667747172212e-05, |
| "loss": 0.25897216796875, |
| "num_tokens": 14203058.0, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.1111111111111111, |
| "grad_norm": 0.486328125, |
| "learning_rate": 2.949795717855911e-05, |
| "loss": 0.25653839111328125, |
| "num_tokens": 14286819.0, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.11173184357541899, |
| "grad_norm": 0.4453125, |
| "learning_rate": 2.9490187963141644e-05, |
| "loss": 0.213531494140625, |
| "num_tokens": 14367159.0, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.11235257603972688, |
| "grad_norm": 0.423828125, |
| "learning_rate": 2.948236013234767e-05, |
| "loss": 0.1999053955078125, |
| "num_tokens": 14445138.0, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.11297330850403477, |
| "grad_norm": 0.421875, |
| "learning_rate": 2.947447371784215e-05, |
| "loss": 0.1924285888671875, |
| "num_tokens": 14524617.0, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.11359404096834265, |
| "grad_norm": 0.515625, |
| "learning_rate": 2.946652875152702e-05, |
| "loss": 0.277069091796875, |
| "num_tokens": 14608040.0, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.11421477343265053, |
| "grad_norm": 0.458984375, |
| "learning_rate": 2.945852526554109e-05, |
| "loss": 0.2434234619140625, |
| "num_tokens": 14689595.0, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.11483550589695841, |
| "grad_norm": 0.47265625, |
| "learning_rate": 2.9450463292259863e-05, |
| "loss": 0.240081787109375, |
| "num_tokens": 14771203.0, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.1154562383612663, |
| "grad_norm": 0.41796875, |
| "learning_rate": 2.944234286429546e-05, |
| "loss": 0.19640350341796875, |
| "num_tokens": 14854070.0, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.11607697082557418, |
| "grad_norm": 0.451171875, |
| "learning_rate": 2.9434164014496445e-05, |
| "loss": 0.183197021484375, |
| "num_tokens": 14928923.0, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.11669770328988206, |
| "grad_norm": 0.453125, |
| "learning_rate": 2.9425926775947713e-05, |
| "loss": 0.26019287109375, |
| "num_tokens": 15010774.0, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.11731843575418995, |
| "grad_norm": 2.171875, |
| "learning_rate": 2.9417631181970354e-05, |
| "loss": 0.225250244140625, |
| "num_tokens": 15093644.0, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.11793916821849783, |
| "grad_norm": 0.451171875, |
| "learning_rate": 2.9409277266121516e-05, |
| "loss": 0.221954345703125, |
| "num_tokens": 15170822.0, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.11855990068280571, |
| "grad_norm": 0.439453125, |
| "learning_rate": 2.9400865062194268e-05, |
| "loss": 0.2058868408203125, |
| "num_tokens": 15243534.0, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.1191806331471136, |
| "grad_norm": 0.396484375, |
| "learning_rate": 2.939239460421746e-05, |
| "loss": 0.15682220458984375, |
| "num_tokens": 15323269.0, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.11980136561142148, |
| "grad_norm": 0.44921875, |
| "learning_rate": 2.93838659264556e-05, |
| "loss": 0.2241363525390625, |
| "num_tokens": 15403461.0, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.12042209807572936, |
| "grad_norm": 0.4609375, |
| "learning_rate": 2.9375279063408706e-05, |
| "loss": 0.2346649169921875, |
| "num_tokens": 15479872.0, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.12104283054003724, |
| "grad_norm": 0.42578125, |
| "learning_rate": 2.9366634049812145e-05, |
| "loss": 0.183074951171875, |
| "num_tokens": 15562255.0, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.12166356300434512, |
| "grad_norm": 0.48046875, |
| "learning_rate": 2.9357930920636537e-05, |
| "loss": 0.245849609375, |
| "num_tokens": 15638673.0, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.122284295468653, |
| "grad_norm": 0.4375, |
| "learning_rate": 2.9349169711087577e-05, |
| "loss": 0.1927642822265625, |
| "num_tokens": 15711371.0, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.12290502793296089, |
| "grad_norm": 0.474609375, |
| "learning_rate": 2.9340350456605908e-05, |
| "loss": 0.2207489013671875, |
| "num_tokens": 15783275.0, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.12352576039726877, |
| "grad_norm": 0.470703125, |
| "learning_rate": 2.9331473192866984e-05, |
| "loss": 0.251373291015625, |
| "num_tokens": 15863857.0, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.12414649286157665, |
| "grad_norm": 0.419921875, |
| "learning_rate": 2.9322537955780902e-05, |
| "loss": 0.19305038452148438, |
| "num_tokens": 15947385.0, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.12476722532588454, |
| "grad_norm": 0.498046875, |
| "learning_rate": 2.931354478149229e-05, |
| "loss": 0.2515716552734375, |
| "num_tokens": 16020854.0, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.12538795779019243, |
| "grad_norm": 0.435546875, |
| "learning_rate": 2.9304493706380112e-05, |
| "loss": 0.2241973876953125, |
| "num_tokens": 16107063.0, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.12600869025450032, |
| "grad_norm": 0.388671875, |
| "learning_rate": 2.929538476705758e-05, |
| "loss": 0.1594696044921875, |
| "num_tokens": 16184032.0, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.1266294227188082, |
| "grad_norm": 0.416015625, |
| "learning_rate": 2.9286218000371973e-05, |
| "loss": 0.189910888671875, |
| "num_tokens": 16267360.0, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.12725015518311608, |
| "grad_norm": 0.494140625, |
| "learning_rate": 2.9276993443404474e-05, |
| "loss": 0.310882568359375, |
| "num_tokens": 16351226.0, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.12787088764742396, |
| "grad_norm": 0.431640625, |
| "learning_rate": 2.9267711133470052e-05, |
| "loss": 0.218902587890625, |
| "num_tokens": 16428433.0, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.12849162011173185, |
| "grad_norm": 0.474609375, |
| "learning_rate": 2.92583711081173e-05, |
| "loss": 0.245269775390625, |
| "num_tokens": 16504509.0, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.12911235257603973, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.9248973405128273e-05, |
| "loss": 0.204315185546875, |
| "num_tokens": 16588839.0, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.1297330850403476, |
| "grad_norm": 0.4765625, |
| "learning_rate": 2.9239518062518342e-05, |
| "loss": 0.2843780517578125, |
| "num_tokens": 16665978.0, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.1303538175046555, |
| "grad_norm": 0.51953125, |
| "learning_rate": 2.923000511853604e-05, |
| "loss": 0.2311859130859375, |
| "num_tokens": 16740112.0, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.13097454996896338, |
| "grad_norm": 0.466796875, |
| "learning_rate": 2.9220434611662913e-05, |
| "loss": 0.241546630859375, |
| "num_tokens": 16818959.0, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.13159528243327126, |
| "grad_norm": 0.45703125, |
| "learning_rate": 2.921080658061336e-05, |
| "loss": 0.2093353271484375, |
| "num_tokens": 16891523.0, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.13221601489757914, |
| "grad_norm": 0.47265625, |
| "learning_rate": 2.9201121064334465e-05, |
| "loss": 0.2313995361328125, |
| "num_tokens": 16973790.0, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.13283674736188703, |
| "grad_norm": 0.416015625, |
| "learning_rate": 2.919137810200586e-05, |
| "loss": 0.1971282958984375, |
| "num_tokens": 17054316.0, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.1334574798261949, |
| "grad_norm": 0.486328125, |
| "learning_rate": 2.9181577733039554e-05, |
| "loss": 0.2489013671875, |
| "num_tokens": 17136683.0, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.1340782122905028, |
| "grad_norm": 0.431640625, |
| "learning_rate": 2.9171719997079775e-05, |
| "loss": 0.2093505859375, |
| "num_tokens": 17219420.0, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.13469894475481067, |
| "grad_norm": 0.53125, |
| "learning_rate": 2.9161804934002807e-05, |
| "loss": 0.2738037109375, |
| "num_tokens": 17296903.0, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.13531967721911856, |
| "grad_norm": 0.400390625, |
| "learning_rate": 2.915183258391684e-05, |
| "loss": 0.1625823974609375, |
| "num_tokens": 17376132.0, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.13594040968342644, |
| "grad_norm": 0.380859375, |
| "learning_rate": 2.9141802987161794e-05, |
| "loss": 0.184234619140625, |
| "num_tokens": 17463223.0, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.13656114214773432, |
| "grad_norm": 0.392578125, |
| "learning_rate": 2.9131716184309167e-05, |
| "loss": 0.14013671875, |
| "num_tokens": 17536271.0, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1371818746120422, |
| "grad_norm": 0.423828125, |
| "learning_rate": 2.912157221616186e-05, |
| "loss": 0.217742919921875, |
| "num_tokens": 17620143.0, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.1378026070763501, |
| "grad_norm": 0.46484375, |
| "learning_rate": 2.9111371123754022e-05, |
| "loss": 0.265472412109375, |
| "num_tokens": 17699602.0, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.13842333954065797, |
| "grad_norm": 0.4453125, |
| "learning_rate": 2.9101112948350876e-05, |
| "loss": 0.18377685546875, |
| "num_tokens": 17775105.0, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.13904407200496585, |
| "grad_norm": 0.451171875, |
| "learning_rate": 2.909079773144856e-05, |
| "loss": 0.186309814453125, |
| "num_tokens": 17846816.0, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.13966480446927373, |
| "grad_norm": 0.49609375, |
| "learning_rate": 2.9080425514773955e-05, |
| "loss": 0.27982330322265625, |
| "num_tokens": 17925471.0, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.14028553693358162, |
| "grad_norm": 0.4453125, |
| "learning_rate": 2.9069996340284513e-05, |
| "loss": 0.22800445556640625, |
| "num_tokens": 18001683.0, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.1409062693978895, |
| "grad_norm": 0.490234375, |
| "learning_rate": 2.905951025016809e-05, |
| "loss": 0.27423095703125, |
| "num_tokens": 18085662.0, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.14152700186219738, |
| "grad_norm": 0.48828125, |
| "learning_rate": 2.9048967286842784e-05, |
| "loss": 0.23690032958984375, |
| "num_tokens": 18163019.0, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.14214773432650527, |
| "grad_norm": 0.474609375, |
| "learning_rate": 2.9038367492956735e-05, |
| "loss": 0.289642333984375, |
| "num_tokens": 18244487.0, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.14276846679081315, |
| "grad_norm": 0.447265625, |
| "learning_rate": 2.9027710911388e-05, |
| "loss": 0.211639404296875, |
| "num_tokens": 18324110.0, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.14338919925512103, |
| "grad_norm": 0.40625, |
| "learning_rate": 2.9016997585244335e-05, |
| "loss": 0.1941986083984375, |
| "num_tokens": 18410072.0, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.1440099317194289, |
| "grad_norm": 0.4375, |
| "learning_rate": 2.900622755786304e-05, |
| "loss": 0.21749114990234375, |
| "num_tokens": 18488841.0, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.1446306641837368, |
| "grad_norm": 0.46875, |
| "learning_rate": 2.8995400872810786e-05, |
| "loss": 0.266448974609375, |
| "num_tokens": 18580198.0, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.1452513966480447, |
| "grad_norm": 0.447265625, |
| "learning_rate": 2.8984517573883426e-05, |
| "loss": 0.225830078125, |
| "num_tokens": 18661317.0, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.1458721291123526, |
| "grad_norm": 0.48828125, |
| "learning_rate": 2.8973577705105835e-05, |
| "loss": 0.221343994140625, |
| "num_tokens": 18736060.0, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.14649286157666047, |
| "grad_norm": 0.462890625, |
| "learning_rate": 2.896258131073172e-05, |
| "loss": 0.20946502685546875, |
| "num_tokens": 18813565.0, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.14711359404096835, |
| "grad_norm": 0.42578125, |
| "learning_rate": 2.8951528435243447e-05, |
| "loss": 0.1993255615234375, |
| "num_tokens": 18892569.0, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.14773432650527624, |
| "grad_norm": 0.462890625, |
| "learning_rate": 2.8940419123351843e-05, |
| "loss": 0.246368408203125, |
| "num_tokens": 18973433.0, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.14835505896958412, |
| "grad_norm": 0.439453125, |
| "learning_rate": 2.8929253419996055e-05, |
| "loss": 0.23016357421875, |
| "num_tokens": 19058301.0, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.148975791433892, |
| "grad_norm": 0.51171875, |
| "learning_rate": 2.8918031370343328e-05, |
| "loss": 0.31201171875, |
| "num_tokens": 19140189.0, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.14959652389819988, |
| "grad_norm": 0.453125, |
| "learning_rate": 2.890675301978883e-05, |
| "loss": 0.2154998779296875, |
| "num_tokens": 19215946.0, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.15021725636250777, |
| "grad_norm": 0.451171875, |
| "learning_rate": 2.8895418413955498e-05, |
| "loss": 0.22357177734375, |
| "num_tokens": 19295232.0, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.15083798882681565, |
| "grad_norm": 0.46484375, |
| "learning_rate": 2.888402759869382e-05, |
| "loss": 0.2400360107421875, |
| "num_tokens": 19371531.0, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.15145872129112353, |
| "grad_norm": 0.4296875, |
| "learning_rate": 2.8872580620081654e-05, |
| "loss": 0.22708892822265625, |
| "num_tokens": 19456098.0, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.15207945375543142, |
| "grad_norm": 0.470703125, |
| "learning_rate": 2.886107752442406e-05, |
| "loss": 0.2177734375, |
| "num_tokens": 19524671.0, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.1527001862197393, |
| "grad_norm": 0.423828125, |
| "learning_rate": 2.8849518358253095e-05, |
| "loss": 0.19512939453125, |
| "num_tokens": 19598978.0, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.15332091868404718, |
| "grad_norm": 0.42578125, |
| "learning_rate": 2.8837903168327634e-05, |
| "loss": 0.204193115234375, |
| "num_tokens": 19686637.0, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.15394165114835506, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.8826232001633174e-05, |
| "loss": 0.19580841064453125, |
| "num_tokens": 19769433.0, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.15456238361266295, |
| "grad_norm": 0.47265625, |
| "learning_rate": 2.8814504905381663e-05, |
| "loss": 0.2563629150390625, |
| "num_tokens": 19849719.0, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.15518311607697083, |
| "grad_norm": 0.37109375, |
| "learning_rate": 2.8802721927011267e-05, |
| "loss": 0.13668441772460938, |
| "num_tokens": 19927576.0, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.1558038485412787, |
| "grad_norm": 0.470703125, |
| "learning_rate": 2.879088311418623e-05, |
| "loss": 0.23772048950195312, |
| "num_tokens": 20001718.0, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.1564245810055866, |
| "grad_norm": 0.4375, |
| "learning_rate": 2.8778988514796644e-05, |
| "loss": 0.227874755859375, |
| "num_tokens": 20088169.0, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.15704531346989448, |
| "grad_norm": 0.5, |
| "learning_rate": 2.876703817695827e-05, |
| "loss": 0.272552490234375, |
| "num_tokens": 20166172.0, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.15766604593420236, |
| "grad_norm": 0.453125, |
| "learning_rate": 2.8755032149012336e-05, |
| "loss": 0.27325439453125, |
| "num_tokens": 20246825.0, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.15828677839851024, |
| "grad_norm": 0.40234375, |
| "learning_rate": 2.8742970479525362e-05, |
| "loss": 0.19056320190429688, |
| "num_tokens": 20330694.0, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.15890751086281812, |
| "grad_norm": 0.4765625, |
| "learning_rate": 2.8730853217288933e-05, |
| "loss": 0.2268218994140625, |
| "num_tokens": 20406311.0, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.159528243327126, |
| "grad_norm": 0.44921875, |
| "learning_rate": 2.8718680411319516e-05, |
| "loss": 0.2295379638671875, |
| "num_tokens": 20484812.0, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.1601489757914339, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.870645211085827e-05, |
| "loss": 0.17767333984375, |
| "num_tokens": 20560209.0, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.16076970825574177, |
| "grad_norm": 0.4765625, |
| "learning_rate": 2.869416836537083e-05, |
| "loss": 0.2900238037109375, |
| "num_tokens": 20640291.0, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.16139044072004965, |
| "grad_norm": 0.376953125, |
| "learning_rate": 2.8681829224547133e-05, |
| "loss": 0.184112548828125, |
| "num_tokens": 20726081.0, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.16201117318435754, |
| "grad_norm": 0.458984375, |
| "learning_rate": 2.8669434738301184e-05, |
| "loss": 0.23430633544921875, |
| "num_tokens": 20808045.0, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.16263190564866542, |
| "grad_norm": 0.458984375, |
| "learning_rate": 2.8656984956770873e-05, |
| "loss": 0.1941375732421875, |
| "num_tokens": 20890073.0, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.1632526381129733, |
| "grad_norm": 0.455078125, |
| "learning_rate": 2.8644479930317776e-05, |
| "loss": 0.19208526611328125, |
| "num_tokens": 20962225.0, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.16387337057728119, |
| "grad_norm": 0.44140625, |
| "learning_rate": 2.8631919709526935e-05, |
| "loss": 0.20654296875, |
| "num_tokens": 21043747.0, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.16449410304158907, |
| "grad_norm": 0.48828125, |
| "learning_rate": 2.8619304345206668e-05, |
| "loss": 0.2682342529296875, |
| "num_tokens": 21120671.0, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.16511483550589695, |
| "grad_norm": 0.4609375, |
| "learning_rate": 2.860663388838836e-05, |
| "loss": 0.2409515380859375, |
| "num_tokens": 21204173.0, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.16573556797020483, |
| "grad_norm": 0.482421875, |
| "learning_rate": 2.859390839032625e-05, |
| "loss": 0.2242584228515625, |
| "num_tokens": 21281345.0, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.16635630043451272, |
| "grad_norm": 0.447265625, |
| "learning_rate": 2.858112790249723e-05, |
| "loss": 0.2214813232421875, |
| "num_tokens": 21357815.0, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.1669770328988206, |
| "grad_norm": 0.462890625, |
| "learning_rate": 2.8568292476600642e-05, |
| "loss": 0.2177734375, |
| "num_tokens": 21431126.0, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.16759776536312848, |
| "grad_norm": 0.515625, |
| "learning_rate": 2.8555402164558058e-05, |
| "loss": 0.2936553955078125, |
| "num_tokens": 21510733.0, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.16821849782743636, |
| "grad_norm": 0.462890625, |
| "learning_rate": 2.854245701851307e-05, |
| "loss": 0.2332000732421875, |
| "num_tokens": 21592082.0, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.16883923029174425, |
| "grad_norm": 0.455078125, |
| "learning_rate": 2.852945709083108e-05, |
| "loss": 0.2206573486328125, |
| "num_tokens": 21661617.0, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.16945996275605213, |
| "grad_norm": 0.4140625, |
| "learning_rate": 2.8516402434099106e-05, |
| "loss": 0.2027130126953125, |
| "num_tokens": 21738094.0, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.17008069522036, |
| "grad_norm": 0.45703125, |
| "learning_rate": 2.8503293101125542e-05, |
| "loss": 0.231048583984375, |
| "num_tokens": 21814944.0, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.1707014276846679, |
| "grad_norm": 0.419921875, |
| "learning_rate": 2.8490129144939968e-05, |
| "loss": 0.217987060546875, |
| "num_tokens": 21901526.0, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.1713221601489758, |
| "grad_norm": 0.4140625, |
| "learning_rate": 2.847691061879291e-05, |
| "loss": 0.1964874267578125, |
| "num_tokens": 21972946.0, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.1719428926132837, |
| "grad_norm": 0.4140625, |
| "learning_rate": 2.8463637576155654e-05, |
| "loss": 0.21360015869140625, |
| "num_tokens": 22058419.0, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.17256362507759157, |
| "grad_norm": 0.484375, |
| "learning_rate": 2.8450310070720002e-05, |
| "loss": 0.252197265625, |
| "num_tokens": 22136372.0, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.17318435754189945, |
| "grad_norm": 0.466796875, |
| "learning_rate": 2.843692815639808e-05, |
| "loss": 0.24320220947265625, |
| "num_tokens": 22214837.0, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.17380509000620734, |
| "grad_norm": 0.486328125, |
| "learning_rate": 2.842349188732209e-05, |
| "loss": 0.2666015625, |
| "num_tokens": 22292215.0, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.17442582247051522, |
| "grad_norm": 0.40625, |
| "learning_rate": 2.8410001317844136e-05, |
| "loss": 0.19649505615234375, |
| "num_tokens": 22374478.0, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.1750465549348231, |
| "grad_norm": 0.40625, |
| "learning_rate": 2.839645650253595e-05, |
| "loss": 0.2139434814453125, |
| "num_tokens": 22455908.0, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.17566728739913098, |
| "grad_norm": 0.470703125, |
| "learning_rate": 2.8382857496188714e-05, |
| "loss": 0.2554473876953125, |
| "num_tokens": 22531972.0, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.17628801986343887, |
| "grad_norm": 0.4453125, |
| "learning_rate": 2.836920435381281e-05, |
| "loss": 0.21221923828125, |
| "num_tokens": 22607411.0, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.17690875232774675, |
| "grad_norm": 0.470703125, |
| "learning_rate": 2.8355497130637625e-05, |
| "loss": 0.257293701171875, |
| "num_tokens": 22687098.0, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.17752948479205463, |
| "grad_norm": 0.46484375, |
| "learning_rate": 2.83417358821113e-05, |
| "loss": 0.2622222900390625, |
| "num_tokens": 22768796.0, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.17815021725636251, |
| "grad_norm": 0.46875, |
| "learning_rate": 2.8327920663900523e-05, |
| "loss": 0.220947265625, |
| "num_tokens": 22841705.0, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.1787709497206704, |
| "grad_norm": 0.45703125, |
| "learning_rate": 2.8314051531890297e-05, |
| "loss": 0.2252655029296875, |
| "num_tokens": 22922884.0, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.17939168218497828, |
| "grad_norm": 0.48828125, |
| "learning_rate": 2.830012854218372e-05, |
| "loss": 0.23340606689453125, |
| "num_tokens": 22996151.0, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.18001241464928616, |
| "grad_norm": 0.51171875, |
| "learning_rate": 2.828615175110175e-05, |
| "loss": 0.3091583251953125, |
| "num_tokens": 23079834.0, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.18063314711359404, |
| "grad_norm": 0.423828125, |
| "learning_rate": 2.827212121518298e-05, |
| "loss": 0.19625091552734375, |
| "num_tokens": 23160510.0, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.18125387957790193, |
| "grad_norm": 0.48046875, |
| "learning_rate": 2.8258036991183414e-05, |
| "loss": 0.2607421875, |
| "num_tokens": 23240819.0, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.1818746120422098, |
| "grad_norm": 0.3984375, |
| "learning_rate": 2.824389913607624e-05, |
| "loss": 0.1927032470703125, |
| "num_tokens": 23323433.0, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.1824953445065177, |
| "grad_norm": 0.396484375, |
| "learning_rate": 2.8229707707051572e-05, |
| "loss": 0.1893768310546875, |
| "num_tokens": 23399330.0, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.18311607697082558, |
| "grad_norm": 0.478515625, |
| "learning_rate": 2.821546276151626e-05, |
| "loss": 0.2425689697265625, |
| "num_tokens": 23472334.0, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.18373680943513346, |
| "grad_norm": 0.423828125, |
| "learning_rate": 2.8201164357093632e-05, |
| "loss": 0.206390380859375, |
| "num_tokens": 23550900.0, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.18435754189944134, |
| "grad_norm": 0.3671875, |
| "learning_rate": 2.8186812551623267e-05, |
| "loss": 0.1666717529296875, |
| "num_tokens": 23630296.0, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.18497827436374922, |
| "grad_norm": 0.466796875, |
| "learning_rate": 2.817240740316076e-05, |
| "loss": 0.26280975341796875, |
| "num_tokens": 23704573.0, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.1855990068280571, |
| "grad_norm": 0.470703125, |
| "learning_rate": 2.8157948969977484e-05, |
| "loss": 0.2276763916015625, |
| "num_tokens": 23784762.0, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.186219739292365, |
| "grad_norm": 0.44140625, |
| "learning_rate": 2.8143437310560365e-05, |
| "loss": 0.23040008544921875, |
| "num_tokens": 23864416.0, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.18684047175667287, |
| "grad_norm": 0.478515625, |
| "learning_rate": 2.812887248361164e-05, |
| "loss": 0.2584381103515625, |
| "num_tokens": 23945170.0, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.18746120422098075, |
| "grad_norm": 0.482421875, |
| "learning_rate": 2.81142545480486e-05, |
| "loss": 0.2735137939453125, |
| "num_tokens": 24026647.0, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.18808193668528864, |
| "grad_norm": 0.46875, |
| "learning_rate": 2.809958356300339e-05, |
| "loss": 0.2475738525390625, |
| "num_tokens": 24114143.0, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.18870266914959652, |
| "grad_norm": 0.451171875, |
| "learning_rate": 2.808485958782275e-05, |
| "loss": 0.224700927734375, |
| "num_tokens": 24187227.0, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.1893234016139044, |
| "grad_norm": 0.474609375, |
| "learning_rate": 2.807008268206776e-05, |
| "loss": 0.259368896484375, |
| "num_tokens": 24260031.0, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.18994413407821228, |
| "grad_norm": 0.376953125, |
| "learning_rate": 2.805525290551362e-05, |
| "loss": 0.15789794921875, |
| "num_tokens": 24337315.0, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.19056486654252017, |
| "grad_norm": 0.44921875, |
| "learning_rate": 2.80403703181494e-05, |
| "loss": 0.215179443359375, |
| "num_tokens": 24412328.0, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.19118559900682805, |
| "grad_norm": 0.48046875, |
| "learning_rate": 2.8025434980177813e-05, |
| "loss": 0.233306884765625, |
| "num_tokens": 24490780.0, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.19180633147113593, |
| "grad_norm": 0.4375, |
| "learning_rate": 2.8010446952014937e-05, |
| "loss": 0.2044830322265625, |
| "num_tokens": 24565732.0, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.19242706393544382, |
| "grad_norm": 0.46875, |
| "learning_rate": 2.799540629429e-05, |
| "loss": 0.19797515869140625, |
| "num_tokens": 24639970.0, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.1930477963997517, |
| "grad_norm": 0.416015625, |
| "learning_rate": 2.7980313067845133e-05, |
| "loss": 0.1732025146484375, |
| "num_tokens": 24723023.0, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.19366852886405958, |
| "grad_norm": 0.478515625, |
| "learning_rate": 2.79651673337351e-05, |
| "loss": 0.2372283935546875, |
| "num_tokens": 24801917.0, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.19428926132836746, |
| "grad_norm": 0.47265625, |
| "learning_rate": 2.7949969153227085e-05, |
| "loss": 0.2459564208984375, |
| "num_tokens": 24886212.0, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.19490999379267535, |
| "grad_norm": 0.42578125, |
| "learning_rate": 2.793471858780042e-05, |
| "loss": 0.21136474609375, |
| "num_tokens": 24973533.0, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.19553072625698323, |
| "grad_norm": 0.4609375, |
| "learning_rate": 2.7919415699146334e-05, |
| "loss": 0.216400146484375, |
| "num_tokens": 25053324.0, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.1961514587212911, |
| "grad_norm": 0.4296875, |
| "learning_rate": 2.7904060549167736e-05, |
| "loss": 0.1983642578125, |
| "num_tokens": 25129276.0, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.196772191185599, |
| "grad_norm": 0.390625, |
| "learning_rate": 2.788865319997892e-05, |
| "loss": 0.1563720703125, |
| "num_tokens": 25205592.0, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.1973929236499069, |
| "grad_norm": 0.4765625, |
| "learning_rate": 2.7873193713905338e-05, |
| "loss": 0.24542999267578125, |
| "num_tokens": 25290666.0, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.1980136561142148, |
| "grad_norm": 0.51953125, |
| "learning_rate": 2.7857682153483353e-05, |
| "loss": 0.27899169921875, |
| "num_tokens": 25375158.0, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.19863438857852267, |
| "grad_norm": 0.390625, |
| "learning_rate": 2.7842118581459978e-05, |
| "loss": 0.167083740234375, |
| "num_tokens": 25457530.0, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.19925512104283055, |
| "grad_norm": 0.453125, |
| "learning_rate": 2.782650306079261e-05, |
| "loss": 0.2344512939453125, |
| "num_tokens": 25536451.0, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.19987585350713843, |
| "grad_norm": 0.416015625, |
| "learning_rate": 2.78108356546488e-05, |
| "loss": 0.184478759765625, |
| "num_tokens": 25613175.0, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.20049658597144632, |
| "grad_norm": 0.419921875, |
| "learning_rate": 2.779511642640598e-05, |
| "loss": 0.196533203125, |
| "num_tokens": 25692610.0, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.2011173184357542, |
| "grad_norm": 0.4375, |
| "learning_rate": 2.777934543965121e-05, |
| "loss": 0.21210479736328125, |
| "num_tokens": 25771170.0, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.20173805090006208, |
| "grad_norm": 0.388671875, |
| "learning_rate": 2.776352275818093e-05, |
| "loss": 0.18120574951171875, |
| "num_tokens": 25853303.0, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.20235878336436997, |
| "grad_norm": 0.486328125, |
| "learning_rate": 2.774764844600069e-05, |
| "loss": 0.243988037109375, |
| "num_tokens": 25932872.0, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.20297951582867785, |
| "grad_norm": 0.4140625, |
| "learning_rate": 2.7731722567324885e-05, |
| "loss": 0.18221282958984375, |
| "num_tokens": 26012918.0, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.20360024829298573, |
| "grad_norm": 0.462890625, |
| "learning_rate": 2.7715745186576516e-05, |
| "loss": 0.23598480224609375, |
| "num_tokens": 26093519.0, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.2042209807572936, |
| "grad_norm": 0.435546875, |
| "learning_rate": 2.7699716368386923e-05, |
| "loss": 0.20574951171875, |
| "num_tokens": 26172889.0, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.2048417132216015, |
| "grad_norm": 0.423828125, |
| "learning_rate": 2.7683636177595513e-05, |
| "loss": 0.208740234375, |
| "num_tokens": 26255983.0, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.20546244568590938, |
| "grad_norm": 0.431640625, |
| "learning_rate": 2.7667504679249503e-05, |
| "loss": 0.189605712890625, |
| "num_tokens": 26334304.0, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.20608317815021726, |
| "grad_norm": 0.4609375, |
| "learning_rate": 2.765132193860366e-05, |
| "loss": 0.20249176025390625, |
| "num_tokens": 26409949.0, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.20670391061452514, |
| "grad_norm": 0.474609375, |
| "learning_rate": 2.7635088021120044e-05, |
| "loss": 0.2257232666015625, |
| "num_tokens": 26482852.0, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.20732464307883303, |
| "grad_norm": 0.53125, |
| "learning_rate": 2.7618802992467718e-05, |
| "loss": 0.2966766357421875, |
| "num_tokens": 26559417.0, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.2079453755431409, |
| "grad_norm": 0.44921875, |
| "learning_rate": 2.760246691852251e-05, |
| "loss": 0.226654052734375, |
| "num_tokens": 26642833.0, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.2085661080074488, |
| "grad_norm": 0.484375, |
| "learning_rate": 2.758607986536673e-05, |
| "loss": 0.28375244140625, |
| "num_tokens": 26719865.0, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.20918684047175667, |
| "grad_norm": 0.412109375, |
| "learning_rate": 2.7569641899288914e-05, |
| "loss": 0.2262420654296875, |
| "num_tokens": 26804222.0, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.20980757293606456, |
| "grad_norm": 0.42578125, |
| "learning_rate": 2.755315308678354e-05, |
| "loss": 0.17522430419921875, |
| "num_tokens": 26886407.0, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.21042830540037244, |
| "grad_norm": 0.419921875, |
| "learning_rate": 2.7536613494550783e-05, |
| "loss": 0.1876220703125, |
| "num_tokens": 26962878.0, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.21104903786468032, |
| "grad_norm": 0.396484375, |
| "learning_rate": 2.7520023189496216e-05, |
| "loss": 0.17600250244140625, |
| "num_tokens": 27041188.0, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.2116697703289882, |
| "grad_norm": 0.43359375, |
| "learning_rate": 2.7503382238730563e-05, |
| "loss": 0.19580078125, |
| "num_tokens": 27112851.0, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.2122905027932961, |
| "grad_norm": 0.396484375, |
| "learning_rate": 2.748669070956942e-05, |
| "loss": 0.17499542236328125, |
| "num_tokens": 27192259.0, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.21291123525760397, |
| "grad_norm": 0.43359375, |
| "learning_rate": 2.746994866953297e-05, |
| "loss": 0.210540771484375, |
| "num_tokens": 27276402.0, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.21353196772191185, |
| "grad_norm": 0.416015625, |
| "learning_rate": 2.7453156186345737e-05, |
| "loss": 0.195098876953125, |
| "num_tokens": 27358171.0, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.21415270018621974, |
| "grad_norm": 0.451171875, |
| "learning_rate": 2.7436313327936292e-05, |
| "loss": 0.2425689697265625, |
| "num_tokens": 27435691.0, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.21477343265052762, |
| "grad_norm": 0.41796875, |
| "learning_rate": 2.7419420162436974e-05, |
| "loss": 0.18865966796875, |
| "num_tokens": 27512760.0, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.2153941651148355, |
| "grad_norm": 0.466796875, |
| "learning_rate": 2.740247675818363e-05, |
| "loss": 0.218170166015625, |
| "num_tokens": 27591082.0, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.21601489757914338, |
| "grad_norm": 0.4296875, |
| "learning_rate": 2.7385483183715335e-05, |
| "loss": 0.2278900146484375, |
| "num_tokens": 27673367.0, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.21663563004345127, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.7368439507774104e-05, |
| "loss": 0.200927734375, |
| "num_tokens": 27759491.0, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.21725636250775915, |
| "grad_norm": 0.45703125, |
| "learning_rate": 2.7351345799304623e-05, |
| "loss": 0.24560546875, |
| "num_tokens": 27841661.0, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.21787709497206703, |
| "grad_norm": 0.5078125, |
| "learning_rate": 2.7334202127453975e-05, |
| "loss": 0.2792816162109375, |
| "num_tokens": 27923609.0, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.21849782743637491, |
| "grad_norm": 0.419921875, |
| "learning_rate": 2.7317008561571343e-05, |
| "loss": 0.2261962890625, |
| "num_tokens": 28008160.0, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.2191185599006828, |
| "grad_norm": 0.439453125, |
| "learning_rate": 2.729976517120775e-05, |
| "loss": 0.194000244140625, |
| "num_tokens": 28092708.0, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.21973929236499068, |
| "grad_norm": 0.419921875, |
| "learning_rate": 2.7282472026115762e-05, |
| "loss": 0.1892242431640625, |
| "num_tokens": 28169079.0, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.22036002482929856, |
| "grad_norm": 0.439453125, |
| "learning_rate": 2.7265129196249213e-05, |
| "loss": 0.22521209716796875, |
| "num_tokens": 28247713.0, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.22098075729360644, |
| "grad_norm": 0.498046875, |
| "learning_rate": 2.724773675176293e-05, |
| "loss": 0.2608642578125, |
| "num_tokens": 28323224.0, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.22160148975791433, |
| "grad_norm": 0.44140625, |
| "learning_rate": 2.7230294763012418e-05, |
| "loss": 0.2362060546875, |
| "num_tokens": 28407032.0, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 0.392578125, |
| "learning_rate": 2.721280330055362e-05, |
| "loss": 0.219757080078125, |
| "num_tokens": 28493250.0, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.2228429546865301, |
| "grad_norm": 0.49609375, |
| "learning_rate": 2.7195262435142587e-05, |
| "loss": 0.30975341796875, |
| "num_tokens": 28574535.0, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.22346368715083798, |
| "grad_norm": 0.388671875, |
| "learning_rate": 2.7177672237735235e-05, |
| "loss": 0.206451416015625, |
| "num_tokens": 28658332.0, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.22408441961514589, |
| "grad_norm": 0.427734375, |
| "learning_rate": 2.716003277948703e-05, |
| "loss": 0.21539306640625, |
| "num_tokens": 28742370.0, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.22470515207945377, |
| "grad_norm": 0.3984375, |
| "learning_rate": 2.71423441317527e-05, |
| "loss": 0.1778411865234375, |
| "num_tokens": 28823115.0, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.22532588454376165, |
| "grad_norm": 0.40234375, |
| "learning_rate": 2.7124606366085967e-05, |
| "loss": 0.17374038696289062, |
| "num_tokens": 28905963.0, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.22594661700806953, |
| "grad_norm": 0.388671875, |
| "learning_rate": 2.7106819554239222e-05, |
| "loss": 0.16725921630859375, |
| "num_tokens": 28986427.0, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.22656734947237742, |
| "grad_norm": 0.42578125, |
| "learning_rate": 2.7088983768163275e-05, |
| "loss": 0.23508071899414062, |
| "num_tokens": 29072582.0, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.2271880819366853, |
| "grad_norm": 0.447265625, |
| "learning_rate": 2.7071099080007035e-05, |
| "loss": 0.240570068359375, |
| "num_tokens": 29149869.0, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.22780881440099318, |
| "grad_norm": 0.447265625, |
| "learning_rate": 2.705316556211724e-05, |
| "loss": 0.23856353759765625, |
| "num_tokens": 29227206.0, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.22842954686530106, |
| "grad_norm": 0.43359375, |
| "learning_rate": 2.703518328703814e-05, |
| "loss": 0.237396240234375, |
| "num_tokens": 29312112.0, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.22905027932960895, |
| "grad_norm": 0.37890625, |
| "learning_rate": 2.701715232751122e-05, |
| "loss": 0.17934417724609375, |
| "num_tokens": 29394833.0, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.22967101179391683, |
| "grad_norm": 0.474609375, |
| "learning_rate": 2.6999072756474917e-05, |
| "loss": 0.2477874755859375, |
| "num_tokens": 29471415.0, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.2302917442582247, |
| "grad_norm": 0.451171875, |
| "learning_rate": 2.6980944647064286e-05, |
| "loss": 0.2259368896484375, |
| "num_tokens": 29551931.0, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.2309124767225326, |
| "grad_norm": 0.48046875, |
| "learning_rate": 2.6962768072610742e-05, |
| "loss": 0.18096923828125, |
| "num_tokens": 29623327.0, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.23153320918684048, |
| "grad_norm": 0.384765625, |
| "learning_rate": 2.694454310664175e-05, |
| "loss": 0.17133331298828125, |
| "num_tokens": 29700283.0, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.23215394165114836, |
| "grad_norm": 0.400390625, |
| "learning_rate": 2.6926269822880526e-05, |
| "loss": 0.192291259765625, |
| "num_tokens": 29781555.0, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.23277467411545624, |
| "grad_norm": 0.419921875, |
| "learning_rate": 2.6907948295245736e-05, |
| "loss": 0.1882476806640625, |
| "num_tokens": 29856292.0, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.23339540657976413, |
| "grad_norm": 0.43359375, |
| "learning_rate": 2.6889578597851206e-05, |
| "loss": 0.225677490234375, |
| "num_tokens": 29934472.0, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.234016139044072, |
| "grad_norm": 0.39453125, |
| "learning_rate": 2.6871160805005617e-05, |
| "loss": 0.19443511962890625, |
| "num_tokens": 30012351.0, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.2346368715083799, |
| "grad_norm": 0.404296875, |
| "learning_rate": 2.6852694991212197e-05, |
| "loss": 0.1674346923828125, |
| "num_tokens": 30093512.0, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.23525760397268777, |
| "grad_norm": 0.3828125, |
| "learning_rate": 2.6834181231168437e-05, |
| "loss": 0.20513153076171875, |
| "num_tokens": 30175960.0, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.23587833643699566, |
| "grad_norm": 0.447265625, |
| "learning_rate": 2.6815619599765775e-05, |
| "loss": 0.248565673828125, |
| "num_tokens": 30260818.0, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.23649906890130354, |
| "grad_norm": 0.38671875, |
| "learning_rate": 2.6797010172089297e-05, |
| "loss": 0.21155548095703125, |
| "num_tokens": 30345711.0, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.23711980136561142, |
| "grad_norm": 0.439453125, |
| "learning_rate": 2.6778353023417434e-05, |
| "loss": 0.21074676513671875, |
| "num_tokens": 30423097.0, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.2377405338299193, |
| "grad_norm": 0.455078125, |
| "learning_rate": 2.6759648229221656e-05, |
| "loss": 0.212890625, |
| "num_tokens": 30500698.0, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.2383612662942272, |
| "grad_norm": 0.375, |
| "learning_rate": 2.6740895865166167e-05, |
| "loss": 0.1587066650390625, |
| "num_tokens": 30579854.0, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.23898199875853507, |
| "grad_norm": 0.48828125, |
| "learning_rate": 2.67220960071076e-05, |
| "loss": 0.253936767578125, |
| "num_tokens": 30654761.0, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.23960273122284295, |
| "grad_norm": 0.4140625, |
| "learning_rate": 2.670324873109472e-05, |
| "loss": 0.2159576416015625, |
| "num_tokens": 30738386.0, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.24022346368715083, |
| "grad_norm": 0.4921875, |
| "learning_rate": 2.668435411336808e-05, |
| "loss": 0.2732086181640625, |
| "num_tokens": 30816440.0, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.24084419615145872, |
| "grad_norm": 0.427734375, |
| "learning_rate": 2.6665412230359768e-05, |
| "loss": 0.204620361328125, |
| "num_tokens": 30895695.0, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.2414649286157666, |
| "grad_norm": 0.443359375, |
| "learning_rate": 2.6646423158693048e-05, |
| "loss": 0.21826171875, |
| "num_tokens": 30975880.0, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.24208566108007448, |
| "grad_norm": 0.421875, |
| "learning_rate": 2.6627386975182083e-05, |
| "loss": 0.18476104736328125, |
| "num_tokens": 31053925.0, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.24270639354438237, |
| "grad_norm": 0.3671875, |
| "learning_rate": 2.66083037568316e-05, |
| "loss": 0.17284393310546875, |
| "num_tokens": 31134437.0, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.24332712600869025, |
| "grad_norm": 0.400390625, |
| "learning_rate": 2.6589173580836607e-05, |
| "loss": 0.19146728515625, |
| "num_tokens": 31219283.0, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.24394785847299813, |
| "grad_norm": 0.46484375, |
| "learning_rate": 2.6569996524582047e-05, |
| "loss": 0.25022125244140625, |
| "num_tokens": 31294567.0, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.244568590937306, |
| "grad_norm": 0.40625, |
| "learning_rate": 2.6550772665642505e-05, |
| "loss": 0.2071380615234375, |
| "num_tokens": 31377068.0, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.2451893234016139, |
| "grad_norm": 0.419921875, |
| "learning_rate": 2.6531502081781902e-05, |
| "loss": 0.17620849609375, |
| "num_tokens": 31457437.0, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.24581005586592178, |
| "grad_norm": 0.439453125, |
| "learning_rate": 2.6512184850953164e-05, |
| "loss": 0.2180328369140625, |
| "num_tokens": 31536927.0, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.24643078833022966, |
| "grad_norm": 0.455078125, |
| "learning_rate": 2.64928210512979e-05, |
| "loss": 0.2259521484375, |
| "num_tokens": 31615885.0, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.24705152079453754, |
| "grad_norm": 0.384765625, |
| "learning_rate": 2.647341076114612e-05, |
| "loss": 0.1813812255859375, |
| "num_tokens": 31693647.0, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.24767225325884543, |
| "grad_norm": 0.384765625, |
| "learning_rate": 2.645395405901588e-05, |
| "loss": 0.16904449462890625, |
| "num_tokens": 31768454.0, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.2482929857231533, |
| "grad_norm": 0.427734375, |
| "learning_rate": 2.6434451023612983e-05, |
| "loss": 0.2210235595703125, |
| "num_tokens": 31843631.0, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.2489137181874612, |
| "grad_norm": 0.478515625, |
| "learning_rate": 2.641490173383067e-05, |
| "loss": 0.2405548095703125, |
| "num_tokens": 31920118.0, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.24953445065176907, |
| "grad_norm": 0.4609375, |
| "learning_rate": 2.6395306268749274e-05, |
| "loss": 0.242462158203125, |
| "num_tokens": 32001556.0, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.250155183116077, |
| "grad_norm": 0.439453125, |
| "learning_rate": 2.6375664707635922e-05, |
| "loss": 0.2298736572265625, |
| "num_tokens": 32082266.0, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.25077591558038487, |
| "grad_norm": 0.3828125, |
| "learning_rate": 2.6355977129944212e-05, |
| "loss": 0.1910858154296875, |
| "num_tokens": 32165058.0, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.25139664804469275, |
| "grad_norm": 0.396484375, |
| "learning_rate": 2.6336243615313876e-05, |
| "loss": 0.18426513671875, |
| "num_tokens": 32244505.0, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.25201738050900063, |
| "grad_norm": 0.447265625, |
| "learning_rate": 2.6316464243570476e-05, |
| "loss": 0.260040283203125, |
| "num_tokens": 32329605.0, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.2526381129733085, |
| "grad_norm": 0.4296875, |
| "learning_rate": 2.6296639094725075e-05, |
| "loss": 0.19622802734375, |
| "num_tokens": 32406959.0, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.2532588454376164, |
| "grad_norm": 0.4375, |
| "learning_rate": 2.6276768248973918e-05, |
| "loss": 0.2441864013671875, |
| "num_tokens": 32491486.0, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.2538795779019243, |
| "grad_norm": 0.419921875, |
| "learning_rate": 2.6256851786698084e-05, |
| "loss": 0.208648681640625, |
| "num_tokens": 32572553.0, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.25450031036623216, |
| "grad_norm": 0.42578125, |
| "learning_rate": 2.62368897884632e-05, |
| "loss": 0.2089385986328125, |
| "num_tokens": 32655823.0, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.25512104283054005, |
| "grad_norm": 0.455078125, |
| "learning_rate": 2.621688233501907e-05, |
| "loss": 0.22441864013671875, |
| "num_tokens": 32736633.0, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.25574177529484793, |
| "grad_norm": 0.423828125, |
| "learning_rate": 2.61968295072994e-05, |
| "loss": 0.204986572265625, |
| "num_tokens": 32816818.0, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.2563625077591558, |
| "grad_norm": 0.435546875, |
| "learning_rate": 2.617673138642143e-05, |
| "loss": 0.23944854736328125, |
| "num_tokens": 32899253.0, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.2569832402234637, |
| "grad_norm": 0.466796875, |
| "learning_rate": 2.6156588053685606e-05, |
| "loss": 0.21295166015625, |
| "num_tokens": 32974912.0, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.2576039726877716, |
| "grad_norm": 0.412109375, |
| "learning_rate": 2.6136399590575288e-05, |
| "loss": 0.202301025390625, |
| "num_tokens": 33052181.0, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.25822470515207946, |
| "grad_norm": 0.3984375, |
| "learning_rate": 2.611616607875638e-05, |
| "loss": 0.21979904174804688, |
| "num_tokens": 33145296.0, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.25884543761638734, |
| "grad_norm": 0.50390625, |
| "learning_rate": 2.6095887600077022e-05, |
| "loss": 0.2843780517578125, |
| "num_tokens": 33223399.0, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.2594661700806952, |
| "grad_norm": 0.451171875, |
| "learning_rate": 2.607556423656725e-05, |
| "loss": 0.2333831787109375, |
| "num_tokens": 33301036.0, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.2600869025450031, |
| "grad_norm": 0.33984375, |
| "learning_rate": 2.6055196070438663e-05, |
| "loss": 0.1499481201171875, |
| "num_tokens": 33385892.0, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.260707635009311, |
| "grad_norm": 0.37890625, |
| "learning_rate": 2.603478318408411e-05, |
| "loss": 0.17242431640625, |
| "num_tokens": 33470896.0, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.26132836747361887, |
| "grad_norm": 0.462890625, |
| "learning_rate": 2.601432566007733e-05, |
| "loss": 0.24725341796875, |
| "num_tokens": 33544783.0, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.26194909993792675, |
| "grad_norm": 0.400390625, |
| "learning_rate": 2.599382358117263e-05, |
| "loss": 0.20053482055664062, |
| "num_tokens": 33627586.0, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.26256983240223464, |
| "grad_norm": 0.4453125, |
| "learning_rate": 2.5973277030304543e-05, |
| "loss": 0.220703125, |
| "num_tokens": 33705999.0, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.2631905648665425, |
| "grad_norm": 0.439453125, |
| "learning_rate": 2.5952686090587515e-05, |
| "loss": 0.2160186767578125, |
| "num_tokens": 33788984.0, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.2638112973308504, |
| "grad_norm": 0.435546875, |
| "learning_rate": 2.593205084531554e-05, |
| "loss": 0.19208526611328125, |
| "num_tokens": 33861102.0, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.2644320297951583, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.5911371377961837e-05, |
| "loss": 0.20062255859375, |
| "num_tokens": 33937816.0, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.26505276225946617, |
| "grad_norm": 0.388671875, |
| "learning_rate": 2.589064777217852e-05, |
| "loss": 0.189849853515625, |
| "num_tokens": 34020322.0, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.26567349472377405, |
| "grad_norm": 0.435546875, |
| "learning_rate": 2.586988011179624e-05, |
| "loss": 0.19635772705078125, |
| "num_tokens": 34091086.0, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.26629422718808193, |
| "grad_norm": 0.404296875, |
| "learning_rate": 2.5849068480823862e-05, |
| "loss": 0.1734619140625, |
| "num_tokens": 34164143.0, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.2669149596523898, |
| "grad_norm": 0.451171875, |
| "learning_rate": 2.5828212963448116e-05, |
| "loss": 0.22176361083984375, |
| "num_tokens": 34244415.0, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.2675356921166977, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.5807313644033273e-05, |
| "loss": 0.1698150634765625, |
| "num_tokens": 34323399.0, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.2681564245810056, |
| "grad_norm": 0.404296875, |
| "learning_rate": 2.578637060712077e-05, |
| "loss": 0.209869384765625, |
| "num_tokens": 34408888.0, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.26877715704531346, |
| "grad_norm": 0.41796875, |
| "learning_rate": 2.576538393742891e-05, |
| "loss": 0.18206024169921875, |
| "num_tokens": 34488933.0, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.26939788950962135, |
| "grad_norm": 0.35546875, |
| "learning_rate": 2.5744353719852477e-05, |
| "loss": 0.15802001953125, |
| "num_tokens": 34574178.0, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.27001862197392923, |
| "grad_norm": 0.39453125, |
| "learning_rate": 2.572328003946244e-05, |
| "loss": 0.1738433837890625, |
| "num_tokens": 34655487.0, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.2706393544382371, |
| "grad_norm": 0.41015625, |
| "learning_rate": 2.5702162981505555e-05, |
| "loss": 0.21429443359375, |
| "num_tokens": 34738038.0, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.271260086902545, |
| "grad_norm": 0.390625, |
| "learning_rate": 2.5681002631404067e-05, |
| "loss": 0.178192138671875, |
| "num_tokens": 34827030.0, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.2718808193668529, |
| "grad_norm": 0.380859375, |
| "learning_rate": 2.5659799074755342e-05, |
| "loss": 0.16982269287109375, |
| "num_tokens": 34905718.0, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.27250155183116076, |
| "grad_norm": 0.345703125, |
| "learning_rate": 2.5638552397331518e-05, |
| "loss": 0.14931488037109375, |
| "num_tokens": 34991598.0, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.27312228429546864, |
| "grad_norm": 0.35546875, |
| "learning_rate": 2.5617262685079173e-05, |
| "loss": 0.15102386474609375, |
| "num_tokens": 35072735.0, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.2737430167597765, |
| "grad_norm": 0.41015625, |
| "learning_rate": 2.5595930024118957e-05, |
| "loss": 0.17125701904296875, |
| "num_tokens": 35146236.0, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.2743637492240844, |
| "grad_norm": 0.400390625, |
| "learning_rate": 2.5574554500745263e-05, |
| "loss": 0.1802215576171875, |
| "num_tokens": 35223566.0, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.2749844816883923, |
| "grad_norm": 0.4296875, |
| "learning_rate": 2.5553136201425868e-05, |
| "loss": 0.1890106201171875, |
| "num_tokens": 35299788.0, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.2756052141527002, |
| "grad_norm": 0.40625, |
| "learning_rate": 2.553167521280159e-05, |
| "loss": 0.18357086181640625, |
| "num_tokens": 35379215.0, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.27622594661700806, |
| "grad_norm": 0.42578125, |
| "learning_rate": 2.5510171621685926e-05, |
| "loss": 0.209228515625, |
| "num_tokens": 35458045.0, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.27684667908131594, |
| "grad_norm": 0.4140625, |
| "learning_rate": 2.5488625515064713e-05, |
| "loss": 0.19693756103515625, |
| "num_tokens": 35536918.0, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.2774674115456238, |
| "grad_norm": 0.416015625, |
| "learning_rate": 2.5467036980095766e-05, |
| "loss": 0.2014617919921875, |
| "num_tokens": 35622887.0, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.2780881440099317, |
| "grad_norm": 0.43359375, |
| "learning_rate": 2.5445406104108527e-05, |
| "loss": 0.20465087890625, |
| "num_tokens": 35702044.0, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.2787088764742396, |
| "grad_norm": 0.462890625, |
| "learning_rate": 2.5423732974603732e-05, |
| "loss": 0.219146728515625, |
| "num_tokens": 35783189.0, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.27932960893854747, |
| "grad_norm": 0.400390625, |
| "learning_rate": 2.5402017679253015e-05, |
| "loss": 0.2011260986328125, |
| "num_tokens": 35867998.0, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.27995034140285535, |
| "grad_norm": 0.458984375, |
| "learning_rate": 2.5380260305898602e-05, |
| "loss": 0.222808837890625, |
| "num_tokens": 35942035.0, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.28057107386716323, |
| "grad_norm": 0.435546875, |
| "learning_rate": 2.535846094255292e-05, |
| "loss": 0.1995086669921875, |
| "num_tokens": 36020075.0, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.2811918063314711, |
| "grad_norm": 0.44140625, |
| "learning_rate": 2.5336619677398248e-05, |
| "loss": 0.2311248779296875, |
| "num_tokens": 36102949.0, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.281812538795779, |
| "grad_norm": 0.453125, |
| "learning_rate": 2.5314736598786376e-05, |
| "loss": 0.2327728271484375, |
| "num_tokens": 36188479.0, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.2824332712600869, |
| "grad_norm": 0.43359375, |
| "learning_rate": 2.529281179523823e-05, |
| "loss": 0.2134552001953125, |
| "num_tokens": 36266869.0, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.28305400372439476, |
| "grad_norm": 0.42578125, |
| "learning_rate": 2.5270845355443524e-05, |
| "loss": 0.191680908203125, |
| "num_tokens": 36341221.0, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.28367473618870265, |
| "grad_norm": 0.43359375, |
| "learning_rate": 2.5248837368260404e-05, |
| "loss": 0.1881561279296875, |
| "num_tokens": 36418826.0, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.28429546865301053, |
| "grad_norm": 0.359375, |
| "learning_rate": 2.5226787922715068e-05, |
| "loss": 0.14521026611328125, |
| "num_tokens": 36494976.0, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.2849162011173184, |
| "grad_norm": 0.4140625, |
| "learning_rate": 2.5204697108001425e-05, |
| "loss": 0.19922637939453125, |
| "num_tokens": 36575143.0, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.2855369335816263, |
| "grad_norm": 0.443359375, |
| "learning_rate": 2.5182565013480746e-05, |
| "loss": 0.25048828125, |
| "num_tokens": 36654947.0, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2861576660459342, |
| "grad_norm": 0.392578125, |
| "learning_rate": 2.516039172868126e-05, |
| "loss": 0.157745361328125, |
| "num_tokens": 36729518.0, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.28677839851024206, |
| "grad_norm": 0.41015625, |
| "learning_rate": 2.5138177343297835e-05, |
| "loss": 0.2284088134765625, |
| "num_tokens": 36816918.0, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.28739913097454994, |
| "grad_norm": 0.435546875, |
| "learning_rate": 2.5115921947191597e-05, |
| "loss": 0.20748138427734375, |
| "num_tokens": 36894753.0, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.2880198634388578, |
| "grad_norm": 0.4296875, |
| "learning_rate": 2.509362563038956e-05, |
| "loss": 0.21819496154785156, |
| "num_tokens": 36968113.0, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.2886405959031657, |
| "grad_norm": 0.44921875, |
| "learning_rate": 2.507128848308428e-05, |
| "loss": 0.220123291015625, |
| "num_tokens": 37047321.0, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.2892613283674736, |
| "grad_norm": 0.4453125, |
| "learning_rate": 2.504891059563347e-05, |
| "loss": 0.223358154296875, |
| "num_tokens": 37123423.0, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.28988206083178153, |
| "grad_norm": 0.40234375, |
| "learning_rate": 2.5026492058559643e-05, |
| "loss": 0.1943206787109375, |
| "num_tokens": 37202375.0, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.2905027932960894, |
| "grad_norm": 0.470703125, |
| "learning_rate": 2.500403296254976e-05, |
| "loss": 0.239013671875, |
| "num_tokens": 37283800.0, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.2911235257603973, |
| "grad_norm": 0.4609375, |
| "learning_rate": 2.4981533398454844e-05, |
| "loss": 0.24169921875, |
| "num_tokens": 37362390.0, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.2917442582247052, |
| "grad_norm": 0.4375, |
| "learning_rate": 2.4958993457289602e-05, |
| "loss": 0.210205078125, |
| "num_tokens": 37441284.0, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.29236499068901306, |
| "grad_norm": 0.427734375, |
| "learning_rate": 2.4936413230232103e-05, |
| "loss": 0.18692779541015625, |
| "num_tokens": 37516234.0, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.29298572315332094, |
| "grad_norm": 0.4375, |
| "learning_rate": 2.491379280862336e-05, |
| "loss": 0.2349700927734375, |
| "num_tokens": 37596771.0, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.2936064556176288, |
| "grad_norm": 0.423828125, |
| "learning_rate": 2.4891132283966976e-05, |
| "loss": 0.1943359375, |
| "num_tokens": 37678320.0, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.2942271880819367, |
| "grad_norm": 0.435546875, |
| "learning_rate": 2.4868431747928794e-05, |
| "loss": 0.23413848876953125, |
| "num_tokens": 37760030.0, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.2948479205462446, |
| "grad_norm": 0.400390625, |
| "learning_rate": 2.4845691292336505e-05, |
| "loss": 0.16797637939453125, |
| "num_tokens": 37834231.0, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.2954686530105525, |
| "grad_norm": 0.365234375, |
| "learning_rate": 2.482291100917928e-05, |
| "loss": 0.154815673828125, |
| "num_tokens": 37913213.0, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.29608938547486036, |
| "grad_norm": 0.3984375, |
| "learning_rate": 2.480009099060739e-05, |
| "loss": 0.1909942626953125, |
| "num_tokens": 37992954.0, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.29671011793916824, |
| "grad_norm": 0.451171875, |
| "learning_rate": 2.4777231328931854e-05, |
| "loss": 0.257049560546875, |
| "num_tokens": 38081649.0, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.2973308504034761, |
| "grad_norm": 0.53515625, |
| "learning_rate": 2.4754332116624055e-05, |
| "loss": 0.35211181640625, |
| "num_tokens": 38167712.0, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.297951582867784, |
| "grad_norm": 0.439453125, |
| "learning_rate": 2.473139344631536e-05, |
| "loss": 0.237213134765625, |
| "num_tokens": 38250426.0, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.2985723153320919, |
| "grad_norm": 0.4609375, |
| "learning_rate": 2.470841541079676e-05, |
| "loss": 0.22540283203125, |
| "num_tokens": 38330121.0, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.29919304779639977, |
| "grad_norm": 0.39453125, |
| "learning_rate": 2.4685398103018465e-05, |
| "loss": 0.161529541015625, |
| "num_tokens": 38413637.0, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.29981378026070765, |
| "grad_norm": 0.396484375, |
| "learning_rate": 2.466234161608957e-05, |
| "loss": 0.1733856201171875, |
| "num_tokens": 38488030.0, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.30043451272501553, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.463924604327765e-05, |
| "loss": 0.2153167724609375, |
| "num_tokens": 38566674.0, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.3010552451893234, |
| "grad_norm": 0.4453125, |
| "learning_rate": 2.4616111478008386e-05, |
| "loss": 0.2422332763671875, |
| "num_tokens": 38648975.0, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.3016759776536313, |
| "grad_norm": 0.3984375, |
| "learning_rate": 2.4592938013865184e-05, |
| "loss": 0.1868896484375, |
| "num_tokens": 38726777.0, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.3022967101179392, |
| "grad_norm": 0.44140625, |
| "learning_rate": 2.4569725744588813e-05, |
| "loss": 0.2122344970703125, |
| "num_tokens": 38808863.0, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.30291744258224707, |
| "grad_norm": 0.392578125, |
| "learning_rate": 2.4546474764077022e-05, |
| "loss": 0.2018280029296875, |
| "num_tokens": 38896069.0, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.30353817504655495, |
| "grad_norm": 0.49609375, |
| "learning_rate": 2.4523185166384137e-05, |
| "loss": 0.23865509033203125, |
| "num_tokens": 38969806.0, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.30415890751086283, |
| "grad_norm": 0.41015625, |
| "learning_rate": 2.4499857045720705e-05, |
| "loss": 0.202606201171875, |
| "num_tokens": 39049210.0, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.3047796399751707, |
| "grad_norm": 0.41796875, |
| "learning_rate": 2.447649049645311e-05, |
| "loss": 0.19609832763671875, |
| "num_tokens": 39125183.0, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.3054003724394786, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.445308561310318e-05, |
| "loss": 0.1680145263671875, |
| "num_tokens": 39206329.0, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.3060211049037865, |
| "grad_norm": 0.455078125, |
| "learning_rate": 2.4429642490347806e-05, |
| "loss": 0.2135009765625, |
| "num_tokens": 39283309.0, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.30664183736809436, |
| "grad_norm": 0.453125, |
| "learning_rate": 2.440616122301858e-05, |
| "loss": 0.248931884765625, |
| "num_tokens": 39367306.0, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.30726256983240224, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.4382641906101394e-05, |
| "loss": 0.16819000244140625, |
| "num_tokens": 39441358.0, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.3078833022967101, |
| "grad_norm": 0.412109375, |
| "learning_rate": 2.435908463473603e-05, |
| "loss": 0.1839447021484375, |
| "num_tokens": 39517505.0, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.308504034761018, |
| "grad_norm": 0.400390625, |
| "learning_rate": 2.4335489504215834e-05, |
| "loss": 0.177398681640625, |
| "num_tokens": 39591498.0, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.3091247672253259, |
| "grad_norm": 0.423828125, |
| "learning_rate": 2.431185660998729e-05, |
| "loss": 0.21129608154296875, |
| "num_tokens": 39672995.0, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.3097454996896338, |
| "grad_norm": 0.365234375, |
| "learning_rate": 2.428818604764964e-05, |
| "loss": 0.1651458740234375, |
| "num_tokens": 39754665.0, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.31036623215394166, |
| "grad_norm": 0.59765625, |
| "learning_rate": 2.4264477912954498e-05, |
| "loss": 0.19366455078125, |
| "num_tokens": 39834074.0, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.31098696461824954, |
| "grad_norm": 0.498046875, |
| "learning_rate": 2.4240732301805463e-05, |
| "loss": 0.22674560546875, |
| "num_tokens": 39901227.0, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.3116076970825574, |
| "grad_norm": 0.384765625, |
| "learning_rate": 2.421694931025775e-05, |
| "loss": 0.1508941650390625, |
| "num_tokens": 39975272.0, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.3122284295468653, |
| "grad_norm": 0.453125, |
| "learning_rate": 2.4193129034517758e-05, |
| "loss": 0.24749755859375, |
| "num_tokens": 40058538.0, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.3128491620111732, |
| "grad_norm": 0.4453125, |
| "learning_rate": 2.4169271570942724e-05, |
| "loss": 0.198455810546875, |
| "num_tokens": 40135374.0, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.31346989447548107, |
| "grad_norm": 0.443359375, |
| "learning_rate": 2.414537701604032e-05, |
| "loss": 0.2157745361328125, |
| "num_tokens": 40213920.0, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.31409062693978895, |
| "grad_norm": 0.416015625, |
| "learning_rate": 2.412144546646825e-05, |
| "loss": 0.18807220458984375, |
| "num_tokens": 40291930.0, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.31471135940409684, |
| "grad_norm": 0.380859375, |
| "learning_rate": 2.409747701903387e-05, |
| "loss": 0.15155029296875, |
| "num_tokens": 40368748.0, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.3153320918684047, |
| "grad_norm": 0.44140625, |
| "learning_rate": 2.4073471770693788e-05, |
| "loss": 0.22333526611328125, |
| "num_tokens": 40446867.0, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.3159528243327126, |
| "grad_norm": 0.404296875, |
| "learning_rate": 2.4049429818553494e-05, |
| "loss": 0.18402099609375, |
| "num_tokens": 40531129.0, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.3165735567970205, |
| "grad_norm": 0.376953125, |
| "learning_rate": 2.4025351259866935e-05, |
| "loss": 0.164276123046875, |
| "num_tokens": 40610648.0, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.31719428926132837, |
| "grad_norm": 0.419921875, |
| "learning_rate": 2.4001236192036154e-05, |
| "loss": 0.11762237548828125, |
| "num_tokens": 40696900.0, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.31781502172563625, |
| "grad_norm": 0.384765625, |
| "learning_rate": 2.3977084712610862e-05, |
| "loss": 0.17205810546875, |
| "num_tokens": 40775511.0, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.31843575418994413, |
| "grad_norm": 0.34765625, |
| "learning_rate": 2.3952896919288074e-05, |
| "loss": 0.13307952880859375, |
| "num_tokens": 40855259.0, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.319056486654252, |
| "grad_norm": 0.39453125, |
| "learning_rate": 2.3928672909911703e-05, |
| "loss": 0.1673126220703125, |
| "num_tokens": 40930806.0, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.3196772191185599, |
| "grad_norm": 0.447265625, |
| "learning_rate": 2.3904412782472147e-05, |
| "loss": 0.2042236328125, |
| "num_tokens": 41007744.0, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.3202979515828678, |
| "grad_norm": 0.435546875, |
| "learning_rate": 2.3880116635105923e-05, |
| "loss": 0.2387542724609375, |
| "num_tokens": 41086843.0, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.32091868404717566, |
| "grad_norm": 0.36328125, |
| "learning_rate": 2.385578456609525e-05, |
| "loss": 0.16551589965820312, |
| "num_tokens": 41169468.0, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.32153941651148354, |
| "grad_norm": 0.45703125, |
| "learning_rate": 2.3831416673867657e-05, |
| "loss": 0.2154693603515625, |
| "num_tokens": 41247676.0, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.3221601489757914, |
| "grad_norm": 0.466796875, |
| "learning_rate": 2.3807013056995583e-05, |
| "loss": 0.23715972900390625, |
| "num_tokens": 41325006.0, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.3227808814400993, |
| "grad_norm": 0.466796875, |
| "learning_rate": 2.3782573814195978e-05, |
| "loss": 0.23017120361328125, |
| "num_tokens": 41402991.0, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.3234016139044072, |
| "grad_norm": 0.51171875, |
| "learning_rate": 2.3758099044329912e-05, |
| "loss": 0.1800079345703125, |
| "num_tokens": 41479499.0, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.3240223463687151, |
| "grad_norm": 0.439453125, |
| "learning_rate": 2.3733588846402158e-05, |
| "loss": 0.2174835205078125, |
| "num_tokens": 41559656.0, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.32464307883302296, |
| "grad_norm": 0.4296875, |
| "learning_rate": 2.370904331956081e-05, |
| "loss": 0.198455810546875, |
| "num_tokens": 41636299.0, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.32526381129733084, |
| "grad_norm": 0.4453125, |
| "learning_rate": 2.3684462563096878e-05, |
| "loss": 0.21435546875, |
| "num_tokens": 41716272.0, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.3258845437616387, |
| "grad_norm": 0.33203125, |
| "learning_rate": 2.365984667644386e-05, |
| "loss": 0.11435699462890625, |
| "num_tokens": 41798664.0, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.3265052762259466, |
| "grad_norm": 0.41796875, |
| "learning_rate": 2.3635195759177382e-05, |
| "loss": 0.1849365234375, |
| "num_tokens": 41871946.0, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.3271260086902545, |
| "grad_norm": 0.412109375, |
| "learning_rate": 2.3610509911014785e-05, |
| "loss": 0.181732177734375, |
| "num_tokens": 41954277.0, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.32774674115456237, |
| "grad_norm": 0.431640625, |
| "learning_rate": 2.3585789231814676e-05, |
| "loss": 0.1625213623046875, |
| "num_tokens": 42027831.0, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.32836747361887025, |
| "grad_norm": 0.50390625, |
| "learning_rate": 2.356103382157659e-05, |
| "loss": 0.2745208740234375, |
| "num_tokens": 42108413.0, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.32898820608317814, |
| "grad_norm": 0.419921875, |
| "learning_rate": 2.3536243780440546e-05, |
| "loss": 0.21575927734375, |
| "num_tokens": 42192168.0, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.329608938547486, |
| "grad_norm": 0.419921875, |
| "learning_rate": 2.351141920868665e-05, |
| "loss": 0.140472412109375, |
| "num_tokens": 42270694.0, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.3302296710117939, |
| "grad_norm": 0.48828125, |
| "learning_rate": 2.3486560206734688e-05, |
| "loss": 0.2412261962890625, |
| "num_tokens": 42347981.0, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.3308504034761018, |
| "grad_norm": 0.388671875, |
| "learning_rate": 2.346166687514373e-05, |
| "loss": 0.1951904296875, |
| "num_tokens": 42436572.0, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.33147113594040967, |
| "grad_norm": 0.478515625, |
| "learning_rate": 2.343673931461171e-05, |
| "loss": 0.2357025146484375, |
| "num_tokens": 42509995.0, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.33209186840471755, |
| "grad_norm": 0.4453125, |
| "learning_rate": 2.3411777625975026e-05, |
| "loss": 0.23712158203125, |
| "num_tokens": 42593004.0, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.33271260086902543, |
| "grad_norm": 0.455078125, |
| "learning_rate": 2.338678191020812e-05, |
| "loss": 0.21124267578125, |
| "num_tokens": 42668679.0, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.443359375, |
| "learning_rate": 2.33617522684231e-05, |
| "loss": 0.2097625732421875, |
| "num_tokens": 42748351.0, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.3339540657976412, |
| "grad_norm": 0.404296875, |
| "learning_rate": 2.3336688801869296e-05, |
| "loss": 0.1845245361328125, |
| "num_tokens": 42831403.0, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.3345747982619491, |
| "grad_norm": 0.41796875, |
| "learning_rate": 2.331159161193287e-05, |
| "loss": 0.1764068603515625, |
| "num_tokens": 42909716.0, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.33519553072625696, |
| "grad_norm": 0.4453125, |
| "learning_rate": 2.3286460800136394e-05, |
| "loss": 0.20522308349609375, |
| "num_tokens": 42991405.0, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.33581626319056485, |
| "grad_norm": 0.4296875, |
| "learning_rate": 2.3261296468138463e-05, |
| "loss": 0.2218170166015625, |
| "num_tokens": 43073517.0, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.33643699565487273, |
| "grad_norm": 0.451171875, |
| "learning_rate": 2.3236098717733246e-05, |
| "loss": 0.2127838134765625, |
| "num_tokens": 43151441.0, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.3370577281191806, |
| "grad_norm": 0.400390625, |
| "learning_rate": 2.3210867650850116e-05, |
| "loss": 0.1876220703125, |
| "num_tokens": 43242315.0, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.3376784605834885, |
| "grad_norm": 0.47265625, |
| "learning_rate": 2.3185603369553196e-05, |
| "loss": 0.24231719970703125, |
| "num_tokens": 43319732.0, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.3382991930477964, |
| "grad_norm": 0.404296875, |
| "learning_rate": 2.3160305976040984e-05, |
| "loss": 0.185272216796875, |
| "num_tokens": 43396973.0, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.33891992551210426, |
| "grad_norm": 0.369140625, |
| "learning_rate": 2.3134975572645917e-05, |
| "loss": 0.1720733642578125, |
| "num_tokens": 43484963.0, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.33954065797641214, |
| "grad_norm": 0.484375, |
| "learning_rate": 2.3109612261833967e-05, |
| "loss": 0.1727294921875, |
| "num_tokens": 43563362.0, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.34016139044072, |
| "grad_norm": 0.40234375, |
| "learning_rate": 2.3084216146204204e-05, |
| "loss": 0.19522857666015625, |
| "num_tokens": 43645331.0, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.3407821229050279, |
| "grad_norm": 0.40234375, |
| "learning_rate": 2.305878732848843e-05, |
| "loss": 0.1963348388671875, |
| "num_tokens": 43723853.0, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.3414028553693358, |
| "grad_norm": 0.4375, |
| "learning_rate": 2.303332591155071e-05, |
| "loss": 0.20317840576171875, |
| "num_tokens": 43795201.0, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.34202358783364367, |
| "grad_norm": 0.416015625, |
| "learning_rate": 2.3007831998386976e-05, |
| "loss": 0.1800994873046875, |
| "num_tokens": 43885422.0, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.3426443202979516, |
| "grad_norm": 0.365234375, |
| "learning_rate": 2.2982305692124633e-05, |
| "loss": 0.19091796875, |
| "num_tokens": 43975863.0, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.3432650527622595, |
| "grad_norm": 0.39453125, |
| "learning_rate": 2.2956747096022103e-05, |
| "loss": 0.2070465087890625, |
| "num_tokens": 44063234.0, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.3438857852265674, |
| "grad_norm": 0.400390625, |
| "learning_rate": 2.2931156313468447e-05, |
| "loss": 0.17583465576171875, |
| "num_tokens": 44140881.0, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.34450651769087526, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.2905533447982895e-05, |
| "loss": 0.180572509765625, |
| "num_tokens": 44227161.0, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.34512725015518314, |
| "grad_norm": 0.404296875, |
| "learning_rate": 2.287987860321448e-05, |
| "loss": 0.18878173828125, |
| "num_tokens": 44307341.0, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.345747982619491, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.2854191882941606e-05, |
| "loss": 0.22259521484375, |
| "num_tokens": 44391210.0, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.3463687150837989, |
| "grad_norm": 0.392578125, |
| "learning_rate": 2.2828473391071588e-05, |
| "loss": 0.1643524169921875, |
| "num_tokens": 44475341.0, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.3469894475481068, |
| "grad_norm": 0.427734375, |
| "learning_rate": 2.280272323164029e-05, |
| "loss": 0.20947265625, |
| "num_tokens": 44552267.0, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.34761018001241467, |
| "grad_norm": 0.419921875, |
| "learning_rate": 2.2776941508811662e-05, |
| "loss": 0.2020416259765625, |
| "num_tokens": 44633095.0, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.34823091247672255, |
| "grad_norm": 0.435546875, |
| "learning_rate": 2.275112832687735e-05, |
| "loss": 0.23046112060546875, |
| "num_tokens": 44716095.0, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.34885164494103044, |
| "grad_norm": 0.462890625, |
| "learning_rate": 2.2725283790256235e-05, |
| "loss": 0.2403411865234375, |
| "num_tokens": 44792169.0, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.3494723774053383, |
| "grad_norm": 0.427734375, |
| "learning_rate": 2.269940800349404e-05, |
| "loss": 0.22369384765625, |
| "num_tokens": 44873057.0, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.3500931098696462, |
| "grad_norm": 0.392578125, |
| "learning_rate": 2.2673501071262923e-05, |
| "loss": 0.185455322265625, |
| "num_tokens": 44958191.0, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.3507138423339541, |
| "grad_norm": 0.447265625, |
| "learning_rate": 2.2647563098361e-05, |
| "loss": 0.2142486572265625, |
| "num_tokens": 45035635.0, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.35133457479826197, |
| "grad_norm": 0.515625, |
| "learning_rate": 2.2621594189711965e-05, |
| "loss": 0.13890838623046875, |
| "num_tokens": 45113009.0, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.35195530726256985, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.2595594450364658e-05, |
| "loss": 0.16967010498046875, |
| "num_tokens": 45185701.0, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.35257603972687773, |
| "grad_norm": 0.423828125, |
| "learning_rate": 2.2569563985492626e-05, |
| "loss": 0.20782470703125, |
| "num_tokens": 45267032.0, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.3531967721911856, |
| "grad_norm": 0.42578125, |
| "learning_rate": 2.254350290039371e-05, |
| "loss": 0.20135498046875, |
| "num_tokens": 45344905.0, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.3538175046554935, |
| "grad_norm": 0.376953125, |
| "learning_rate": 2.251741130048962e-05, |
| "loss": 0.18006134033203125, |
| "num_tokens": 45431946.0, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.3544382371198014, |
| "grad_norm": 0.46875, |
| "learning_rate": 2.2491289291325485e-05, |
| "loss": 0.25028228759765625, |
| "num_tokens": 45509704.0, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.35505896958410926, |
| "grad_norm": 0.41015625, |
| "learning_rate": 2.246513697856947e-05, |
| "loss": 0.1885833740234375, |
| "num_tokens": 45585908.0, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.35567970204841715, |
| "grad_norm": 0.388671875, |
| "learning_rate": 2.2438954468012308e-05, |
| "loss": 0.1893157958984375, |
| "num_tokens": 45670191.0, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.35630043451272503, |
| "grad_norm": 0.4453125, |
| "learning_rate": 2.2412741865566887e-05, |
| "loss": 0.22332763671875, |
| "num_tokens": 45751129.0, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.3569211669770329, |
| "grad_norm": 0.390625, |
| "learning_rate": 2.2386499277267837e-05, |
| "loss": 0.18932342529296875, |
| "num_tokens": 45829269.0, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.3575418994413408, |
| "grad_norm": 0.37890625, |
| "learning_rate": 2.2360226809271064e-05, |
| "loss": 0.1678466796875, |
| "num_tokens": 45911335.0, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.3581626319056487, |
| "grad_norm": 0.4453125, |
| "learning_rate": 2.2333924567853363e-05, |
| "loss": 0.22794342041015625, |
| "num_tokens": 45990480.0, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.35878336436995656, |
| "grad_norm": 0.3828125, |
| "learning_rate": 2.230759265941195e-05, |
| "loss": 0.17134857177734375, |
| "num_tokens": 46074975.0, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.35940409683426444, |
| "grad_norm": 0.412109375, |
| "learning_rate": 2.228123119046406e-05, |
| "loss": 0.1915130615234375, |
| "num_tokens": 46152517.0, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.3600248292985723, |
| "grad_norm": 0.458984375, |
| "learning_rate": 2.2254840267646506e-05, |
| "loss": 0.24593353271484375, |
| "num_tokens": 46230369.0, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.3606455617628802, |
| "grad_norm": 0.404296875, |
| "learning_rate": 2.222841999771525e-05, |
| "loss": 0.172882080078125, |
| "num_tokens": 46308236.0, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.3612662942271881, |
| "grad_norm": 0.42578125, |
| "learning_rate": 2.2201970487544954e-05, |
| "loss": 0.202362060546875, |
| "num_tokens": 46389254.0, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.36188702669149597, |
| "grad_norm": 0.44140625, |
| "learning_rate": 2.2175491844128573e-05, |
| "loss": 0.18841552734375, |
| "num_tokens": 46463330.0, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.36250775915580385, |
| "grad_norm": 0.451171875, |
| "learning_rate": 2.2148984174576906e-05, |
| "loss": 0.2029876708984375, |
| "num_tokens": 46543207.0, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.36312849162011174, |
| "grad_norm": 0.384765625, |
| "learning_rate": 2.212244758611818e-05, |
| "loss": 0.1617889404296875, |
| "num_tokens": 46624716.0, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.3637492240844196, |
| "grad_norm": 0.328125, |
| "learning_rate": 2.2095882186097584e-05, |
| "loss": 0.1309967041015625, |
| "num_tokens": 46707564.0, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.3643699565487275, |
| "grad_norm": 0.466796875, |
| "learning_rate": 2.2069288081976875e-05, |
| "loss": 0.2613372802734375, |
| "num_tokens": 46790238.0, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.3649906890130354, |
| "grad_norm": 0.380859375, |
| "learning_rate": 2.204266538133391e-05, |
| "loss": 0.2042388916015625, |
| "num_tokens": 46879381.0, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.36561142147734327, |
| "grad_norm": 0.400390625, |
| "learning_rate": 2.2016014191862225e-05, |
| "loss": 0.18834686279296875, |
| "num_tokens": 46956936.0, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.36623215394165115, |
| "grad_norm": 0.4140625, |
| "learning_rate": 2.198933462137061e-05, |
| "loss": 0.159576416015625, |
| "num_tokens": 47033368.0, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.36685288640595903, |
| "grad_norm": 0.400390625, |
| "learning_rate": 2.196262677778264e-05, |
| "loss": 0.1726226806640625, |
| "num_tokens": 47111585.0, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.3674736188702669, |
| "grad_norm": 0.431640625, |
| "learning_rate": 2.1935890769136284e-05, |
| "loss": 0.222747802734375, |
| "num_tokens": 47194450.0, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.3680943513345748, |
| "grad_norm": 0.431640625, |
| "learning_rate": 2.190912670358343e-05, |
| "loss": 0.2170867919921875, |
| "num_tokens": 47275636.0, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.3687150837988827, |
| "grad_norm": 0.40625, |
| "learning_rate": 2.188233468938946e-05, |
| "loss": 0.16595458984375, |
| "num_tokens": 47353178.0, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.36933581626319056, |
| "grad_norm": 0.423828125, |
| "learning_rate": 2.1855514834932806e-05, |
| "loss": 0.20269775390625, |
| "num_tokens": 47430597.0, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.36995654872749845, |
| "grad_norm": 0.390625, |
| "learning_rate": 2.182866724870455e-05, |
| "loss": 0.19757080078125, |
| "num_tokens": 47522254.0, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.37057728119180633, |
| "grad_norm": 0.47265625, |
| "learning_rate": 2.180179203930792e-05, |
| "loss": 0.2463836669921875, |
| "num_tokens": 47605790.0, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.3711980136561142, |
| "grad_norm": 0.427734375, |
| "learning_rate": 2.17748893154579e-05, |
| "loss": 0.19834136962890625, |
| "num_tokens": 47682588.0, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.3718187461204221, |
| "grad_norm": 0.423828125, |
| "learning_rate": 2.174795918598077e-05, |
| "loss": 0.228607177734375, |
| "num_tokens": 47762530.0, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.37243947858473, |
| "grad_norm": 0.40234375, |
| "learning_rate": 2.1721001759813677e-05, |
| "loss": 0.1551666259765625, |
| "num_tokens": 47837223.0, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.37306021104903786, |
| "grad_norm": 0.3984375, |
| "learning_rate": 2.1694017146004186e-05, |
| "loss": 0.16622161865234375, |
| "num_tokens": 47913845.0, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.37368094351334574, |
| "grad_norm": 0.375, |
| "learning_rate": 2.166700545370983e-05, |
| "loss": 0.189453125, |
| "num_tokens": 48000697.0, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.3743016759776536, |
| "grad_norm": 0.48046875, |
| "learning_rate": 2.1639966792197694e-05, |
| "loss": 0.24367523193359375, |
| "num_tokens": 48082887.0, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.3749224084419615, |
| "grad_norm": 0.359375, |
| "learning_rate": 2.161290127084396e-05, |
| "loss": 0.16951751708984375, |
| "num_tokens": 48167173.0, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.3755431409062694, |
| "grad_norm": 0.4140625, |
| "learning_rate": 2.1585808999133435e-05, |
| "loss": 0.1868438720703125, |
| "num_tokens": 48247447.0, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.3761638733705773, |
| "grad_norm": 0.3828125, |
| "learning_rate": 2.1558690086659172e-05, |
| "loss": 0.1612396240234375, |
| "num_tokens": 48323444.0, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.37678460583488516, |
| "grad_norm": 0.482421875, |
| "learning_rate": 2.153154464312197e-05, |
| "loss": 0.21346282958984375, |
| "num_tokens": 48391728.0, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.37740533829919304, |
| "grad_norm": 0.38671875, |
| "learning_rate": 2.1504372778329963e-05, |
| "loss": 0.171112060546875, |
| "num_tokens": 48476706.0, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.3780260707635009, |
| "grad_norm": 0.4140625, |
| "learning_rate": 2.1477174602198142e-05, |
| "loss": 0.179595947265625, |
| "num_tokens": 48554257.0, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.3786468032278088, |
| "grad_norm": 0.3984375, |
| "learning_rate": 2.1449950224747954e-05, |
| "loss": 0.15386962890625, |
| "num_tokens": 48630367.0, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.3792675356921167, |
| "grad_norm": 0.400390625, |
| "learning_rate": 2.1422699756106828e-05, |
| "loss": 0.199737548828125, |
| "num_tokens": 48715094.0, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.37988826815642457, |
| "grad_norm": 0.380859375, |
| "learning_rate": 2.139542330650774e-05, |
| "loss": 0.19342041015625, |
| "num_tokens": 48800641.0, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.38050900062073245, |
| "grad_norm": 0.416015625, |
| "learning_rate": 2.1368120986288746e-05, |
| "loss": 0.173858642578125, |
| "num_tokens": 48884711.0, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.38112973308504033, |
| "grad_norm": 0.431640625, |
| "learning_rate": 2.1340792905892578e-05, |
| "loss": 0.2032928466796875, |
| "num_tokens": 48964753.0, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.3817504655493482, |
| "grad_norm": 0.4609375, |
| "learning_rate": 2.1313439175866156e-05, |
| "loss": 0.2122802734375, |
| "num_tokens": 49039111.0, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.3823711980136561, |
| "grad_norm": 0.45703125, |
| "learning_rate": 2.1286059906860162e-05, |
| "loss": 0.22325897216796875, |
| "num_tokens": 49120270.0, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.382991930477964, |
| "grad_norm": 0.427734375, |
| "learning_rate": 2.1258655209628593e-05, |
| "loss": 0.21783447265625, |
| "num_tokens": 49196980.0, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.38361266294227186, |
| "grad_norm": 0.384765625, |
| "learning_rate": 2.12312251950283e-05, |
| "loss": 0.1694793701171875, |
| "num_tokens": 49285898.0, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.38423339540657975, |
| "grad_norm": 0.416015625, |
| "learning_rate": 2.1203769974018545e-05, |
| "loss": 0.2267608642578125, |
| "num_tokens": 49365023.0, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.38485412787088763, |
| "grad_norm": 0.4140625, |
| "learning_rate": 2.1176289657660564e-05, |
| "loss": 0.1989288330078125, |
| "num_tokens": 49447858.0, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.3854748603351955, |
| "grad_norm": 0.43359375, |
| "learning_rate": 2.1148784357117103e-05, |
| "loss": 0.231170654296875, |
| "num_tokens": 49527337.0, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.3860955927995034, |
| "grad_norm": 0.47265625, |
| "learning_rate": 2.1121254183651974e-05, |
| "loss": 0.209716796875, |
| "num_tokens": 49602193.0, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.3867163252638113, |
| "grad_norm": 0.42578125, |
| "learning_rate": 2.1093699248629603e-05, |
| "loss": 0.23325347900390625, |
| "num_tokens": 49688375.0, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.38733705772811916, |
| "grad_norm": 0.396484375, |
| "learning_rate": 2.106611966351459e-05, |
| "loss": 0.16530609130859375, |
| "num_tokens": 49761687.0, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.38795779019242704, |
| "grad_norm": 0.419921875, |
| "learning_rate": 2.1038515539871224e-05, |
| "loss": 0.2009124755859375, |
| "num_tokens": 49841425.0, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.3885785226567349, |
| "grad_norm": 0.412109375, |
| "learning_rate": 2.1010886989363086e-05, |
| "loss": 0.18260955810546875, |
| "num_tokens": 49917147.0, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.3891992551210428, |
| "grad_norm": 0.455078125, |
| "learning_rate": 2.0983234123752553e-05, |
| "loss": 0.23314666748046875, |
| "num_tokens": 49995888.0, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.3898199875853507, |
| "grad_norm": 0.453125, |
| "learning_rate": 2.095555705490037e-05, |
| "loss": 0.23809814453125, |
| "num_tokens": 50073478.0, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.3904407200496586, |
| "grad_norm": 0.51953125, |
| "learning_rate": 2.0927855894765175e-05, |
| "loss": 0.2695159912109375, |
| "num_tokens": 50149261.0, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.39106145251396646, |
| "grad_norm": 0.39453125, |
| "learning_rate": 2.0900130755403066e-05, |
| "loss": 0.17889404296875, |
| "num_tokens": 50228053.0, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.39168218497827434, |
| "grad_norm": 0.43359375, |
| "learning_rate": 2.0872381748967144e-05, |
| "loss": 0.218994140625, |
| "num_tokens": 50312331.0, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.3923029174425822, |
| "grad_norm": 0.412109375, |
| "learning_rate": 2.0844608987707053e-05, |
| "loss": 0.218292236328125, |
| "num_tokens": 50399935.0, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.3929236499068901, |
| "grad_norm": 0.43359375, |
| "learning_rate": 2.0816812583968532e-05, |
| "loss": 0.2323150634765625, |
| "num_tokens": 50482609.0, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.393544382371198, |
| "grad_norm": 0.36328125, |
| "learning_rate": 2.0788992650192958e-05, |
| "loss": 0.1590576171875, |
| "num_tokens": 50561251.0, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.39416511483550587, |
| "grad_norm": 0.36328125, |
| "learning_rate": 2.076114929891689e-05, |
| "loss": 0.16410064697265625, |
| "num_tokens": 50639648.0, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.3947858472998138, |
| "grad_norm": 0.431640625, |
| "learning_rate": 2.0733282642771614e-05, |
| "loss": 0.2053680419921875, |
| "num_tokens": 50715555.0, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.3954065797641217, |
| "grad_norm": 0.443359375, |
| "learning_rate": 2.0705392794482686e-05, |
| "loss": 0.2141571044921875, |
| "num_tokens": 50793206.0, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.3960273122284296, |
| "grad_norm": 0.404296875, |
| "learning_rate": 2.0677479866869486e-05, |
| "loss": 0.17827606201171875, |
| "num_tokens": 50870476.0, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.39664804469273746, |
| "grad_norm": 0.439453125, |
| "learning_rate": 2.064954397284475e-05, |
| "loss": 0.2004547119140625, |
| "num_tokens": 50949724.0, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.39726877715704534, |
| "grad_norm": 0.419921875, |
| "learning_rate": 2.0621585225414114e-05, |
| "loss": 0.195831298828125, |
| "num_tokens": 51030554.0, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.3978895096213532, |
| "grad_norm": 0.44921875, |
| "learning_rate": 2.0593603737675665e-05, |
| "loss": 0.22705841064453125, |
| "num_tokens": 51107694.0, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.3985102420856611, |
| "grad_norm": 0.42578125, |
| "learning_rate": 2.0565599622819466e-05, |
| "loss": 0.1757965087890625, |
| "num_tokens": 51181258.0, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.399130974549969, |
| "grad_norm": 0.4296875, |
| "learning_rate": 2.0537572994127142e-05, |
| "loss": 0.20317840576171875, |
| "num_tokens": 51262914.0, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.39975170701427687, |
| "grad_norm": 0.41015625, |
| "learning_rate": 2.0509523964971355e-05, |
| "loss": 0.15679168701171875, |
| "num_tokens": 51339790.0, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.40037243947858475, |
| "grad_norm": 0.4140625, |
| "learning_rate": 2.0481452648815395e-05, |
| "loss": 0.221588134765625, |
| "num_tokens": 51421532.0, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.40099317194289263, |
| "grad_norm": 0.470703125, |
| "learning_rate": 2.0453359159212715e-05, |
| "loss": 0.235626220703125, |
| "num_tokens": 51498904.0, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.4016139044072005, |
| "grad_norm": 0.369140625, |
| "learning_rate": 2.0425243609806445e-05, |
| "loss": 0.15847015380859375, |
| "num_tokens": 51579825.0, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.4022346368715084, |
| "grad_norm": 0.412109375, |
| "learning_rate": 2.039710611432897e-05, |
| "loss": 0.185516357421875, |
| "num_tokens": 51662095.0, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.4028553693358163, |
| "grad_norm": 0.384765625, |
| "learning_rate": 2.0368946786601443e-05, |
| "loss": 0.1752471923828125, |
| "num_tokens": 51739679.0, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.40347610180012417, |
| "grad_norm": 0.416015625, |
| "learning_rate": 2.0340765740533327e-05, |
| "loss": 0.18506622314453125, |
| "num_tokens": 51817801.0, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.40409683426443205, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.031256309012195e-05, |
| "loss": 0.1865386962890625, |
| "num_tokens": 51896683.0, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.40471756672873993, |
| "grad_norm": 0.46875, |
| "learning_rate": 2.0284338949452016e-05, |
| "loss": 0.216888427734375, |
| "num_tokens": 51977826.0, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.4053382991930478, |
| "grad_norm": 0.435546875, |
| "learning_rate": 2.0256093432695182e-05, |
| "loss": 0.21588134765625, |
| "num_tokens": 52058594.0, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.4059590316573557, |
| "grad_norm": 0.435546875, |
| "learning_rate": 2.0227826654109566e-05, |
| "loss": 0.215362548828125, |
| "num_tokens": 52136915.0, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.4065797641216636, |
| "grad_norm": 0.36328125, |
| "learning_rate": 2.019953872803929e-05, |
| "loss": 0.146881103515625, |
| "num_tokens": 52220786.0, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.40720049658597146, |
| "grad_norm": 0.384765625, |
| "learning_rate": 2.017122976891403e-05, |
| "loss": 0.147857666015625, |
| "num_tokens": 52296180.0, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.40782122905027934, |
| "grad_norm": 0.435546875, |
| "learning_rate": 2.0142899891248525e-05, |
| "loss": 0.18096160888671875, |
| "num_tokens": 52376517.0, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.4084419615145872, |
| "grad_norm": 0.36328125, |
| "learning_rate": 2.0114549209642165e-05, |
| "loss": 0.1657867431640625, |
| "num_tokens": 52462404.0, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.4090626939788951, |
| "grad_norm": 0.396484375, |
| "learning_rate": 2.008617783877847e-05, |
| "loss": 0.1858978271484375, |
| "num_tokens": 52546991.0, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.409683426443203, |
| "grad_norm": 0.392578125, |
| "learning_rate": 2.0057785893424656e-05, |
| "loss": 0.2008209228515625, |
| "num_tokens": 52638809.0, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.4103041589075109, |
| "grad_norm": 0.33984375, |
| "learning_rate": 2.002937348843118e-05, |
| "loss": 0.14853668212890625, |
| "num_tokens": 52727289.0, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.41092489137181876, |
| "grad_norm": 0.384765625, |
| "learning_rate": 2.000094073873124e-05, |
| "loss": 0.19116973876953125, |
| "num_tokens": 52815668.0, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.41154562383612664, |
| "grad_norm": 0.39453125, |
| "learning_rate": 1.9972487759340355e-05, |
| "loss": 0.2099609375, |
| "num_tokens": 52900680.0, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.4121663563004345, |
| "grad_norm": 0.357421875, |
| "learning_rate": 1.9944014665355855e-05, |
| "loss": 0.1764984130859375, |
| "num_tokens": 52980938.0, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.4127870887647424, |
| "grad_norm": 0.384765625, |
| "learning_rate": 1.9915521571956457e-05, |
| "loss": 0.2012176513671875, |
| "num_tokens": 53067584.0, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.4134078212290503, |
| "grad_norm": 0.384765625, |
| "learning_rate": 1.9887008594401765e-05, |
| "loss": 0.16516876220703125, |
| "num_tokens": 53147013.0, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.41402855369335817, |
| "grad_norm": 0.421875, |
| "learning_rate": 1.9858475848031824e-05, |
| "loss": 0.20810699462890625, |
| "num_tokens": 53225332.0, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.41464928615766605, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.9829923448266642e-05, |
| "loss": 0.221160888671875, |
| "num_tokens": 53300690.0, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.41527001862197394, |
| "grad_norm": 0.357421875, |
| "learning_rate": 1.9801351510605744e-05, |
| "loss": 0.137420654296875, |
| "num_tokens": 53382326.0, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.4158907510862818, |
| "grad_norm": 0.373046875, |
| "learning_rate": 1.977276015062767e-05, |
| "loss": 0.1817779541015625, |
| "num_tokens": 53462021.0, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.4165114835505897, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.9744149483989534e-05, |
| "loss": 0.19124603271484375, |
| "num_tokens": 53535953.0, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.4171322160148976, |
| "grad_norm": 0.4140625, |
| "learning_rate": 1.971551962642655e-05, |
| "loss": 0.19427490234375, |
| "num_tokens": 53610177.0, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.41775294847920547, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.9686870693751562e-05, |
| "loss": 0.18967437744140625, |
| "num_tokens": 53687501.0, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.41837368094351335, |
| "grad_norm": 0.388671875, |
| "learning_rate": 1.965820280185458e-05, |
| "loss": 0.1967315673828125, |
| "num_tokens": 53769286.0, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.41899441340782123, |
| "grad_norm": 0.455078125, |
| "learning_rate": 1.96295160667023e-05, |
| "loss": 0.218597412109375, |
| "num_tokens": 53841088.0, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.4196151458721291, |
| "grad_norm": 0.44140625, |
| "learning_rate": 1.9600810604337646e-05, |
| "loss": 0.235260009765625, |
| "num_tokens": 53921346.0, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.420235878336437, |
| "grad_norm": 0.369140625, |
| "learning_rate": 1.95720865308793e-05, |
| "loss": 0.16656494140625, |
| "num_tokens": 54000408.0, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.4208566108007449, |
| "grad_norm": 0.396484375, |
| "learning_rate": 1.9543343962521225e-05, |
| "loss": 0.18169403076171875, |
| "num_tokens": 54080150.0, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.42147734326505276, |
| "grad_norm": 0.421875, |
| "learning_rate": 1.9514583015532197e-05, |
| "loss": 0.1837158203125, |
| "num_tokens": 54159480.0, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.42209807572936064, |
| "grad_norm": 0.46484375, |
| "learning_rate": 1.9485803806255345e-05, |
| "loss": 0.220855712890625, |
| "num_tokens": 54232989.0, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.4227188081936685, |
| "grad_norm": 0.396484375, |
| "learning_rate": 1.9457006451107664e-05, |
| "loss": 0.1890869140625, |
| "num_tokens": 54315143.0, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.4233395406579764, |
| "grad_norm": 0.419921875, |
| "learning_rate": 1.942819106657956e-05, |
| "loss": 0.186798095703125, |
| "num_tokens": 54401624.0, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.4239602731222843, |
| "grad_norm": 0.44140625, |
| "learning_rate": 1.939935776923436e-05, |
| "loss": 0.2145843505859375, |
| "num_tokens": 54480488.0, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.4245810055865922, |
| "grad_norm": 0.3671875, |
| "learning_rate": 1.937050667570786e-05, |
| "loss": 0.149749755859375, |
| "num_tokens": 54557090.0, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.42520173805090006, |
| "grad_norm": 0.443359375, |
| "learning_rate": 1.9341637902707846e-05, |
| "loss": 0.27191162109375, |
| "num_tokens": 54646069.0, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.42582247051520794, |
| "grad_norm": 0.34375, |
| "learning_rate": 1.9312751567013615e-05, |
| "loss": 0.14463043212890625, |
| "num_tokens": 54735450.0, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.4264432029795158, |
| "grad_norm": 0.396484375, |
| "learning_rate": 1.9283847785475514e-05, |
| "loss": 0.18885040283203125, |
| "num_tokens": 54816470.0, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.4270639354438237, |
| "grad_norm": 0.392578125, |
| "learning_rate": 1.9254926675014452e-05, |
| "loss": 0.20259857177734375, |
| "num_tokens": 54896296.0, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.4276846679081316, |
| "grad_norm": 0.443359375, |
| "learning_rate": 1.9225988352621445e-05, |
| "loss": 0.233428955078125, |
| "num_tokens": 54981627.0, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.42830540037243947, |
| "grad_norm": 0.48046875, |
| "learning_rate": 1.919703293535714e-05, |
| "loss": 0.229583740234375, |
| "num_tokens": 55055866.0, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.42892613283674735, |
| "grad_norm": 0.45703125, |
| "learning_rate": 1.9168060540351314e-05, |
| "loss": 0.20269775390625, |
| "num_tokens": 55132535.0, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.42954686530105524, |
| "grad_norm": 0.404296875, |
| "learning_rate": 1.9139071284802447e-05, |
| "loss": 0.1852874755859375, |
| "num_tokens": 55215409.0, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.4301675977653631, |
| "grad_norm": 0.3671875, |
| "learning_rate": 1.91100652859772e-05, |
| "loss": 0.17153167724609375, |
| "num_tokens": 55294673.0, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.430788330229671, |
| "grad_norm": 0.38671875, |
| "learning_rate": 1.908104266120999e-05, |
| "loss": 0.2008056640625, |
| "num_tokens": 55377822.0, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.4314090626939789, |
| "grad_norm": 0.3828125, |
| "learning_rate": 1.9052003527902464e-05, |
| "loss": 0.17771148681640625, |
| "num_tokens": 55455943.0, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.43202979515828677, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.9022948003523063e-05, |
| "loss": 0.195098876953125, |
| "num_tokens": 55534587.0, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.43265052762259465, |
| "grad_norm": 0.400390625, |
| "learning_rate": 1.8993876205606527e-05, |
| "loss": 0.20026016235351562, |
| "num_tokens": 55617815.0, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.43327126008690253, |
| "grad_norm": 0.427734375, |
| "learning_rate": 1.8964788251753427e-05, |
| "loss": 0.198516845703125, |
| "num_tokens": 55704237.0, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.4338919925512104, |
| "grad_norm": 0.373046875, |
| "learning_rate": 1.8935684259629688e-05, |
| "loss": 0.1548919677734375, |
| "num_tokens": 55786934.0, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.4345127250155183, |
| "grad_norm": 0.392578125, |
| "learning_rate": 1.8906564346966113e-05, |
| "loss": 0.19970703125, |
| "num_tokens": 55872060.0, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.4351334574798262, |
| "grad_norm": 0.435546875, |
| "learning_rate": 1.8877428631557906e-05, |
| "loss": 0.17505645751953125, |
| "num_tokens": 55948342.0, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.43575418994413406, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.8848277231264197e-05, |
| "loss": 0.1900177001953125, |
| "num_tokens": 56023677.0, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.43637492240844195, |
| "grad_norm": 0.390625, |
| "learning_rate": 1.881911026400756e-05, |
| "loss": 0.16182708740234375, |
| "num_tokens": 56101447.0, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.43699565487274983, |
| "grad_norm": 0.419921875, |
| "learning_rate": 1.878992784777354e-05, |
| "loss": 0.189727783203125, |
| "num_tokens": 56175901.0, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.4376163873370577, |
| "grad_norm": 0.4140625, |
| "learning_rate": 1.876073010061019e-05, |
| "loss": 0.179351806640625, |
| "num_tokens": 56252404.0, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.4382371198013656, |
| "grad_norm": 0.41015625, |
| "learning_rate": 1.873151714062756e-05, |
| "loss": 0.20854949951171875, |
| "num_tokens": 56338172.0, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.4388578522656735, |
| "grad_norm": 0.392578125, |
| "learning_rate": 1.8702289085997245e-05, |
| "loss": 0.16644287109375, |
| "num_tokens": 56417116.0, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.43947858472998136, |
| "grad_norm": 0.470703125, |
| "learning_rate": 1.8673046054951908e-05, |
| "loss": 0.227203369140625, |
| "num_tokens": 56493308.0, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.44009931719428924, |
| "grad_norm": 0.390625, |
| "learning_rate": 1.864378816578478e-05, |
| "loss": 0.1591949462890625, |
| "num_tokens": 56568455.0, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.4407200496585971, |
| "grad_norm": 0.3125, |
| "learning_rate": 1.8614515536849215e-05, |
| "loss": 0.109405517578125, |
| "num_tokens": 56653227.0, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.441340782122905, |
| "grad_norm": 0.390625, |
| "learning_rate": 1.8585228286558174e-05, |
| "loss": 0.19554901123046875, |
| "num_tokens": 56739014.0, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.4419615145872129, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.8555926533383776e-05, |
| "loss": 0.18121337890625, |
| "num_tokens": 56815246.0, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.44258224705152077, |
| "grad_norm": 0.380859375, |
| "learning_rate": 1.8526610395856803e-05, |
| "loss": 0.172515869140625, |
| "num_tokens": 56894715.0, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.44320297951582865, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.849727999256621e-05, |
| "loss": 0.18987274169921875, |
| "num_tokens": 56979213.0, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.44382371198013654, |
| "grad_norm": 0.373046875, |
| "learning_rate": 1.846793544215869e-05, |
| "loss": 0.1981353759765625, |
| "num_tokens": 57070660.0, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 0.40625, |
| "learning_rate": 1.8438576863338133e-05, |
| "loss": 0.21075439453125, |
| "num_tokens": 57157609.0, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.4450651769087523, |
| "grad_norm": 0.416015625, |
| "learning_rate": 1.84092043748652e-05, |
| "loss": 0.2082672119140625, |
| "num_tokens": 57234608.0, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.4456859093730602, |
| "grad_norm": 0.41015625, |
| "learning_rate": 1.83798180955568e-05, |
| "loss": 0.1847076416015625, |
| "num_tokens": 57318773.0, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.44630664183736807, |
| "grad_norm": 0.353515625, |
| "learning_rate": 1.835041814428564e-05, |
| "loss": 0.1503753662109375, |
| "num_tokens": 57406853.0, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.44692737430167595, |
| "grad_norm": 0.396484375, |
| "learning_rate": 1.832100463997973e-05, |
| "loss": 0.19110107421875, |
| "num_tokens": 57488789.0, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.4475481067659839, |
| "grad_norm": 0.478515625, |
| "learning_rate": 1.829157770162191e-05, |
| "loss": 0.2366485595703125, |
| "num_tokens": 57563698.0, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.44816883923029177, |
| "grad_norm": 0.353515625, |
| "learning_rate": 1.8262137448249348e-05, |
| "loss": 0.1322479248046875, |
| "num_tokens": 57644710.0, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.44878957169459965, |
| "grad_norm": 0.349609375, |
| "learning_rate": 1.823268399895309e-05, |
| "loss": 0.1372528076171875, |
| "num_tokens": 57731064.0, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.44941030415890754, |
| "grad_norm": 0.453125, |
| "learning_rate": 1.8203217472877544e-05, |
| "loss": 0.21416473388671875, |
| "num_tokens": 57810130.0, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.4500310366232154, |
| "grad_norm": 0.4375, |
| "learning_rate": 1.8173737989220038e-05, |
| "loss": 0.24169921875, |
| "num_tokens": 57890792.0, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.4506517690875233, |
| "grad_norm": 0.47265625, |
| "learning_rate": 1.81442456672303e-05, |
| "loss": 0.239410400390625, |
| "num_tokens": 57969745.0, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.4512725015518312, |
| "grad_norm": 0.37109375, |
| "learning_rate": 1.811474062620999e-05, |
| "loss": 0.13714599609375, |
| "num_tokens": 58044387.0, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.45189323401613907, |
| "grad_norm": 0.5078125, |
| "learning_rate": 1.8085222985512234e-05, |
| "loss": 0.25836181640625, |
| "num_tokens": 58124420.0, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.45251396648044695, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.8055692864541114e-05, |
| "loss": 0.18988037109375, |
| "num_tokens": 58198686.0, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.45313469894475483, |
| "grad_norm": 0.416015625, |
| "learning_rate": 1.802615038275119e-05, |
| "loss": 0.201568603515625, |
| "num_tokens": 58276539.0, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.4537554314090627, |
| "grad_norm": 0.380859375, |
| "learning_rate": 1.7996595659647043e-05, |
| "loss": 0.1974639892578125, |
| "num_tokens": 58361867.0, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.4543761638733706, |
| "grad_norm": 0.40234375, |
| "learning_rate": 1.796702881478276e-05, |
| "loss": 0.17498779296875, |
| "num_tokens": 58436536.0, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.4549968963376785, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.793744996776146e-05, |
| "loss": 0.1988677978515625, |
| "num_tokens": 58519083.0, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.45561762880198636, |
| "grad_norm": 0.39453125, |
| "learning_rate": 1.7907859238234826e-05, |
| "loss": 0.182403564453125, |
| "num_tokens": 58595835.0, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.45623836126629425, |
| "grad_norm": 0.369140625, |
| "learning_rate": 1.7878256745902588e-05, |
| "loss": 0.1378021240234375, |
| "num_tokens": 58673443.0, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.45685909373060213, |
| "grad_norm": 0.443359375, |
| "learning_rate": 1.784864261051208e-05, |
| "loss": 0.239715576171875, |
| "num_tokens": 58757115.0, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.45747982619491, |
| "grad_norm": 0.423828125, |
| "learning_rate": 1.781901695185772e-05, |
| "loss": 0.1883392333984375, |
| "num_tokens": 58834293.0, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.4581005586592179, |
| "grad_norm": 0.396484375, |
| "learning_rate": 1.7789379889780544e-05, |
| "loss": 0.177215576171875, |
| "num_tokens": 58909500.0, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.4587212911235258, |
| "grad_norm": 0.4140625, |
| "learning_rate": 1.7759731544167715e-05, |
| "loss": 0.22180938720703125, |
| "num_tokens": 58995888.0, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.45934202358783366, |
| "grad_norm": 0.4296875, |
| "learning_rate": 1.7730072034952047e-05, |
| "loss": 0.198516845703125, |
| "num_tokens": 59075355.0, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.45996275605214154, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.7700401482111503e-05, |
| "loss": 0.205902099609375, |
| "num_tokens": 59165083.0, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.4605834885164494, |
| "grad_norm": 0.37109375, |
| "learning_rate": 1.7670720005668725e-05, |
| "loss": 0.167877197265625, |
| "num_tokens": 59249069.0, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.4612042209807573, |
| "grad_norm": 0.44140625, |
| "learning_rate": 1.7641027725690544e-05, |
| "loss": 0.2045135498046875, |
| "num_tokens": 59327815.0, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.4618249534450652, |
| "grad_norm": 0.4453125, |
| "learning_rate": 1.761132476228749e-05, |
| "loss": 0.2032623291015625, |
| "num_tokens": 59399727.0, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.46244568590937307, |
| "grad_norm": 0.40625, |
| "learning_rate": 1.75816112356133e-05, |
| "loss": 0.1790771484375, |
| "num_tokens": 59475443.0, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.46306641837368095, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.7551887265864474e-05, |
| "loss": 0.19443511962890625, |
| "num_tokens": 59555681.0, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.46368715083798884, |
| "grad_norm": 0.40625, |
| "learning_rate": 1.7522152973279713e-05, |
| "loss": 0.1773529052734375, |
| "num_tokens": 59634719.0, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.4643078833022967, |
| "grad_norm": 0.3828125, |
| "learning_rate": 1.7492408478139508e-05, |
| "loss": 0.14281463623046875, |
| "num_tokens": 59715727.0, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.4649286157666046, |
| "grad_norm": 0.4375, |
| "learning_rate": 1.7462653900765607e-05, |
| "loss": 0.20855712890625, |
| "num_tokens": 59794308.0, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.4655493482309125, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.743288936152054e-05, |
| "loss": 0.18010711669921875, |
| "num_tokens": 59872812.0, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.46617008069522037, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.7403114980807142e-05, |
| "loss": 0.210784912109375, |
| "num_tokens": 59953561.0, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.46679081315952825, |
| "grad_norm": 0.380859375, |
| "learning_rate": 1.737333087906806e-05, |
| "loss": 0.17118072509765625, |
| "num_tokens": 60032312.0, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.46741154562383613, |
| "grad_norm": 0.416015625, |
| "learning_rate": 1.7343537176785255e-05, |
| "loss": 0.18750762939453125, |
| "num_tokens": 60112734.0, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.468032278088144, |
| "grad_norm": 0.474609375, |
| "learning_rate": 1.7313733994479534e-05, |
| "loss": 0.2533111572265625, |
| "num_tokens": 60189052.0, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.4686530105524519, |
| "grad_norm": 0.4609375, |
| "learning_rate": 1.7283921452710047e-05, |
| "loss": 0.209014892578125, |
| "num_tokens": 60271917.0, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.4692737430167598, |
| "grad_norm": 0.41796875, |
| "learning_rate": 1.72540996720738e-05, |
| "loss": 0.1859893798828125, |
| "num_tokens": 60356438.0, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.46989447548106766, |
| "grad_norm": 0.37890625, |
| "learning_rate": 1.722426877320518e-05, |
| "loss": 0.14642333984375, |
| "num_tokens": 60435647.0, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.47051520794537555, |
| "grad_norm": 0.41796875, |
| "learning_rate": 1.7194428876775458e-05, |
| "loss": 0.21857452392578125, |
| "num_tokens": 60519950.0, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.47113594040968343, |
| "grad_norm": 0.39453125, |
| "learning_rate": 1.7164580103492302e-05, |
| "loss": 0.1731414794921875, |
| "num_tokens": 60603961.0, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.4717566728739913, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.713472257409928e-05, |
| "loss": 0.18233489990234375, |
| "num_tokens": 60681412.0, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.4723774053382992, |
| "grad_norm": 0.43359375, |
| "learning_rate": 1.7104856409375383e-05, |
| "loss": 0.19803619384765625, |
| "num_tokens": 60758500.0, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.4729981378026071, |
| "grad_norm": 0.416015625, |
| "learning_rate": 1.7074981730134547e-05, |
| "loss": 0.1900634765625, |
| "num_tokens": 60839969.0, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.47361887026691496, |
| "grad_norm": 0.4140625, |
| "learning_rate": 1.7045098657225134e-05, |
| "loss": 0.195281982421875, |
| "num_tokens": 60923306.0, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.47423960273122284, |
| "grad_norm": 0.373046875, |
| "learning_rate": 1.701520731152947e-05, |
| "loss": 0.155914306640625, |
| "num_tokens": 61003078.0, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.4748603351955307, |
| "grad_norm": 0.404296875, |
| "learning_rate": 1.6985307813963338e-05, |
| "loss": 0.199554443359375, |
| "num_tokens": 61090253.0, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.4754810676598386, |
| "grad_norm": 0.45703125, |
| "learning_rate": 1.6955400285475504e-05, |
| "loss": 0.22792816162109375, |
| "num_tokens": 61168492.0, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.4761018001241465, |
| "grad_norm": 0.4765625, |
| "learning_rate": 1.6925484847047213e-05, |
| "loss": 0.2374267578125, |
| "num_tokens": 61242755.0, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.4767225325884544, |
| "grad_norm": 0.421875, |
| "learning_rate": 1.6895561619691714e-05, |
| "loss": 0.2057952880859375, |
| "num_tokens": 61322276.0, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.47734326505276226, |
| "grad_norm": 0.37890625, |
| "learning_rate": 1.686563072445376e-05, |
| "loss": 0.1525726318359375, |
| "num_tokens": 61403828.0, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.47796399751707014, |
| "grad_norm": 0.435546875, |
| "learning_rate": 1.6835692282409126e-05, |
| "loss": 0.23336029052734375, |
| "num_tokens": 61488635.0, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.478584729981378, |
| "grad_norm": 0.490234375, |
| "learning_rate": 1.6805746414664112e-05, |
| "loss": 0.2491455078125, |
| "num_tokens": 61568661.0, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.4792054624456859, |
| "grad_norm": 0.44140625, |
| "learning_rate": 1.6775793242355053e-05, |
| "loss": 0.2314605712890625, |
| "num_tokens": 61647420.0, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.4798261949099938, |
| "grad_norm": 0.37109375, |
| "learning_rate": 1.6745832886647834e-05, |
| "loss": 0.1607513427734375, |
| "num_tokens": 61727741.0, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.48044692737430167, |
| "grad_norm": 0.400390625, |
| "learning_rate": 1.671586546873741e-05, |
| "loss": 0.1855926513671875, |
| "num_tokens": 61805443.0, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.48106765983860955, |
| "grad_norm": 0.396484375, |
| "learning_rate": 1.6685891109847286e-05, |
| "loss": 0.1652050018310547, |
| "num_tokens": 61882548.0, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.48168839230291743, |
| "grad_norm": 0.4296875, |
| "learning_rate": 1.665590993122905e-05, |
| "loss": 0.194183349609375, |
| "num_tokens": 61959694.0, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.4823091247672253, |
| "grad_norm": 0.369140625, |
| "learning_rate": 1.6625922054161878e-05, |
| "loss": 0.154144287109375, |
| "num_tokens": 62039175.0, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.4829298572315332, |
| "grad_norm": 0.40625, |
| "learning_rate": 1.659592759995205e-05, |
| "loss": 0.1820526123046875, |
| "num_tokens": 62117984.0, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.4835505896958411, |
| "grad_norm": 0.36328125, |
| "learning_rate": 1.656592668993245e-05, |
| "loss": 0.13739776611328125, |
| "num_tokens": 62194530.0, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.48417132216014896, |
| "grad_norm": 0.36328125, |
| "learning_rate": 1.653591944546206e-05, |
| "loss": 0.15596771240234375, |
| "num_tokens": 62280038.0, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.48479205462445685, |
| "grad_norm": 0.365234375, |
| "learning_rate": 1.65059059879255e-05, |
| "loss": 0.154205322265625, |
| "num_tokens": 62356807.0, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.48541278708876473, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.6475886438732527e-05, |
| "loss": 0.1698455810546875, |
| "num_tokens": 62435316.0, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.4860335195530726, |
| "grad_norm": 0.41796875, |
| "learning_rate": 1.6445860919317538e-05, |
| "loss": 0.2103271484375, |
| "num_tokens": 62514373.0, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.4866542520173805, |
| "grad_norm": 0.43359375, |
| "learning_rate": 1.6415829551139062e-05, |
| "loss": 0.22314453125, |
| "num_tokens": 62591991.0, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.4872749844816884, |
| "grad_norm": 0.373046875, |
| "learning_rate": 1.638579245567931e-05, |
| "loss": 0.1541900634765625, |
| "num_tokens": 62667694.0, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.48789571694599626, |
| "grad_norm": 0.439453125, |
| "learning_rate": 1.6355749754443654e-05, |
| "loss": 0.20981597900390625, |
| "num_tokens": 62747024.0, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.48851644941030414, |
| "grad_norm": 0.41015625, |
| "learning_rate": 1.6325701568960136e-05, |
| "loss": 0.1633758544921875, |
| "num_tokens": 62822599.0, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.489137181874612, |
| "grad_norm": 0.423828125, |
| "learning_rate": 1.6295648020778994e-05, |
| "loss": 0.2032928466796875, |
| "num_tokens": 62901868.0, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.4897579143389199, |
| "grad_norm": 0.40625, |
| "learning_rate": 1.626558923147215e-05, |
| "loss": 0.1837158203125, |
| "num_tokens": 62978585.0, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.4903786468032278, |
| "grad_norm": 0.369140625, |
| "learning_rate": 1.623552532263273e-05, |
| "loss": 0.17771148681640625, |
| "num_tokens": 63060469.0, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.4909993792675357, |
| "grad_norm": 0.435546875, |
| "learning_rate": 1.6205456415874572e-05, |
| "loss": 0.18357086181640625, |
| "num_tokens": 63137122.0, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.49162011173184356, |
| "grad_norm": 0.380859375, |
| "learning_rate": 1.6175382632831724e-05, |
| "loss": 0.16837310791015625, |
| "num_tokens": 63213580.0, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.49224084419615144, |
| "grad_norm": 0.3828125, |
| "learning_rate": 1.6145304095157965e-05, |
| "loss": 0.17380523681640625, |
| "num_tokens": 63297148.0, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.4928615766604593, |
| "grad_norm": 0.380859375, |
| "learning_rate": 1.611522092452631e-05, |
| "loss": 0.1910247802734375, |
| "num_tokens": 63381047.0, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.4934823091247672, |
| "grad_norm": 0.4453125, |
| "learning_rate": 1.6085133242628515e-05, |
| "loss": 0.2176666259765625, |
| "num_tokens": 63459966.0, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.4941030415890751, |
| "grad_norm": 0.375, |
| "learning_rate": 1.6055041171174574e-05, |
| "loss": 0.158843994140625, |
| "num_tokens": 63537593.0, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.49472377405338297, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.602494483189225e-05, |
| "loss": 0.1724090576171875, |
| "num_tokens": 63615445.0, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.49534450651769085, |
| "grad_norm": 0.4140625, |
| "learning_rate": 1.599484434652656e-05, |
| "loss": 0.1734771728515625, |
| "num_tokens": 63690727.0, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.49596523898199874, |
| "grad_norm": 0.39453125, |
| "learning_rate": 1.5964739836839305e-05, |
| "loss": 0.17597198486328125, |
| "num_tokens": 63776339.0, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.4965859714463066, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.5934631424608556e-05, |
| "loss": 0.2207794189453125, |
| "num_tokens": 63855669.0, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.4972067039106145, |
| "grad_norm": 0.443359375, |
| "learning_rate": 1.5904519231628175e-05, |
| "loss": 0.22064208984375, |
| "num_tokens": 63934046.0, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.4978274363749224, |
| "grad_norm": 0.380859375, |
| "learning_rate": 1.5874403379707306e-05, |
| "loss": 0.162017822265625, |
| "num_tokens": 64011864.0, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.49844816883923027, |
| "grad_norm": 0.37109375, |
| "learning_rate": 1.5844283990669915e-05, |
| "loss": 0.158050537109375, |
| "num_tokens": 64094920.0, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.49906890130353815, |
| "grad_norm": 0.4921875, |
| "learning_rate": 1.5814161186354257e-05, |
| "loss": 0.2811279296875, |
| "num_tokens": 64168919.0, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.49968963376784603, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.5784035088612415e-05, |
| "loss": 0.19830322265625, |
| "num_tokens": 64256050.0, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.500310366232154, |
| "grad_norm": 0.37890625, |
| "learning_rate": 1.575390581930979e-05, |
| "loss": 0.16234588623046875, |
| "num_tokens": 64346466.0, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.5009310986964618, |
| "grad_norm": 0.380859375, |
| "learning_rate": 1.5723773500324604e-05, |
| "loss": 0.151947021484375, |
| "num_tokens": 64422808.0, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.5015518311607697, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.569363825354743e-05, |
| "loss": 0.17218780517578125, |
| "num_tokens": 64500504.0, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.5021725636250776, |
| "grad_norm": 0.435546875, |
| "learning_rate": 1.5663500200880684e-05, |
| "loss": 0.216949462890625, |
| "num_tokens": 64578821.0, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.5027932960893855, |
| "grad_norm": 0.453125, |
| "learning_rate": 1.563335946423812e-05, |
| "loss": 0.222808837890625, |
| "num_tokens": 64663165.0, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.5034140285536933, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.560321616554435e-05, |
| "loss": 0.1801300048828125, |
| "num_tokens": 64740844.0, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.5040347610180013, |
| "grad_norm": 0.40234375, |
| "learning_rate": 1.5573070426734365e-05, |
| "loss": 0.19293975830078125, |
| "num_tokens": 64818416.0, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.5046554934823091, |
| "grad_norm": 0.3359375, |
| "learning_rate": 1.554292236975301e-05, |
| "loss": 0.1099090576171875, |
| "num_tokens": 64890722.0, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.505276225946617, |
| "grad_norm": 0.341796875, |
| "learning_rate": 1.551277211655452e-05, |
| "loss": 0.13115692138671875, |
| "num_tokens": 64966495.0, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.5058969584109249, |
| "grad_norm": 0.45703125, |
| "learning_rate": 1.5482619789102008e-05, |
| "loss": 0.2239532470703125, |
| "num_tokens": 65053323.0, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.5065176908752328, |
| "grad_norm": 0.4375, |
| "learning_rate": 1.545246550936698e-05, |
| "loss": 0.2304840087890625, |
| "num_tokens": 65131992.0, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.5071384233395406, |
| "grad_norm": 0.390625, |
| "learning_rate": 1.5422309399328832e-05, |
| "loss": 0.18270111083984375, |
| "num_tokens": 65219013.0, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.5077591558038486, |
| "grad_norm": 0.40625, |
| "learning_rate": 1.5392151580974373e-05, |
| "loss": 0.2208099365234375, |
| "num_tokens": 65309422.0, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.5083798882681564, |
| "grad_norm": 0.44140625, |
| "learning_rate": 1.536199217629732e-05, |
| "loss": 0.225372314453125, |
| "num_tokens": 65387728.0, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.5090006207324643, |
| "grad_norm": 0.369140625, |
| "learning_rate": 1.5331831307297803e-05, |
| "loss": 0.12811279296875, |
| "num_tokens": 65465547.0, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5096213531967722, |
| "grad_norm": 0.40234375, |
| "learning_rate": 1.5301669095981885e-05, |
| "loss": 0.1576385498046875, |
| "num_tokens": 65545665.0, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.5102420856610801, |
| "grad_norm": 0.392578125, |
| "learning_rate": 1.527150566436105e-05, |
| "loss": 0.1743316650390625, |
| "num_tokens": 65629842.0, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.5108628181253879, |
| "grad_norm": 0.44140625, |
| "learning_rate": 1.5241341134451715e-05, |
| "loss": 0.22735595703125, |
| "num_tokens": 65707379.0, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.5114835505896959, |
| "grad_norm": 0.4296875, |
| "learning_rate": 1.5211175628274746e-05, |
| "loss": 0.20652008056640625, |
| "num_tokens": 65781668.0, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.5121042830540037, |
| "grad_norm": 0.373046875, |
| "learning_rate": 1.5181009267854968e-05, |
| "loss": 0.17317962646484375, |
| "num_tokens": 65864091.0, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.5127250155183116, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.5150842175220648e-05, |
| "loss": 0.211456298828125, |
| "num_tokens": 65945102.0, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.5133457479826194, |
| "grad_norm": 0.447265625, |
| "learning_rate": 1.5120674472403009e-05, |
| "loss": 0.20635986328125, |
| "num_tokens": 66021221.0, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.5139664804469274, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.5090506281435759e-05, |
| "loss": 0.16302490234375, |
| "num_tokens": 66103317.0, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.5145872129112352, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.5060337724354569e-05, |
| "loss": 0.2002105712890625, |
| "num_tokens": 66181326.0, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.5152079453755432, |
| "grad_norm": 0.380859375, |
| "learning_rate": 1.5030168923196605e-05, |
| "loss": 0.1685028076171875, |
| "num_tokens": 66259538.0, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.515828677839851, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.5e-05, |
| "loss": 0.2446746826171875, |
| "num_tokens": 66339990.0, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.5164494103041589, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.49698310768034e-05, |
| "loss": 0.246551513671875, |
| "num_tokens": 66416214.0, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.5170701427684667, |
| "grad_norm": 0.375, |
| "learning_rate": 1.493966227564543e-05, |
| "loss": 0.14859771728515625, |
| "num_tokens": 66500017.0, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.5176908752327747, |
| "grad_norm": 0.421875, |
| "learning_rate": 1.4909493718564242e-05, |
| "loss": 0.2099151611328125, |
| "num_tokens": 66576672.0, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.5183116076970825, |
| "grad_norm": 0.38671875, |
| "learning_rate": 1.4879325527596997e-05, |
| "loss": 0.1600341796875, |
| "num_tokens": 66653128.0, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.5189323401613904, |
| "grad_norm": 0.44140625, |
| "learning_rate": 1.4849157824779356e-05, |
| "loss": 0.22028350830078125, |
| "num_tokens": 66734373.0, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.5195530726256983, |
| "grad_norm": 0.427734375, |
| "learning_rate": 1.4818990732145035e-05, |
| "loss": 0.198974609375, |
| "num_tokens": 66809502.0, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.5201738050900062, |
| "grad_norm": 0.39453125, |
| "learning_rate": 1.4788824371725253e-05, |
| "loss": 0.1495361328125, |
| "num_tokens": 66878792.0, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.520794537554314, |
| "grad_norm": 0.421875, |
| "learning_rate": 1.4758658865548286e-05, |
| "loss": 0.2187652587890625, |
| "num_tokens": 66962677.0, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.521415270018622, |
| "grad_norm": 0.44921875, |
| "learning_rate": 1.4728494335638957e-05, |
| "loss": 0.2018280029296875, |
| "num_tokens": 67041474.0, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5220360024829298, |
| "grad_norm": 0.455078125, |
| "learning_rate": 1.4698330904018115e-05, |
| "loss": 0.2111968994140625, |
| "num_tokens": 67115058.0, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.5226567349472377, |
| "grad_norm": 0.41015625, |
| "learning_rate": 1.46681686927022e-05, |
| "loss": 0.1927642822265625, |
| "num_tokens": 67197272.0, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.5232774674115456, |
| "grad_norm": 0.39453125, |
| "learning_rate": 1.4638007823702684e-05, |
| "loss": 0.1621246337890625, |
| "num_tokens": 67278020.0, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.5238981998758535, |
| "grad_norm": 0.416015625, |
| "learning_rate": 1.4607848419025631e-05, |
| "loss": 0.22991943359375, |
| "num_tokens": 67362699.0, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.5245189323401613, |
| "grad_norm": 0.435546875, |
| "learning_rate": 1.4577690600671174e-05, |
| "loss": 0.22564697265625, |
| "num_tokens": 67443724.0, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.5251396648044693, |
| "grad_norm": 0.423828125, |
| "learning_rate": 1.4547534490633022e-05, |
| "loss": 0.1840057373046875, |
| "num_tokens": 67522083.0, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.5257603972687771, |
| "grad_norm": 0.38671875, |
| "learning_rate": 1.4517380210897995e-05, |
| "loss": 0.18781280517578125, |
| "num_tokens": 67604006.0, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.526381129733085, |
| "grad_norm": 0.3671875, |
| "learning_rate": 1.4487227883445484e-05, |
| "loss": 0.14209747314453125, |
| "num_tokens": 67680358.0, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.527001862197393, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.4457077630246992e-05, |
| "loss": 0.19575119018554688, |
| "num_tokens": 67761222.0, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.5276225946617008, |
| "grad_norm": 0.423828125, |
| "learning_rate": 1.442692957326564e-05, |
| "loss": 0.1881866455078125, |
| "num_tokens": 67844083.0, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.5282433271260087, |
| "grad_norm": 0.40234375, |
| "learning_rate": 1.4396783834455656e-05, |
| "loss": 0.18814468383789062, |
| "num_tokens": 67928085.0, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.5288640595903166, |
| "grad_norm": 0.384765625, |
| "learning_rate": 1.4366640535761885e-05, |
| "loss": 0.1851654052734375, |
| "num_tokens": 68010126.0, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.5294847920546245, |
| "grad_norm": 0.37109375, |
| "learning_rate": 1.433649979911932e-05, |
| "loss": 0.1590118408203125, |
| "num_tokens": 68090900.0, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.5301055245189323, |
| "grad_norm": 0.373046875, |
| "learning_rate": 1.4306361746452566e-05, |
| "loss": 0.15969085693359375, |
| "num_tokens": 68168826.0, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.5307262569832403, |
| "grad_norm": 0.35546875, |
| "learning_rate": 1.4276226499675395e-05, |
| "loss": 0.1632537841796875, |
| "num_tokens": 68255065.0, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.5313469894475481, |
| "grad_norm": 0.427734375, |
| "learning_rate": 1.4246094180690214e-05, |
| "loss": 0.20761871337890625, |
| "num_tokens": 68331792.0, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.531967721911856, |
| "grad_norm": 0.41015625, |
| "learning_rate": 1.4215964911387584e-05, |
| "loss": 0.2359619140625, |
| "num_tokens": 68419168.0, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.5325884543761639, |
| "grad_norm": 0.369140625, |
| "learning_rate": 1.4185838813645744e-05, |
| "loss": 0.1770172119140625, |
| "num_tokens": 68506536.0, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.5332091868404718, |
| "grad_norm": 0.39453125, |
| "learning_rate": 1.4155716009330088e-05, |
| "loss": 0.178009033203125, |
| "num_tokens": 68590568.0, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.5338299193047796, |
| "grad_norm": 0.380859375, |
| "learning_rate": 1.4125596620292695e-05, |
| "loss": 0.1668243408203125, |
| "num_tokens": 68667413.0, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.5344506517690876, |
| "grad_norm": 0.421875, |
| "learning_rate": 1.409548076837183e-05, |
| "loss": 0.1732025146484375, |
| "num_tokens": 68738326.0, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.5350713842333954, |
| "grad_norm": 0.376953125, |
| "learning_rate": 1.4065368575391443e-05, |
| "loss": 0.145050048828125, |
| "num_tokens": 68823637.0, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.5356921166977033, |
| "grad_norm": 0.4453125, |
| "learning_rate": 1.4035260163160697e-05, |
| "loss": 0.2113037109375, |
| "num_tokens": 68900932.0, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.5363128491620112, |
| "grad_norm": 0.423828125, |
| "learning_rate": 1.4005155653473445e-05, |
| "loss": 0.2074127197265625, |
| "num_tokens": 68977006.0, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.5369335816263191, |
| "grad_norm": 0.43359375, |
| "learning_rate": 1.3975055168107754e-05, |
| "loss": 0.2095184326171875, |
| "num_tokens": 69055188.0, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.5375543140906269, |
| "grad_norm": 0.392578125, |
| "learning_rate": 1.3944958828825428e-05, |
| "loss": 0.173980712890625, |
| "num_tokens": 69134818.0, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.5381750465549349, |
| "grad_norm": 0.404296875, |
| "learning_rate": 1.391486675737149e-05, |
| "loss": 0.17784881591796875, |
| "num_tokens": 69213114.0, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.5387957790192427, |
| "grad_norm": 0.380859375, |
| "learning_rate": 1.3884779075473689e-05, |
| "loss": 0.1586456298828125, |
| "num_tokens": 69290426.0, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.5394165114835506, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.3854695904842039e-05, |
| "loss": 0.20092010498046875, |
| "num_tokens": 69370532.0, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.5400372439478585, |
| "grad_norm": 0.365234375, |
| "learning_rate": 1.382461736716828e-05, |
| "loss": 0.16647720336914062, |
| "num_tokens": 69461040.0, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.5406579764121664, |
| "grad_norm": 0.41015625, |
| "learning_rate": 1.3794543584125429e-05, |
| "loss": 0.19617462158203125, |
| "num_tokens": 69541184.0, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.5412787088764742, |
| "grad_norm": 0.376953125, |
| "learning_rate": 1.3764474677367273e-05, |
| "loss": 0.165863037109375, |
| "num_tokens": 69622477.0, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.5418994413407822, |
| "grad_norm": 0.34765625, |
| "learning_rate": 1.373441076852785e-05, |
| "loss": 0.12197494506835938, |
| "num_tokens": 69700254.0, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.54252017380509, |
| "grad_norm": 0.38671875, |
| "learning_rate": 1.3704351979221007e-05, |
| "loss": 0.1822509765625, |
| "num_tokens": 69783067.0, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.5431409062693979, |
| "grad_norm": 0.396484375, |
| "learning_rate": 1.3674298431039864e-05, |
| "loss": 0.18769073486328125, |
| "num_tokens": 69864793.0, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.5437616387337058, |
| "grad_norm": 0.333984375, |
| "learning_rate": 1.3644250245556345e-05, |
| "loss": 0.13982391357421875, |
| "num_tokens": 69952289.0, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.5443823711980137, |
| "grad_norm": 0.4140625, |
| "learning_rate": 1.3614207544320692e-05, |
| "loss": 0.189849853515625, |
| "num_tokens": 70026926.0, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.5450031036623215, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.3584170448860945e-05, |
| "loss": 0.168121337890625, |
| "num_tokens": 70111349.0, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.5456238361266295, |
| "grad_norm": 0.423828125, |
| "learning_rate": 1.3554139080682468e-05, |
| "loss": 0.1836090087890625, |
| "num_tokens": 70187805.0, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.5462445685909373, |
| "grad_norm": 0.42578125, |
| "learning_rate": 1.3524113561267474e-05, |
| "loss": 0.2201385498046875, |
| "num_tokens": 70263202.0, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.5468653010552452, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.3494094012074497e-05, |
| "loss": 0.17308807373046875, |
| "num_tokens": 70342583.0, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.547486033519553, |
| "grad_norm": 0.4609375, |
| "learning_rate": 1.3464080554537943e-05, |
| "loss": 0.2161407470703125, |
| "num_tokens": 70412434.0, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.548106765983861, |
| "grad_norm": 0.4375, |
| "learning_rate": 1.3434073310067556e-05, |
| "loss": 0.217529296875, |
| "num_tokens": 70495209.0, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.5487274984481688, |
| "grad_norm": 0.443359375, |
| "learning_rate": 1.3404072400047946e-05, |
| "loss": 0.214080810546875, |
| "num_tokens": 70573462.0, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.5493482309124768, |
| "grad_norm": 0.41015625, |
| "learning_rate": 1.3374077945838124e-05, |
| "loss": 0.17427825927734375, |
| "num_tokens": 70657878.0, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.5499689633767846, |
| "grad_norm": 0.337890625, |
| "learning_rate": 1.3344090068770957e-05, |
| "loss": 0.14078521728515625, |
| "num_tokens": 70742198.0, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.5505896958410925, |
| "grad_norm": 0.40625, |
| "learning_rate": 1.3314108890152717e-05, |
| "loss": 0.17542266845703125, |
| "num_tokens": 70822176.0, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.5512104283054003, |
| "grad_norm": 0.37890625, |
| "learning_rate": 1.3284134531262595e-05, |
| "loss": 0.19176483154296875, |
| "num_tokens": 70909111.0, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.5518311607697083, |
| "grad_norm": 0.39453125, |
| "learning_rate": 1.3254167113352162e-05, |
| "loss": 0.163787841796875, |
| "num_tokens": 70989773.0, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.5524518932340161, |
| "grad_norm": 0.466796875, |
| "learning_rate": 1.3224206757644951e-05, |
| "loss": 0.22904205322265625, |
| "num_tokens": 71063942.0, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.553072625698324, |
| "grad_norm": 0.423828125, |
| "learning_rate": 1.319425358533589e-05, |
| "loss": 0.196685791015625, |
| "num_tokens": 71141174.0, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.5536933581626319, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.3164307717590873e-05, |
| "loss": 0.18463897705078125, |
| "num_tokens": 71222244.0, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.5543140906269398, |
| "grad_norm": 0.455078125, |
| "learning_rate": 1.3134369275546241e-05, |
| "loss": 0.2183685302734375, |
| "num_tokens": 71297532.0, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.5549348230912476, |
| "grad_norm": 0.453125, |
| "learning_rate": 1.3104438380308293e-05, |
| "loss": 0.1897430419921875, |
| "num_tokens": 71373291.0, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 0.470703125, |
| "learning_rate": 1.307451515295279e-05, |
| "loss": 0.2385406494140625, |
| "num_tokens": 71449653.0, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.5561762880198634, |
| "grad_norm": 0.341796875, |
| "learning_rate": 1.30445997145245e-05, |
| "loss": 0.1535797119140625, |
| "num_tokens": 71533929.0, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.5567970204841713, |
| "grad_norm": 0.392578125, |
| "learning_rate": 1.3014692186036664e-05, |
| "loss": 0.179718017578125, |
| "num_tokens": 71612633.0, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.5574177529484792, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.298479268847053e-05, |
| "loss": 0.189605712890625, |
| "num_tokens": 71692519.0, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.5580384854127871, |
| "grad_norm": 0.43359375, |
| "learning_rate": 1.2954901342774869e-05, |
| "loss": 0.2273101806640625, |
| "num_tokens": 71770072.0, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.5586592178770949, |
| "grad_norm": 0.3828125, |
| "learning_rate": 1.2925018269865453e-05, |
| "loss": 0.175201416015625, |
| "num_tokens": 71851096.0, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5592799503414029, |
| "grad_norm": 0.373046875, |
| "learning_rate": 1.2895143590624618e-05, |
| "loss": 0.167205810546875, |
| "num_tokens": 71929139.0, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.5599006828057107, |
| "grad_norm": 0.38671875, |
| "learning_rate": 1.2865277425900725e-05, |
| "loss": 0.18930816650390625, |
| "num_tokens": 72010353.0, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.5605214152700186, |
| "grad_norm": 0.455078125, |
| "learning_rate": 1.2835419896507699e-05, |
| "loss": 0.220123291015625, |
| "num_tokens": 72089982.0, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.5611421477343265, |
| "grad_norm": 0.37109375, |
| "learning_rate": 1.2805571123224543e-05, |
| "loss": 0.16143035888671875, |
| "num_tokens": 72168193.0, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.5617628801986344, |
| "grad_norm": 0.390625, |
| "learning_rate": 1.2775731226794823e-05, |
| "loss": 0.1923980712890625, |
| "num_tokens": 72255592.0, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.5623836126629422, |
| "grad_norm": 0.42578125, |
| "learning_rate": 1.2745900327926201e-05, |
| "loss": 0.2190704345703125, |
| "num_tokens": 72327996.0, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.5630043451272502, |
| "grad_norm": 0.390625, |
| "learning_rate": 1.2716078547289956e-05, |
| "loss": 0.1983642578125, |
| "num_tokens": 72412086.0, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.563625077591558, |
| "grad_norm": 0.427734375, |
| "learning_rate": 1.2686266005520462e-05, |
| "loss": 0.2094268798828125, |
| "num_tokens": 72487655.0, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.5642458100558659, |
| "grad_norm": 0.5, |
| "learning_rate": 1.2656462823214744e-05, |
| "loss": 0.27734375, |
| "num_tokens": 72565211.0, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.5648665425201738, |
| "grad_norm": 0.41015625, |
| "learning_rate": 1.2626669120931943e-05, |
| "loss": 0.1863555908203125, |
| "num_tokens": 72647060.0, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.5654872749844817, |
| "grad_norm": 0.443359375, |
| "learning_rate": 1.259688501919286e-05, |
| "loss": 0.192108154296875, |
| "num_tokens": 72725747.0, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.5661080074487895, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.2567110638479462e-05, |
| "loss": 0.17256927490234375, |
| "num_tokens": 72804807.0, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.5667287399130975, |
| "grad_norm": 0.474609375, |
| "learning_rate": 1.2537346099234401e-05, |
| "loss": 0.210479736328125, |
| "num_tokens": 72881700.0, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.5673494723774053, |
| "grad_norm": 0.427734375, |
| "learning_rate": 1.2507591521860493e-05, |
| "loss": 0.2077178955078125, |
| "num_tokens": 72963037.0, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.5679702048417132, |
| "grad_norm": 0.404296875, |
| "learning_rate": 1.2477847026720287e-05, |
| "loss": 0.1495361328125, |
| "num_tokens": 73042459.0, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.5685909373060211, |
| "grad_norm": 0.404296875, |
| "learning_rate": 1.2448112734135527e-05, |
| "loss": 0.16144561767578125, |
| "num_tokens": 73118566.0, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.569211669770329, |
| "grad_norm": 0.44140625, |
| "learning_rate": 1.2418388764386698e-05, |
| "loss": 0.182769775390625, |
| "num_tokens": 73191457.0, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.5698324022346368, |
| "grad_norm": 0.451171875, |
| "learning_rate": 1.2388675237712516e-05, |
| "loss": 0.21152496337890625, |
| "num_tokens": 73274378.0, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.5704531346989448, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.2358972274309456e-05, |
| "loss": 0.2570343017578125, |
| "num_tokens": 73353511.0, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.5710738671632526, |
| "grad_norm": 0.400390625, |
| "learning_rate": 1.2329279994331277e-05, |
| "loss": 0.1658477783203125, |
| "num_tokens": 73432026.0, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.5716945996275605, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.2299598517888503e-05, |
| "loss": 0.2160491943359375, |
| "num_tokens": 73518039.0, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.5723153320918684, |
| "grad_norm": 0.392578125, |
| "learning_rate": 1.2269927965047957e-05, |
| "loss": 0.15822601318359375, |
| "num_tokens": 73599283.0, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.5729360645561763, |
| "grad_norm": 0.435546875, |
| "learning_rate": 1.2240268455832288e-05, |
| "loss": 0.230224609375, |
| "num_tokens": 73679927.0, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.5735567970204841, |
| "grad_norm": 0.380859375, |
| "learning_rate": 1.2210620110219457e-05, |
| "loss": 0.1621551513671875, |
| "num_tokens": 73763227.0, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.5741775294847921, |
| "grad_norm": 0.37890625, |
| "learning_rate": 1.2180983048142284e-05, |
| "loss": 0.1544647216796875, |
| "num_tokens": 73836676.0, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.5747982619490999, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.2151357389487927e-05, |
| "loss": 0.2332763671875, |
| "num_tokens": 73920361.0, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.5754189944134078, |
| "grad_norm": 0.4765625, |
| "learning_rate": 1.2121743254097413e-05, |
| "loss": 0.23199462890625, |
| "num_tokens": 73992322.0, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.5760397268777157, |
| "grad_norm": 0.326171875, |
| "learning_rate": 1.2092140761765178e-05, |
| "loss": 0.123077392578125, |
| "num_tokens": 74075990.0, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.5766604593420236, |
| "grad_norm": 0.380859375, |
| "learning_rate": 1.2062550032238543e-05, |
| "loss": 0.1793670654296875, |
| "num_tokens": 74158981.0, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.5772811918063314, |
| "grad_norm": 0.37890625, |
| "learning_rate": 1.2032971185217241e-05, |
| "loss": 0.1597747802734375, |
| "num_tokens": 74237004.0, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.5779019242706394, |
| "grad_norm": 0.41796875, |
| "learning_rate": 1.200340434035296e-05, |
| "loss": 0.1903839111328125, |
| "num_tokens": 74315297.0, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.5785226567349472, |
| "grad_norm": 0.419921875, |
| "learning_rate": 1.1973849617248809e-05, |
| "loss": 0.18768310546875, |
| "num_tokens": 74398632.0, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.5791433891992551, |
| "grad_norm": 0.375, |
| "learning_rate": 1.1944307135458887e-05, |
| "loss": 0.1831207275390625, |
| "num_tokens": 74488238.0, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.5797641216635631, |
| "grad_norm": 0.392578125, |
| "learning_rate": 1.1914777014487767e-05, |
| "loss": 0.180328369140625, |
| "num_tokens": 74571216.0, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.5803848541278709, |
| "grad_norm": 0.388671875, |
| "learning_rate": 1.1885259373790008e-05, |
| "loss": 0.1994476318359375, |
| "num_tokens": 74653026.0, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.5810055865921788, |
| "grad_norm": 0.416015625, |
| "learning_rate": 1.1855754332769706e-05, |
| "loss": 0.195098876953125, |
| "num_tokens": 74735242.0, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.5816263190564867, |
| "grad_norm": 0.375, |
| "learning_rate": 1.1826262010779966e-05, |
| "loss": 0.15753936767578125, |
| "num_tokens": 74822007.0, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.5822470515207946, |
| "grad_norm": 0.37890625, |
| "learning_rate": 1.1796782527122457e-05, |
| "loss": 0.1595611572265625, |
| "num_tokens": 74898040.0, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.5828677839851024, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.1767316001046916e-05, |
| "loss": 0.1820526123046875, |
| "num_tokens": 74981375.0, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.5834885164494104, |
| "grad_norm": 0.388671875, |
| "learning_rate": 1.1737862551750658e-05, |
| "loss": 0.17943572998046875, |
| "num_tokens": 75060177.0, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.5841092489137182, |
| "grad_norm": 0.453125, |
| "learning_rate": 1.1708422298378092e-05, |
| "loss": 0.20145416259765625, |
| "num_tokens": 75135145.0, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.5847299813780261, |
| "grad_norm": 0.625, |
| "learning_rate": 1.1678995360020272e-05, |
| "loss": 0.1782684326171875, |
| "num_tokens": 75216188.0, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.585350713842334, |
| "grad_norm": 0.392578125, |
| "learning_rate": 1.1649581855714361e-05, |
| "loss": 0.1988677978515625, |
| "num_tokens": 75304938.0, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.5859714463066419, |
| "grad_norm": 0.42578125, |
| "learning_rate": 1.1620181904443203e-05, |
| "loss": 0.184051513671875, |
| "num_tokens": 75378257.0, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.5865921787709497, |
| "grad_norm": 0.4453125, |
| "learning_rate": 1.1590795625134807e-05, |
| "loss": 0.2012481689453125, |
| "num_tokens": 75449830.0, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.5872129112352577, |
| "grad_norm": 0.396484375, |
| "learning_rate": 1.1561423136661866e-05, |
| "loss": 0.1639251708984375, |
| "num_tokens": 75532371.0, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.5878336436995655, |
| "grad_norm": 0.43359375, |
| "learning_rate": 1.1532064557841316e-05, |
| "loss": 0.1612091064453125, |
| "num_tokens": 75604484.0, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.5884543761638734, |
| "grad_norm": 0.3515625, |
| "learning_rate": 1.1502720007433792e-05, |
| "loss": 0.1376495361328125, |
| "num_tokens": 75688669.0, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.5890751086281812, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.14733896041432e-05, |
| "loss": 0.19152069091796875, |
| "num_tokens": 75766075.0, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.5896958410924892, |
| "grad_norm": 0.376953125, |
| "learning_rate": 1.1444073466616224e-05, |
| "loss": 0.1862640380859375, |
| "num_tokens": 75848988.0, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.590316573556797, |
| "grad_norm": 0.388671875, |
| "learning_rate": 1.1414771713441821e-05, |
| "loss": 0.185943603515625, |
| "num_tokens": 75933789.0, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.590937306021105, |
| "grad_norm": 0.462890625, |
| "learning_rate": 1.1385484463150784e-05, |
| "loss": 0.257293701171875, |
| "num_tokens": 76014391.0, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.5915580384854128, |
| "grad_norm": 0.40234375, |
| "learning_rate": 1.135621183421522e-05, |
| "loss": 0.19158172607421875, |
| "num_tokens": 76101364.0, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.5921787709497207, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.1326953945048096e-05, |
| "loss": 0.162933349609375, |
| "num_tokens": 76171217.0, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.5927995034140285, |
| "grad_norm": 0.4296875, |
| "learning_rate": 1.1297710914002758e-05, |
| "loss": 0.1987457275390625, |
| "num_tokens": 76245504.0, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.5934202358783365, |
| "grad_norm": 0.3671875, |
| "learning_rate": 1.1268482859372448e-05, |
| "loss": 0.14699554443359375, |
| "num_tokens": 76327814.0, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.5940409683426443, |
| "grad_norm": 0.39453125, |
| "learning_rate": 1.1239269899389812e-05, |
| "loss": 0.17163848876953125, |
| "num_tokens": 76407754.0, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.5946617008069522, |
| "grad_norm": 0.419921875, |
| "learning_rate": 1.1210072152226462e-05, |
| "loss": 0.2128143310546875, |
| "num_tokens": 76487788.0, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.5952824332712601, |
| "grad_norm": 0.466796875, |
| "learning_rate": 1.1180889735992444e-05, |
| "loss": 0.21141815185546875, |
| "num_tokens": 76559394.0, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.595903165735568, |
| "grad_norm": 0.421875, |
| "learning_rate": 1.1151722768735806e-05, |
| "loss": 0.20459747314453125, |
| "num_tokens": 76636321.0, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.5965238981998758, |
| "grad_norm": 0.40234375, |
| "learning_rate": 1.1122571368442096e-05, |
| "loss": 0.1572418212890625, |
| "num_tokens": 76713008.0, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.5971446306641838, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.1093435653033888e-05, |
| "loss": 0.187225341796875, |
| "num_tokens": 76790334.0, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.5977653631284916, |
| "grad_norm": 0.369140625, |
| "learning_rate": 1.1064315740370316e-05, |
| "loss": 0.1584625244140625, |
| "num_tokens": 76870560.0, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.5983860955927995, |
| "grad_norm": 0.35546875, |
| "learning_rate": 1.1035211748246577e-05, |
| "loss": 0.14681243896484375, |
| "num_tokens": 76946226.0, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.5990068280571074, |
| "grad_norm": 0.466796875, |
| "learning_rate": 1.1006123794393474e-05, |
| "loss": 0.21826171875, |
| "num_tokens": 77017326.0, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.5996275605214153, |
| "grad_norm": 0.392578125, |
| "learning_rate": 1.0977051996476942e-05, |
| "loss": 0.1714324951171875, |
| "num_tokens": 77102455.0, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.6002482929857231, |
| "grad_norm": 0.333984375, |
| "learning_rate": 1.0947996472097542e-05, |
| "loss": 0.13805389404296875, |
| "num_tokens": 77187110.0, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.6008690254500311, |
| "grad_norm": 0.41796875, |
| "learning_rate": 1.0918957338790012e-05, |
| "loss": 0.20819091796875, |
| "num_tokens": 77268567.0, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.6014897579143389, |
| "grad_norm": 0.40234375, |
| "learning_rate": 1.0889934714022801e-05, |
| "loss": 0.1937255859375, |
| "num_tokens": 77355659.0, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.6021104903786468, |
| "grad_norm": 0.353515625, |
| "learning_rate": 1.0860928715197556e-05, |
| "loss": 0.1458587646484375, |
| "num_tokens": 77433425.0, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.6027312228429547, |
| "grad_norm": 0.359375, |
| "learning_rate": 1.0831939459648689e-05, |
| "loss": 0.146820068359375, |
| "num_tokens": 77513317.0, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.6033519553072626, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.0802967064642867e-05, |
| "loss": 0.17783355712890625, |
| "num_tokens": 77594050.0, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.6039726877715704, |
| "grad_norm": 0.3671875, |
| "learning_rate": 1.0774011647378554e-05, |
| "loss": 0.1338653564453125, |
| "num_tokens": 77677696.0, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.6045934202358784, |
| "grad_norm": 0.4140625, |
| "learning_rate": 1.074507332498555e-05, |
| "loss": 0.1887969970703125, |
| "num_tokens": 77765990.0, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.6052141527001862, |
| "grad_norm": 0.419921875, |
| "learning_rate": 1.0716152214524492e-05, |
| "loss": 0.18674468994140625, |
| "num_tokens": 77847283.0, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.6058348851644941, |
| "grad_norm": 0.4375, |
| "learning_rate": 1.0687248432986385e-05, |
| "loss": 0.215911865234375, |
| "num_tokens": 77924250.0, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.606455617628802, |
| "grad_norm": 0.455078125, |
| "learning_rate": 1.0658362097292158e-05, |
| "loss": 0.2231903076171875, |
| "num_tokens": 78004107.0, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.6070763500931099, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.062949332429214e-05, |
| "loss": 0.2095947265625, |
| "num_tokens": 78081586.0, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.6076970825574177, |
| "grad_norm": 0.419921875, |
| "learning_rate": 1.0600642230765646e-05, |
| "loss": 0.19371795654296875, |
| "num_tokens": 78158824.0, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.6083178150217257, |
| "grad_norm": 0.390625, |
| "learning_rate": 1.0571808933420446e-05, |
| "loss": 0.183502197265625, |
| "num_tokens": 78237451.0, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.6089385474860335, |
| "grad_norm": 0.419921875, |
| "learning_rate": 1.0542993548892335e-05, |
| "loss": 0.17093658447265625, |
| "num_tokens": 78310371.0, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.6095592799503414, |
| "grad_norm": 0.439453125, |
| "learning_rate": 1.0514196193744659e-05, |
| "loss": 0.22356414794921875, |
| "num_tokens": 78390169.0, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.6101800124146493, |
| "grad_norm": 0.38671875, |
| "learning_rate": 1.0485416984467807e-05, |
| "loss": 0.181396484375, |
| "num_tokens": 78474698.0, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.6108007448789572, |
| "grad_norm": 0.416015625, |
| "learning_rate": 1.045665603747878e-05, |
| "loss": 0.1837158203125, |
| "num_tokens": 78546427.0, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.611421477343265, |
| "grad_norm": 0.421875, |
| "learning_rate": 1.0427913469120702e-05, |
| "loss": 0.2014923095703125, |
| "num_tokens": 78625136.0, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.612042209807573, |
| "grad_norm": 0.38671875, |
| "learning_rate": 1.0399189395662353e-05, |
| "loss": 0.163421630859375, |
| "num_tokens": 78709674.0, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.6126629422718808, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.0370483933297702e-05, |
| "loss": 0.18042755126953125, |
| "num_tokens": 78784917.0, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.6132836747361887, |
| "grad_norm": 0.40234375, |
| "learning_rate": 1.0341797198145426e-05, |
| "loss": 0.197662353515625, |
| "num_tokens": 78869506.0, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.6139044072004965, |
| "grad_norm": 0.419921875, |
| "learning_rate": 1.0313129306248439e-05, |
| "loss": 0.215667724609375, |
| "num_tokens": 78951695.0, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.6145251396648045, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.0284480373573453e-05, |
| "loss": 0.17279052734375, |
| "num_tokens": 79029222.0, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.6151458721291123, |
| "grad_norm": 0.396484375, |
| "learning_rate": 1.0255850516010472e-05, |
| "loss": 0.17056655883789062, |
| "num_tokens": 79105801.0, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.6157666045934203, |
| "grad_norm": 0.365234375, |
| "learning_rate": 1.0227239849372333e-05, |
| "loss": 0.16425323486328125, |
| "num_tokens": 79189037.0, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.6163873370577281, |
| "grad_norm": 0.443359375, |
| "learning_rate": 1.019864848939426e-05, |
| "loss": 0.210418701171875, |
| "num_tokens": 79264685.0, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.617008069522036, |
| "grad_norm": 0.416015625, |
| "learning_rate": 1.0170076551733356e-05, |
| "loss": 0.19185638427734375, |
| "num_tokens": 79345699.0, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.6176288019863438, |
| "grad_norm": 0.361328125, |
| "learning_rate": 1.0141524151968182e-05, |
| "loss": 0.1581878662109375, |
| "num_tokens": 79431819.0, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.6182495344506518, |
| "grad_norm": 0.400390625, |
| "learning_rate": 1.0112991405598239e-05, |
| "loss": 0.189056396484375, |
| "num_tokens": 79512214.0, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.6188702669149596, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.0084478428043544e-05, |
| "loss": 0.175384521484375, |
| "num_tokens": 79594543.0, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.6194909993792675, |
| "grad_norm": 0.447265625, |
| "learning_rate": 1.005598533464415e-05, |
| "loss": 0.18316650390625, |
| "num_tokens": 79668677.0, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.6201117318435754, |
| "grad_norm": 0.42578125, |
| "learning_rate": 1.0027512240659654e-05, |
| "loss": 0.19012451171875, |
| "num_tokens": 79746193.0, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.6207324643078833, |
| "grad_norm": 0.435546875, |
| "learning_rate": 9.999059261268763e-06, |
| "loss": 0.188385009765625, |
| "num_tokens": 79822782.0, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6213531967721911, |
| "grad_norm": 0.421875, |
| "learning_rate": 9.970626511568823e-06, |
| "loss": 0.16900634765625, |
| "num_tokens": 79903506.0, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.6219739292364991, |
| "grad_norm": 0.431640625, |
| "learning_rate": 9.942214106575347e-06, |
| "loss": 0.178558349609375, |
| "num_tokens": 79980294.0, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.6225946617008069, |
| "grad_norm": 0.388671875, |
| "learning_rate": 9.913822161221532e-06, |
| "loss": 0.14640045166015625, |
| "num_tokens": 80058311.0, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.6232153941651148, |
| "grad_norm": 0.375, |
| "learning_rate": 9.885450790357838e-06, |
| "loss": 0.1551666259765625, |
| "num_tokens": 80144031.0, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.6238361266294227, |
| "grad_norm": 0.4765625, |
| "learning_rate": 9.857100108751472e-06, |
| "loss": 0.23431396484375, |
| "num_tokens": 80218374.0, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.6244568590937306, |
| "grad_norm": 0.388671875, |
| "learning_rate": 9.828770231085973e-06, |
| "loss": 0.16182708740234375, |
| "num_tokens": 80294466.0, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.6250775915580384, |
| "grad_norm": 0.44921875, |
| "learning_rate": 9.800461271960713e-06, |
| "loss": 0.2367095947265625, |
| "num_tokens": 80367504.0, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.6256983240223464, |
| "grad_norm": 0.4140625, |
| "learning_rate": 9.772173345890433e-06, |
| "loss": 0.17333984375, |
| "num_tokens": 80444754.0, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.6263190564866542, |
| "grad_norm": 0.37890625, |
| "learning_rate": 9.743906567304819e-06, |
| "loss": 0.163726806640625, |
| "num_tokens": 80530419.0, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.6269397889509621, |
| "grad_norm": 0.408203125, |
| "learning_rate": 9.715661050547986e-06, |
| "loss": 0.1769561767578125, |
| "num_tokens": 80607413.0, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.62756052141527, |
| "grad_norm": 0.357421875, |
| "learning_rate": 9.687436909878054e-06, |
| "loss": 0.11574554443359375, |
| "num_tokens": 80690008.0, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.6281812538795779, |
| "grad_norm": 0.384765625, |
| "learning_rate": 9.659234259466675e-06, |
| "loss": 0.1707763671875, |
| "num_tokens": 80768202.0, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.6288019863438857, |
| "grad_norm": 0.44921875, |
| "learning_rate": 9.631053213398557e-06, |
| "loss": 0.2403106689453125, |
| "num_tokens": 80848546.0, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.6294227188081937, |
| "grad_norm": 0.396484375, |
| "learning_rate": 9.602893885671031e-06, |
| "loss": 0.181182861328125, |
| "num_tokens": 80928690.0, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.6300434512725015, |
| "grad_norm": 0.37890625, |
| "learning_rate": 9.57475639019356e-06, |
| "loss": 0.1595458984375, |
| "num_tokens": 81010382.0, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.6306641837368094, |
| "grad_norm": 0.423828125, |
| "learning_rate": 9.546640840787291e-06, |
| "loss": 0.1982421875, |
| "num_tokens": 81089515.0, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.6312849162011173, |
| "grad_norm": 0.4140625, |
| "learning_rate": 9.518547351184607e-06, |
| "loss": 0.1847686767578125, |
| "num_tokens": 81172315.0, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.6319056486654252, |
| "grad_norm": 0.4296875, |
| "learning_rate": 9.490476035028652e-06, |
| "loss": 0.21869659423828125, |
| "num_tokens": 81257038.0, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.6325263811297331, |
| "grad_norm": 0.400390625, |
| "learning_rate": 9.462427005872859e-06, |
| "loss": 0.14666748046875, |
| "num_tokens": 81329471.0, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.633147113594041, |
| "grad_norm": 0.416015625, |
| "learning_rate": 9.434400377180535e-06, |
| "loss": 0.1641082763671875, |
| "num_tokens": 81403335.0, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.6337678460583489, |
| "grad_norm": 0.4140625, |
| "learning_rate": 9.406396262324341e-06, |
| "loss": 0.183349609375, |
| "num_tokens": 81489845.0, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.6343885785226567, |
| "grad_norm": 0.40625, |
| "learning_rate": 9.378414774585889e-06, |
| "loss": 0.19657135009765625, |
| "num_tokens": 81568499.0, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.6350093109869647, |
| "grad_norm": 0.3984375, |
| "learning_rate": 9.350456027155254e-06, |
| "loss": 0.16814422607421875, |
| "num_tokens": 81643811.0, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.6356300434512725, |
| "grad_norm": 0.416015625, |
| "learning_rate": 9.322520133130515e-06, |
| "loss": 0.202880859375, |
| "num_tokens": 81728916.0, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.6362507759155804, |
| "grad_norm": 0.4296875, |
| "learning_rate": 9.294607205517318e-06, |
| "loss": 0.243621826171875, |
| "num_tokens": 81819717.0, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.6368715083798883, |
| "grad_norm": 0.373046875, |
| "learning_rate": 9.266717357228392e-06, |
| "loss": 0.13022613525390625, |
| "num_tokens": 81898465.0, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.6374922408441962, |
| "grad_norm": 0.37890625, |
| "learning_rate": 9.23885070108311e-06, |
| "loss": 0.15389251708984375, |
| "num_tokens": 81983540.0, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.638112973308504, |
| "grad_norm": 0.4375, |
| "learning_rate": 9.211007349807044e-06, |
| "loss": 0.19677734375, |
| "num_tokens": 82061511.0, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.638733705772812, |
| "grad_norm": 0.462890625, |
| "learning_rate": 9.183187416031465e-06, |
| "loss": 0.2108306884765625, |
| "num_tokens": 82135277.0, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.6393544382371198, |
| "grad_norm": 0.498046875, |
| "learning_rate": 9.155391012292948e-06, |
| "loss": 0.2243804931640625, |
| "num_tokens": 82211020.0, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.6399751707014277, |
| "grad_norm": 0.470703125, |
| "learning_rate": 9.12761825103286e-06, |
| "loss": 0.23211669921875, |
| "num_tokens": 82285716.0, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.6405959031657356, |
| "grad_norm": 0.408203125, |
| "learning_rate": 9.099869244596938e-06, |
| "loss": 0.1846923828125, |
| "num_tokens": 82361940.0, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.6412166356300435, |
| "grad_norm": 0.44140625, |
| "learning_rate": 9.072144105234829e-06, |
| "loss": 0.219390869140625, |
| "num_tokens": 82440261.0, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.6418373680943513, |
| "grad_norm": 0.3359375, |
| "learning_rate": 9.044442945099634e-06, |
| "loss": 0.13309478759765625, |
| "num_tokens": 82518198.0, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.6424581005586593, |
| "grad_norm": 0.423828125, |
| "learning_rate": 9.016765876247445e-06, |
| "loss": 0.200439453125, |
| "num_tokens": 82602917.0, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.6430788330229671, |
| "grad_norm": 0.322265625, |
| "learning_rate": 8.989113010636918e-06, |
| "loss": 0.1237640380859375, |
| "num_tokens": 82689370.0, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.643699565487275, |
| "grad_norm": 0.490234375, |
| "learning_rate": 8.96148446012878e-06, |
| "loss": 0.240997314453125, |
| "num_tokens": 82762908.0, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.6443202979515829, |
| "grad_norm": 0.359375, |
| "learning_rate": 8.933880336485415e-06, |
| "loss": 0.1269073486328125, |
| "num_tokens": 82837460.0, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.6449410304158908, |
| "grad_norm": 0.388671875, |
| "learning_rate": 8.9063007513704e-06, |
| "loss": 0.16131591796875, |
| "num_tokens": 82916638.0, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.6455617628801986, |
| "grad_norm": 0.462890625, |
| "learning_rate": 8.878745816348025e-06, |
| "loss": 0.2454986572265625, |
| "num_tokens": 83001996.0, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.6461824953445066, |
| "grad_norm": 0.404296875, |
| "learning_rate": 8.8512156428829e-06, |
| "loss": 0.17291259765625, |
| "num_tokens": 83078821.0, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.6468032278088144, |
| "grad_norm": 0.4453125, |
| "learning_rate": 8.823710342339439e-06, |
| "loss": 0.22509765625, |
| "num_tokens": 83158631.0, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.6474239602731223, |
| "grad_norm": 0.36328125, |
| "learning_rate": 8.796230025981456e-06, |
| "loss": 0.15559005737304688, |
| "num_tokens": 83246743.0, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.6480446927374302, |
| "grad_norm": 0.39453125, |
| "learning_rate": 8.768774804971705e-06, |
| "loss": 0.16439056396484375, |
| "num_tokens": 83324797.0, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.6486654252017381, |
| "grad_norm": 0.478515625, |
| "learning_rate": 8.741344790371411e-06, |
| "loss": 0.2326507568359375, |
| "num_tokens": 83404222.0, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.6492861576660459, |
| "grad_norm": 0.41796875, |
| "learning_rate": 8.713940093139835e-06, |
| "loss": 0.201171875, |
| "num_tokens": 83481438.0, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.6499068901303539, |
| "grad_norm": 0.375, |
| "learning_rate": 8.686560824133845e-06, |
| "loss": 0.16651153564453125, |
| "num_tokens": 83567009.0, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.6505276225946617, |
| "grad_norm": 0.42578125, |
| "learning_rate": 8.659207094107421e-06, |
| "loss": 0.20192718505859375, |
| "num_tokens": 83644082.0, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.6511483550589696, |
| "grad_norm": 0.39453125, |
| "learning_rate": 8.631879013711255e-06, |
| "loss": 0.170074462890625, |
| "num_tokens": 83720729.0, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.6517690875232774, |
| "grad_norm": 0.451171875, |
| "learning_rate": 8.604576693492269e-06, |
| "loss": 0.23583984375, |
| "num_tokens": 83800561.0, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.6523898199875854, |
| "grad_norm": 0.427734375, |
| "learning_rate": 8.577300243893173e-06, |
| "loss": 0.18898773193359375, |
| "num_tokens": 83873584.0, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.6530105524518932, |
| "grad_norm": 0.3671875, |
| "learning_rate": 8.550049775252048e-06, |
| "loss": 0.15362548828125, |
| "num_tokens": 83955767.0, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.6536312849162011, |
| "grad_norm": 0.44921875, |
| "learning_rate": 8.522825397801864e-06, |
| "loss": 0.19380950927734375, |
| "num_tokens": 84032226.0, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.654252017380509, |
| "grad_norm": 0.41015625, |
| "learning_rate": 8.495627221670043e-06, |
| "loss": 0.19615936279296875, |
| "num_tokens": 84110483.0, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.6548727498448169, |
| "grad_norm": 0.3515625, |
| "learning_rate": 8.468455356878027e-06, |
| "loss": 0.1392669677734375, |
| "num_tokens": 84188975.0, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.6554934823091247, |
| "grad_norm": 0.4140625, |
| "learning_rate": 8.441309913340826e-06, |
| "loss": 0.211944580078125, |
| "num_tokens": 84274820.0, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.6561142147734327, |
| "grad_norm": 0.44140625, |
| "learning_rate": 8.414191000866566e-06, |
| "loss": 0.23101806640625, |
| "num_tokens": 84355077.0, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.6567349472377405, |
| "grad_norm": 0.408203125, |
| "learning_rate": 8.387098729156049e-06, |
| "loss": 0.169464111328125, |
| "num_tokens": 84433760.0, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.6573556797020484, |
| "grad_norm": 0.484375, |
| "learning_rate": 8.360033207802303e-06, |
| "loss": 0.256561279296875, |
| "num_tokens": 84513581.0, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.6579764121663563, |
| "grad_norm": 0.392578125, |
| "learning_rate": 8.332994546290172e-06, |
| "loss": 0.1609954833984375, |
| "num_tokens": 84590465.0, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.6585971446306642, |
| "grad_norm": 0.435546875, |
| "learning_rate": 8.305982853995821e-06, |
| "loss": 0.188507080078125, |
| "num_tokens": 84669091.0, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.659217877094972, |
| "grad_norm": 0.416015625, |
| "learning_rate": 8.278998240186322e-06, |
| "loss": 0.1766510009765625, |
| "num_tokens": 84742945.0, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.65983860955928, |
| "grad_norm": 0.412109375, |
| "learning_rate": 8.252040814019234e-06, |
| "loss": 0.1820068359375, |
| "num_tokens": 84821324.0, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.6604593420235878, |
| "grad_norm": 0.3984375, |
| "learning_rate": 8.225110684542102e-06, |
| "loss": 0.194549560546875, |
| "num_tokens": 84906422.0, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.6610800744878957, |
| "grad_norm": 0.38671875, |
| "learning_rate": 8.198207960692083e-06, |
| "loss": 0.14788818359375, |
| "num_tokens": 84983658.0, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.6617008069522036, |
| "grad_norm": 0.392578125, |
| "learning_rate": 8.171332751295451e-06, |
| "loss": 0.170166015625, |
| "num_tokens": 85066430.0, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.6623215394165115, |
| "grad_norm": 0.333984375, |
| "learning_rate": 8.144485165067187e-06, |
| "loss": 0.13824462890625, |
| "num_tokens": 85149566.0, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.6629422718808193, |
| "grad_norm": 0.41015625, |
| "learning_rate": 8.117665310610544e-06, |
| "loss": 0.17536163330078125, |
| "num_tokens": 85232817.0, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.6635630043451273, |
| "grad_norm": 0.384765625, |
| "learning_rate": 8.090873296416573e-06, |
| "loss": 0.177276611328125, |
| "num_tokens": 85319206.0, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.6641837368094351, |
| "grad_norm": 0.4140625, |
| "learning_rate": 8.064109230863711e-06, |
| "loss": 0.2116546630859375, |
| "num_tokens": 85403774.0, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.664804469273743, |
| "grad_norm": 0.408203125, |
| "learning_rate": 8.037373222217359e-06, |
| "loss": 0.2097015380859375, |
| "num_tokens": 85489559.0, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.6654252017380509, |
| "grad_norm": 0.416015625, |
| "learning_rate": 8.010665378629394e-06, |
| "loss": 0.1838836669921875, |
| "num_tokens": 85573410.0, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.6660459342023588, |
| "grad_norm": 0.44140625, |
| "learning_rate": 7.983985808137774e-06, |
| "loss": 0.2295379638671875, |
| "num_tokens": 85647620.0, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.423828125, |
| "learning_rate": 7.957334618666096e-06, |
| "loss": 0.16323089599609375, |
| "num_tokens": 85728277.0, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.6672873991309746, |
| "grad_norm": 0.41796875, |
| "learning_rate": 7.930711918023126e-06, |
| "loss": 0.1671905517578125, |
| "num_tokens": 85802882.0, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.6679081315952824, |
| "grad_norm": 0.41015625, |
| "learning_rate": 7.904117813902413e-06, |
| "loss": 0.17621612548828125, |
| "num_tokens": 85882884.0, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.6685288640595903, |
| "grad_norm": 0.38671875, |
| "learning_rate": 7.877552413881824e-06, |
| "loss": 0.18146896362304688, |
| "num_tokens": 85962973.0, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.6691495965238982, |
| "grad_norm": 0.412109375, |
| "learning_rate": 7.851015825423093e-06, |
| "loss": 0.18597412109375, |
| "num_tokens": 86043933.0, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.6697703289882061, |
| "grad_norm": 0.38671875, |
| "learning_rate": 7.824508155871431e-06, |
| "loss": 0.15042877197265625, |
| "num_tokens": 86118306.0, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.6703910614525139, |
| "grad_norm": 0.439453125, |
| "learning_rate": 7.798029512455053e-06, |
| "loss": 0.19622802734375, |
| "num_tokens": 86191715.0, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.6710117939168219, |
| "grad_norm": 0.427734375, |
| "learning_rate": 7.771580002284752e-06, |
| "loss": 0.1968994140625, |
| "num_tokens": 86277394.0, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.6716325263811297, |
| "grad_norm": 0.44921875, |
| "learning_rate": 7.745159732353493e-06, |
| "loss": 0.2355499267578125, |
| "num_tokens": 86358424.0, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.6722532588454376, |
| "grad_norm": 0.45703125, |
| "learning_rate": 7.71876880953594e-06, |
| "loss": 0.211517333984375, |
| "num_tokens": 86434095.0, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.6728739913097455, |
| "grad_norm": 0.4375, |
| "learning_rate": 7.692407340588055e-06, |
| "loss": 0.2068023681640625, |
| "num_tokens": 86510150.0, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.6734947237740534, |
| "grad_norm": 0.408203125, |
| "learning_rate": 7.666075432146644e-06, |
| "loss": 0.1804656982421875, |
| "num_tokens": 86595432.0, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.6741154562383612, |
| "grad_norm": 0.40234375, |
| "learning_rate": 7.639773190728937e-06, |
| "loss": 0.1710968017578125, |
| "num_tokens": 86669354.0, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.6747361887026692, |
| "grad_norm": 0.4296875, |
| "learning_rate": 7.613500722732162e-06, |
| "loss": 0.2032012939453125, |
| "num_tokens": 86749701.0, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.675356921166977, |
| "grad_norm": 0.357421875, |
| "learning_rate": 7.587258134433113e-06, |
| "loss": 0.183990478515625, |
| "num_tokens": 86833740.0, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.6759776536312849, |
| "grad_norm": 0.388671875, |
| "learning_rate": 7.561045531987692e-06, |
| "loss": 0.158416748046875, |
| "num_tokens": 86912900.0, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.6765983860955928, |
| "grad_norm": 0.373046875, |
| "learning_rate": 7.5348630214305325e-06, |
| "loss": 0.16168212890625, |
| "num_tokens": 86997532.0, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.6772191185599007, |
| "grad_norm": 0.4140625, |
| "learning_rate": 7.5087107086745144e-06, |
| "loss": 0.18426513671875, |
| "num_tokens": 87072421.0, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.6778398510242085, |
| "grad_norm": 0.357421875, |
| "learning_rate": 7.4825886995103856e-06, |
| "loss": 0.15811920166015625, |
| "num_tokens": 87152464.0, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.6784605834885165, |
| "grad_norm": 0.44921875, |
| "learning_rate": 7.456497099606293e-06, |
| "loss": 0.2179718017578125, |
| "num_tokens": 87234284.0, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.6790813159528243, |
| "grad_norm": 0.39453125, |
| "learning_rate": 7.430436014507373e-06, |
| "loss": 0.1925048828125, |
| "num_tokens": 87321567.0, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.6797020484171322, |
| "grad_norm": 0.3984375, |
| "learning_rate": 7.4044055496353445e-06, |
| "loss": 0.19403839111328125, |
| "num_tokens": 87398585.0, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.68032278088144, |
| "grad_norm": 0.4140625, |
| "learning_rate": 7.378405810288035e-06, |
| "loss": 0.15915679931640625, |
| "num_tokens": 87478434.0, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.680943513345748, |
| "grad_norm": 0.390625, |
| "learning_rate": 7.352436901639005e-06, |
| "loss": 0.147705078125, |
| "num_tokens": 87557281.0, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.6815642458100558, |
| "grad_norm": 0.41796875, |
| "learning_rate": 7.3264989287370775e-06, |
| "loss": 0.19734954833984375, |
| "num_tokens": 87639054.0, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.6821849782743638, |
| "grad_norm": 0.43359375, |
| "learning_rate": 7.300591996505955e-06, |
| "loss": 0.2100677490234375, |
| "num_tokens": 87721938.0, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.6828057107386716, |
| "grad_norm": 0.3984375, |
| "learning_rate": 7.2747162097437684e-06, |
| "loss": 0.1786956787109375, |
| "num_tokens": 87808007.0, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.6834264432029795, |
| "grad_norm": 0.447265625, |
| "learning_rate": 7.248871673122655e-06, |
| "loss": 0.1780242919921875, |
| "num_tokens": 87878005.0, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.6840471756672873, |
| "grad_norm": 0.419921875, |
| "learning_rate": 7.223058491188335e-06, |
| "loss": 0.18431854248046875, |
| "num_tokens": 87955080.0, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.6846679081315953, |
| "grad_norm": 0.3828125, |
| "learning_rate": 7.197276768359712e-06, |
| "loss": 0.16007232666015625, |
| "num_tokens": 88032406.0, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.6852886405959032, |
| "grad_norm": 0.416015625, |
| "learning_rate": 7.171526608928417e-06, |
| "loss": 0.1771697998046875, |
| "num_tokens": 88110736.0, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.685909373060211, |
| "grad_norm": 0.40625, |
| "learning_rate": 7.145808117058399e-06, |
| "loss": 0.1876220703125, |
| "num_tokens": 88190107.0, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.686530105524519, |
| "grad_norm": 0.42578125, |
| "learning_rate": 7.120121396785521e-06, |
| "loss": 0.20516204833984375, |
| "num_tokens": 88266901.0, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.6871508379888268, |
| "grad_norm": 0.41015625, |
| "learning_rate": 7.094466552017108e-06, |
| "loss": 0.1861114501953125, |
| "num_tokens": 88352106.0, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.6877715704531348, |
| "grad_norm": 0.421875, |
| "learning_rate": 7.068843686531555e-06, |
| "loss": 0.18357086181640625, |
| "num_tokens": 88426358.0, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.6883923029174426, |
| "grad_norm": 0.4375, |
| "learning_rate": 7.043252903977895e-06, |
| "loss": 0.1861419677734375, |
| "num_tokens": 88503481.0, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.6890130353817505, |
| "grad_norm": 0.439453125, |
| "learning_rate": 7.017694307875365e-06, |
| "loss": 0.1988372802734375, |
| "num_tokens": 88578451.0, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.6896337678460583, |
| "grad_norm": 0.419921875, |
| "learning_rate": 6.992168001613024e-06, |
| "loss": 0.15110015869140625, |
| "num_tokens": 88650293.0, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.6902545003103663, |
| "grad_norm": 0.40234375, |
| "learning_rate": 6.9666740884492965e-06, |
| "loss": 0.1735382080078125, |
| "num_tokens": 88726764.0, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.6908752327746741, |
| "grad_norm": 0.42578125, |
| "learning_rate": 6.941212671511569e-06, |
| "loss": 0.1799774169921875, |
| "num_tokens": 88806096.0, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.691495965238982, |
| "grad_norm": 0.431640625, |
| "learning_rate": 6.9157838537957965e-06, |
| "loss": 0.18682098388671875, |
| "num_tokens": 88877547.0, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.6921166977032899, |
| "grad_norm": 0.427734375, |
| "learning_rate": 6.890387738166042e-06, |
| "loss": 0.2119140625, |
| "num_tokens": 88956193.0, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.6927374301675978, |
| "grad_norm": 0.404296875, |
| "learning_rate": 6.8650244273540845e-06, |
| "loss": 0.1871337890625, |
| "num_tokens": 89040341.0, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.6933581626319056, |
| "grad_norm": 0.427734375, |
| "learning_rate": 6.839694023959016e-06, |
| "loss": 0.2122344970703125, |
| "num_tokens": 89117484.0, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.6939788950962136, |
| "grad_norm": 0.392578125, |
| "learning_rate": 6.814396630446807e-06, |
| "loss": 0.183502197265625, |
| "num_tokens": 89195644.0, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.6945996275605214, |
| "grad_norm": 0.353515625, |
| "learning_rate": 6.789132349149886e-06, |
| "loss": 0.13417816162109375, |
| "num_tokens": 89273466.0, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.6952203600248293, |
| "grad_norm": 0.376953125, |
| "learning_rate": 6.763901282266755e-06, |
| "loss": 0.1656036376953125, |
| "num_tokens": 89358256.0, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.6958410924891372, |
| "grad_norm": 0.392578125, |
| "learning_rate": 6.738703531861537e-06, |
| "loss": 0.201568603515625, |
| "num_tokens": 89441520.0, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.6964618249534451, |
| "grad_norm": 0.38671875, |
| "learning_rate": 6.713539199863605e-06, |
| "loss": 0.15439605712890625, |
| "num_tokens": 89521371.0, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.6970825574177529, |
| "grad_norm": 0.412109375, |
| "learning_rate": 6.688408388067135e-06, |
| "loss": 0.2021331787109375, |
| "num_tokens": 89606207.0, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.6977032898820609, |
| "grad_norm": 0.439453125, |
| "learning_rate": 6.663311198130705e-06, |
| "loss": 0.21379852294921875, |
| "num_tokens": 89686029.0, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.6983240223463687, |
| "grad_norm": 0.439453125, |
| "learning_rate": 6.638247731576902e-06, |
| "loss": 0.2183380126953125, |
| "num_tokens": 89772289.0, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.6989447548106766, |
| "grad_norm": 0.419921875, |
| "learning_rate": 6.6132180897918804e-06, |
| "loss": 0.1571502685546875, |
| "num_tokens": 89842683.0, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.6995654872749845, |
| "grad_norm": 0.408203125, |
| "learning_rate": 6.5882223740249815e-06, |
| "loss": 0.19301605224609375, |
| "num_tokens": 89921966.0, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.7001862197392924, |
| "grad_norm": 0.3671875, |
| "learning_rate": 6.563260685388291e-06, |
| "loss": 0.1480255126953125, |
| "num_tokens": 89998974.0, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.7008069522036002, |
| "grad_norm": 0.392578125, |
| "learning_rate": 6.5383331248562665e-06, |
| "loss": 0.1881103515625, |
| "num_tokens": 90085169.0, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.7014276846679082, |
| "grad_norm": 0.4375, |
| "learning_rate": 6.513439793265311e-06, |
| "loss": 0.22406005859375, |
| "num_tokens": 90166745.0, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.702048417132216, |
| "grad_norm": 0.375, |
| "learning_rate": 6.488580791313354e-06, |
| "loss": 0.122772216796875, |
| "num_tokens": 90249785.0, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.7026691495965239, |
| "grad_norm": 0.396484375, |
| "learning_rate": 6.4637562195594555e-06, |
| "loss": 0.19120025634765625, |
| "num_tokens": 90336741.0, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.7032898820608318, |
| "grad_norm": 0.4765625, |
| "learning_rate": 6.438966178423413e-06, |
| "loss": 0.23110198974609375, |
| "num_tokens": 90416540.0, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.7039106145251397, |
| "grad_norm": 0.40234375, |
| "learning_rate": 6.414210768185326e-06, |
| "loss": 0.18560791015625, |
| "num_tokens": 90498123.0, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.7045313469894475, |
| "grad_norm": 0.455078125, |
| "learning_rate": 6.389490088985222e-06, |
| "loss": 0.1893463134765625, |
| "num_tokens": 90576624.0, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.7051520794537555, |
| "grad_norm": 0.41796875, |
| "learning_rate": 6.36480424082262e-06, |
| "loss": 0.190521240234375, |
| "num_tokens": 90654821.0, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.7057728119180633, |
| "grad_norm": 0.4296875, |
| "learning_rate": 6.340153323556144e-06, |
| "loss": 0.18609619140625, |
| "num_tokens": 90727103.0, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.7063935443823712, |
| "grad_norm": 0.3984375, |
| "learning_rate": 6.315537436903132e-06, |
| "loss": 0.16522216796875, |
| "num_tokens": 90805695.0, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.7070142768466791, |
| "grad_norm": 0.400390625, |
| "learning_rate": 6.29095668043919e-06, |
| "loss": 0.164306640625, |
| "num_tokens": 90882306.0, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.707635009310987, |
| "grad_norm": 0.376953125, |
| "learning_rate": 6.266411153597841e-06, |
| "loss": 0.1566009521484375, |
| "num_tokens": 90956916.0, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.7082557417752948, |
| "grad_norm": 0.345703125, |
| "learning_rate": 6.24190095567009e-06, |
| "loss": 0.15454864501953125, |
| "num_tokens": 91047860.0, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.7088764742396028, |
| "grad_norm": 0.33203125, |
| "learning_rate": 6.217426185804024e-06, |
| "loss": 0.1237030029296875, |
| "num_tokens": 91131173.0, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.7094972067039106, |
| "grad_norm": 0.42578125, |
| "learning_rate": 6.192986943004418e-06, |
| "loss": 0.1826324462890625, |
| "num_tokens": 91209571.0, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.7101179391682185, |
| "grad_norm": 0.404296875, |
| "learning_rate": 6.168583326132346e-06, |
| "loss": 0.1811981201171875, |
| "num_tokens": 91285031.0, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.7107386716325264, |
| "grad_norm": 0.451171875, |
| "learning_rate": 6.1442154339047506e-06, |
| "loss": 0.231231689453125, |
| "num_tokens": 91371948.0, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.7113594040968343, |
| "grad_norm": 0.443359375, |
| "learning_rate": 6.11988336489408e-06, |
| "loss": 0.2124786376953125, |
| "num_tokens": 91448538.0, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.7119801365611421, |
| "grad_norm": 0.330078125, |
| "learning_rate": 6.09558721752786e-06, |
| "loss": 0.1241607666015625, |
| "num_tokens": 91525226.0, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.7126008690254501, |
| "grad_norm": 0.451171875, |
| "learning_rate": 6.071327090088302e-06, |
| "loss": 0.22678375244140625, |
| "num_tokens": 91601221.0, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.7132216014897579, |
| "grad_norm": 0.453125, |
| "learning_rate": 6.047103080711925e-06, |
| "loss": 0.224884033203125, |
| "num_tokens": 91682422.0, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.7138423339540658, |
| "grad_norm": 0.42578125, |
| "learning_rate": 6.02291528738914e-06, |
| "loss": 0.17754364013671875, |
| "num_tokens": 91755328.0, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.7144630664183736, |
| "grad_norm": 0.412109375, |
| "learning_rate": 5.998763807963846e-06, |
| "loss": 0.1867523193359375, |
| "num_tokens": 91833868.0, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.7150837988826816, |
| "grad_norm": 0.416015625, |
| "learning_rate": 5.974648740133065e-06, |
| "loss": 0.208709716796875, |
| "num_tokens": 91916714.0, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.7157045313469894, |
| "grad_norm": 0.408203125, |
| "learning_rate": 5.950570181446507e-06, |
| "loss": 0.1851043701171875, |
| "num_tokens": 91991942.0, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.7163252638112974, |
| "grad_norm": 0.439453125, |
| "learning_rate": 5.926528229306215e-06, |
| "loss": 0.248809814453125, |
| "num_tokens": 92077182.0, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.7169459962756052, |
| "grad_norm": 0.408203125, |
| "learning_rate": 5.902522980966138e-06, |
| "loss": 0.18105316162109375, |
| "num_tokens": 92153100.0, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.7175667287399131, |
| "grad_norm": 0.39453125, |
| "learning_rate": 5.878554533531753e-06, |
| "loss": 0.16986083984375, |
| "num_tokens": 92237435.0, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.718187461204221, |
| "grad_norm": 0.396484375, |
| "learning_rate": 5.854622983959682e-06, |
| "loss": 0.16733551025390625, |
| "num_tokens": 92317774.0, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.7188081936685289, |
| "grad_norm": 0.419921875, |
| "learning_rate": 5.830728429057282e-06, |
| "loss": 0.208831787109375, |
| "num_tokens": 92405245.0, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.7194289261328367, |
| "grad_norm": 0.443359375, |
| "learning_rate": 5.80687096548225e-06, |
| "loss": 0.21135711669921875, |
| "num_tokens": 92488878.0, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.7200496585971446, |
| "grad_norm": 0.46875, |
| "learning_rate": 5.783050689742255e-06, |
| "loss": 0.2313690185546875, |
| "num_tokens": 92565304.0, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.7206703910614525, |
| "grad_norm": 0.412109375, |
| "learning_rate": 5.759267698194535e-06, |
| "loss": 0.18932342529296875, |
| "num_tokens": 92648605.0, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.7212911235257604, |
| "grad_norm": 0.47265625, |
| "learning_rate": 5.735522087045506e-06, |
| "loss": 0.212432861328125, |
| "num_tokens": 92722662.0, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.7219118559900682, |
| "grad_norm": 0.3828125, |
| "learning_rate": 5.711813952350362e-06, |
| "loss": 0.15892791748046875, |
| "num_tokens": 92798917.0, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.7225325884543762, |
| "grad_norm": 0.43359375, |
| "learning_rate": 5.688143390012709e-06, |
| "loss": 0.225738525390625, |
| "num_tokens": 92884483.0, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.723153320918684, |
| "grad_norm": 0.5, |
| "learning_rate": 5.6645104957841655e-06, |
| "loss": 0.2500152587890625, |
| "num_tokens": 92968061.0, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.7237740533829919, |
| "grad_norm": 0.349609375, |
| "learning_rate": 5.6409153652639755e-06, |
| "loss": 0.11112594604492188, |
| "num_tokens": 93041596.0, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.7243947858472998, |
| "grad_norm": 0.404296875, |
| "learning_rate": 5.617358093898613e-06, |
| "loss": 0.192626953125, |
| "num_tokens": 93118305.0, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.7250155183116077, |
| "grad_norm": 0.388671875, |
| "learning_rate": 5.593838776981421e-06, |
| "loss": 0.15001678466796875, |
| "num_tokens": 93195446.0, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.7256362507759155, |
| "grad_norm": 0.392578125, |
| "learning_rate": 5.570357509652195e-06, |
| "loss": 0.188720703125, |
| "num_tokens": 93278206.0, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.7262569832402235, |
| "grad_norm": 0.4140625, |
| "learning_rate": 5.546914386896822e-06, |
| "loss": 0.193389892578125, |
| "num_tokens": 93357263.0, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.7268777157045313, |
| "grad_norm": 0.400390625, |
| "learning_rate": 5.523509503546893e-06, |
| "loss": 0.1725616455078125, |
| "num_tokens": 93431226.0, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.7274984481688392, |
| "grad_norm": 0.4296875, |
| "learning_rate": 5.500142954279293e-06, |
| "loss": 0.1910552978515625, |
| "num_tokens": 93504707.0, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.7281191806331471, |
| "grad_norm": 0.421875, |
| "learning_rate": 5.4768148336158655e-06, |
| "loss": 0.1892547607421875, |
| "num_tokens": 93582811.0, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.728739913097455, |
| "grad_norm": 0.431640625, |
| "learning_rate": 5.453525235922982e-06, |
| "loss": 0.20801544189453125, |
| "num_tokens": 93666160.0, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.7293606455617628, |
| "grad_norm": 0.484375, |
| "learning_rate": 5.430274255411186e-06, |
| "loss": 0.249267578125, |
| "num_tokens": 93742826.0, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.7299813780260708, |
| "grad_norm": 0.412109375, |
| "learning_rate": 5.407061986134821e-06, |
| "loss": 0.199371337890625, |
| "num_tokens": 93826373.0, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.7306021104903786, |
| "grad_norm": 0.416015625, |
| "learning_rate": 5.383888521991622e-06, |
| "loss": 0.200042724609375, |
| "num_tokens": 93908597.0, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.7312228429546865, |
| "grad_norm": 0.416015625, |
| "learning_rate": 5.36075395672235e-06, |
| "loss": 0.214691162109375, |
| "num_tokens": 93991198.0, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.7318435754189944, |
| "grad_norm": 0.427734375, |
| "learning_rate": 5.337658383910432e-06, |
| "loss": 0.1941070556640625, |
| "num_tokens": 94069378.0, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.7324643078833023, |
| "grad_norm": 0.43359375, |
| "learning_rate": 5.314601896981535e-06, |
| "loss": 0.1955413818359375, |
| "num_tokens": 94146061.0, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.7330850403476101, |
| "grad_norm": 0.39453125, |
| "learning_rate": 5.291584589203242e-06, |
| "loss": 0.1679840087890625, |
| "num_tokens": 94226271.0, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.7337057728119181, |
| "grad_norm": 0.4609375, |
| "learning_rate": 5.26860655368464e-06, |
| "loss": 0.20273590087890625, |
| "num_tokens": 94302823.0, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.7343265052762259, |
| "grad_norm": 0.3984375, |
| "learning_rate": 5.245667883375945e-06, |
| "loss": 0.18761444091796875, |
| "num_tokens": 94385790.0, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.7349472377405338, |
| "grad_norm": 0.39453125, |
| "learning_rate": 5.2227686710681495e-06, |
| "loss": 0.186309814453125, |
| "num_tokens": 94468345.0, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.7355679702048417, |
| "grad_norm": 0.369140625, |
| "learning_rate": 5.199909009392618e-06, |
| "loss": 0.17969894409179688, |
| "num_tokens": 94560952.0, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.7361887026691496, |
| "grad_norm": 0.404296875, |
| "learning_rate": 5.177088990820725e-06, |
| "loss": 0.17291259765625, |
| "num_tokens": 94642925.0, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.7368094351334574, |
| "grad_norm": 0.404296875, |
| "learning_rate": 5.154308707663497e-06, |
| "loss": 0.1746978759765625, |
| "num_tokens": 94725017.0, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.7374301675977654, |
| "grad_norm": 0.353515625, |
| "learning_rate": 5.131568252071204e-06, |
| "loss": 0.1286163330078125, |
| "num_tokens": 94803603.0, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.7380509000620733, |
| "grad_norm": 0.4453125, |
| "learning_rate": 5.108867716033028e-06, |
| "loss": 0.1925506591796875, |
| "num_tokens": 94876440.0, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.7386716325263811, |
| "grad_norm": 0.447265625, |
| "learning_rate": 5.086207191376645e-06, |
| "loss": 0.223907470703125, |
| "num_tokens": 94952624.0, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.7392923649906891, |
| "grad_norm": 0.421875, |
| "learning_rate": 5.063586769767896e-06, |
| "loss": 0.18677520751953125, |
| "num_tokens": 95028937.0, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.7399130974549969, |
| "grad_norm": 0.3515625, |
| "learning_rate": 5.041006542710396e-06, |
| "loss": 0.12854766845703125, |
| "num_tokens": 95109395.0, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.7405338299193048, |
| "grad_norm": 0.369140625, |
| "learning_rate": 5.018466601545162e-06, |
| "loss": 0.145904541015625, |
| "num_tokens": 95186310.0, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.7411545623836127, |
| "grad_norm": 0.423828125, |
| "learning_rate": 4.995967037450238e-06, |
| "loss": 0.22533035278320312, |
| "num_tokens": 95271733.0, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.7417752948479206, |
| "grad_norm": 0.4609375, |
| "learning_rate": 4.973507941440357e-06, |
| "loss": 0.2373199462890625, |
| "num_tokens": 95349099.0, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.7423960273122284, |
| "grad_norm": 0.380859375, |
| "learning_rate": 4.9510894043665315e-06, |
| "loss": 0.140869140625, |
| "num_tokens": 95426779.0, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.7430167597765364, |
| "grad_norm": 0.4140625, |
| "learning_rate": 4.928711516915723e-06, |
| "loss": 0.19592666625976562, |
| "num_tokens": 95511284.0, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.7436374922408442, |
| "grad_norm": 0.421875, |
| "learning_rate": 4.906374369610443e-06, |
| "loss": 0.18447113037109375, |
| "num_tokens": 95587613.0, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.7442582247051521, |
| "grad_norm": 0.4140625, |
| "learning_rate": 4.884078052808405e-06, |
| "loss": 0.1688995361328125, |
| "num_tokens": 95664193.0, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.74487895716946, |
| "grad_norm": 0.353515625, |
| "learning_rate": 4.861822656702169e-06, |
| "loss": 0.13661956787109375, |
| "num_tokens": 95747007.0, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.7454996896337679, |
| "grad_norm": 0.47265625, |
| "learning_rate": 4.839608271318742e-06, |
| "loss": 0.2079925537109375, |
| "num_tokens": 95822933.0, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.7461204220980757, |
| "grad_norm": 0.36328125, |
| "learning_rate": 4.817434986519255e-06, |
| "loss": 0.14505767822265625, |
| "num_tokens": 95904324.0, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.7467411545623837, |
| "grad_norm": 0.4375, |
| "learning_rate": 4.795302891998574e-06, |
| "loss": 0.18990325927734375, |
| "num_tokens": 95982974.0, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.7473618870266915, |
| "grad_norm": 0.357421875, |
| "learning_rate": 4.7732120772849325e-06, |
| "loss": 0.1411285400390625, |
| "num_tokens": 96065625.0, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.7479826194909994, |
| "grad_norm": 0.390625, |
| "learning_rate": 4.751162631739599e-06, |
| "loss": 0.17435455322265625, |
| "num_tokens": 96141267.0, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.7486033519553073, |
| "grad_norm": 0.44140625, |
| "learning_rate": 4.7291546445564775e-06, |
| "loss": 0.238250732421875, |
| "num_tokens": 96222390.0, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.7492240844196152, |
| "grad_norm": 0.458984375, |
| "learning_rate": 4.707188204761772e-06, |
| "loss": 0.21509170532226562, |
| "num_tokens": 96302319.0, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.749844816883923, |
| "grad_norm": 0.42578125, |
| "learning_rate": 4.685263401213629e-06, |
| "loss": 0.217193603515625, |
| "num_tokens": 96383023.0, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.750465549348231, |
| "grad_norm": 0.458984375, |
| "learning_rate": 4.66338032260176e-06, |
| "loss": 0.2308349609375, |
| "num_tokens": 96462459.0, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.7510862818125388, |
| "grad_norm": 0.384765625, |
| "learning_rate": 4.641539057447085e-06, |
| "loss": 0.18218994140625, |
| "num_tokens": 96541548.0, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.7517070142768467, |
| "grad_norm": 0.408203125, |
| "learning_rate": 4.619739694101398e-06, |
| "loss": 0.1999664306640625, |
| "num_tokens": 96622820.0, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.7523277467411545, |
| "grad_norm": 0.365234375, |
| "learning_rate": 4.597982320746985e-06, |
| "loss": 0.16184234619140625, |
| "num_tokens": 96709512.0, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.7529484792054625, |
| "grad_norm": 0.3984375, |
| "learning_rate": 4.57626702539627e-06, |
| "loss": 0.168853759765625, |
| "num_tokens": 96787786.0, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.7535692116697703, |
| "grad_norm": 0.380859375, |
| "learning_rate": 4.554593895891473e-06, |
| "loss": 0.1801605224609375, |
| "num_tokens": 96877877.0, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.7541899441340782, |
| "grad_norm": 0.46875, |
| "learning_rate": 4.532963019904237e-06, |
| "loss": 0.2055816650390625, |
| "num_tokens": 96950419.0, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.7548106765983861, |
| "grad_norm": 0.412109375, |
| "learning_rate": 4.5113744849352894e-06, |
| "loss": 0.1885986328125, |
| "num_tokens": 97033679.0, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.755431409062694, |
| "grad_norm": 0.431640625, |
| "learning_rate": 4.489828378314077e-06, |
| "loss": 0.2092742919921875, |
| "num_tokens": 97117388.0, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.7560521415270018, |
| "grad_norm": 0.365234375, |
| "learning_rate": 4.468324787198412e-06, |
| "loss": 0.146820068359375, |
| "num_tokens": 97197805.0, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.7566728739913098, |
| "grad_norm": 0.3984375, |
| "learning_rate": 4.446863798574136e-06, |
| "loss": 0.18268585205078125, |
| "num_tokens": 97274498.0, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.7572936064556176, |
| "grad_norm": 0.328125, |
| "learning_rate": 4.425445499254745e-06, |
| "loss": 0.11944580078125, |
| "num_tokens": 97353682.0, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.7579143389199255, |
| "grad_norm": 0.4375, |
| "learning_rate": 4.40406997588105e-06, |
| "loss": 0.2086029052734375, |
| "num_tokens": 97437612.0, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.7585350713842334, |
| "grad_norm": 0.447265625, |
| "learning_rate": 4.38273731492083e-06, |
| "loss": 0.20543289184570312, |
| "num_tokens": 97511523.0, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.7591558038485413, |
| "grad_norm": 0.396484375, |
| "learning_rate": 4.361447602668479e-06, |
| "loss": 0.19820404052734375, |
| "num_tokens": 97592227.0, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.7597765363128491, |
| "grad_norm": 0.4140625, |
| "learning_rate": 4.340200925244659e-06, |
| "loss": 0.1703033447265625, |
| "num_tokens": 97666467.0, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.7603972687771571, |
| "grad_norm": 0.44921875, |
| "learning_rate": 4.3189973685959345e-06, |
| "loss": 0.2415924072265625, |
| "num_tokens": 97744282.0, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.7610180012414649, |
| "grad_norm": 0.369140625, |
| "learning_rate": 4.297837018494445e-06, |
| "loss": 0.1400146484375, |
| "num_tokens": 97821398.0, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.7616387337057728, |
| "grad_norm": 0.41796875, |
| "learning_rate": 4.276719960537565e-06, |
| "loss": 0.1937255859375, |
| "num_tokens": 97900872.0, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.7622594661700807, |
| "grad_norm": 0.353515625, |
| "learning_rate": 4.255646280147526e-06, |
| "loss": 0.15023040771484375, |
| "num_tokens": 97982087.0, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.7628801986343886, |
| "grad_norm": 0.419921875, |
| "learning_rate": 4.234616062571094e-06, |
| "loss": 0.2020111083984375, |
| "num_tokens": 98064001.0, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.7635009310986964, |
| "grad_norm": 0.42578125, |
| "learning_rate": 4.213629392879233e-06, |
| "loss": 0.19549560546875, |
| "num_tokens": 98144451.0, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.7641216635630044, |
| "grad_norm": 0.40234375, |
| "learning_rate": 4.192686355966729e-06, |
| "loss": 0.1761016845703125, |
| "num_tokens": 98226114.0, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.7647423960273122, |
| "grad_norm": 0.421875, |
| "learning_rate": 4.171787036551886e-06, |
| "loss": 0.19824981689453125, |
| "num_tokens": 98302072.0, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.7653631284916201, |
| "grad_norm": 0.408203125, |
| "learning_rate": 4.150931519176141e-06, |
| "loss": 0.205413818359375, |
| "num_tokens": 98386088.0, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.765983860955928, |
| "grad_norm": 0.400390625, |
| "learning_rate": 4.1301198882037595e-06, |
| "loss": 0.1674957275390625, |
| "num_tokens": 98470647.0, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.7666045934202359, |
| "grad_norm": 0.400390625, |
| "learning_rate": 4.109352227821482e-06, |
| "loss": 0.1761627197265625, |
| "num_tokens": 98547612.0, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.7672253258845437, |
| "grad_norm": 0.408203125, |
| "learning_rate": 4.088628622038165e-06, |
| "loss": 0.18512725830078125, |
| "num_tokens": 98627105.0, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.7678460583488517, |
| "grad_norm": 0.439453125, |
| "learning_rate": 4.067949154684462e-06, |
| "loss": 0.2196044921875, |
| "num_tokens": 98706232.0, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.7684667908131595, |
| "grad_norm": 0.384765625, |
| "learning_rate": 4.047313909412488e-06, |
| "loss": 0.1661376953125, |
| "num_tokens": 98791923.0, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.7690875232774674, |
| "grad_norm": 0.431640625, |
| "learning_rate": 4.026722969695462e-06, |
| "loss": 0.2099761962890625, |
| "num_tokens": 98872645.0, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.7697082557417753, |
| "grad_norm": 0.3828125, |
| "learning_rate": 4.0061764188273755e-06, |
| "loss": 0.1783294677734375, |
| "num_tokens": 98954387.0, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.7703289882060832, |
| "grad_norm": 0.451171875, |
| "learning_rate": 3.985674339922674e-06, |
| "loss": 0.2251434326171875, |
| "num_tokens": 99033442.0, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.770949720670391, |
| "grad_norm": 0.421875, |
| "learning_rate": 3.965216815915891e-06, |
| "loss": 0.18013763427734375, |
| "num_tokens": 99111922.0, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.771570453134699, |
| "grad_norm": 0.44140625, |
| "learning_rate": 3.944803929561336e-06, |
| "loss": 0.1980133056640625, |
| "num_tokens": 99195722.0, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.7721911855990068, |
| "grad_norm": 0.396484375, |
| "learning_rate": 3.924435763432755e-06, |
| "loss": 0.174835205078125, |
| "num_tokens": 99281344.0, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.7728119180633147, |
| "grad_norm": 0.43359375, |
| "learning_rate": 3.904112399922981e-06, |
| "loss": 0.19427490234375, |
| "num_tokens": 99364276.0, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.7734326505276226, |
| "grad_norm": 0.3984375, |
| "learning_rate": 3.883833921243622e-06, |
| "loss": 0.16762542724609375, |
| "num_tokens": 99448144.0, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.7740533829919305, |
| "grad_norm": 0.39453125, |
| "learning_rate": 3.863600409424716e-06, |
| "loss": 0.1667022705078125, |
| "num_tokens": 99532641.0, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.7746741154562383, |
| "grad_norm": 0.416015625, |
| "learning_rate": 3.8434119463143935e-06, |
| "loss": 0.21457672119140625, |
| "num_tokens": 99617767.0, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.7752948479205463, |
| "grad_norm": 0.390625, |
| "learning_rate": 3.823268613578576e-06, |
| "loss": 0.17430877685546875, |
| "num_tokens": 99695368.0, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.7759155803848541, |
| "grad_norm": 0.36328125, |
| "learning_rate": 3.8031704927005983e-06, |
| "loss": 0.17668914794921875, |
| "num_tokens": 99778699.0, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.776536312849162, |
| "grad_norm": 0.4296875, |
| "learning_rate": 3.783117664980932e-06, |
| "loss": 0.18178558349609375, |
| "num_tokens": 99859725.0, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.7771570453134699, |
| "grad_norm": 0.3828125, |
| "learning_rate": 3.7631102115368087e-06, |
| "loss": 0.16890716552734375, |
| "num_tokens": 99944743.0, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.7777777777777778, |
| "grad_norm": 0.384765625, |
| "learning_rate": 3.743148213301919e-06, |
| "loss": 0.18173980712890625, |
| "num_tokens": 100033423.0, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.7783985102420856, |
| "grad_norm": 0.466796875, |
| "learning_rate": 3.7232317510260825e-06, |
| "loss": 0.24291229248046875, |
| "num_tokens": 100113246.0, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.7790192427063936, |
| "grad_norm": 0.421875, |
| "learning_rate": 3.703360905274924e-06, |
| "loss": 0.16143798828125, |
| "num_tokens": 100186995.0, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.7796399751707014, |
| "grad_norm": 0.443359375, |
| "learning_rate": 3.683535756429523e-06, |
| "loss": 0.221038818359375, |
| "num_tokens": 100271754.0, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.7802607076350093, |
| "grad_norm": 0.455078125, |
| "learning_rate": 3.6637563846861278e-06, |
| "loss": 0.1905517578125, |
| "num_tokens": 100345576.0, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.7808814400993171, |
| "grad_norm": 0.431640625, |
| "learning_rate": 3.6440228700557893e-06, |
| "loss": 0.206390380859375, |
| "num_tokens": 100426619.0, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.7815021725636251, |
| "grad_norm": 0.3828125, |
| "learning_rate": 3.624335292364077e-06, |
| "loss": 0.1775665283203125, |
| "num_tokens": 100514871.0, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.7821229050279329, |
| "grad_norm": 0.486328125, |
| "learning_rate": 3.6046937312507296e-06, |
| "loss": 0.2408599853515625, |
| "num_tokens": 100596087.0, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.7827436374922409, |
| "grad_norm": 0.41015625, |
| "learning_rate": 3.58509826616933e-06, |
| "loss": 0.19145965576171875, |
| "num_tokens": 100679129.0, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.7833643699565487, |
| "grad_norm": 0.34765625, |
| "learning_rate": 3.565548976387018e-06, |
| "loss": 0.15478515625, |
| "num_tokens": 100765813.0, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.7839851024208566, |
| "grad_norm": 0.41015625, |
| "learning_rate": 3.546045940984123e-06, |
| "loss": 0.1922149658203125, |
| "num_tokens": 100850232.0, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.7846058348851644, |
| "grad_norm": 0.40625, |
| "learning_rate": 3.5265892388538795e-06, |
| "loss": 0.17694091796875, |
| "num_tokens": 100921417.0, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.7852265673494724, |
| "grad_norm": 0.39453125, |
| "learning_rate": 3.5071789487021017e-06, |
| "loss": 0.17737579345703125, |
| "num_tokens": 101003910.0, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.7858472998137802, |
| "grad_norm": 0.45703125, |
| "learning_rate": 3.487815149046838e-06, |
| "loss": 0.22576904296875, |
| "num_tokens": 101087743.0, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.7864680322780881, |
| "grad_norm": 0.40625, |
| "learning_rate": 3.4684979182180976e-06, |
| "loss": 0.1746368408203125, |
| "num_tokens": 101172824.0, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.787088764742396, |
| "grad_norm": 0.462890625, |
| "learning_rate": 3.4492273343574972e-06, |
| "loss": 0.2132110595703125, |
| "num_tokens": 101251894.0, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.7877094972067039, |
| "grad_norm": 0.42578125, |
| "learning_rate": 3.4300034754179555e-06, |
| "loss": 0.2190704345703125, |
| "num_tokens": 101331559.0, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.7883302296710117, |
| "grad_norm": 0.4140625, |
| "learning_rate": 3.4108264191633946e-06, |
| "loss": 0.194610595703125, |
| "num_tokens": 101410420.0, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.7889509621353197, |
| "grad_norm": 0.490234375, |
| "learning_rate": 3.3916962431684017e-06, |
| "loss": 0.271148681640625, |
| "num_tokens": 101492994.0, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.7895716945996276, |
| "grad_norm": 0.43359375, |
| "learning_rate": 3.37261302481792e-06, |
| "loss": 0.18902587890625, |
| "num_tokens": 101570639.0, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.7901924270639354, |
| "grad_norm": 0.4609375, |
| "learning_rate": 3.3535768413069547e-06, |
| "loss": 0.198089599609375, |
| "num_tokens": 101642573.0, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.7908131595282434, |
| "grad_norm": 0.392578125, |
| "learning_rate": 3.3345877696402347e-06, |
| "loss": 0.165008544921875, |
| "num_tokens": 101723678.0, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.7914338919925512, |
| "grad_norm": 0.380859375, |
| "learning_rate": 3.3156458866319195e-06, |
| "loss": 0.1746673583984375, |
| "num_tokens": 101808849.0, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.7920546244568591, |
| "grad_norm": 0.39453125, |
| "learning_rate": 3.2967512689052844e-06, |
| "loss": 0.16193389892578125, |
| "num_tokens": 101882091.0, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.792675356921167, |
| "grad_norm": 0.41796875, |
| "learning_rate": 3.2779039928923967e-06, |
| "loss": 0.20468902587890625, |
| "num_tokens": 101961594.0, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.7932960893854749, |
| "grad_norm": 0.38671875, |
| "learning_rate": 3.2591041348338334e-06, |
| "loss": 0.1653900146484375, |
| "num_tokens": 102043847.0, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.7939168218497827, |
| "grad_norm": 0.375, |
| "learning_rate": 3.2403517707783476e-06, |
| "loss": 0.14620208740234375, |
| "num_tokens": 102120706.0, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.7945375543140907, |
| "grad_norm": 0.361328125, |
| "learning_rate": 3.221646976582568e-06, |
| "loss": 0.14786529541015625, |
| "num_tokens": 102207470.0, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.7951582867783985, |
| "grad_norm": 0.427734375, |
| "learning_rate": 3.2029898279107062e-06, |
| "loss": 0.20794677734375, |
| "num_tokens": 102291117.0, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.7957790192427064, |
| "grad_norm": 0.3828125, |
| "learning_rate": 3.1843804002342296e-06, |
| "loss": 0.1543731689453125, |
| "num_tokens": 102374864.0, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.7963997517070143, |
| "grad_norm": 0.3671875, |
| "learning_rate": 3.1658187688315654e-06, |
| "loss": 0.12654876708984375, |
| "num_tokens": 102451005.0, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.7970204841713222, |
| "grad_norm": 0.388671875, |
| "learning_rate": 3.147305008787805e-06, |
| "loss": 0.153411865234375, |
| "num_tokens": 102529847.0, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.79764121663563, |
| "grad_norm": 0.41015625, |
| "learning_rate": 3.1288391949943825e-06, |
| "loss": 0.1969146728515625, |
| "num_tokens": 102606936.0, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.798261949099938, |
| "grad_norm": 0.44921875, |
| "learning_rate": 3.110421402148793e-06, |
| "loss": 0.2253570556640625, |
| "num_tokens": 102693887.0, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.7988826815642458, |
| "grad_norm": 0.353515625, |
| "learning_rate": 3.0920517047542656e-06, |
| "loss": 0.14318084716796875, |
| "num_tokens": 102784300.0, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.7995034140285537, |
| "grad_norm": 0.4140625, |
| "learning_rate": 3.0737301771194744e-06, |
| "loss": 0.1566314697265625, |
| "num_tokens": 102855191.0, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.8001241464928616, |
| "grad_norm": 0.455078125, |
| "learning_rate": 3.0554568933582505e-06, |
| "loss": 0.226654052734375, |
| "num_tokens": 102934610.0, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.8007448789571695, |
| "grad_norm": 0.416015625, |
| "learning_rate": 3.0372319273892606e-06, |
| "loss": 0.20951080322265625, |
| "num_tokens": 103017237.0, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.8013656114214773, |
| "grad_norm": 0.416015625, |
| "learning_rate": 3.019055352935717e-06, |
| "loss": 0.206024169921875, |
| "num_tokens": 103102408.0, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.8019863438857853, |
| "grad_norm": 0.404296875, |
| "learning_rate": 3.0009272435250878e-06, |
| "loss": 0.1588134765625, |
| "num_tokens": 103174813.0, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.8026070763500931, |
| "grad_norm": 0.443359375, |
| "learning_rate": 2.982847672488778e-06, |
| "loss": 0.19913482666015625, |
| "num_tokens": 103253612.0, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.803227808814401, |
| "grad_norm": 0.41015625, |
| "learning_rate": 2.964816712961865e-06, |
| "loss": 0.18546295166015625, |
| "num_tokens": 103333110.0, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.8038485412787089, |
| "grad_norm": 0.40625, |
| "learning_rate": 2.9468344378827627e-06, |
| "loss": 0.19812774658203125, |
| "num_tokens": 103416087.0, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.8044692737430168, |
| "grad_norm": 0.384765625, |
| "learning_rate": 2.9289009199929633e-06, |
| "loss": 0.1905517578125, |
| "num_tokens": 103499651.0, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.8050900062073246, |
| "grad_norm": 0.443359375, |
| "learning_rate": 2.9110162318367274e-06, |
| "loss": 0.190582275390625, |
| "num_tokens": 103570923.0, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.8057107386716326, |
| "grad_norm": 0.427734375, |
| "learning_rate": 2.8931804457607815e-06, |
| "loss": 0.2168731689453125, |
| "num_tokens": 103652798.0, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.8063314711359404, |
| "grad_norm": 0.400390625, |
| "learning_rate": 2.8753936339140342e-06, |
| "loss": 0.1739959716796875, |
| "num_tokens": 103729881.0, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.8069522036002483, |
| "grad_norm": 0.380859375, |
| "learning_rate": 2.8576558682472974e-06, |
| "loss": 0.15575408935546875, |
| "num_tokens": 103807492.0, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.8075729360645562, |
| "grad_norm": 0.392578125, |
| "learning_rate": 2.8399672205129674e-06, |
| "loss": 0.1865997314453125, |
| "num_tokens": 103884164.0, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.8081936685288641, |
| "grad_norm": 0.474609375, |
| "learning_rate": 2.822327762264765e-06, |
| "loss": 0.2444305419921875, |
| "num_tokens": 103963985.0, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.8088144009931719, |
| "grad_norm": 0.390625, |
| "learning_rate": 2.8047375648574187e-06, |
| "loss": 0.17287445068359375, |
| "num_tokens": 104042041.0, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.8094351334574799, |
| "grad_norm": 0.494140625, |
| "learning_rate": 2.7871966994463887e-06, |
| "loss": 0.257781982421875, |
| "num_tokens": 104121681.0, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.8100558659217877, |
| "grad_norm": 0.38671875, |
| "learning_rate": 2.769705236987585e-06, |
| "loss": 0.15924835205078125, |
| "num_tokens": 104201431.0, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.8106765983860956, |
| "grad_norm": 0.423828125, |
| "learning_rate": 2.752263248237074e-06, |
| "loss": 0.21198272705078125, |
| "num_tokens": 104281573.0, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.8112973308504035, |
| "grad_norm": 0.38671875, |
| "learning_rate": 2.734870803750783e-06, |
| "loss": 0.17617034912109375, |
| "num_tokens": 104360318.0, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.8119180633147114, |
| "grad_norm": 0.44140625, |
| "learning_rate": 2.717527973884238e-06, |
| "loss": 0.2028656005859375, |
| "num_tokens": 104447102.0, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.8125387957790192, |
| "grad_norm": 0.416015625, |
| "learning_rate": 2.7002348287922525e-06, |
| "loss": 0.2050933837890625, |
| "num_tokens": 104533303.0, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.8131595282433272, |
| "grad_norm": 0.439453125, |
| "learning_rate": 2.6829914384286576e-06, |
| "loss": 0.2105712890625, |
| "num_tokens": 104612178.0, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.813780260707635, |
| "grad_norm": 0.359375, |
| "learning_rate": 2.6657978725460286e-06, |
| "loss": 0.1677093505859375, |
| "num_tokens": 104707343.0, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.8144009931719429, |
| "grad_norm": 0.43359375, |
| "learning_rate": 2.6486542006953772e-06, |
| "loss": 0.25018310546875, |
| "num_tokens": 104795928.0, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.8150217256362507, |
| "grad_norm": 0.45703125, |
| "learning_rate": 2.6315604922259e-06, |
| "loss": 0.2257232666015625, |
| "num_tokens": 104880807.0, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.8156424581005587, |
| "grad_norm": 0.4453125, |
| "learning_rate": 2.61451681628467e-06, |
| "loss": 0.236785888671875, |
| "num_tokens": 104962359.0, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.8162631905648665, |
| "grad_norm": 0.41796875, |
| "learning_rate": 2.5975232418163704e-06, |
| "loss": 0.19732666015625, |
| "num_tokens": 105045342.0, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.8168839230291745, |
| "grad_norm": 0.458984375, |
| "learning_rate": 2.580579837563026e-06, |
| "loss": 0.2263641357421875, |
| "num_tokens": 105126223.0, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.8175046554934823, |
| "grad_norm": 0.451171875, |
| "learning_rate": 2.563686672063711e-06, |
| "loss": 0.204345703125, |
| "num_tokens": 105196201.0, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.8181253879577902, |
| "grad_norm": 0.478515625, |
| "learning_rate": 2.546843813654261e-06, |
| "loss": 0.2430877685546875, |
| "num_tokens": 105272611.0, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.818746120422098, |
| "grad_norm": 0.443359375, |
| "learning_rate": 2.5300513304670315e-06, |
| "loss": 0.2425537109375, |
| "num_tokens": 105359767.0, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.819366852886406, |
| "grad_norm": 0.3671875, |
| "learning_rate": 2.513309290430583e-06, |
| "loss": 0.15370941162109375, |
| "num_tokens": 105445403.0, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.8199875853507138, |
| "grad_norm": 0.400390625, |
| "learning_rate": 2.496617761269439e-06, |
| "loss": 0.20330810546875, |
| "num_tokens": 105532166.0, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.8206083178150217, |
| "grad_norm": 0.44140625, |
| "learning_rate": 2.4799768105037867e-06, |
| "loss": 0.222412109375, |
| "num_tokens": 105611571.0, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.8212290502793296, |
| "grad_norm": 0.375, |
| "learning_rate": 2.4633865054492193e-06, |
| "loss": 0.14919281005859375, |
| "num_tokens": 105691792.0, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.8218497827436375, |
| "grad_norm": 0.3984375, |
| "learning_rate": 2.446846913216461e-06, |
| "loss": 0.1780853271484375, |
| "num_tokens": 105774938.0, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.8224705152079453, |
| "grad_norm": 0.4140625, |
| "learning_rate": 2.430358100711088e-06, |
| "loss": 0.14537811279296875, |
| "num_tokens": 105851722.0, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.8230912476722533, |
| "grad_norm": 0.427734375, |
| "learning_rate": 2.413920134633272e-06, |
| "loss": 0.207763671875, |
| "num_tokens": 105934661.0, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.8237119801365611, |
| "grad_norm": 0.390625, |
| "learning_rate": 2.3975330814774915e-06, |
| "loss": 0.1488037109375, |
| "num_tokens": 106007825.0, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.824332712600869, |
| "grad_norm": 0.412109375, |
| "learning_rate": 2.38119700753228e-06, |
| "loss": 0.19580078125, |
| "num_tokens": 106089985.0, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.8249534450651769, |
| "grad_norm": 0.416015625, |
| "learning_rate": 2.3649119788799567e-06, |
| "loss": 0.1855010986328125, |
| "num_tokens": 106170017.0, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.8255741775294848, |
| "grad_norm": 0.353515625, |
| "learning_rate": 2.3486780613963403e-06, |
| "loss": 0.1274261474609375, |
| "num_tokens": 106244244.0, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.8261949099937926, |
| "grad_norm": 0.376953125, |
| "learning_rate": 2.3324953207504984e-06, |
| "loss": 0.15641021728515625, |
| "num_tokens": 106318196.0, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.8268156424581006, |
| "grad_norm": 0.447265625, |
| "learning_rate": 2.3163638224044915e-06, |
| "loss": 0.2002410888671875, |
| "num_tokens": 106396636.0, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.8274363749224084, |
| "grad_norm": 0.3671875, |
| "learning_rate": 2.300283631613081e-06, |
| "loss": 0.15753936767578125, |
| "num_tokens": 106481491.0, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.8280571073867163, |
| "grad_norm": 0.412109375, |
| "learning_rate": 2.2842548134234866e-06, |
| "loss": 0.1749725341796875, |
| "num_tokens": 106560667.0, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.8286778398510242, |
| "grad_norm": 0.3828125, |
| "learning_rate": 2.2682774326751215e-06, |
| "loss": 0.178192138671875, |
| "num_tokens": 106640126.0, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.8292985723153321, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.2523515539993152e-06, |
| "loss": 0.1955413818359375, |
| "num_tokens": 106726158.0, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.8299193047796399, |
| "grad_norm": 0.3828125, |
| "learning_rate": 2.236477241819067e-06, |
| "loss": 0.17472076416015625, |
| "num_tokens": 106814251.0, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.8305400372439479, |
| "grad_norm": 0.45703125, |
| "learning_rate": 2.2206545603487884e-06, |
| "loss": 0.2115325927734375, |
| "num_tokens": 106888763.0, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.8311607697082557, |
| "grad_norm": 0.3984375, |
| "learning_rate": 2.2048835735940194e-06, |
| "loss": 0.14669036865234375, |
| "num_tokens": 106963719.0, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.8317815021725636, |
| "grad_norm": 0.37890625, |
| "learning_rate": 2.1891643453512018e-06, |
| "loss": 0.15201568603515625, |
| "num_tokens": 107047767.0, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.8324022346368715, |
| "grad_norm": 0.38671875, |
| "learning_rate": 2.1734969392073944e-06, |
| "loss": 0.15398788452148438, |
| "num_tokens": 107123697.0, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.8330229671011794, |
| "grad_norm": 0.412109375, |
| "learning_rate": 2.157881418540024e-06, |
| "loss": 0.172332763671875, |
| "num_tokens": 107197505.0, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.8336436995654872, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.1423178465166485e-06, |
| "loss": 0.16192626953125, |
| "num_tokens": 107276412.0, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.8342644320297952, |
| "grad_norm": 0.396484375, |
| "learning_rate": 2.126806286094666e-06, |
| "loss": 0.18810272216796875, |
| "num_tokens": 107356981.0, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.834885164494103, |
| "grad_norm": 0.4140625, |
| "learning_rate": 2.1113468000210843e-06, |
| "loss": 0.17772674560546875, |
| "num_tokens": 107434241.0, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.8355058969584109, |
| "grad_norm": 0.369140625, |
| "learning_rate": 2.0959394508322644e-06, |
| "loss": 0.1571807861328125, |
| "num_tokens": 107518350.0, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.8361266294227188, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.080584300853665e-06, |
| "loss": 0.1822357177734375, |
| "num_tokens": 107596504.0, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.8367473618870267, |
| "grad_norm": 0.419921875, |
| "learning_rate": 2.0652814121995824e-06, |
| "loss": 0.1939239501953125, |
| "num_tokens": 107673001.0, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.8373680943513345, |
| "grad_norm": 0.44921875, |
| "learning_rate": 2.0500308467729186e-06, |
| "loss": 0.20574951171875, |
| "num_tokens": 107746256.0, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.8379888268156425, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.0348326662649008e-06, |
| "loss": 0.175872802734375, |
| "num_tokens": 107827993.0, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.8386095592799503, |
| "grad_norm": 0.46875, |
| "learning_rate": 2.0196869321548727e-06, |
| "loss": 0.22869873046875, |
| "num_tokens": 107907923.0, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.8392302917442582, |
| "grad_norm": 0.451171875, |
| "learning_rate": 2.004593705710004e-06, |
| "loss": 0.20977783203125, |
| "num_tokens": 107987178.0, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.839851024208566, |
| "grad_norm": 0.416015625, |
| "learning_rate": 1.9895530479850654e-06, |
| "loss": 0.2040557861328125, |
| "num_tokens": 108068616.0, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.840471756672874, |
| "grad_norm": 0.462890625, |
| "learning_rate": 1.9745650198221903e-06, |
| "loss": 0.2293243408203125, |
| "num_tokens": 108152086.0, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.8410924891371818, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.9596296818505988e-06, |
| "loss": 0.194061279296875, |
| "num_tokens": 108241091.0, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.8417132216014898, |
| "grad_norm": 0.43359375, |
| "learning_rate": 1.944747094486386e-06, |
| "loss": 0.22503662109375, |
| "num_tokens": 108320387.0, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.8423339540657977, |
| "grad_norm": 0.42578125, |
| "learning_rate": 1.929917317932243e-06, |
| "loss": 0.1920166015625, |
| "num_tokens": 108405576.0, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.8429546865301055, |
| "grad_norm": 0.388671875, |
| "learning_rate": 1.9151404121772507e-06, |
| "loss": 0.1645660400390625, |
| "num_tokens": 108485839.0, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.8435754189944135, |
| "grad_norm": 0.361328125, |
| "learning_rate": 1.9004164369966088e-06, |
| "loss": 0.12830352783203125, |
| "num_tokens": 108563228.0, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.8441961514587213, |
| "grad_norm": 0.4296875, |
| "learning_rate": 1.8857454519514044e-06, |
| "loss": 0.2244873046875, |
| "num_tokens": 108645284.0, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.8448168839230292, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.871127516388364e-06, |
| "loss": 0.1541900634765625, |
| "num_tokens": 108725903.0, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.845437616387337, |
| "grad_norm": 0.462890625, |
| "learning_rate": 1.8565626894396366e-06, |
| "loss": 0.2313995361328125, |
| "num_tokens": 108803257.0, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.846058348851645, |
| "grad_norm": 0.353515625, |
| "learning_rate": 1.8420510300225147e-06, |
| "loss": 0.13166046142578125, |
| "num_tokens": 108884045.0, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.8466790813159528, |
| "grad_norm": 0.345703125, |
| "learning_rate": 1.8275925968392414e-06, |
| "loss": 0.14595794677734375, |
| "num_tokens": 108969431.0, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.8472998137802608, |
| "grad_norm": 0.359375, |
| "learning_rate": 1.813187448376734e-06, |
| "loss": 0.12152862548828125, |
| "num_tokens": 109049313.0, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.8479205462445686, |
| "grad_norm": 0.419921875, |
| "learning_rate": 1.7988356429063684e-06, |
| "loss": 0.1735076904296875, |
| "num_tokens": 109122274.0, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.8485412787088765, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.7845372384837433e-06, |
| "loss": 0.1820220947265625, |
| "num_tokens": 109203209.0, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.8491620111731844, |
| "grad_norm": 0.4140625, |
| "learning_rate": 1.77029229294843e-06, |
| "loss": 0.19683837890625, |
| "num_tokens": 109290660.0, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.8497827436374923, |
| "grad_norm": 0.345703125, |
| "learning_rate": 1.7561008639237613e-06, |
| "loss": 0.15297698974609375, |
| "num_tokens": 109375535.0, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.8504034761018001, |
| "grad_norm": 0.38671875, |
| "learning_rate": 1.7419630088165832e-06, |
| "loss": 0.174041748046875, |
| "num_tokens": 109459125.0, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.851024208566108, |
| "grad_norm": 0.3828125, |
| "learning_rate": 1.7278787848170185e-06, |
| "loss": 0.1533050537109375, |
| "num_tokens": 109538042.0, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.8516449410304159, |
| "grad_norm": 0.376953125, |
| "learning_rate": 1.71384824889825e-06, |
| "loss": 0.15036773681640625, |
| "num_tokens": 109621213.0, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.8522656734947238, |
| "grad_norm": 0.37890625, |
| "learning_rate": 1.6998714578162822e-06, |
| "loss": 0.17192840576171875, |
| "num_tokens": 109700895.0, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.8528864059590316, |
| "grad_norm": 0.435546875, |
| "learning_rate": 1.6859484681097025e-06, |
| "loss": 0.1910400390625, |
| "num_tokens": 109779614.0, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.8535071384233396, |
| "grad_norm": 0.421875, |
| "learning_rate": 1.672079336099479e-06, |
| "loss": 0.20361328125, |
| "num_tokens": 109867597.0, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.8541278708876474, |
| "grad_norm": 0.361328125, |
| "learning_rate": 1.6582641178887036e-06, |
| "loss": 0.13359832763671875, |
| "num_tokens": 109947167.0, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.8547486033519553, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.6445028693623764e-06, |
| "loss": 0.16276931762695312, |
| "num_tokens": 110019199.0, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.8553693358162632, |
| "grad_norm": 0.44921875, |
| "learning_rate": 1.6307956461871888e-06, |
| "loss": 0.2106475830078125, |
| "num_tokens": 110098896.0, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.8559900682805711, |
| "grad_norm": 0.390625, |
| "learning_rate": 1.617142503811288e-06, |
| "loss": 0.1520233154296875, |
| "num_tokens": 110171551.0, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.8566108007448789, |
| "grad_norm": 0.45703125, |
| "learning_rate": 1.603543497464049e-06, |
| "loss": 0.230438232421875, |
| "num_tokens": 110249019.0, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.8572315332091869, |
| "grad_norm": 0.400390625, |
| "learning_rate": 1.589998682155866e-06, |
| "loss": 0.174896240234375, |
| "num_tokens": 110330713.0, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.8578522656734947, |
| "grad_norm": 0.439453125, |
| "learning_rate": 1.5765081126779074e-06, |
| "loss": 0.20772552490234375, |
| "num_tokens": 110410310.0, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.8584729981378026, |
| "grad_norm": 0.390625, |
| "learning_rate": 1.5630718436019253e-06, |
| "loss": 0.15108489990234375, |
| "num_tokens": 110484902.0, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.8590937306021105, |
| "grad_norm": 0.400390625, |
| "learning_rate": 1.5496899292800009e-06, |
| "loss": 0.17327880859375, |
| "num_tokens": 110565591.0, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.8597144630664184, |
| "grad_norm": 0.37109375, |
| "learning_rate": 1.5363624238443497e-06, |
| "loss": 0.1597137451171875, |
| "num_tokens": 110653753.0, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.8603351955307262, |
| "grad_norm": 0.47265625, |
| "learning_rate": 1.5230893812070928e-06, |
| "loss": 0.2253265380859375, |
| "num_tokens": 110732240.0, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.8609559279950342, |
| "grad_norm": 0.36328125, |
| "learning_rate": 1.5098708550600366e-06, |
| "loss": 0.13427734375, |
| "num_tokens": 110815081.0, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.861576660459342, |
| "grad_norm": 0.4453125, |
| "learning_rate": 1.496706898874458e-06, |
| "loss": 0.213287353515625, |
| "num_tokens": 110894665.0, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.8621973929236499, |
| "grad_norm": 0.365234375, |
| "learning_rate": 1.483597565900895e-06, |
| "loss": 0.15041351318359375, |
| "num_tokens": 110974767.0, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.8628181253879578, |
| "grad_norm": 0.4453125, |
| "learning_rate": 1.4705429091689194e-06, |
| "loss": 0.197418212890625, |
| "num_tokens": 111049554.0, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.8634388578522657, |
| "grad_norm": 0.41015625, |
| "learning_rate": 1.457542981486934e-06, |
| "loss": 0.181884765625, |
| "num_tokens": 111129832.0, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.8640595903165735, |
| "grad_norm": 0.384765625, |
| "learning_rate": 1.4445978354419437e-06, |
| "loss": 0.15564727783203125, |
| "num_tokens": 111209554.0, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.8646803227808815, |
| "grad_norm": 0.3515625, |
| "learning_rate": 1.4317075233993553e-06, |
| "loss": 0.126068115234375, |
| "num_tokens": 111289762.0, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.8653010552451893, |
| "grad_norm": 0.4453125, |
| "learning_rate": 1.4188720975027687e-06, |
| "loss": 0.1878814697265625, |
| "num_tokens": 111368478.0, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.8659217877094972, |
| "grad_norm": 0.380859375, |
| "learning_rate": 1.4060916096737536e-06, |
| "loss": 0.1748046875, |
| "num_tokens": 111449441.0, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.8665425201738051, |
| "grad_norm": 0.37890625, |
| "learning_rate": 1.393366111611642e-06, |
| "loss": 0.15207672119140625, |
| "num_tokens": 111524565.0, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.867163252638113, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.3806956547933347e-06, |
| "loss": 0.1956939697265625, |
| "num_tokens": 111604313.0, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.8677839851024208, |
| "grad_norm": 0.37890625, |
| "learning_rate": 1.368080290473066e-06, |
| "loss": 0.16059112548828125, |
| "num_tokens": 111683673.0, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.8684047175667288, |
| "grad_norm": 0.462890625, |
| "learning_rate": 1.3555200696822235e-06, |
| "loss": 0.20253753662109375, |
| "num_tokens": 111762330.0, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.8690254500310366, |
| "grad_norm": 0.423828125, |
| "learning_rate": 1.3430150432291262e-06, |
| "loss": 0.1954803466796875, |
| "num_tokens": 111840676.0, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.8696461824953445, |
| "grad_norm": 0.3828125, |
| "learning_rate": 1.330565261698815e-06, |
| "loss": 0.1727294921875, |
| "num_tokens": 111919229.0, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.8702669149596524, |
| "grad_norm": 0.43359375, |
| "learning_rate": 1.3181707754528671e-06, |
| "loss": 0.194854736328125, |
| "num_tokens": 111995077.0, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.8708876474239603, |
| "grad_norm": 0.4296875, |
| "learning_rate": 1.305831634629171e-06, |
| "loss": 0.21188735961914062, |
| "num_tokens": 112071865.0, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.8715083798882681, |
| "grad_norm": 0.44921875, |
| "learning_rate": 1.2935478891417346e-06, |
| "loss": 0.1915283203125, |
| "num_tokens": 112143523.0, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.8721291123525761, |
| "grad_norm": 0.41796875, |
| "learning_rate": 1.2813195886804885e-06, |
| "loss": 0.19721221923828125, |
| "num_tokens": 112228377.0, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.8727498448168839, |
| "grad_norm": 0.41796875, |
| "learning_rate": 1.2691467827110693e-06, |
| "loss": 0.1800537109375, |
| "num_tokens": 112303546.0, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.8733705772811918, |
| "grad_norm": 0.498046875, |
| "learning_rate": 1.257029520474638e-06, |
| "loss": 0.27400970458984375, |
| "num_tokens": 112377192.0, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.8739913097454997, |
| "grad_norm": 0.423828125, |
| "learning_rate": 1.244967850987664e-06, |
| "loss": 0.2031402587890625, |
| "num_tokens": 112456630.0, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.8746120422098076, |
| "grad_norm": 0.353515625, |
| "learning_rate": 1.2329618230417327e-06, |
| "loss": 0.12925338745117188, |
| "num_tokens": 112536338.0, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.8752327746741154, |
| "grad_norm": 0.388671875, |
| "learning_rate": 1.2210114852033572e-06, |
| "loss": 0.15638351440429688, |
| "num_tokens": 112617701.0, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.8758535071384234, |
| "grad_norm": 0.404296875, |
| "learning_rate": 1.2091168858137714e-06, |
| "loss": 0.19182205200195312, |
| "num_tokens": 112698716.0, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.8764742396027312, |
| "grad_norm": 0.400390625, |
| "learning_rate": 1.1972780729887333e-06, |
| "loss": 0.19015884399414062, |
| "num_tokens": 112784460.0, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.8770949720670391, |
| "grad_norm": 0.462890625, |
| "learning_rate": 1.18549509461834e-06, |
| "loss": 0.2064971923828125, |
| "num_tokens": 112862225.0, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.877715704531347, |
| "grad_norm": 0.41015625, |
| "learning_rate": 1.1737679983668259e-06, |
| "loss": 0.15874481201171875, |
| "num_tokens": 112938792.0, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.8783364369956549, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.162096831672369e-06, |
| "loss": 0.1752777099609375, |
| "num_tokens": 113023192.0, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.8789571694599627, |
| "grad_norm": 0.37890625, |
| "learning_rate": 1.1504816417469088e-06, |
| "loss": 0.14960479736328125, |
| "num_tokens": 113106520.0, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.8795779019242707, |
| "grad_norm": 0.416015625, |
| "learning_rate": 1.1389224755759425e-06, |
| "loss": 0.1729583740234375, |
| "num_tokens": 113190980.0, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.8801986343885785, |
| "grad_norm": 0.396484375, |
| "learning_rate": 1.1274193799183486e-06, |
| "loss": 0.177276611328125, |
| "num_tokens": 113267794.0, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.8808193668528864, |
| "grad_norm": 0.3671875, |
| "learning_rate": 1.1159724013061818e-06, |
| "loss": 0.1397705078125, |
| "num_tokens": 113342885.0, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.8814400993171942, |
| "grad_norm": 0.353515625, |
| "learning_rate": 1.104581586044502e-06, |
| "loss": 0.129364013671875, |
| "num_tokens": 113423145.0, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.8820608317815022, |
| "grad_norm": 0.45703125, |
| "learning_rate": 1.0932469802111688e-06, |
| "loss": 0.226531982421875, |
| "num_tokens": 113505500.0, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.88268156424581, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.081968629656676e-06, |
| "loss": 0.253570556640625, |
| "num_tokens": 113581240.0, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.883302296710118, |
| "grad_norm": 0.421875, |
| "learning_rate": 1.0707465800039456e-06, |
| "loss": 0.19501495361328125, |
| "num_tokens": 113666314.0, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.8839230291744258, |
| "grad_norm": 0.54296875, |
| "learning_rate": 1.0595808766481569e-06, |
| "loss": 0.252655029296875, |
| "num_tokens": 113743878.0, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.8845437616387337, |
| "grad_norm": 0.390625, |
| "learning_rate": 1.0484715647565562e-06, |
| "loss": 0.1773681640625, |
| "num_tokens": 113820248.0, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.8851644941030415, |
| "grad_norm": 0.443359375, |
| "learning_rate": 1.0374186892682813e-06, |
| "loss": 0.2051239013671875, |
| "num_tokens": 113895970.0, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.8857852265673495, |
| "grad_norm": 0.44140625, |
| "learning_rate": 1.026422294894167e-06, |
| "loss": 0.1966552734375, |
| "num_tokens": 113975890.0, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.8864059590316573, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.0154824261165763e-06, |
| "loss": 0.1671295166015625, |
| "num_tokens": 114047999.0, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.8870266914959652, |
| "grad_norm": 0.390625, |
| "learning_rate": 1.004599127189218e-06, |
| "loss": 0.156585693359375, |
| "num_tokens": 114128129.0, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.8876474239602731, |
| "grad_norm": 0.388671875, |
| "learning_rate": 9.937724421369609e-07, |
| "loss": 0.1729278564453125, |
| "num_tokens": 114207622.0, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.888268156424581, |
| "grad_norm": 0.39453125, |
| "learning_rate": 9.830024147556637e-07, |
| "loss": 0.1910552978515625, |
| "num_tokens": 114294842.0, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 0.39453125, |
| "learning_rate": 9.722890886120002e-07, |
| "loss": 0.15358734130859375, |
| "num_tokens": 114369904.0, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.8895096213531968, |
| "grad_norm": 0.365234375, |
| "learning_rate": 9.616325070432636e-07, |
| "loss": 0.1573638916015625, |
| "num_tokens": 114451390.0, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.8901303538175046, |
| "grad_norm": 0.4453125, |
| "learning_rate": 9.5103271315722e-07, |
| "loss": 0.22116851806640625, |
| "num_tokens": 114527301.0, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.8907510862818125, |
| "grad_norm": 0.384765625, |
| "learning_rate": 9.40489749831912e-07, |
| "loss": 0.154052734375, |
| "num_tokens": 114600377.0, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.8913718187461204, |
| "grad_norm": 0.37109375, |
| "learning_rate": 9.300036597154881e-07, |
| "loss": 0.165008544921875, |
| "num_tokens": 114684385.0, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.8919925512104283, |
| "grad_norm": 0.431640625, |
| "learning_rate": 9.195744852260457e-07, |
| "loss": 0.1820068359375, |
| "num_tokens": 114762718.0, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.8926132836747361, |
| "grad_norm": 0.369140625, |
| "learning_rate": 9.092022685514429e-07, |
| "loss": 0.1756591796875, |
| "num_tokens": 114850757.0, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.8932340161390441, |
| "grad_norm": 0.455078125, |
| "learning_rate": 8.988870516491254e-07, |
| "loss": 0.18399810791015625, |
| "num_tokens": 114924794.0, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.8938547486033519, |
| "grad_norm": 0.4921875, |
| "learning_rate": 8.886288762459805e-07, |
| "loss": 0.24407958984375, |
| "num_tokens": 114995720.0, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.8944754810676598, |
| "grad_norm": 0.361328125, |
| "learning_rate": 8.784277838381427e-07, |
| "loss": 0.1392669677734375, |
| "num_tokens": 115074423.0, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.8950962135319678, |
| "grad_norm": 0.408203125, |
| "learning_rate": 8.682838156908335e-07, |
| "loss": 0.1748046875, |
| "num_tokens": 115157851.0, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.8957169459962756, |
| "grad_norm": 0.40625, |
| "learning_rate": 8.581970128382055e-07, |
| "loss": 0.19258880615234375, |
| "num_tokens": 115239612.0, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.8963376784605835, |
| "grad_norm": 0.439453125, |
| "learning_rate": 8.481674160831598e-07, |
| "loss": 0.1905975341796875, |
| "num_tokens": 115318775.0, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.8969584109248914, |
| "grad_norm": 0.388671875, |
| "learning_rate": 8.381950659971943e-07, |
| "loss": 0.17224884033203125, |
| "num_tokens": 115403229.0, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.8975791433891993, |
| "grad_norm": 0.41015625, |
| "learning_rate": 8.28280002920227e-07, |
| "loss": 0.18399810791015625, |
| "num_tokens": 115484047.0, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.8981998758535071, |
| "grad_norm": 0.451171875, |
| "learning_rate": 8.184222669604463e-07, |
| "loss": 0.215240478515625, |
| "num_tokens": 115565971.0, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.8988206083178151, |
| "grad_norm": 0.39453125, |
| "learning_rate": 8.086218979941412e-07, |
| "loss": 0.155853271484375, |
| "num_tokens": 115644591.0, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.8994413407821229, |
| "grad_norm": 0.443359375, |
| "learning_rate": 7.988789356655379e-07, |
| "loss": 0.21747589111328125, |
| "num_tokens": 115726768.0, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.9000620732464308, |
| "grad_norm": 0.37890625, |
| "learning_rate": 7.891934193866418e-07, |
| "loss": 0.15863037109375, |
| "num_tokens": 115806911.0, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.9006828057107387, |
| "grad_norm": 0.451171875, |
| "learning_rate": 7.795653883370857e-07, |
| "loss": 0.2614288330078125, |
| "num_tokens": 115888858.0, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.9013035381750466, |
| "grad_norm": 0.376953125, |
| "learning_rate": 7.699948814639602e-07, |
| "loss": 0.14519500732421875, |
| "num_tokens": 115964929.0, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.9019242706393544, |
| "grad_norm": 0.40234375, |
| "learning_rate": 7.604819374816602e-07, |
| "loss": 0.14528656005859375, |
| "num_tokens": 116036534.0, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.9025450031036624, |
| "grad_norm": 0.357421875, |
| "learning_rate": 7.510265948717304e-07, |
| "loss": 0.16790771484375, |
| "num_tokens": 116121277.0, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.9031657355679702, |
| "grad_norm": 0.384765625, |
| "learning_rate": 7.416288918827002e-07, |
| "loss": 0.1535186767578125, |
| "num_tokens": 116202371.0, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.9037864680322781, |
| "grad_norm": 0.40234375, |
| "learning_rate": 7.322888665299487e-07, |
| "loss": 0.1700439453125, |
| "num_tokens": 116276724.0, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.904407200496586, |
| "grad_norm": 0.419921875, |
| "learning_rate": 7.230065565955302e-07, |
| "loss": 0.18024444580078125, |
| "num_tokens": 116353533.0, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.9050279329608939, |
| "grad_norm": 0.365234375, |
| "learning_rate": 7.137819996280303e-07, |
| "loss": 0.1444854736328125, |
| "num_tokens": 116440136.0, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.9056486654252017, |
| "grad_norm": 0.349609375, |
| "learning_rate": 7.046152329424205e-07, |
| "loss": 0.145660400390625, |
| "num_tokens": 116521838.0, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.9062693978895097, |
| "grad_norm": 0.3828125, |
| "learning_rate": 6.955062936198886e-07, |
| "loss": 0.18415451049804688, |
| "num_tokens": 116603356.0, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.9068901303538175, |
| "grad_norm": 0.4140625, |
| "learning_rate": 6.86455218507715e-07, |
| "loss": 0.22351837158203125, |
| "num_tokens": 116690757.0, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.9075108628181254, |
| "grad_norm": 0.40625, |
| "learning_rate": 6.774620442190954e-07, |
| "loss": 0.1795196533203125, |
| "num_tokens": 116776859.0, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.9081315952824333, |
| "grad_norm": 0.4140625, |
| "learning_rate": 6.685268071330147e-07, |
| "loss": 0.15985107421875, |
| "num_tokens": 116853475.0, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.9087523277467412, |
| "grad_norm": 0.416015625, |
| "learning_rate": 6.596495433940919e-07, |
| "loss": 0.2061004638671875, |
| "num_tokens": 116939042.0, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.909373060211049, |
| "grad_norm": 0.404296875, |
| "learning_rate": 6.508302889124268e-07, |
| "loss": 0.1634521484375, |
| "num_tokens": 117024209.0, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.909993792675357, |
| "grad_norm": 0.400390625, |
| "learning_rate": 6.42069079363467e-07, |
| "loss": 0.1707916259765625, |
| "num_tokens": 117102733.0, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.9106145251396648, |
| "grad_norm": 0.3515625, |
| "learning_rate": 6.333659501878597e-07, |
| "loss": 0.1476593017578125, |
| "num_tokens": 117187854.0, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.9112352576039727, |
| "grad_norm": 0.43359375, |
| "learning_rate": 6.247209365912982e-07, |
| "loss": 0.18548583984375, |
| "num_tokens": 117269571.0, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.9118559900682806, |
| "grad_norm": 0.44921875, |
| "learning_rate": 6.161340735443987e-07, |
| "loss": 0.24283599853515625, |
| "num_tokens": 117354410.0, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.9124767225325885, |
| "grad_norm": 0.373046875, |
| "learning_rate": 6.076053957825411e-07, |
| "loss": 0.17169189453125, |
| "num_tokens": 117435287.0, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.9130974549968963, |
| "grad_norm": 0.447265625, |
| "learning_rate": 5.991349378057348e-07, |
| "loss": 0.2221527099609375, |
| "num_tokens": 117513198.0, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.9137181874612043, |
| "grad_norm": 0.345703125, |
| "learning_rate": 5.907227338784843e-07, |
| "loss": 0.147308349609375, |
| "num_tokens": 117596986.0, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.9143389199255121, |
| "grad_norm": 0.37890625, |
| "learning_rate": 5.823688180296477e-07, |
| "loss": 0.1605072021484375, |
| "num_tokens": 117675745.0, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.91495965238982, |
| "grad_norm": 0.43359375, |
| "learning_rate": 5.740732240522883e-07, |
| "loss": 0.2085418701171875, |
| "num_tokens": 117756575.0, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.9155803848541278, |
| "grad_norm": 0.373046875, |
| "learning_rate": 5.658359855035567e-07, |
| "loss": 0.1626129150390625, |
| "num_tokens": 117840473.0, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.9162011173184358, |
| "grad_norm": 0.43359375, |
| "learning_rate": 5.576571357045401e-07, |
| "loss": 0.190399169921875, |
| "num_tokens": 117916766.0, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.9168218497827436, |
| "grad_norm": 0.375, |
| "learning_rate": 5.495367077401353e-07, |
| "loss": 0.1690216064453125, |
| "num_tokens": 118002624.0, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.9174425822470516, |
| "grad_norm": 0.388671875, |
| "learning_rate": 5.414747344589111e-07, |
| "loss": 0.16597747802734375, |
| "num_tokens": 118083268.0, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.9180633147113594, |
| "grad_norm": 0.37109375, |
| "learning_rate": 5.33471248472977e-07, |
| "loss": 0.14627838134765625, |
| "num_tokens": 118160614.0, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.9186840471756673, |
| "grad_norm": 0.390625, |
| "learning_rate": 5.255262821578521e-07, |
| "loss": 0.17824935913085938, |
| "num_tokens": 118243952.0, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.9193047796399751, |
| "grad_norm": 0.455078125, |
| "learning_rate": 5.176398676523314e-07, |
| "loss": 0.231048583984375, |
| "num_tokens": 118322380.0, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.9199255121042831, |
| "grad_norm": 0.39453125, |
| "learning_rate": 5.098120368583559e-07, |
| "loss": 0.18029022216796875, |
| "num_tokens": 118403710.0, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.9205462445685909, |
| "grad_norm": 0.38671875, |
| "learning_rate": 5.020428214408912e-07, |
| "loss": 0.12561798095703125, |
| "num_tokens": 118476644.0, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.9211669770328988, |
| "grad_norm": 0.419921875, |
| "learning_rate": 4.943322528277894e-07, |
| "loss": 0.19466400146484375, |
| "num_tokens": 118559697.0, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.9217877094972067, |
| "grad_norm": 0.42578125, |
| "learning_rate": 4.866803622096638e-07, |
| "loss": 0.18054962158203125, |
| "num_tokens": 118633723.0, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.9224084419615146, |
| "grad_norm": 0.478515625, |
| "learning_rate": 4.790871805397695e-07, |
| "loss": 0.247344970703125, |
| "num_tokens": 118711044.0, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.9230291744258224, |
| "grad_norm": 0.361328125, |
| "learning_rate": 4.71552738533868e-07, |
| "loss": 0.167816162109375, |
| "num_tokens": 118799563.0, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.9236499068901304, |
| "grad_norm": 0.396484375, |
| "learning_rate": 4.6407706667011604e-07, |
| "loss": 0.18695068359375, |
| "num_tokens": 118881029.0, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.9242706393544382, |
| "grad_norm": 0.416015625, |
| "learning_rate": 4.5666019518892544e-07, |
| "loss": 0.218505859375, |
| "num_tokens": 118966788.0, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.9248913718187461, |
| "grad_norm": 0.4296875, |
| "learning_rate": 4.493021540928566e-07, |
| "loss": 0.1919097900390625, |
| "num_tokens": 119048862.0, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.925512104283054, |
| "grad_norm": 0.421875, |
| "learning_rate": 4.4200297314649196e-07, |
| "loss": 0.1617279052734375, |
| "num_tokens": 119120018.0, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.9261328367473619, |
| "grad_norm": 0.408203125, |
| "learning_rate": 4.347626818763062e-07, |
| "loss": 0.184967041015625, |
| "num_tokens": 119199971.0, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.9267535692116697, |
| "grad_norm": 0.4609375, |
| "learning_rate": 4.275813095705611e-07, |
| "loss": 0.2259674072265625, |
| "num_tokens": 119274715.0, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.9273743016759777, |
| "grad_norm": 0.3359375, |
| "learning_rate": 4.204588852791824e-07, |
| "loss": 0.131103515625, |
| "num_tokens": 119348041.0, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.9279950341402855, |
| "grad_norm": 0.40234375, |
| "learning_rate": 4.1339543781363343e-07, |
| "loss": 0.167022705078125, |
| "num_tokens": 119430405.0, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.9286157666045934, |
| "grad_norm": 0.421875, |
| "learning_rate": 4.063909957468148e-07, |
| "loss": 0.179901123046875, |
| "num_tokens": 119512610.0, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.9292364990689013, |
| "grad_norm": 0.419921875, |
| "learning_rate": 3.9944558741293325e-07, |
| "loss": 0.191131591796875, |
| "num_tokens": 119588713.0, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.9298572315332092, |
| "grad_norm": 0.412109375, |
| "learning_rate": 3.9255924090739306e-07, |
| "loss": 0.171417236328125, |
| "num_tokens": 119669482.0, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.930477963997517, |
| "grad_norm": 0.326171875, |
| "learning_rate": 3.85731984086688e-07, |
| "loss": 0.12468338012695312, |
| "num_tokens": 119749076.0, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.931098696461825, |
| "grad_norm": 0.41015625, |
| "learning_rate": 3.7896384456828136e-07, |
| "loss": 0.19616317749023438, |
| "num_tokens": 119828562.0, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.9317194289261328, |
| "grad_norm": 0.447265625, |
| "learning_rate": 3.7225484973049284e-07, |
| "loss": 0.203277587890625, |
| "num_tokens": 119904627.0, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.9323401613904407, |
| "grad_norm": 0.423828125, |
| "learning_rate": 3.656050267123984e-07, |
| "loss": 0.17144012451171875, |
| "num_tokens": 119980341.0, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.9329608938547486, |
| "grad_norm": 0.4375, |
| "learning_rate": 3.5901440241370875e-07, |
| "loss": 0.19316864013671875, |
| "num_tokens": 120053204.0, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.9335816263190565, |
| "grad_norm": 0.373046875, |
| "learning_rate": 3.524830034946647e-07, |
| "loss": 0.15491485595703125, |
| "num_tokens": 120135505.0, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.9342023587833643, |
| "grad_norm": 0.376953125, |
| "learning_rate": 3.4601085637593344e-07, |
| "loss": 0.132049560546875, |
| "num_tokens": 120215908.0, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.9348230912476723, |
| "grad_norm": 0.3359375, |
| "learning_rate": 3.395979872384958e-07, |
| "loss": 0.1197662353515625, |
| "num_tokens": 120303178.0, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.9354438237119801, |
| "grad_norm": 0.361328125, |
| "learning_rate": 3.332444220235442e-07, |
| "loss": 0.13343048095703125, |
| "num_tokens": 120384660.0, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.936064556176288, |
| "grad_norm": 0.427734375, |
| "learning_rate": 3.269501864323732e-07, |
| "loss": 0.1978912353515625, |
| "num_tokens": 120461966.0, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.9366852886405959, |
| "grad_norm": 0.380859375, |
| "learning_rate": 3.2071530592628076e-07, |
| "loss": 0.14560699462890625, |
| "num_tokens": 120538489.0, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.9373060211049038, |
| "grad_norm": 0.75390625, |
| "learning_rate": 3.145398057264637e-07, |
| "loss": 0.180206298828125, |
| "num_tokens": 120617202.0, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.9379267535692116, |
| "grad_norm": 0.453125, |
| "learning_rate": 3.084237108139143e-07, |
| "loss": 0.2194061279296875, |
| "num_tokens": 120698032.0, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.9385474860335196, |
| "grad_norm": 0.451171875, |
| "learning_rate": 3.0236704592931876e-07, |
| "loss": 0.20873260498046875, |
| "num_tokens": 120775036.0, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.9391682184978274, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.9636983557295716e-07, |
| "loss": 0.18021392822265625, |
| "num_tokens": 120854378.0, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.9397889509621353, |
| "grad_norm": 0.431640625, |
| "learning_rate": 2.904321040046104e-07, |
| "loss": 0.233978271484375, |
| "num_tokens": 120936978.0, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.9404096834264432, |
| "grad_norm": 0.37890625, |
| "learning_rate": 2.8455387524345513e-07, |
| "loss": 0.153228759765625, |
| "num_tokens": 121014313.0, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.9410304158907511, |
| "grad_norm": 0.404296875, |
| "learning_rate": 2.787351730679671e-07, |
| "loss": 0.200439453125, |
| "num_tokens": 121094212.0, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.9416511483550589, |
| "grad_norm": 0.46875, |
| "learning_rate": 2.729760210158233e-07, |
| "loss": 0.2059478759765625, |
| "num_tokens": 121168325.0, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.9422718808193669, |
| "grad_norm": 0.384765625, |
| "learning_rate": 2.672764423838181e-07, |
| "loss": 0.18526458740234375, |
| "num_tokens": 121255353.0, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.9428926132836747, |
| "grad_norm": 0.44921875, |
| "learning_rate": 2.6163646022775544e-07, |
| "loss": 0.188323974609375, |
| "num_tokens": 121328380.0, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.9435133457479826, |
| "grad_norm": 0.44140625, |
| "learning_rate": 2.560560973623588e-07, |
| "loss": 0.19647216796875, |
| "num_tokens": 121407339.0, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.9441340782122905, |
| "grad_norm": 0.40234375, |
| "learning_rate": 2.505353763611895e-07, |
| "loss": 0.17388153076171875, |
| "num_tokens": 121491153.0, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.9447548106765984, |
| "grad_norm": 0.427734375, |
| "learning_rate": 2.450743195565369e-07, |
| "loss": 0.218963623046875, |
| "num_tokens": 121572779.0, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.9453755431409062, |
| "grad_norm": 0.40234375, |
| "learning_rate": 2.396729490393468e-07, |
| "loss": 0.17543411254882812, |
| "num_tokens": 121649615.0, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.9459962756052142, |
| "grad_norm": 0.390625, |
| "learning_rate": 2.343312866591163e-07, |
| "loss": 0.175079345703125, |
| "num_tokens": 121731434.0, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.946617008069522, |
| "grad_norm": 0.400390625, |
| "learning_rate": 2.2904935402381755e-07, |
| "loss": 0.15227508544921875, |
| "num_tokens": 121807521.0, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.9472377405338299, |
| "grad_norm": 0.4609375, |
| "learning_rate": 2.2382717249980423e-07, |
| "loss": 0.2166595458984375, |
| "num_tokens": 121882228.0, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.9478584729981379, |
| "grad_norm": 0.49609375, |
| "learning_rate": 2.1866476321172334e-07, |
| "loss": 0.211517333984375, |
| "num_tokens": 121957461.0, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.9484792054624457, |
| "grad_norm": 0.4921875, |
| "learning_rate": 2.1356214704243537e-07, |
| "loss": 0.22247314453125, |
| "num_tokens": 122029914.0, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.9490999379267536, |
| "grad_norm": 0.3984375, |
| "learning_rate": 2.0851934463292922e-07, |
| "loss": 0.197418212890625, |
| "num_tokens": 122116350.0, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.9497206703910615, |
| "grad_norm": 0.3671875, |
| "learning_rate": 2.0353637638222898e-07, |
| "loss": 0.1572265625, |
| "num_tokens": 122196440.0, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.9503414028553694, |
| "grad_norm": 0.388671875, |
| "learning_rate": 1.986132624473258e-07, |
| "loss": 0.179290771484375, |
| "num_tokens": 122275699.0, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.9509621353196772, |
| "grad_norm": 0.42578125, |
| "learning_rate": 1.937500227430894e-07, |
| "loss": 0.190521240234375, |
| "num_tokens": 122360522.0, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.9515828677839852, |
| "grad_norm": 0.421875, |
| "learning_rate": 1.889466769421816e-07, |
| "loss": 0.1977996826171875, |
| "num_tokens": 122439298.0, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.952203600248293, |
| "grad_norm": 0.486328125, |
| "learning_rate": 1.842032444749897e-07, |
| "loss": 0.2313079833984375, |
| "num_tokens": 122509830.0, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.9528243327126009, |
| "grad_norm": 0.4375, |
| "learning_rate": 1.7951974452953989e-07, |
| "loss": 0.2130889892578125, |
| "num_tokens": 122589044.0, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.9534450651769087, |
| "grad_norm": 0.40234375, |
| "learning_rate": 1.7489619605141395e-07, |
| "loss": 0.19554901123046875, |
| "num_tokens": 122680765.0, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.9540657976412167, |
| "grad_norm": 0.33984375, |
| "learning_rate": 1.7033261774368758e-07, |
| "loss": 0.1403350830078125, |
| "num_tokens": 122760966.0, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.9546865301055245, |
| "grad_norm": 0.39453125, |
| "learning_rate": 1.658290280668423e-07, |
| "loss": 0.1837005615234375, |
| "num_tokens": 122847761.0, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.9553072625698324, |
| "grad_norm": 0.451171875, |
| "learning_rate": 1.6138544523869702e-07, |
| "loss": 0.20369720458984375, |
| "num_tokens": 122920265.0, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.9559279950341403, |
| "grad_norm": 0.384765625, |
| "learning_rate": 1.5700188723432984e-07, |
| "loss": 0.15799713134765625, |
| "num_tokens": 122999008.0, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.9565487274984482, |
| "grad_norm": 0.373046875, |
| "learning_rate": 1.5267837178600974e-07, |
| "loss": 0.16912841796875, |
| "num_tokens": 123091404.0, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.957169459962756, |
| "grad_norm": 0.357421875, |
| "learning_rate": 1.4841491638312167e-07, |
| "loss": 0.13803863525390625, |
| "num_tokens": 123171022.0, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.957790192427064, |
| "grad_norm": 0.42578125, |
| "learning_rate": 1.4421153827209987e-07, |
| "loss": 0.20819091796875, |
| "num_tokens": 123249917.0, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.9584109248913718, |
| "grad_norm": 0.419921875, |
| "learning_rate": 1.4006825445634975e-07, |
| "loss": 0.1942291259765625, |
| "num_tokens": 123334537.0, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.9590316573556797, |
| "grad_norm": 0.35546875, |
| "learning_rate": 1.3598508169619106e-07, |
| "loss": 0.13500213623046875, |
| "num_tokens": 123414558.0, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.9596523898199876, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.3196203650878148e-07, |
| "loss": 0.17513275146484375, |
| "num_tokens": 123487049.0, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.9602731222842955, |
| "grad_norm": 0.41796875, |
| "learning_rate": 1.2799913516804818e-07, |
| "loss": 0.208465576171875, |
| "num_tokens": 123577657.0, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.9608938547486033, |
| "grad_norm": 0.326171875, |
| "learning_rate": 1.2409639370463133e-07, |
| "loss": 0.12164306640625, |
| "num_tokens": 123656267.0, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.9615145872129113, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.2025382790580909e-07, |
| "loss": 0.1643524169921875, |
| "num_tokens": 123731282.0, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.9621353196772191, |
| "grad_norm": 0.408203125, |
| "learning_rate": 1.1647145331544263e-07, |
| "loss": 0.16950225830078125, |
| "num_tokens": 123803813.0, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.962756052141527, |
| "grad_norm": 0.375, |
| "learning_rate": 1.1274928523390626e-07, |
| "loss": 0.17138671875, |
| "num_tokens": 123886681.0, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.9633767846058349, |
| "grad_norm": 0.369140625, |
| "learning_rate": 1.0908733871802911e-07, |
| "loss": 0.12857818603515625, |
| "num_tokens": 123962326.0, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.9639975170701428, |
| "grad_norm": 0.427734375, |
| "learning_rate": 1.054856285810335e-07, |
| "loss": 0.19605255126953125, |
| "num_tokens": 124041304.0, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.9646182495344506, |
| "grad_norm": 0.458984375, |
| "learning_rate": 1.0194416939247497e-07, |
| "loss": 0.1844940185546875, |
| "num_tokens": 124114734.0, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.9652389819987586, |
| "grad_norm": 0.4375, |
| "learning_rate": 9.84629754781824e-08, |
| "loss": 0.227142333984375, |
| "num_tokens": 124198151.0, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.9658597144630664, |
| "grad_norm": 0.4140625, |
| "learning_rate": 9.504206092020129e-08, |
| "loss": 0.18299102783203125, |
| "num_tokens": 124277118.0, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.9664804469273743, |
| "grad_norm": 0.443359375, |
| "learning_rate": 9.168143955673892e-08, |
| "loss": 0.1861114501953125, |
| "num_tokens": 124351071.0, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.9671011793916822, |
| "grad_norm": 0.435546875, |
| "learning_rate": 8.83811249821026e-08, |
| "loss": 0.1986236572265625, |
| "num_tokens": 124429523.0, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.9677219118559901, |
| "grad_norm": 0.38671875, |
| "learning_rate": 8.514113054664984e-08, |
| "loss": 0.15753173828125, |
| "num_tokens": 124508850.0, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.9683426443202979, |
| "grad_norm": 0.421875, |
| "learning_rate": 8.196146935672999e-08, |
| "loss": 0.1880035400390625, |
| "num_tokens": 124583097.0, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.9689633767846059, |
| "grad_norm": 0.4765625, |
| "learning_rate": 7.884215427463758e-08, |
| "loss": 0.22265625, |
| "num_tokens": 124668189.0, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.9695841092489137, |
| "grad_norm": 0.416015625, |
| "learning_rate": 7.578319791855581e-08, |
| "loss": 0.17105865478515625, |
| "num_tokens": 124739966.0, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.9702048417132216, |
| "grad_norm": 0.443359375, |
| "learning_rate": 7.278461266250147e-08, |
| "loss": 0.209686279296875, |
| "num_tokens": 124822047.0, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.9708255741775295, |
| "grad_norm": 0.451171875, |
| "learning_rate": 6.984641063628506e-08, |
| "loss": 0.2391815185546875, |
| "num_tokens": 124904083.0, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.9714463066418374, |
| "grad_norm": 0.439453125, |
| "learning_rate": 6.696860372545244e-08, |
| "loss": 0.21942901611328125, |
| "num_tokens": 124984580.0, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.9720670391061452, |
| "grad_norm": 0.427734375, |
| "learning_rate": 6.415120357124493e-08, |
| "loss": 0.2217254638671875, |
| "num_tokens": 125063992.0, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.9726877715704532, |
| "grad_norm": 0.41015625, |
| "learning_rate": 6.139422157054265e-08, |
| "loss": 0.1790771484375, |
| "num_tokens": 125141703.0, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.973308504034761, |
| "grad_norm": 0.41796875, |
| "learning_rate": 5.869766887582784e-08, |
| "loss": 0.193359375, |
| "num_tokens": 125221017.0, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.9739292364990689, |
| "grad_norm": 0.369140625, |
| "learning_rate": 5.6061556395131665e-08, |
| "loss": 0.15718841552734375, |
| "num_tokens": 125304963.0, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.9745499689633768, |
| "grad_norm": 0.416015625, |
| "learning_rate": 5.348589479199917e-08, |
| "loss": 0.17581939697265625, |
| "num_tokens": 125382152.0, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.9751707014276847, |
| "grad_norm": 0.43359375, |
| "learning_rate": 5.0970694485434346e-08, |
| "loss": 0.209014892578125, |
| "num_tokens": 125466570.0, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.9757914338919925, |
| "grad_norm": 0.361328125, |
| "learning_rate": 4.8515965649870155e-08, |
| "loss": 0.1434173583984375, |
| "num_tokens": 125546768.0, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.9764121663563005, |
| "grad_norm": 0.419921875, |
| "learning_rate": 4.6121718215118566e-08, |
| "loss": 0.2110137939453125, |
| "num_tokens": 125631483.0, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.9770328988206083, |
| "grad_norm": 0.431640625, |
| "learning_rate": 4.3787961866333935e-08, |
| "loss": 0.19290924072265625, |
| "num_tokens": 125711555.0, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.9776536312849162, |
| "grad_norm": 0.41015625, |
| "learning_rate": 4.151470604397467e-08, |
| "loss": 0.168670654296875, |
| "num_tokens": 125786649.0, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.978274363749224, |
| "grad_norm": 0.419921875, |
| "learning_rate": 3.930195994376329e-08, |
| "loss": 0.190093994140625, |
| "num_tokens": 125869743.0, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.978895096213532, |
| "grad_norm": 0.38671875, |
| "learning_rate": 3.714973251664977e-08, |
| "loss": 0.1556854248046875, |
| "num_tokens": 125951316.0, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.9795158286778398, |
| "grad_norm": 0.365234375, |
| "learning_rate": 3.505803246877326e-08, |
| "loss": 0.13401031494140625, |
| "num_tokens": 126032193.0, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.9801365611421478, |
| "grad_norm": 0.458984375, |
| "learning_rate": 3.3026868261433754e-08, |
| "loss": 0.23333740234375, |
| "num_tokens": 126111506.0, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.9807572936064556, |
| "grad_norm": 0.431640625, |
| "learning_rate": 3.105624811104879e-08, |
| "loss": 0.2361297607421875, |
| "num_tokens": 126197466.0, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.9813780260707635, |
| "grad_norm": 0.490234375, |
| "learning_rate": 2.914617998912683e-08, |
| "loss": 0.19991302490234375, |
| "num_tokens": 126267247.0, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.9819987585350713, |
| "grad_norm": 0.44140625, |
| "learning_rate": 2.729667162222893e-08, |
| "loss": 0.200286865234375, |
| "num_tokens": 126347281.0, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.9826194909993793, |
| "grad_norm": 0.48046875, |
| "learning_rate": 2.55077304919471e-08, |
| "loss": 0.237945556640625, |
| "num_tokens": 126428027.0, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.9832402234636871, |
| "grad_norm": 0.396484375, |
| "learning_rate": 2.3779363834864344e-08, |
| "loss": 0.1669158935546875, |
| "num_tokens": 126510422.0, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.983860955927995, |
| "grad_norm": 0.390625, |
| "learning_rate": 2.2111578642527997e-08, |
| "loss": 0.164306640625, |
| "num_tokens": 126585863.0, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.9844816883923029, |
| "grad_norm": 0.39453125, |
| "learning_rate": 2.050438166142643e-08, |
| "loss": 0.180206298828125, |
| "num_tokens": 126667421.0, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.9851024208566108, |
| "grad_norm": 0.46875, |
| "learning_rate": 1.8957779392955732e-08, |
| "loss": 0.2297210693359375, |
| "num_tokens": 126751726.0, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.9857231533209186, |
| "grad_norm": 0.3203125, |
| "learning_rate": 1.7471778093396395e-08, |
| "loss": 0.10770416259765625, |
| "num_tokens": 126833265.0, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.9863438857852266, |
| "grad_norm": 0.4296875, |
| "learning_rate": 1.6046383773885008e-08, |
| "loss": 0.1649017333984375, |
| "num_tokens": 126907564.0, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.9869646182495344, |
| "grad_norm": 0.39453125, |
| "learning_rate": 1.4681602200395938e-08, |
| "loss": 0.20687103271484375, |
| "num_tokens": 126990184.0, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.9875853507138423, |
| "grad_norm": 0.421875, |
| "learning_rate": 1.3377438893711347e-08, |
| "loss": 0.194854736328125, |
| "num_tokens": 127065596.0, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.9882060831781502, |
| "grad_norm": 0.39453125, |
| "learning_rate": 1.2133899129402882e-08, |
| "loss": 0.17165374755859375, |
| "num_tokens": 127141734.0, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.9888268156424581, |
| "grad_norm": 0.41015625, |
| "learning_rate": 1.095098793781002e-08, |
| "loss": 0.1893463134765625, |
| "num_tokens": 127222708.0, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.9894475481067659, |
| "grad_norm": 0.36328125, |
| "learning_rate": 9.828710104018424e-09, |
| "loss": 0.1423492431640625, |
| "num_tokens": 127303578.0, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.9900682805710739, |
| "grad_norm": 0.451171875, |
| "learning_rate": 8.767070167838287e-09, |
| "loss": 0.2303009033203125, |
| "num_tokens": 127384163.0, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.9906890130353817, |
| "grad_norm": 0.404296875, |
| "learning_rate": 7.76607242379268e-09, |
| "loss": 0.16613006591796875, |
| "num_tokens": 127468046.0, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.9913097454996896, |
| "grad_norm": 0.43359375, |
| "learning_rate": 6.825720921094236e-09, |
| "loss": 0.210845947265625, |
| "num_tokens": 127551436.0, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.9919304779639975, |
| "grad_norm": 0.4375, |
| "learning_rate": 5.946019463631824e-09, |
| "loss": 0.176361083984375, |
| "num_tokens": 127628295.0, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.9925512104283054, |
| "grad_norm": 0.40625, |
| "learning_rate": 5.126971609952235e-09, |
| "loss": 0.1775970458984375, |
| "num_tokens": 127709813.0, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.9931719428926132, |
| "grad_norm": 0.404296875, |
| "learning_rate": 4.368580673251854e-09, |
| "loss": 0.1641998291015625, |
| "num_tokens": 127788013.0, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.9937926753569212, |
| "grad_norm": 0.451171875, |
| "learning_rate": 3.6708497213550074e-09, |
| "loss": 0.22100830078125, |
| "num_tokens": 127864301.0, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.994413407821229, |
| "grad_norm": 0.482421875, |
| "learning_rate": 3.033781576705641e-09, |
| "loss": 0.238128662109375, |
| "num_tokens": 127941392.0, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.9950341402855369, |
| "grad_norm": 0.46875, |
| "learning_rate": 2.4573788163589906e-09, |
| "loss": 0.24317169189453125, |
| "num_tokens": 128021185.0, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.9956548727498448, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.9416437719665946e-09, |
| "loss": 0.1907958984375, |
| "num_tokens": 128099875.0, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.9962756052141527, |
| "grad_norm": 0.43359375, |
| "learning_rate": 1.4865785297646373e-09, |
| "loss": 0.205413818359375, |
| "num_tokens": 128183898.0, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.9968963376784605, |
| "grad_norm": 0.388671875, |
| "learning_rate": 1.092184930577278e-09, |
| "loss": 0.161468505859375, |
| "num_tokens": 128262383.0, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.9975170701427685, |
| "grad_norm": 0.396484375, |
| "learning_rate": 7.584645697933379e-10, |
| "loss": 0.18444061279296875, |
| "num_tokens": 128344933.0, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.9981378026070763, |
| "grad_norm": 0.416015625, |
| "learning_rate": 4.854187973712954e-10, |
| "loss": 0.203826904296875, |
| "num_tokens": 128429729.0, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.9987585350713842, |
| "grad_norm": 0.48828125, |
| "learning_rate": 2.730487178309593e-10, |
| "loss": 0.22216796875, |
| "num_tokens": 128506793.0, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.9993792675356921, |
| "grad_norm": 0.416015625, |
| "learning_rate": 1.2135519024514264e-10, |
| "loss": 0.16802978515625, |
| "num_tokens": 128584548.0, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.33203125, |
| "learning_rate": 3.033882824299283e-11, |
| "loss": 0.10080718994140625, |
| "num_tokens": 128666899.0, |
| "step": 1611 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1611, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.206218535574333e+19, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|