| [ |
| { |
| "loss": 7.8454, |
| "grad_norm": 37.07211685180664, |
| "learning_rate": 9.800000000000001e-07, |
| "epoch": 0.061092019854906456, |
| "step": 50 |
| }, |
| { |
| "loss": 3.9827, |
| "grad_norm": 20.092954635620117, |
| "learning_rate": 1.98e-06, |
| "epoch": 0.12218403970981291, |
| "step": 100 |
| }, |
| { |
| "loss": 2.3931, |
| "grad_norm": 17.73457908630371, |
| "learning_rate": 2.9800000000000003e-06, |
| "epoch": 0.18327605956471935, |
| "step": 150 |
| }, |
| { |
| "loss": 1.8963, |
| "grad_norm": 20.85625457763672, |
| "learning_rate": 3.980000000000001e-06, |
| "epoch": 0.24436807941962582, |
| "step": 200 |
| }, |
| { |
| "loss": 1.4987, |
| "grad_norm": 16.645475387573242, |
| "learning_rate": 4.980000000000001e-06, |
| "epoch": 0.30546009927453227, |
| "step": 250 |
| }, |
| { |
| "loss": 0.6793, |
| "grad_norm": 5.7492899894714355, |
| "learning_rate": 5.98e-06, |
| "epoch": 0.3665521191294387, |
| "step": 300 |
| }, |
| { |
| "loss": 0.4645, |
| "grad_norm": 6.880336284637451, |
| "learning_rate": 6.98e-06, |
| "epoch": 0.42764413898434517, |
| "step": 350 |
| }, |
| { |
| "loss": 0.4268, |
| "grad_norm": 5.579587936401367, |
| "learning_rate": 7.980000000000002e-06, |
| "epoch": 0.48873615883925164, |
| "step": 400 |
| }, |
| { |
| "loss": 0.3482, |
| "grad_norm": 4.888024806976318, |
| "learning_rate": 8.98e-06, |
| "epoch": 0.5498281786941581, |
| "step": 450 |
| }, |
| { |
| "loss": 0.3344, |
| "grad_norm": 5.0392961502075195, |
| "learning_rate": 9.980000000000001e-06, |
| "epoch": 0.6109201985490645, |
| "step": 500 |
| }, |
| { |
| "eval_loss": 0.34320297837257385, |
| "eval_lev": 0.11722440908020909, |
| "eval_runtime": 762.646, |
| "eval_samples_per_second": 7.631, |
| "eval_steps_per_second": 1.908, |
| "epoch": 0.6109201985490645, |
| "step": 500 |
| }, |
| { |
| "loss": 0.3161, |
| "grad_norm": 4.948662281036377, |
| "learning_rate": 9.863699582753825e-06, |
| "epoch": 0.672012218403971, |
| "step": 550 |
| }, |
| { |
| "loss": 0.3174, |
| "grad_norm": 6.04939079284668, |
| "learning_rate": 9.724617524339361e-06, |
| "epoch": 0.7331042382588774, |
| "step": 600 |
| }, |
| { |
| "loss": 0.3027, |
| "grad_norm": 4.422155380249023, |
| "learning_rate": 9.585535465924897e-06, |
| "epoch": 0.7941962581137839, |
| "step": 650 |
| }, |
| { |
| "loss": 0.2771, |
| "grad_norm": 3.8613669872283936, |
| "learning_rate": 9.446453407510432e-06, |
| "epoch": 0.8552882779686903, |
| "step": 700 |
| }, |
| { |
| "loss": 0.2778, |
| "grad_norm": 3.7996127605438232, |
| "learning_rate": 9.307371349095966e-06, |
| "epoch": 0.9163802978235968, |
| "step": 750 |
| }, |
| { |
| "loss": 0.2656, |
| "grad_norm": 4.138099670410156, |
| "learning_rate": 9.168289290681502e-06, |
| "epoch": 0.9774723176785033, |
| "step": 800 |
| }, |
| { |
| "loss": 0.2132, |
| "grad_norm": 3.6106605529785156, |
| "learning_rate": 9.029207232267038e-06, |
| "epoch": 1.037877052310042, |
| "step": 850 |
| }, |
| { |
| "loss": 0.2045, |
| "grad_norm": 4.340276718139648, |
| "learning_rate": 8.890125173852574e-06, |
| "epoch": 1.0989690721649485, |
| "step": 900 |
| }, |
| { |
| "loss": 0.1889, |
| "grad_norm": 3.8788809776306152, |
| "learning_rate": 8.75104311543811e-06, |
| "epoch": 1.1600610920198549, |
| "step": 950 |
| }, |
| { |
| "loss": 0.1977, |
| "grad_norm": 3.841637372970581, |
| "learning_rate": 8.611961057023645e-06, |
| "epoch": 1.2211531118747614, |
| "step": 1000 |
| }, |
| { |
| "eval_loss": 0.231288880109787, |
| "eval_lev": 0.1185600415981164, |
| "eval_runtime": 746.701, |
| "eval_samples_per_second": 7.794, |
| "eval_steps_per_second": 1.949, |
| "epoch": 1.2211531118747614, |
| "step": 1000 |
| }, |
| { |
| "loss": 0.1913, |
| "grad_norm": 3.2860212326049805, |
| "learning_rate": 8.47287899860918e-06, |
| "epoch": 1.2822451317296677, |
| "step": 1050 |
| }, |
| { |
| "loss": 0.1623, |
| "grad_norm": 3.720313549041748, |
| "learning_rate": 8.333796940194717e-06, |
| "epoch": 1.3433371515845742, |
| "step": 1100 |
| }, |
| { |
| "loss": 0.1703, |
| "grad_norm": 2.716610908508301, |
| "learning_rate": 8.194714881780251e-06, |
| "epoch": 1.4044291714394808, |
| "step": 1150 |
| }, |
| { |
| "loss": 0.1848, |
| "grad_norm": 3.862579822540283, |
| "learning_rate": 8.055632823365787e-06, |
| "epoch": 1.4655211912943873, |
| "step": 1200 |
| }, |
| { |
| "loss": 0.1774, |
| "grad_norm": 4.334630012512207, |
| "learning_rate": 7.916550764951322e-06, |
| "epoch": 1.5266132111492936, |
| "step": 1250 |
| }, |
| { |
| "loss": 0.1658, |
| "grad_norm": 3.6040408611297607, |
| "learning_rate": 7.777468706536857e-06, |
| "epoch": 1.5877052310042, |
| "step": 1300 |
| }, |
| { |
| "loss": 0.1634, |
| "grad_norm": 3.428239345550537, |
| "learning_rate": 7.638386648122394e-06, |
| "epoch": 1.6487972508591064, |
| "step": 1350 |
| }, |
| { |
| "loss": 0.1547, |
| "grad_norm": 4.171792030334473, |
| "learning_rate": 7.499304589707929e-06, |
| "epoch": 1.709889270714013, |
| "step": 1400 |
| }, |
| { |
| "loss": 0.1807, |
| "grad_norm": 3.5489614009857178, |
| "learning_rate": 7.360222531293464e-06, |
| "epoch": 1.7709812905689195, |
| "step": 1450 |
| }, |
| { |
| "loss": 0.1684, |
| "grad_norm": 3.7410941123962402, |
| "learning_rate": 7.221140472878999e-06, |
| "epoch": 1.832073310423826, |
| "step": 1500 |
| }, |
| { |
| "eval_loss": 0.19100622832775116, |
| "eval_lev": 0.11059288646312225, |
| "eval_runtime": 743.8165, |
| "eval_samples_per_second": 7.825, |
| "eval_steps_per_second": 1.956, |
| "epoch": 1.832073310423826, |
| "step": 1500 |
| }, |
| { |
| "loss": 0.1629, |
| "grad_norm": 4.399950981140137, |
| "learning_rate": 7.0820584144645345e-06, |
| "epoch": 1.8931653302787324, |
| "step": 1550 |
| }, |
| { |
| "loss": 0.163, |
| "grad_norm": 4.052116870880127, |
| "learning_rate": 6.9429763560500694e-06, |
| "epoch": 1.9542573501336387, |
| "step": 1600 |
| }, |
| { |
| "loss": 0.1384, |
| "grad_norm": 2.347085952758789, |
| "learning_rate": 6.803894297635606e-06, |
| "epoch": 2.0146620847651775, |
| "step": 1650 |
| }, |
| { |
| "loss": 0.1002, |
| "grad_norm": 3.4055838584899902, |
| "learning_rate": 6.664812239221141e-06, |
| "epoch": 2.075754104620084, |
| "step": 1700 |
| }, |
| { |
| "loss": 0.098, |
| "grad_norm": 3.1648952960968018, |
| "learning_rate": 6.525730180806677e-06, |
| "epoch": 2.1368461244749906, |
| "step": 1750 |
| }, |
| { |
| "loss": 0.0914, |
| "grad_norm": 2.8259644508361816, |
| "learning_rate": 6.386648122392212e-06, |
| "epoch": 2.197938144329897, |
| "step": 1800 |
| }, |
| { |
| "loss": 0.0978, |
| "grad_norm": 2.8769214153289795, |
| "learning_rate": 6.247566063977747e-06, |
| "epoch": 2.259030164184803, |
| "step": 1850 |
| }, |
| { |
| "loss": 0.1013, |
| "grad_norm": 3.907423257827759, |
| "learning_rate": 6.108484005563283e-06, |
| "epoch": 2.3201221840397097, |
| "step": 1900 |
| }, |
| { |
| "loss": 0.0975, |
| "grad_norm": 3.2260284423828125, |
| "learning_rate": 5.969401947148818e-06, |
| "epoch": 2.3812142038946162, |
| "step": 1950 |
| }, |
| { |
| "loss": 0.0857, |
| "grad_norm": 3.05881929397583, |
| "learning_rate": 5.830319888734354e-06, |
| "epoch": 2.4423062237495228, |
| "step": 2000 |
| }, |
| { |
| "eval_loss": 0.1767120063304901, |
| "eval_lev": 0.10026850043947817, |
| "eval_runtime": 736.5694, |
| "eval_samples_per_second": 7.901, |
| "eval_steps_per_second": 1.975, |
| "epoch": 2.4423062237495228, |
| "step": 2000 |
| }, |
| { |
| "loss": 0.0925, |
| "grad_norm": 3.2194902896881104, |
| "learning_rate": 5.691237830319889e-06, |
| "epoch": 2.5033982436044293, |
| "step": 2050 |
| }, |
| { |
| "loss": 0.0982, |
| "grad_norm": 3.6402034759521484, |
| "learning_rate": 5.552155771905425e-06, |
| "epoch": 2.5644902634593354, |
| "step": 2100 |
| }, |
| { |
| "loss": 0.099, |
| "grad_norm": 3.429462432861328, |
| "learning_rate": 5.41307371349096e-06, |
| "epoch": 2.625582283314242, |
| "step": 2150 |
| }, |
| { |
| "loss": 0.0988, |
| "grad_norm": 2.946981906890869, |
| "learning_rate": 5.273991655076496e-06, |
| "epoch": 2.6866743031691485, |
| "step": 2200 |
| }, |
| { |
| "loss": 0.1095, |
| "grad_norm": 2.541302442550659, |
| "learning_rate": 5.134909596662031e-06, |
| "epoch": 2.747766323024055, |
| "step": 2250 |
| }, |
| { |
| "loss": 0.0848, |
| "grad_norm": 2.9033353328704834, |
| "learning_rate": 4.995827538247566e-06, |
| "epoch": 2.8088583428789615, |
| "step": 2300 |
| }, |
| { |
| "loss": 0.0905, |
| "grad_norm": 3.764165163040161, |
| "learning_rate": 4.856745479833102e-06, |
| "epoch": 2.869950362733868, |
| "step": 2350 |
| }, |
| { |
| "loss": 0.0929, |
| "grad_norm": 2.633665084838867, |
| "learning_rate": 4.7176634214186375e-06, |
| "epoch": 2.9310423825887746, |
| "step": 2400 |
| }, |
| { |
| "loss": 0.0922, |
| "grad_norm": 5.649708271026611, |
| "learning_rate": 4.578581363004173e-06, |
| "epoch": 2.9921344024436807, |
| "step": 2450 |
| }, |
| { |
| "loss": 0.0598, |
| "grad_norm": 3.696406602859497, |
| "learning_rate": 4.439499304589708e-06, |
| "epoch": 3.0525391370752195, |
| "step": 2500 |
| }, |
| { |
| "eval_loss": 0.16838780045509338, |
| "eval_lev": 0.0839466685620889, |
| "eval_runtime": 752.8171, |
| "eval_samples_per_second": 7.731, |
| "eval_steps_per_second": 1.933, |
| "epoch": 3.0525391370752195, |
| "step": 2500 |
| }, |
| { |
| "loss": 0.0645, |
| "grad_norm": 1.515001654624939, |
| "learning_rate": 4.300417246175244e-06, |
| "epoch": 3.113631156930126, |
| "step": 2550 |
| }, |
| { |
| "loss": 0.0615, |
| "grad_norm": 1.3002523183822632, |
| "learning_rate": 4.16133518776078e-06, |
| "epoch": 3.1747231767850326, |
| "step": 2600 |
| }, |
| { |
| "loss": 0.0517, |
| "grad_norm": 3.700875997543335, |
| "learning_rate": 4.022253129346315e-06, |
| "epoch": 3.235815196639939, |
| "step": 2650 |
| }, |
| { |
| "loss": 0.0508, |
| "grad_norm": 2.631333827972412, |
| "learning_rate": 3.8831710709318496e-06, |
| "epoch": 3.296907216494845, |
| "step": 2700 |
| }, |
| { |
| "loss": 0.0475, |
| "grad_norm": 2.8750274181365967, |
| "learning_rate": 3.7440890125173858e-06, |
| "epoch": 3.3579992363497517, |
| "step": 2750 |
| }, |
| { |
| "loss": 0.0649, |
| "grad_norm": 2.2158966064453125, |
| "learning_rate": 3.605006954102921e-06, |
| "epoch": 3.4190912562046583, |
| "step": 2800 |
| }, |
| { |
| "loss": 0.0548, |
| "grad_norm": 2.202899694442749, |
| "learning_rate": 3.465924895688457e-06, |
| "epoch": 3.480183276059565, |
| "step": 2850 |
| }, |
| { |
| "loss": 0.0485, |
| "grad_norm": 3.5741360187530518, |
| "learning_rate": 3.3268428372739918e-06, |
| "epoch": 3.5412752959144713, |
| "step": 2900 |
| }, |
| { |
| "loss": 0.0453, |
| "grad_norm": 2.79834246635437, |
| "learning_rate": 3.187760778859527e-06, |
| "epoch": 3.602367315769378, |
| "step": 2950 |
| }, |
| { |
| "loss": 0.0611, |
| "grad_norm": 1.3616344928741455, |
| "learning_rate": 3.048678720445063e-06, |
| "epoch": 3.663459335624284, |
| "step": 3000 |
| }, |
| { |
| "eval_loss": 0.1635599136352539, |
| "eval_lev": 0.08673565709996436, |
| "eval_runtime": 742.0161, |
| "eval_samples_per_second": 7.843, |
| "eval_steps_per_second": 1.961, |
| "epoch": 3.663459335624284, |
| "step": 3000 |
| }, |
| { |
| "loss": 0.0413, |
| "grad_norm": 2.2232677936553955, |
| "learning_rate": 2.9095966620305982e-06, |
| "epoch": 3.7245513554791905, |
| "step": 3050 |
| }, |
| { |
| "loss": 0.0531, |
| "grad_norm": 2.548612117767334, |
| "learning_rate": 2.770514603616134e-06, |
| "epoch": 3.785643375334097, |
| "step": 3100 |
| }, |
| { |
| "loss": 0.0531, |
| "grad_norm": 3.896488666534424, |
| "learning_rate": 2.6314325452016694e-06, |
| "epoch": 3.8467353951890035, |
| "step": 3150 |
| }, |
| { |
| "loss": 0.0557, |
| "grad_norm": 1.2170665264129639, |
| "learning_rate": 2.4923504867872047e-06, |
| "epoch": 3.9078274150439096, |
| "step": 3200 |
| }, |
| { |
| "loss": 0.0531, |
| "grad_norm": 1.4885497093200684, |
| "learning_rate": 2.35326842837274e-06, |
| "epoch": 3.968919434898816, |
| "step": 3250 |
| }, |
| { |
| "loss": 0.0429, |
| "grad_norm": 1.6385103464126587, |
| "learning_rate": 2.2141863699582754e-06, |
| "epoch": 4.029324169530355, |
| "step": 3300 |
| }, |
| { |
| "loss": 0.0309, |
| "grad_norm": 1.5109846591949463, |
| "learning_rate": 2.075104311543811e-06, |
| "epoch": 4.0904161893852615, |
| "step": 3350 |
| }, |
| { |
| "loss": 0.0413, |
| "grad_norm": 1.5601056814193726, |
| "learning_rate": 1.9360222531293465e-06, |
| "epoch": 4.151508209240168, |
| "step": 3400 |
| }, |
| { |
| "loss": 0.0269, |
| "grad_norm": 1.4139606952667236, |
| "learning_rate": 1.7969401947148818e-06, |
| "epoch": 4.212600229095075, |
| "step": 3450 |
| }, |
| { |
| "loss": 0.0363, |
| "grad_norm": 1.725199818611145, |
| "learning_rate": 1.6578581363004174e-06, |
| "epoch": 4.273692248949981, |
| "step": 3500 |
| }, |
| { |
| "eval_loss": 0.16628701984882355, |
| "eval_lev": 0.06235171194318398, |
| "eval_runtime": 748.3873, |
| "eval_samples_per_second": 7.777, |
| "eval_steps_per_second": 1.944, |
| "epoch": 4.273692248949981, |
| "step": 3500 |
| }, |
| { |
| "loss": 0.0247, |
| "grad_norm": 1.4835779666900635, |
| "learning_rate": 1.5187760778859527e-06, |
| "epoch": 4.334784268804888, |
| "step": 3550 |
| }, |
| { |
| "loss": 0.0291, |
| "grad_norm": 1.5756815671920776, |
| "learning_rate": 1.3796940194714883e-06, |
| "epoch": 4.395876288659794, |
| "step": 3600 |
| }, |
| { |
| "loss": 0.0247, |
| "grad_norm": 1.7207695245742798, |
| "learning_rate": 1.2406119610570236e-06, |
| "epoch": 4.4569683085147, |
| "step": 3650 |
| }, |
| { |
| "loss": 0.0341, |
| "grad_norm": 1.29410982131958, |
| "learning_rate": 1.1015299026425592e-06, |
| "epoch": 4.518060328369606, |
| "step": 3700 |
| }, |
| { |
| "loss": 0.0248, |
| "grad_norm": 2.7500200271606445, |
| "learning_rate": 9.624478442280945e-07, |
| "epoch": 4.579152348224513, |
| "step": 3750 |
| }, |
| { |
| "loss": 0.0301, |
| "grad_norm": 1.767777442932129, |
| "learning_rate": 8.233657858136301e-07, |
| "epoch": 4.640244368079419, |
| "step": 3800 |
| }, |
| { |
| "loss": 0.027, |
| "grad_norm": 1.7285575866699219, |
| "learning_rate": 6.842837273991656e-07, |
| "epoch": 4.701336387934326, |
| "step": 3850 |
| }, |
| { |
| "loss": 0.037, |
| "grad_norm": 1.4113770723342896, |
| "learning_rate": 5.45201668984701e-07, |
| "epoch": 4.7624284077892325, |
| "step": 3900 |
| }, |
| { |
| "loss": 0.0245, |
| "grad_norm": 1.7481070756912231, |
| "learning_rate": 4.061196105702365e-07, |
| "epoch": 4.823520427644139, |
| "step": 3950 |
| }, |
| { |
| "loss": 0.0281, |
| "grad_norm": 2.5071568489074707, |
| "learning_rate": 2.6703755215577195e-07, |
| "epoch": 4.8846124474990456, |
| "step": 4000 |
| }, |
| { |
| "eval_loss": 0.16462065279483795, |
| "eval_lev": 0.06492746862423085, |
| "eval_runtime": 747.1834, |
| "eval_samples_per_second": 7.789, |
| "eval_steps_per_second": 1.947, |
| "epoch": 4.8846124474990456, |
| "step": 4000 |
| }, |
| { |
| "loss": 0.0347, |
| "grad_norm": 1.3820921182632446, |
| "learning_rate": 1.2795549374130738e-07, |
| "epoch": 4.945704467353952, |
| "step": 4050 |
| }, |
| { |
| "train_runtime": 31790.5788, |
| "train_samples_per_second": 8.238, |
| "train_steps_per_second": 0.129, |
| "total_flos": 7.55790734168064e+19, |
| "train_loss": 0.33621121611496174, |
| "epoch": 5.0, |
| "step": 4095 |
| } |
| ] |