| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9997538764459759, |
| "eval_steps": 500, |
| "global_step": 2708, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00036918533103618014, |
| "grad_norm": 2.4412319660186768, |
| "learning_rate": 5.0000000000000004e-08, |
| "loss": 1.1388, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0007383706620723603, |
| "grad_norm": 2.275918483734131, |
| "learning_rate": 1.0000000000000001e-07, |
| "loss": 1.1206, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0011075559931085406, |
| "grad_norm": 2.2667036056518555, |
| "learning_rate": 1.5000000000000002e-07, |
| "loss": 1.0982, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0014767413241447206, |
| "grad_norm": 2.4527230262756348, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 1.1072, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0018459266551809008, |
| "grad_norm": 2.393638849258423, |
| "learning_rate": 2.5000000000000004e-07, |
| "loss": 1.0996, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.002215111986217081, |
| "grad_norm": 2.389622449874878, |
| "learning_rate": 3.0000000000000004e-07, |
| "loss": 1.0901, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.002584297317253261, |
| "grad_norm": 2.34468150138855, |
| "learning_rate": 3.5000000000000004e-07, |
| "loss": 1.1209, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.002953482648289441, |
| "grad_norm": 2.48166561126709, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 1.1034, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0033226679793256215, |
| "grad_norm": 2.1924567222595215, |
| "learning_rate": 4.5000000000000003e-07, |
| "loss": 1.1516, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0036918533103618015, |
| "grad_norm": 2.408474922180176, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 1.1405, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.004061038641397982, |
| "grad_norm": 2.1132235527038574, |
| "learning_rate": 5.5e-07, |
| "loss": 1.1031, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.004430223972434162, |
| "grad_norm": 2.1916306018829346, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 1.1238, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.004799409303470342, |
| "grad_norm": 2.040755033493042, |
| "learning_rate": 6.5e-07, |
| "loss": 1.1197, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.005168594634506522, |
| "grad_norm": 1.9937278032302856, |
| "learning_rate": 7.000000000000001e-07, |
| "loss": 1.0578, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.005537779965542703, |
| "grad_norm": 2.0224194526672363, |
| "learning_rate": 7.5e-07, |
| "loss": 1.0846, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.005906965296578882, |
| "grad_norm": 1.80266273021698, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 1.0908, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.006276150627615063, |
| "grad_norm": 1.8800767660140991, |
| "learning_rate": 8.500000000000001e-07, |
| "loss": 1.0928, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.006645335958651243, |
| "grad_norm": 1.6004233360290527, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 1.0828, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0070145212896874235, |
| "grad_norm": 1.653378963470459, |
| "learning_rate": 9.500000000000001e-07, |
| "loss": 1.1113, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.007383706620723603, |
| "grad_norm": 1.6406723260879517, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.0682, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0077528919517597834, |
| "grad_norm": 1.6181585788726807, |
| "learning_rate": 1.0500000000000001e-06, |
| "loss": 1.1055, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.008122077282795964, |
| "grad_norm": 1.551200270652771, |
| "learning_rate": 1.1e-06, |
| "loss": 1.0795, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.008491262613832144, |
| "grad_norm": 1.3663794994354248, |
| "learning_rate": 1.1500000000000002e-06, |
| "loss": 1.0283, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.008860447944868325, |
| "grad_norm": 1.3748595714569092, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 1.0784, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.009229633275904503, |
| "grad_norm": 1.2911611795425415, |
| "learning_rate": 1.25e-06, |
| "loss": 0.9854, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.009598818606940684, |
| "grad_norm": 1.3047049045562744, |
| "learning_rate": 1.3e-06, |
| "loss": 1.0503, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.009968003937976864, |
| "grad_norm": 1.17142653465271, |
| "learning_rate": 1.3500000000000002e-06, |
| "loss": 1.0588, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.010337189269013045, |
| "grad_norm": 1.2030054330825806, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 1.0328, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.010706374600049225, |
| "grad_norm": 1.131135106086731, |
| "learning_rate": 1.45e-06, |
| "loss": 1.0273, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.011075559931085405, |
| "grad_norm": 1.0866118669509888, |
| "learning_rate": 1.5e-06, |
| "loss": 0.9883, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.011444745262121586, |
| "grad_norm": 1.0986360311508179, |
| "learning_rate": 1.5500000000000002e-06, |
| "loss": 1.0138, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.011813930593157764, |
| "grad_norm": 0.9595009088516235, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 0.988, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.012183115924193945, |
| "grad_norm": 1.054680347442627, |
| "learning_rate": 1.6500000000000003e-06, |
| "loss": 1.0522, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.012552301255230125, |
| "grad_norm": 0.9745041131973267, |
| "learning_rate": 1.7000000000000002e-06, |
| "loss": 0.9978, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.012921486586266306, |
| "grad_norm": 0.9892019629478455, |
| "learning_rate": 1.75e-06, |
| "loss": 1.046, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.013290671917302486, |
| "grad_norm": 0.9731583595275879, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 0.9907, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.013659857248338667, |
| "grad_norm": 0.9275212287902832, |
| "learning_rate": 1.85e-06, |
| "loss": 0.9293, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.014029042579374847, |
| "grad_norm": 0.9423267245292664, |
| "learning_rate": 1.9000000000000002e-06, |
| "loss": 0.9377, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.014398227910411026, |
| "grad_norm": 0.9474686980247498, |
| "learning_rate": 1.9500000000000004e-06, |
| "loss": 0.9594, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.014767413241447206, |
| "grad_norm": 0.9592716693878174, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.9871, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.015136598572483386, |
| "grad_norm": 0.9387710094451904, |
| "learning_rate": 2.05e-06, |
| "loss": 0.9823, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.015505783903519567, |
| "grad_norm": 0.9290558695793152, |
| "learning_rate": 2.1000000000000002e-06, |
| "loss": 0.9354, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.015874969234555746, |
| "grad_norm": 0.9229576587677002, |
| "learning_rate": 2.15e-06, |
| "loss": 0.9828, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.016244154565591928, |
| "grad_norm": 0.9243917465209961, |
| "learning_rate": 2.2e-06, |
| "loss": 1.0032, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.016613339896628106, |
| "grad_norm": 0.9277251958847046, |
| "learning_rate": 2.25e-06, |
| "loss": 0.9702, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.01698252522766429, |
| "grad_norm": 0.9161118865013123, |
| "learning_rate": 2.3000000000000004e-06, |
| "loss": 0.9846, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.017351710558700467, |
| "grad_norm": 0.8675879240036011, |
| "learning_rate": 2.35e-06, |
| "loss": 0.9333, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.01772089588973665, |
| "grad_norm": 0.8792003393173218, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 1.0106, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.018090081220772828, |
| "grad_norm": 0.894873857498169, |
| "learning_rate": 2.4500000000000003e-06, |
| "loss": 0.9514, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.018459266551809007, |
| "grad_norm": 0.8866007328033447, |
| "learning_rate": 2.5e-06, |
| "loss": 1.0057, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.01882845188284519, |
| "grad_norm": 0.8863010406494141, |
| "learning_rate": 2.55e-06, |
| "loss": 0.9586, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.019197637213881368, |
| "grad_norm": 0.912958025932312, |
| "learning_rate": 2.6e-06, |
| "loss": 0.9121, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.01956682254491755, |
| "grad_norm": 0.8827121257781982, |
| "learning_rate": 2.6500000000000005e-06, |
| "loss": 0.9517, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.01993600787595373, |
| "grad_norm": 0.8558551669120789, |
| "learning_rate": 2.7000000000000004e-06, |
| "loss": 0.9365, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.02030519320698991, |
| "grad_norm": 0.8515662550926208, |
| "learning_rate": 2.7500000000000004e-06, |
| "loss": 0.9204, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.02067437853802609, |
| "grad_norm": 0.844958484172821, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 0.9801, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.021043563869062268, |
| "grad_norm": 0.8569675087928772, |
| "learning_rate": 2.85e-06, |
| "loss": 0.9336, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.02141274920009845, |
| "grad_norm": 0.8839316964149475, |
| "learning_rate": 2.9e-06, |
| "loss": 0.9296, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.02178193453113463, |
| "grad_norm": 1.0445549488067627, |
| "learning_rate": 2.95e-06, |
| "loss": 0.9134, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.02215111986217081, |
| "grad_norm": 0.8282931447029114, |
| "learning_rate": 3e-06, |
| "loss": 0.9421, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02252030519320699, |
| "grad_norm": 0.8246078491210938, |
| "learning_rate": 3.05e-06, |
| "loss": 0.8743, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.02288949052424317, |
| "grad_norm": 0.8587180376052856, |
| "learning_rate": 3.1000000000000004e-06, |
| "loss": 0.9303, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.02325867585527935, |
| "grad_norm": 0.8575277924537659, |
| "learning_rate": 3.1500000000000003e-06, |
| "loss": 0.9503, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.02362786118631553, |
| "grad_norm": 0.8518301844596863, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 0.9225, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.02399704651735171, |
| "grad_norm": 0.8324997425079346, |
| "learning_rate": 3.2500000000000002e-06, |
| "loss": 0.9158, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.02436623184838789, |
| "grad_norm": 0.8463263511657715, |
| "learning_rate": 3.3000000000000006e-06, |
| "loss": 0.9311, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.024735417179424072, |
| "grad_norm": 0.8261412978172302, |
| "learning_rate": 3.3500000000000005e-06, |
| "loss": 0.9121, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.02510460251046025, |
| "grad_norm": 0.8466128706932068, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 0.9587, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.025473787841496433, |
| "grad_norm": 0.8264670372009277, |
| "learning_rate": 3.45e-06, |
| "loss": 0.9301, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.02584297317253261, |
| "grad_norm": 0.8121640086174011, |
| "learning_rate": 3.5e-06, |
| "loss": 0.9116, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.02621215850356879, |
| "grad_norm": 0.8689830303192139, |
| "learning_rate": 3.5500000000000003e-06, |
| "loss": 0.9224, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.026581343834604972, |
| "grad_norm": 0.8256193399429321, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 0.8696, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.02695052916564115, |
| "grad_norm": 0.8338184952735901, |
| "learning_rate": 3.65e-06, |
| "loss": 0.934, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.027319714496677333, |
| "grad_norm": 0.8366256356239319, |
| "learning_rate": 3.7e-06, |
| "loss": 0.9109, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.027688899827713512, |
| "grad_norm": 0.7981867790222168, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 0.8785, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.028058085158749694, |
| "grad_norm": 0.8150340914726257, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 0.9343, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.028427270489785873, |
| "grad_norm": 0.820603609085083, |
| "learning_rate": 3.85e-06, |
| "loss": 0.9195, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.02879645582082205, |
| "grad_norm": 0.8418338894844055, |
| "learning_rate": 3.900000000000001e-06, |
| "loss": 0.9318, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.029165641151858233, |
| "grad_norm": 0.8316344022750854, |
| "learning_rate": 3.95e-06, |
| "loss": 0.9175, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.029534826482894412, |
| "grad_norm": 0.8164108991622925, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.914, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.029904011813930594, |
| "grad_norm": 0.7849715948104858, |
| "learning_rate": 4.05e-06, |
| "loss": 0.8593, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.030273197144966773, |
| "grad_norm": 0.859346866607666, |
| "learning_rate": 4.1e-06, |
| "loss": 0.8864, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.030642382476002955, |
| "grad_norm": 0.8121856451034546, |
| "learning_rate": 4.15e-06, |
| "loss": 0.8686, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.031011567807039134, |
| "grad_norm": 0.8445794582366943, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 0.9037, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.03138075313807531, |
| "grad_norm": 0.8285534381866455, |
| "learning_rate": 4.25e-06, |
| "loss": 0.9309, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.03174993846911149, |
| "grad_norm": 0.794826090335846, |
| "learning_rate": 4.3e-06, |
| "loss": 0.8836, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.03211912380014768, |
| "grad_norm": 0.853547990322113, |
| "learning_rate": 4.350000000000001e-06, |
| "loss": 0.8828, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.032488309131183855, |
| "grad_norm": 0.8319276571273804, |
| "learning_rate": 4.4e-06, |
| "loss": 0.8678, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.032857494462220034, |
| "grad_norm": 0.8368034958839417, |
| "learning_rate": 4.450000000000001e-06, |
| "loss": 0.9213, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.03322667979325621, |
| "grad_norm": 0.7604875564575195, |
| "learning_rate": 4.5e-06, |
| "loss": 0.8462, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03359586512429239, |
| "grad_norm": 0.8023838400840759, |
| "learning_rate": 4.5500000000000005e-06, |
| "loss": 0.8957, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.03396505045532858, |
| "grad_norm": 0.8296010494232178, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 0.8578, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.034334235786364756, |
| "grad_norm": 0.802651047706604, |
| "learning_rate": 4.65e-06, |
| "loss": 0.8946, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.034703421117400934, |
| "grad_norm": 0.8469492793083191, |
| "learning_rate": 4.7e-06, |
| "loss": 0.8709, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.03507260644843711, |
| "grad_norm": 0.8248879313468933, |
| "learning_rate": 4.75e-06, |
| "loss": 0.9127, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.0354417917794733, |
| "grad_norm": 0.8265485167503357, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 0.9046, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.03581097711050948, |
| "grad_norm": 0.9016802310943604, |
| "learning_rate": 4.85e-06, |
| "loss": 0.8448, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.036180162441545656, |
| "grad_norm": 0.8030735850334167, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 0.8666, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.036549347772581835, |
| "grad_norm": 0.8260457515716553, |
| "learning_rate": 4.95e-06, |
| "loss": 0.87, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.03691853310361801, |
| "grad_norm": 0.8534119725227356, |
| "learning_rate": 5e-06, |
| "loss": 0.8781, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0372877184346542, |
| "grad_norm": 0.820518970489502, |
| "learning_rate": 4.999999952687895e-06, |
| "loss": 0.8719, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.03765690376569038, |
| "grad_norm": 0.8288585543632507, |
| "learning_rate": 4.99999981075158e-06, |
| "loss": 0.8646, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.038026089096726556, |
| "grad_norm": 0.8551903963088989, |
| "learning_rate": 4.999999574191062e-06, |
| "loss": 0.8824, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.038395274427762735, |
| "grad_norm": 0.8003144860267639, |
| "learning_rate": 4.999999243006348e-06, |
| "loss": 0.8534, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.038764459758798914, |
| "grad_norm": 0.8019240498542786, |
| "learning_rate": 4.9999988171974525e-06, |
| "loss": 0.8696, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.0391336450898351, |
| "grad_norm": 0.8382614254951477, |
| "learning_rate": 4.999998296764391e-06, |
| "loss": 0.8746, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.03950283042087128, |
| "grad_norm": 0.8664233088493347, |
| "learning_rate": 4.999997681707182e-06, |
| "loss": 0.9053, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.03987201575190746, |
| "grad_norm": 0.8756380677223206, |
| "learning_rate": 4.99999697202585e-06, |
| "loss": 0.8714, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.040241201082943635, |
| "grad_norm": 1.0035847425460815, |
| "learning_rate": 4.9999961677204224e-06, |
| "loss": 0.8111, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.04061038641397982, |
| "grad_norm": 0.8640075922012329, |
| "learning_rate": 4.999995268790928e-06, |
| "loss": 0.9003, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.040979571745016, |
| "grad_norm": 0.8350078463554382, |
| "learning_rate": 4.999994275237402e-06, |
| "loss": 0.8816, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.04134875707605218, |
| "grad_norm": 0.8058568835258484, |
| "learning_rate": 4.999993187059882e-06, |
| "loss": 0.8243, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.04171794240708836, |
| "grad_norm": 0.8728750348091125, |
| "learning_rate": 4.999992004258409e-06, |
| "loss": 0.8457, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.042087127738124536, |
| "grad_norm": 0.8918057084083557, |
| "learning_rate": 4.999990726833027e-06, |
| "loss": 0.8412, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.04245631306916072, |
| "grad_norm": 0.8393918871879578, |
| "learning_rate": 4.9999893547837855e-06, |
| "loss": 0.8806, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.0428254984001969, |
| "grad_norm": 0.8215784430503845, |
| "learning_rate": 4.999987888110736e-06, |
| "loss": 0.8957, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.04319468373123308, |
| "grad_norm": 0.8378515243530273, |
| "learning_rate": 4.999986326813933e-06, |
| "loss": 0.816, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.04356386906226926, |
| "grad_norm": 0.8467821478843689, |
| "learning_rate": 4.999984670893438e-06, |
| "loss": 0.8879, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.043933054393305436, |
| "grad_norm": 0.8451763391494751, |
| "learning_rate": 4.999982920349311e-06, |
| "loss": 0.8752, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.04430223972434162, |
| "grad_norm": 0.8014891743659973, |
| "learning_rate": 4.99998107518162e-06, |
| "loss": 0.8279, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.0446714250553778, |
| "grad_norm": 0.8478080630302429, |
| "learning_rate": 4.999979135390434e-06, |
| "loss": 0.9071, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.04504061038641398, |
| "grad_norm": 0.8662890791893005, |
| "learning_rate": 4.999977100975827e-06, |
| "loss": 0.8752, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.04540979571745016, |
| "grad_norm": 0.8697710633277893, |
| "learning_rate": 4.999974971937875e-06, |
| "loss": 0.8386, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.04577898104848634, |
| "grad_norm": 0.8541619181632996, |
| "learning_rate": 4.99997274827666e-06, |
| "loss": 0.9063, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.04614816637952252, |
| "grad_norm": 0.8500178456306458, |
| "learning_rate": 4.999970429992266e-06, |
| "loss": 0.8862, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.0465173517105587, |
| "grad_norm": 0.8678516149520874, |
| "learning_rate": 4.9999680170847794e-06, |
| "loss": 0.8178, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.04688653704159488, |
| "grad_norm": 0.867600679397583, |
| "learning_rate": 4.999965509554293e-06, |
| "loss": 0.9158, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.04725572237263106, |
| "grad_norm": 0.8475435376167297, |
| "learning_rate": 4.9999629074009005e-06, |
| "loss": 0.8593, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.047624907703667244, |
| "grad_norm": 0.8186939358711243, |
| "learning_rate": 4.999960210624701e-06, |
| "loss": 0.8621, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.04799409303470342, |
| "grad_norm": 0.9259890913963318, |
| "learning_rate": 4.999957419225797e-06, |
| "loss": 0.8642, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0483632783657396, |
| "grad_norm": 0.8416559100151062, |
| "learning_rate": 4.999954533204293e-06, |
| "loss": 0.8799, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.04873246369677578, |
| "grad_norm": 0.8305281400680542, |
| "learning_rate": 4.9999515525603e-06, |
| "loss": 0.8314, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.04910164902781196, |
| "grad_norm": 0.8432246446609497, |
| "learning_rate": 4.999948477293929e-06, |
| "loss": 0.8687, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.049470834358848144, |
| "grad_norm": 0.8665896058082581, |
| "learning_rate": 4.999945307405297e-06, |
| "loss": 0.861, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.04984001968988432, |
| "grad_norm": 0.8684259057044983, |
| "learning_rate": 4.9999420428945236e-06, |
| "loss": 0.9069, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.0502092050209205, |
| "grad_norm": 0.8950573801994324, |
| "learning_rate": 4.999938683761733e-06, |
| "loss": 0.8553, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.05057839035195668, |
| "grad_norm": 0.8906726837158203, |
| "learning_rate": 4.9999352300070535e-06, |
| "loss": 0.865, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.050947575682992866, |
| "grad_norm": 0.9318857789039612, |
| "learning_rate": 4.999931681630614e-06, |
| "loss": 0.8697, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.051316761014029044, |
| "grad_norm": 0.8434486389160156, |
| "learning_rate": 4.999928038632549e-06, |
| "loss": 0.8665, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.05168594634506522, |
| "grad_norm": 0.8642570376396179, |
| "learning_rate": 4.999924301012997e-06, |
| "loss": 0.8999, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0520551316761014, |
| "grad_norm": 0.8331663012504578, |
| "learning_rate": 4.999920468772099e-06, |
| "loss": 0.8458, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.05242431700713758, |
| "grad_norm": 0.949670135974884, |
| "learning_rate": 4.9999165419100005e-06, |
| "loss": 0.8552, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.052793502338173766, |
| "grad_norm": 0.8622894287109375, |
| "learning_rate": 4.999912520426849e-06, |
| "loss": 0.867, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.053162687669209945, |
| "grad_norm": 0.8427858948707581, |
| "learning_rate": 4.999908404322799e-06, |
| "loss": 0.8842, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.05353187300024612, |
| "grad_norm": 0.8718158006668091, |
| "learning_rate": 4.999904193598003e-06, |
| "loss": 0.8591, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.0539010583312823, |
| "grad_norm": 0.8681777715682983, |
| "learning_rate": 4.999899888252624e-06, |
| "loss": 0.8984, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.05427024366231848, |
| "grad_norm": 0.8601359128952026, |
| "learning_rate": 4.999895488286822e-06, |
| "loss": 0.8386, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.054639428993354666, |
| "grad_norm": 0.8220537304878235, |
| "learning_rate": 4.999890993700766e-06, |
| "loss": 0.8362, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.055008614324390845, |
| "grad_norm": 0.8281165361404419, |
| "learning_rate": 4.999886404494624e-06, |
| "loss": 0.8587, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.055377799655427024, |
| "grad_norm": 0.8356446623802185, |
| "learning_rate": 4.999881720668571e-06, |
| "loss": 0.8988, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0557469849864632, |
| "grad_norm": 0.8825479745864868, |
| "learning_rate": 4.999876942222783e-06, |
| "loss": 0.8419, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.05611617031749939, |
| "grad_norm": 0.8695886135101318, |
| "learning_rate": 4.999872069157443e-06, |
| "loss": 0.852, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.056485355648535567, |
| "grad_norm": 0.8561027646064758, |
| "learning_rate": 4.999867101472733e-06, |
| "loss": 0.806, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.056854540979571745, |
| "grad_norm": 0.8631170988082886, |
| "learning_rate": 4.999862039168843e-06, |
| "loss": 0.8227, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.057223726310607924, |
| "grad_norm": 0.8831436634063721, |
| "learning_rate": 4.999856882245963e-06, |
| "loss": 0.8584, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.0575929116416441, |
| "grad_norm": 0.8398553133010864, |
| "learning_rate": 4.9998516307042895e-06, |
| "loss": 0.8675, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.05796209697268029, |
| "grad_norm": 0.9215529561042786, |
| "learning_rate": 4.999846284544021e-06, |
| "loss": 0.844, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.05833128230371647, |
| "grad_norm": 0.8526574373245239, |
| "learning_rate": 4.999840843765359e-06, |
| "loss": 0.825, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.058700467634752646, |
| "grad_norm": 0.8621156811714172, |
| "learning_rate": 4.99983530836851e-06, |
| "loss": 0.8762, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.059069652965788824, |
| "grad_norm": 0.86864173412323, |
| "learning_rate": 4.999829678353684e-06, |
| "loss": 0.8886, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.059438838296825, |
| "grad_norm": 0.8976534008979797, |
| "learning_rate": 4.9998239537210935e-06, |
| "loss": 0.8452, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.05980802362786119, |
| "grad_norm": 0.8699798583984375, |
| "learning_rate": 4.999818134470955e-06, |
| "loss": 0.8482, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.06017720895889737, |
| "grad_norm": 0.8608055710792542, |
| "learning_rate": 4.99981222060349e-06, |
| "loss": 0.8432, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.060546394289933546, |
| "grad_norm": 0.8606122732162476, |
| "learning_rate": 4.999806212118921e-06, |
| "loss": 0.8591, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.060915579620969725, |
| "grad_norm": 0.8934593200683594, |
| "learning_rate": 4.9998001090174745e-06, |
| "loss": 0.8672, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.06128476495200591, |
| "grad_norm": 0.8507917523384094, |
| "learning_rate": 4.999793911299384e-06, |
| "loss": 0.8604, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.06165395028304209, |
| "grad_norm": 0.8685324192047119, |
| "learning_rate": 4.999787618964883e-06, |
| "loss": 0.8796, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.06202313561407827, |
| "grad_norm": 0.9141797423362732, |
| "learning_rate": 4.9997812320142095e-06, |
| "loss": 0.8473, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.062392320945114446, |
| "grad_norm": 1.0198613405227661, |
| "learning_rate": 4.9997747504476045e-06, |
| "loss": 0.8412, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.06276150627615062, |
| "grad_norm": 0.8494629263877869, |
| "learning_rate": 4.999768174265315e-06, |
| "loss": 0.8515, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0631306916071868, |
| "grad_norm": 0.8553645610809326, |
| "learning_rate": 4.999761503467589e-06, |
| "loss": 0.8504, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.06349987693822298, |
| "grad_norm": 0.846227765083313, |
| "learning_rate": 4.999754738054678e-06, |
| "loss": 0.837, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.06386906226925917, |
| "grad_norm": 0.8288367986679077, |
| "learning_rate": 4.999747878026841e-06, |
| "loss": 0.8366, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.06423824760029535, |
| "grad_norm": 0.8543452620506287, |
| "learning_rate": 4.9997409233843345e-06, |
| "loss": 0.8545, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.06460743293133153, |
| "grad_norm": 0.8958789110183716, |
| "learning_rate": 4.999733874127423e-06, |
| "loss": 0.8324, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.06497661826236771, |
| "grad_norm": 0.8512812852859497, |
| "learning_rate": 4.999726730256373e-06, |
| "loss": 0.8172, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.06534580359340389, |
| "grad_norm": 0.8509172797203064, |
| "learning_rate": 4.999719491771457e-06, |
| "loss": 0.8648, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.06571498892444007, |
| "grad_norm": 0.8630576133728027, |
| "learning_rate": 4.999712158672945e-06, |
| "loss": 0.8518, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.06608417425547625, |
| "grad_norm": 0.8487321138381958, |
| "learning_rate": 4.999704730961118e-06, |
| "loss": 0.8454, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.06645335958651243, |
| "grad_norm": 0.8370772004127502, |
| "learning_rate": 4.999697208636255e-06, |
| "loss": 0.7777, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0668225449175486, |
| "grad_norm": 0.867073655128479, |
| "learning_rate": 4.999689591698642e-06, |
| "loss": 0.8204, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.06719173024858478, |
| "grad_norm": 0.8590624332427979, |
| "learning_rate": 4.999681880148567e-06, |
| "loss": 0.885, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.06756091557962098, |
| "grad_norm": 0.8786302804946899, |
| "learning_rate": 4.999674073986322e-06, |
| "loss": 0.8107, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.06793010091065715, |
| "grad_norm": 0.8392751216888428, |
| "learning_rate": 4.999666173212201e-06, |
| "loss": 0.8198, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.06829928624169333, |
| "grad_norm": 0.8693823218345642, |
| "learning_rate": 4.999658177826505e-06, |
| "loss": 0.8278, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.06866847157272951, |
| "grad_norm": 0.9009088277816772, |
| "learning_rate": 4.999650087829536e-06, |
| "loss": 0.8554, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.06903765690376569, |
| "grad_norm": 0.8450184464454651, |
| "learning_rate": 4.9996419032216e-06, |
| "loss": 0.7952, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.06940684223480187, |
| "grad_norm": 0.851325511932373, |
| "learning_rate": 4.9996336240030065e-06, |
| "loss": 0.866, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.06977602756583805, |
| "grad_norm": 0.846808135509491, |
| "learning_rate": 4.99962525017407e-06, |
| "loss": 0.8526, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.07014521289687423, |
| "grad_norm": 0.8484999537467957, |
| "learning_rate": 4.999616781735106e-06, |
| "loss": 0.8134, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0705143982279104, |
| "grad_norm": 0.8739628195762634, |
| "learning_rate": 4.999608218686436e-06, |
| "loss": 0.7972, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.0708835835589466, |
| "grad_norm": 0.8760291934013367, |
| "learning_rate": 4.999599561028384e-06, |
| "loss": 0.846, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.07125276888998278, |
| "grad_norm": 0.8653873801231384, |
| "learning_rate": 4.999590808761277e-06, |
| "loss": 0.8571, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.07162195422101895, |
| "grad_norm": 0.89369797706604, |
| "learning_rate": 4.999581961885447e-06, |
| "loss": 0.8503, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.07199113955205513, |
| "grad_norm": 0.8840173482894897, |
| "learning_rate": 4.999573020401229e-06, |
| "loss": 0.8419, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.07236032488309131, |
| "grad_norm": 0.8561863899230957, |
| "learning_rate": 4.9995639843089605e-06, |
| "loss": 0.7862, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.07272951021412749, |
| "grad_norm": 0.8365263342857361, |
| "learning_rate": 4.9995548536089845e-06, |
| "loss": 0.8587, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.07309869554516367, |
| "grad_norm": 0.9029537439346313, |
| "learning_rate": 4.9995456283016455e-06, |
| "loss": 0.8483, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.07346788087619985, |
| "grad_norm": 0.8581278324127197, |
| "learning_rate": 4.999536308387294e-06, |
| "loss": 0.847, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.07383706620723603, |
| "grad_norm": 0.8499622344970703, |
| "learning_rate": 4.999526893866282e-06, |
| "loss": 0.8161, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07420625153827222, |
| "grad_norm": 0.8650686740875244, |
| "learning_rate": 4.999517384738966e-06, |
| "loss": 0.8218, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.0745754368693084, |
| "grad_norm": 0.9009180665016174, |
| "learning_rate": 4.999507781005705e-06, |
| "loss": 0.8505, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.07494462220034458, |
| "grad_norm": 0.897419273853302, |
| "learning_rate": 4.9994980826668646e-06, |
| "loss": 0.7851, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.07531380753138076, |
| "grad_norm": 0.8872283697128296, |
| "learning_rate": 4.99948828972281e-06, |
| "loss": 0.789, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.07568299286241693, |
| "grad_norm": 0.8454247713088989, |
| "learning_rate": 4.9994784021739115e-06, |
| "loss": 0.8373, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.07605217819345311, |
| "grad_norm": 0.869134247303009, |
| "learning_rate": 4.999468420020546e-06, |
| "loss": 0.837, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.07642136352448929, |
| "grad_norm": 0.8689702153205872, |
| "learning_rate": 4.999458343263089e-06, |
| "loss": 0.8106, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.07679054885552547, |
| "grad_norm": 0.8920548558235168, |
| "learning_rate": 4.999448171901923e-06, |
| "loss": 0.8608, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.07715973418656165, |
| "grad_norm": 0.8637383580207825, |
| "learning_rate": 4.999437905937431e-06, |
| "loss": 0.8396, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.07752891951759783, |
| "grad_norm": 0.9378350973129272, |
| "learning_rate": 4.9994275453700045e-06, |
| "loss": 0.8381, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.07789810484863402, |
| "grad_norm": 0.8814995288848877, |
| "learning_rate": 4.9994170902000335e-06, |
| "loss": 0.8495, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.0782672901796702, |
| "grad_norm": 0.8853291869163513, |
| "learning_rate": 4.9994065404279155e-06, |
| "loss": 0.8429, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.07863647551070638, |
| "grad_norm": 0.8535809516906738, |
| "learning_rate": 4.999395896054048e-06, |
| "loss": 0.8198, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.07900566084174256, |
| "grad_norm": 0.889284074306488, |
| "learning_rate": 4.999385157078835e-06, |
| "loss": 0.8836, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.07937484617277873, |
| "grad_norm": 0.8783283829689026, |
| "learning_rate": 4.999374323502683e-06, |
| "loss": 0.8611, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.07974403150381491, |
| "grad_norm": 0.851722240447998, |
| "learning_rate": 4.999363395326e-06, |
| "loss": 0.828, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.08011321683485109, |
| "grad_norm": 0.8967769145965576, |
| "learning_rate": 4.999352372549203e-06, |
| "loss": 0.7991, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.08048240216588727, |
| "grad_norm": 0.9296314716339111, |
| "learning_rate": 4.999341255172707e-06, |
| "loss": 0.893, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.08085158749692345, |
| "grad_norm": 0.9072420001029968, |
| "learning_rate": 4.999330043196933e-06, |
| "loss": 0.8014, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.08122077282795964, |
| "grad_norm": 0.9063705205917358, |
| "learning_rate": 4.999318736622306e-06, |
| "loss": 0.8206, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.08158995815899582, |
| "grad_norm": 0.8354766964912415, |
| "learning_rate": 4.9993073354492525e-06, |
| "loss": 0.8148, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.081959143490032, |
| "grad_norm": 0.878901481628418, |
| "learning_rate": 4.999295839678206e-06, |
| "loss": 0.8758, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.08232832882106818, |
| "grad_norm": 0.8754085302352905, |
| "learning_rate": 4.999284249309602e-06, |
| "loss": 0.8447, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.08269751415210436, |
| "grad_norm": 0.8729782104492188, |
| "learning_rate": 4.9992725643438765e-06, |
| "loss": 0.7938, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.08306669948314054, |
| "grad_norm": 0.8772115111351013, |
| "learning_rate": 4.999260784781473e-06, |
| "loss": 0.8683, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.08343588481417671, |
| "grad_norm": 0.8647124767303467, |
| "learning_rate": 4.999248910622838e-06, |
| "loss": 0.8111, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.08380507014521289, |
| "grad_norm": 0.85676509141922, |
| "learning_rate": 4.999236941868421e-06, |
| "loss": 0.8005, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.08417425547624907, |
| "grad_norm": 0.8511557579040527, |
| "learning_rate": 4.999224878518674e-06, |
| "loss": 0.8074, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.08454344080728526, |
| "grad_norm": 0.8975043892860413, |
| "learning_rate": 4.9992127205740545e-06, |
| "loss": 0.8327, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.08491262613832144, |
| "grad_norm": 0.863868772983551, |
| "learning_rate": 4.999200468035021e-06, |
| "loss": 0.8226, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.08528181146935762, |
| "grad_norm": 0.8852335214614868, |
| "learning_rate": 4.9991881209020406e-06, |
| "loss": 0.8353, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.0856509968003938, |
| "grad_norm": 0.8811020851135254, |
| "learning_rate": 4.999175679175577e-06, |
| "loss": 0.8028, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.08602018213142998, |
| "grad_norm": 0.8707005381584167, |
| "learning_rate": 4.999163142856104e-06, |
| "loss": 0.7983, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.08638936746246616, |
| "grad_norm": 0.857227087020874, |
| "learning_rate": 4.999150511944094e-06, |
| "loss": 0.7777, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.08675855279350234, |
| "grad_norm": 0.8872169256210327, |
| "learning_rate": 4.999137786440026e-06, |
| "loss": 0.8692, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.08712773812453851, |
| "grad_norm": 0.8813910484313965, |
| "learning_rate": 4.999124966344381e-06, |
| "loss": 0.8101, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.0874969234555747, |
| "grad_norm": 0.8994487524032593, |
| "learning_rate": 4.999112051657646e-06, |
| "loss": 0.8646, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.08786610878661087, |
| "grad_norm": 0.9409844875335693, |
| "learning_rate": 4.999099042380307e-06, |
| "loss": 0.8218, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.08823529411764706, |
| "grad_norm": 0.8659898042678833, |
| "learning_rate": 4.999085938512859e-06, |
| "loss": 0.8347, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.08860447944868324, |
| "grad_norm": 0.8241569995880127, |
| "learning_rate": 4.9990727400557965e-06, |
| "loss": 0.8112, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.08897366477971942, |
| "grad_norm": 0.8582605123519897, |
| "learning_rate": 4.99905944700962e-06, |
| "loss": 0.8115, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.0893428501107556, |
| "grad_norm": 0.8941754698753357, |
| "learning_rate": 4.999046059374831e-06, |
| "loss": 0.8304, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.08971203544179178, |
| "grad_norm": 0.8905880451202393, |
| "learning_rate": 4.999032577151939e-06, |
| "loss": 0.8168, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.09008122077282796, |
| "grad_norm": 0.8816720247268677, |
| "learning_rate": 4.999019000341452e-06, |
| "loss": 0.8422, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.09045040610386414, |
| "grad_norm": 1.047232747077942, |
| "learning_rate": 4.999005328943884e-06, |
| "loss": 0.8136, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.09081959143490032, |
| "grad_norm": 0.9141537547111511, |
| "learning_rate": 4.998991562959753e-06, |
| "loss": 0.8415, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.0911887767659365, |
| "grad_norm": 0.8919954895973206, |
| "learning_rate": 4.998977702389581e-06, |
| "loss": 0.8224, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.09155796209697269, |
| "grad_norm": 0.8710380792617798, |
| "learning_rate": 4.998963747233891e-06, |
| "loss": 0.7944, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.09192714742800887, |
| "grad_norm": 0.8452226519584656, |
| "learning_rate": 4.998949697493212e-06, |
| "loss": 0.8128, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.09229633275904504, |
| "grad_norm": 0.8582141399383545, |
| "learning_rate": 4.998935553168075e-06, |
| "loss": 0.8467, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.09266551809008122, |
| "grad_norm": 0.8774867057800293, |
| "learning_rate": 4.998921314259017e-06, |
| "loss": 0.8261, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.0930347034211174, |
| "grad_norm": 0.879334568977356, |
| "learning_rate": 4.998906980766576e-06, |
| "loss": 0.8346, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.09340388875215358, |
| "grad_norm": 0.886013925075531, |
| "learning_rate": 4.998892552691294e-06, |
| "loss": 0.8562, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.09377307408318976, |
| "grad_norm": 0.8755276203155518, |
| "learning_rate": 4.998878030033717e-06, |
| "loss": 0.8005, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.09414225941422594, |
| "grad_norm": 0.9009412527084351, |
| "learning_rate": 4.998863412794396e-06, |
| "loss": 0.8057, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.09451144474526212, |
| "grad_norm": 0.9637260437011719, |
| "learning_rate": 4.998848700973883e-06, |
| "loss": 0.8285, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.09488063007629831, |
| "grad_norm": 0.8925495743751526, |
| "learning_rate": 4.9988338945727355e-06, |
| "loss": 0.8466, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.09524981540733449, |
| "grad_norm": 0.88019198179245, |
| "learning_rate": 4.998818993591513e-06, |
| "loss": 0.828, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.09561900073837067, |
| "grad_norm": 0.8707719445228577, |
| "learning_rate": 4.998803998030781e-06, |
| "loss": 0.7912, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.09598818606940684, |
| "grad_norm": 0.9469668865203857, |
| "learning_rate": 4.998788907891107e-06, |
| "loss": 0.8255, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.09635737140044302, |
| "grad_norm": 0.8590503931045532, |
| "learning_rate": 4.998773723173061e-06, |
| "loss": 0.834, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.0967265567314792, |
| "grad_norm": 0.8726522922515869, |
| "learning_rate": 4.998758443877217e-06, |
| "loss": 0.8434, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.09709574206251538, |
| "grad_norm": 0.8915356397628784, |
| "learning_rate": 4.998743070004156e-06, |
| "loss": 0.8455, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.09746492739355156, |
| "grad_norm": 0.8678056597709656, |
| "learning_rate": 4.998727601554458e-06, |
| "loss": 0.7851, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.09783411272458774, |
| "grad_norm": 0.8804232478141785, |
| "learning_rate": 4.998712038528709e-06, |
| "loss": 0.8163, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.09820329805562392, |
| "grad_norm": 0.8934099674224854, |
| "learning_rate": 4.998696380927497e-06, |
| "loss": 0.8479, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.09857248338666011, |
| "grad_norm": 0.9582729339599609, |
| "learning_rate": 4.998680628751417e-06, |
| "loss": 0.8351, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.09894166871769629, |
| "grad_norm": 0.8772808909416199, |
| "learning_rate": 4.998664782001063e-06, |
| "loss": 0.8135, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.09931085404873247, |
| "grad_norm": 0.8562557101249695, |
| "learning_rate": 4.998648840677035e-06, |
| "loss": 0.7817, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.09968003937976865, |
| "grad_norm": 0.9073139429092407, |
| "learning_rate": 4.9986328047799385e-06, |
| "loss": 0.7951, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.10004922471080482, |
| "grad_norm": 0.9205917119979858, |
| "learning_rate": 4.9986166743103774e-06, |
| "loss": 0.8265, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.100418410041841, |
| "grad_norm": 0.8653632998466492, |
| "learning_rate": 4.9986004492689644e-06, |
| "loss": 0.8301, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.10078759537287718, |
| "grad_norm": 0.9200085997581482, |
| "learning_rate": 4.9985841296563135e-06, |
| "loss": 0.8534, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.10115678070391336, |
| "grad_norm": 0.8734180331230164, |
| "learning_rate": 4.998567715473041e-06, |
| "loss": 0.8183, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.10152596603494954, |
| "grad_norm": 0.866165816783905, |
| "learning_rate": 4.99855120671977e-06, |
| "loss": 0.827, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.10189515136598573, |
| "grad_norm": 0.9260159134864807, |
| "learning_rate": 4.998534603397123e-06, |
| "loss": 0.7965, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.10226433669702191, |
| "grad_norm": 0.8854061365127563, |
| "learning_rate": 4.998517905505731e-06, |
| "loss": 0.808, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.10263352202805809, |
| "grad_norm": 0.8753185272216797, |
| "learning_rate": 4.998501113046224e-06, |
| "loss": 0.8205, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.10300270735909427, |
| "grad_norm": 0.8909716606140137, |
| "learning_rate": 4.998484226019239e-06, |
| "loss": 0.8521, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.10337189269013045, |
| "grad_norm": 0.8754370212554932, |
| "learning_rate": 4.9984672444254145e-06, |
| "loss": 0.7891, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.10374107802116662, |
| "grad_norm": 0.8687075972557068, |
| "learning_rate": 4.998450168265393e-06, |
| "loss": 0.7974, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.1041102633522028, |
| "grad_norm": 0.8216086030006409, |
| "learning_rate": 4.998432997539821e-06, |
| "loss": 0.7793, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.10447944868323898, |
| "grad_norm": 0.899731457233429, |
| "learning_rate": 4.998415732249349e-06, |
| "loss": 0.8075, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.10484863401427516, |
| "grad_norm": 0.8399525880813599, |
| "learning_rate": 4.998398372394631e-06, |
| "loss": 0.8167, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.10521781934531135, |
| "grad_norm": 0.8872588276863098, |
| "learning_rate": 4.998380917976321e-06, |
| "loss": 0.7981, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.10558700467634753, |
| "grad_norm": 0.8770443797111511, |
| "learning_rate": 4.998363368995083e-06, |
| "loss": 0.8156, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.10595619000738371, |
| "grad_norm": 0.9171691536903381, |
| "learning_rate": 4.99834572545158e-06, |
| "loss": 0.7958, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.10632537533841989, |
| "grad_norm": 0.8948536515235901, |
| "learning_rate": 4.99832798734648e-06, |
| "loss": 0.8092, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.10669456066945607, |
| "grad_norm": 0.8909181356430054, |
| "learning_rate": 4.998310154680453e-06, |
| "loss": 0.8001, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.10706374600049225, |
| "grad_norm": 0.9211814403533936, |
| "learning_rate": 4.9982922274541765e-06, |
| "loss": 0.8416, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.10743293133152843, |
| "grad_norm": 0.9189214706420898, |
| "learning_rate": 4.998274205668326e-06, |
| "loss": 0.7836, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.1078021166625646, |
| "grad_norm": 0.9062879085540771, |
| "learning_rate": 4.998256089323587e-06, |
| "loss": 0.81, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.10817130199360078, |
| "grad_norm": 0.8764585256576538, |
| "learning_rate": 4.998237878420643e-06, |
| "loss": 0.8161, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.10854048732463696, |
| "grad_norm": 0.8838504552841187, |
| "learning_rate": 4.998219572960183e-06, |
| "loss": 0.8339, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.10890967265567315, |
| "grad_norm": 0.847440779209137, |
| "learning_rate": 4.998201172942901e-06, |
| "loss": 0.8275, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.10927885798670933, |
| "grad_norm": 0.9169361591339111, |
| "learning_rate": 4.998182678369494e-06, |
| "loss": 0.8022, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.10964804331774551, |
| "grad_norm": 0.8472815752029419, |
| "learning_rate": 4.99816408924066e-06, |
| "loss": 0.8192, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.11001722864878169, |
| "grad_norm": 0.9293531775474548, |
| "learning_rate": 4.9981454055571045e-06, |
| "loss": 0.8554, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.11038641397981787, |
| "grad_norm": 0.8915771245956421, |
| "learning_rate": 4.998126627319533e-06, |
| "loss": 0.8023, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.11075559931085405, |
| "grad_norm": 0.9370061755180359, |
| "learning_rate": 4.998107754528657e-06, |
| "loss": 0.8097, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.11112478464189023, |
| "grad_norm": 0.937268853187561, |
| "learning_rate": 4.998088787185192e-06, |
| "loss": 0.8048, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.1114939699729264, |
| "grad_norm": 0.8639885187149048, |
| "learning_rate": 4.998069725289854e-06, |
| "loss": 0.7987, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.11186315530396258, |
| "grad_norm": 0.9094707369804382, |
| "learning_rate": 4.998050568843364e-06, |
| "loss": 0.8551, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.11223234063499878, |
| "grad_norm": 0.8595545291900635, |
| "learning_rate": 4.9980313178464504e-06, |
| "loss": 0.7923, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.11260152596603495, |
| "grad_norm": 0.9705724120140076, |
| "learning_rate": 4.9980119722998396e-06, |
| "loss": 0.8356, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.11297071129707113, |
| "grad_norm": 0.8938621282577515, |
| "learning_rate": 4.9979925322042635e-06, |
| "loss": 0.7797, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.11333989662810731, |
| "grad_norm": 0.8729509711265564, |
| "learning_rate": 4.9979729975604584e-06, |
| "loss": 0.799, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.11370908195914349, |
| "grad_norm": 0.8773247599601746, |
| "learning_rate": 4.997953368369164e-06, |
| "loss": 0.8094, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.11407826729017967, |
| "grad_norm": 0.8874313831329346, |
| "learning_rate": 4.997933644631122e-06, |
| "loss": 0.7785, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.11444745262121585, |
| "grad_norm": 0.9289500713348389, |
| "learning_rate": 4.997913826347082e-06, |
| "loss": 0.8127, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.11481663795225203, |
| "grad_norm": 0.901099681854248, |
| "learning_rate": 4.99789391351779e-06, |
| "loss": 0.8038, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.1151858232832882, |
| "grad_norm": 0.8916109204292297, |
| "learning_rate": 4.997873906144002e-06, |
| "loss": 0.8115, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.1155550086143244, |
| "grad_norm": 0.9170032143592834, |
| "learning_rate": 4.997853804226476e-06, |
| "loss": 0.8181, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.11592419394536058, |
| "grad_norm": 0.9100698828697205, |
| "learning_rate": 4.997833607765971e-06, |
| "loss": 0.7888, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.11629337927639675, |
| "grad_norm": 0.897360622882843, |
| "learning_rate": 4.997813316763252e-06, |
| "loss": 0.8152, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.11666256460743293, |
| "grad_norm": 0.893099308013916, |
| "learning_rate": 4.997792931219089e-06, |
| "loss": 0.7683, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.11703174993846911, |
| "grad_norm": 0.9319385290145874, |
| "learning_rate": 4.9977724511342504e-06, |
| "loss": 0.7882, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.11740093526950529, |
| "grad_norm": 0.9166727662086487, |
| "learning_rate": 4.997751876509513e-06, |
| "loss": 0.7975, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.11777012060054147, |
| "grad_norm": 0.9373429417610168, |
| "learning_rate": 4.997731207345655e-06, |
| "loss": 0.8274, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.11813930593157765, |
| "grad_norm": 0.8399270176887512, |
| "learning_rate": 4.997710443643461e-06, |
| "loss": 0.7732, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.11850849126261383, |
| "grad_norm": 0.8919075727462769, |
| "learning_rate": 4.997689585403713e-06, |
| "loss": 0.7933, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.11887767659365, |
| "grad_norm": 0.9245322942733765, |
| "learning_rate": 4.997668632627203e-06, |
| "loss": 0.8306, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.1192468619246862, |
| "grad_norm": 0.89954674243927, |
| "learning_rate": 4.997647585314723e-06, |
| "loss": 0.8254, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.11961604725572238, |
| "grad_norm": 0.9208563566207886, |
| "learning_rate": 4.9976264434670714e-06, |
| "loss": 0.8275, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.11998523258675856, |
| "grad_norm": 1.0518693923950195, |
| "learning_rate": 4.9976052070850465e-06, |
| "loss": 0.8041, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.12035441791779473, |
| "grad_norm": 0.8718807697296143, |
| "learning_rate": 4.997583876169453e-06, |
| "loss": 0.8171, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.12072360324883091, |
| "grad_norm": 0.8952045440673828, |
| "learning_rate": 4.997562450721098e-06, |
| "loss": 0.8005, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.12109278857986709, |
| "grad_norm": 0.8763337135314941, |
| "learning_rate": 4.997540930740792e-06, |
| "loss": 0.7838, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.12146197391090327, |
| "grad_norm": 0.8835309147834778, |
| "learning_rate": 4.9975193162293505e-06, |
| "loss": 0.793, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.12183115924193945, |
| "grad_norm": 0.9191171526908875, |
| "learning_rate": 4.997497607187591e-06, |
| "loss": 0.8317, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.12220034457297563, |
| "grad_norm": 0.8889843225479126, |
| "learning_rate": 4.9974758036163355e-06, |
| "loss": 0.7937, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.12256952990401182, |
| "grad_norm": 0.9089657664299011, |
| "learning_rate": 4.997453905516408e-06, |
| "loss": 0.8223, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.122938715235048, |
| "grad_norm": 0.92867112159729, |
| "learning_rate": 4.9974319128886396e-06, |
| "loss": 0.8092, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.12330790056608418, |
| "grad_norm": 0.8917028903961182, |
| "learning_rate": 4.997409825733861e-06, |
| "loss": 0.7728, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.12367708589712036, |
| "grad_norm": 0.8886356949806213, |
| "learning_rate": 4.997387644052909e-06, |
| "loss": 0.8593, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.12404627122815653, |
| "grad_norm": 0.8747639060020447, |
| "learning_rate": 4.997365367846623e-06, |
| "loss": 0.7963, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.12441545655919271, |
| "grad_norm": 0.8846672177314758, |
| "learning_rate": 4.997342997115846e-06, |
| "loss": 0.774, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.12478464189022889, |
| "grad_norm": 0.8727664947509766, |
| "learning_rate": 4.997320531861424e-06, |
| "loss": 0.7894, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.12515382722126508, |
| "grad_norm": 0.9146867394447327, |
| "learning_rate": 4.997297972084209e-06, |
| "loss": 0.788, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.12552301255230125, |
| "grad_norm": 0.8793301582336426, |
| "learning_rate": 4.997275317785053e-06, |
| "loss": 0.7873, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.12589219788333744, |
| "grad_norm": 0.899469792842865, |
| "learning_rate": 4.997252568964814e-06, |
| "loss": 0.8054, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.1262613832143736, |
| "grad_norm": 0.8407626152038574, |
| "learning_rate": 4.997229725624354e-06, |
| "loss": 0.7782, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.1266305685454098, |
| "grad_norm": 0.9121686220169067, |
| "learning_rate": 4.997206787764537e-06, |
| "loss": 0.8135, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.12699975387644596, |
| "grad_norm": 0.8709003925323486, |
| "learning_rate": 4.9971837553862324e-06, |
| "loss": 0.805, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.12736893920748216, |
| "grad_norm": 0.8822476267814636, |
| "learning_rate": 4.997160628490309e-06, |
| "loss": 0.7888, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.12773812453851835, |
| "grad_norm": 0.8969622254371643, |
| "learning_rate": 4.997137407077645e-06, |
| "loss": 0.8076, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.12810730986955451, |
| "grad_norm": 0.9052038192749023, |
| "learning_rate": 4.997114091149118e-06, |
| "loss": 0.8207, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.1284764952005907, |
| "grad_norm": 0.860755205154419, |
| "learning_rate": 4.997090680705611e-06, |
| "loss": 0.795, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.12884568053162687, |
| "grad_norm": 0.8602296710014343, |
| "learning_rate": 4.99706717574801e-06, |
| "loss": 0.8034, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.12921486586266306, |
| "grad_norm": 0.8612799644470215, |
| "learning_rate": 4.997043576277203e-06, |
| "loss": 0.7905, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.12958405119369923, |
| "grad_norm": 0.9908099174499512, |
| "learning_rate": 4.997019882294086e-06, |
| "loss": 0.8259, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.12995323652473542, |
| "grad_norm": 0.8944092988967896, |
| "learning_rate": 4.996996093799554e-06, |
| "loss": 0.7836, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.1303224218557716, |
| "grad_norm": 0.9142276644706726, |
| "learning_rate": 4.996972210794509e-06, |
| "loss": 0.8118, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.13069160718680778, |
| "grad_norm": 0.9402908682823181, |
| "learning_rate": 4.996948233279852e-06, |
| "loss": 0.8101, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.13106079251784397, |
| "grad_norm": 0.9145587682723999, |
| "learning_rate": 4.996924161256494e-06, |
| "loss": 0.8238, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.13142997784888014, |
| "grad_norm": 0.9261123538017273, |
| "learning_rate": 4.996899994725344e-06, |
| "loss": 0.8304, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.13179916317991633, |
| "grad_norm": 0.8930022120475769, |
| "learning_rate": 4.996875733687317e-06, |
| "loss": 0.7747, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.1321683485109525, |
| "grad_norm": 0.8823668360710144, |
| "learning_rate": 4.9968513781433315e-06, |
| "loss": 0.8134, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.1325375338419887, |
| "grad_norm": 0.8473939299583435, |
| "learning_rate": 4.996826928094309e-06, |
| "loss": 0.7833, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.13290671917302485, |
| "grad_norm": 0.8742851614952087, |
| "learning_rate": 4.996802383541176e-06, |
| "loss": 0.7532, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.13327590450406104, |
| "grad_norm": 0.8971150517463684, |
| "learning_rate": 4.996777744484861e-06, |
| "loss": 0.7914, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.1336450898350972, |
| "grad_norm": 0.8893861174583435, |
| "learning_rate": 4.996753010926296e-06, |
| "loss": 0.8268, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.1340142751661334, |
| "grad_norm": 0.8671346306800842, |
| "learning_rate": 4.996728182866418e-06, |
| "loss": 0.7804, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.13438346049716957, |
| "grad_norm": 0.8901523351669312, |
| "learning_rate": 4.9967032603061655e-06, |
| "loss": 0.7814, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.13475264582820576, |
| "grad_norm": 0.9003875255584717, |
| "learning_rate": 4.996678243246483e-06, |
| "loss": 0.8193, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.13512183115924195, |
| "grad_norm": 0.8973804116249084, |
| "learning_rate": 4.996653131688316e-06, |
| "loss": 0.8228, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.13549101649027812, |
| "grad_norm": 0.8611428737640381, |
| "learning_rate": 4.996627925632617e-06, |
| "loss": 0.7733, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.1358602018213143, |
| "grad_norm": 0.8708634376525879, |
| "learning_rate": 4.996602625080339e-06, |
| "loss": 0.7709, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.13622938715235047, |
| "grad_norm": 0.930029571056366, |
| "learning_rate": 4.996577230032439e-06, |
| "loss": 0.7984, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.13659857248338667, |
| "grad_norm": 0.8698320984840393, |
| "learning_rate": 4.996551740489879e-06, |
| "loss": 0.7694, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.13696775781442283, |
| "grad_norm": 0.8765986561775208, |
| "learning_rate": 4.996526156453624e-06, |
| "loss": 0.7992, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.13733694314545902, |
| "grad_norm": 0.9100019335746765, |
| "learning_rate": 4.996500477924642e-06, |
| "loss": 0.8417, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.1377061284764952, |
| "grad_norm": 0.8693497180938721, |
| "learning_rate": 4.996474704903904e-06, |
| "loss": 0.7603, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.13807531380753138, |
| "grad_norm": 0.874281644821167, |
| "learning_rate": 4.9964488373923865e-06, |
| "loss": 0.792, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.13844449913856757, |
| "grad_norm": 0.9282156825065613, |
| "learning_rate": 4.9964228753910685e-06, |
| "loss": 0.8703, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.13881368446960374, |
| "grad_norm": 0.8873798251152039, |
| "learning_rate": 4.9963968189009324e-06, |
| "loss": 0.786, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.13918286980063993, |
| "grad_norm": 0.868864119052887, |
| "learning_rate": 4.996370667922965e-06, |
| "loss": 0.7815, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.1395520551316761, |
| "grad_norm": 0.9150336980819702, |
| "learning_rate": 4.996344422458155e-06, |
| "loss": 0.7949, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.1399212404627123, |
| "grad_norm": 0.8794249892234802, |
| "learning_rate": 4.996318082507497e-06, |
| "loss": 0.7503, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.14029042579374845, |
| "grad_norm": 0.9185070395469666, |
| "learning_rate": 4.996291648071988e-06, |
| "loss": 0.8043, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.14065961112478464, |
| "grad_norm": 0.8789845108985901, |
| "learning_rate": 4.996265119152627e-06, |
| "loss": 0.7647, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.1410287964558208, |
| "grad_norm": 0.8894780278205872, |
| "learning_rate": 4.99623849575042e-06, |
| "loss": 0.788, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.141397981786857, |
| "grad_norm": 0.9412915110588074, |
| "learning_rate": 4.996211777866372e-06, |
| "loss": 0.7974, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.1417671671178932, |
| "grad_norm": 0.8994157910346985, |
| "learning_rate": 4.996184965501497e-06, |
| "loss": 0.78, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.14213635244892936, |
| "grad_norm": 0.9516313076019287, |
| "learning_rate": 4.9961580586568095e-06, |
| "loss": 0.8062, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.14250553777996555, |
| "grad_norm": 0.8979402184486389, |
| "learning_rate": 4.996131057333327e-06, |
| "loss": 0.8339, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.14287472311100172, |
| "grad_norm": 0.8623480200767517, |
| "learning_rate": 4.996103961532072e-06, |
| "loss": 0.8066, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.1432439084420379, |
| "grad_norm": 0.8892715573310852, |
| "learning_rate": 4.996076771254068e-06, |
| "loss": 0.7618, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.14361309377307407, |
| "grad_norm": 0.9217121005058289, |
| "learning_rate": 4.9960494865003486e-06, |
| "loss": 0.8128, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.14398227910411027, |
| "grad_norm": 0.9867552518844604, |
| "learning_rate": 4.996022107271942e-06, |
| "loss": 0.7973, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.14435146443514643, |
| "grad_norm": 0.903056263923645, |
| "learning_rate": 4.995994633569888e-06, |
| "loss": 0.804, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.14472064976618262, |
| "grad_norm": 0.8795948028564453, |
| "learning_rate": 4.995967065395223e-06, |
| "loss": 0.7495, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.14508983509721882, |
| "grad_norm": 0.8725371956825256, |
| "learning_rate": 4.9959394027489934e-06, |
| "loss": 0.7933, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.14545902042825498, |
| "grad_norm": 0.8918120265007019, |
| "learning_rate": 4.995911645632245e-06, |
| "loss": 0.7678, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.14582820575929117, |
| "grad_norm": 0.934451162815094, |
| "learning_rate": 4.995883794046029e-06, |
| "loss": 0.8161, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.14619739109032734, |
| "grad_norm": 0.8813429474830627, |
| "learning_rate": 4.995855847991398e-06, |
| "loss": 0.7993, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.14656657642136353, |
| "grad_norm": 0.8930681943893433, |
| "learning_rate": 4.995827807469412e-06, |
| "loss": 0.7737, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.1469357617523997, |
| "grad_norm": 0.8840509057044983, |
| "learning_rate": 4.995799672481131e-06, |
| "loss": 0.7996, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.1473049470834359, |
| "grad_norm": 0.887534499168396, |
| "learning_rate": 4.9957714430276196e-06, |
| "loss": 0.8072, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.14767413241447205, |
| "grad_norm": 0.9916796684265137, |
| "learning_rate": 4.995743119109947e-06, |
| "loss": 0.847, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.14804331774550825, |
| "grad_norm": 0.9485662579536438, |
| "learning_rate": 4.995714700729184e-06, |
| "loss": 0.8371, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.14841250307654444, |
| "grad_norm": 0.9004511833190918, |
| "learning_rate": 4.995686187886408e-06, |
| "loss": 0.7994, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.1487816884075806, |
| "grad_norm": 0.9183670282363892, |
| "learning_rate": 4.995657580582699e-06, |
| "loss": 0.7913, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.1491508737386168, |
| "grad_norm": 0.8997277617454529, |
| "learning_rate": 4.995628878819137e-06, |
| "loss": 0.7709, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.14952005906965296, |
| "grad_norm": 0.9115433096885681, |
| "learning_rate": 4.9956000825968086e-06, |
| "loss": 0.8312, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.14988924440068915, |
| "grad_norm": 0.9109799861907959, |
| "learning_rate": 4.995571191916805e-06, |
| "loss": 0.8222, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.15025842973172532, |
| "grad_norm": 0.8981993794441223, |
| "learning_rate": 4.9955422067802205e-06, |
| "loss": 0.8333, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.1506276150627615, |
| "grad_norm": 1.7644963264465332, |
| "learning_rate": 4.995513127188151e-06, |
| "loss": 0.7885, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.15099680039379768, |
| "grad_norm": 0.9482977986335754, |
| "learning_rate": 4.995483953141696e-06, |
| "loss": 0.8211, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.15136598572483387, |
| "grad_norm": 0.9331035017967224, |
| "learning_rate": 4.995454684641961e-06, |
| "loss": 0.7764, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.15173517105587006, |
| "grad_norm": 0.890426516532898, |
| "learning_rate": 4.995425321690055e-06, |
| "loss": 0.7796, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.15210435638690623, |
| "grad_norm": 0.9329193234443665, |
| "learning_rate": 4.995395864287088e-06, |
| "loss": 0.7815, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.15247354171794242, |
| "grad_norm": 0.8611469864845276, |
| "learning_rate": 4.995366312434174e-06, |
| "loss": 0.7848, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.15284272704897858, |
| "grad_norm": 0.8654336333274841, |
| "learning_rate": 4.995336666132434e-06, |
| "loss": 0.7821, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.15321191238001478, |
| "grad_norm": 0.904015064239502, |
| "learning_rate": 4.9953069253829875e-06, |
| "loss": 0.8034, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.15358109771105094, |
| "grad_norm": 0.9867867827415466, |
| "learning_rate": 4.995277090186962e-06, |
| "loss": 0.7989, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.15395028304208713, |
| "grad_norm": 0.899686336517334, |
| "learning_rate": 4.995247160545487e-06, |
| "loss": 0.7969, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.1543194683731233, |
| "grad_norm": 0.883914589881897, |
| "learning_rate": 4.995217136459693e-06, |
| "loss": 0.8133, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.1546886537041595, |
| "grad_norm": 0.8943600654602051, |
| "learning_rate": 4.995187017930718e-06, |
| "loss": 0.7676, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.15505783903519565, |
| "grad_norm": 0.8856242299079895, |
| "learning_rate": 4.995156804959702e-06, |
| "loss": 0.7129, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.15542702436623185, |
| "grad_norm": 0.9339002966880798, |
| "learning_rate": 4.9951264975477895e-06, |
| "loss": 0.7945, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.15579620969726804, |
| "grad_norm": 1.004530429840088, |
| "learning_rate": 4.995096095696126e-06, |
| "loss": 0.8219, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.1561653950283042, |
| "grad_norm": 0.9293914437294006, |
| "learning_rate": 4.995065599405862e-06, |
| "loss": 0.8059, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.1565345803593404, |
| "grad_norm": 0.897552490234375, |
| "learning_rate": 4.995035008678153e-06, |
| "loss": 0.7482, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.15690376569037656, |
| "grad_norm": 0.9051419496536255, |
| "learning_rate": 4.995004323514157e-06, |
| "loss": 0.8244, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.15727295102141275, |
| "grad_norm": 0.8879945278167725, |
| "learning_rate": 4.9949735439150335e-06, |
| "loss": 0.7653, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.15764213635244892, |
| "grad_norm": 0.901598334312439, |
| "learning_rate": 4.99494266988195e-06, |
| "loss": 0.7852, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.1580113216834851, |
| "grad_norm": 0.9374473690986633, |
| "learning_rate": 4.994911701416073e-06, |
| "loss": 0.7544, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.15838050701452128, |
| "grad_norm": 0.8810298442840576, |
| "learning_rate": 4.994880638518575e-06, |
| "loss": 0.7623, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.15874969234555747, |
| "grad_norm": 0.9226915836334229, |
| "learning_rate": 4.994849481190634e-06, |
| "loss": 0.7503, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.15911887767659366, |
| "grad_norm": 0.923017144203186, |
| "learning_rate": 4.994818229433427e-06, |
| "loss": 0.8182, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.15948806300762983, |
| "grad_norm": 0.9048720002174377, |
| "learning_rate": 4.994786883248137e-06, |
| "loss": 0.7976, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.15985724833866602, |
| "grad_norm": 0.8946258425712585, |
| "learning_rate": 4.99475544263595e-06, |
| "loss": 0.7349, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.16022643366970218, |
| "grad_norm": 0.903343915939331, |
| "learning_rate": 4.994723907598058e-06, |
| "loss": 0.7816, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.16059561900073838, |
| "grad_norm": 0.907153844833374, |
| "learning_rate": 4.994692278135653e-06, |
| "loss": 0.7753, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.16096480433177454, |
| "grad_norm": 0.950080931186676, |
| "learning_rate": 4.994660554249933e-06, |
| "loss": 0.7821, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.16133398966281073, |
| "grad_norm": 0.8720895648002625, |
| "learning_rate": 4.994628735942098e-06, |
| "loss": 0.8067, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.1617031749938469, |
| "grad_norm": 0.9267756938934326, |
| "learning_rate": 4.994596823213353e-06, |
| "loss": 0.8121, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.1620723603248831, |
| "grad_norm": 0.9312788844108582, |
| "learning_rate": 4.9945648160649054e-06, |
| "loss": 0.8059, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.16244154565591928, |
| "grad_norm": 0.8700461983680725, |
| "learning_rate": 4.994532714497966e-06, |
| "loss": 0.7626, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.16281073098695545, |
| "grad_norm": 0.8887227773666382, |
| "learning_rate": 4.9945005185137515e-06, |
| "loss": 0.7747, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.16317991631799164, |
| "grad_norm": 0.9699864983558655, |
| "learning_rate": 4.99446822811348e-06, |
| "loss": 0.7935, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.1635491016490278, |
| "grad_norm": 0.9109600782394409, |
| "learning_rate": 4.994435843298372e-06, |
| "loss": 0.7375, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.163918286980064, |
| "grad_norm": 0.8919878005981445, |
| "learning_rate": 4.994403364069656e-06, |
| "loss": 0.7691, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.16428747231110016, |
| "grad_norm": 0.9388747215270996, |
| "learning_rate": 4.994370790428559e-06, |
| "loss": 0.7982, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.16465665764213636, |
| "grad_norm": 0.9099019169807434, |
| "learning_rate": 4.994338122376315e-06, |
| "loss": 0.7594, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.16502584297317252, |
| "grad_norm": 0.9286575317382812, |
| "learning_rate": 4.994305359914161e-06, |
| "loss": 0.8153, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.1653950283042087, |
| "grad_norm": 0.896110475063324, |
| "learning_rate": 4.9942725030433356e-06, |
| "loss": 0.8013, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.1657642136352449, |
| "grad_norm": 0.8885270953178406, |
| "learning_rate": 4.994239551765083e-06, |
| "loss": 0.7642, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.16613339896628107, |
| "grad_norm": 0.8727377653121948, |
| "learning_rate": 4.994206506080651e-06, |
| "loss": 0.7358, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.16650258429731726, |
| "grad_norm": 0.9173669219017029, |
| "learning_rate": 4.9941733659912905e-06, |
| "loss": 0.8196, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.16687176962835343, |
| "grad_norm": 0.9255698323249817, |
| "learning_rate": 4.994140131498254e-06, |
| "loss": 0.8184, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.16724095495938962, |
| "grad_norm": 0.9053508639335632, |
| "learning_rate": 4.994106802602802e-06, |
| "loss": 0.7938, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.16761014029042579, |
| "grad_norm": 0.9308109283447266, |
| "learning_rate": 4.994073379306193e-06, |
| "loss": 0.8067, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.16797932562146198, |
| "grad_norm": 0.8316235542297363, |
| "learning_rate": 4.994039861609696e-06, |
| "loss": 0.7799, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.16834851095249814, |
| "grad_norm": 0.8959935903549194, |
| "learning_rate": 4.994006249514575e-06, |
| "loss": 0.797, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.16871769628353434, |
| "grad_norm": 0.8984429240226746, |
| "learning_rate": 4.993972543022106e-06, |
| "loss": 0.7763, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.16908688161457053, |
| "grad_norm": 0.8788191676139832, |
| "learning_rate": 4.9939387421335626e-06, |
| "loss": 0.811, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.1694560669456067, |
| "grad_norm": 0.921341061592102, |
| "learning_rate": 4.993904846850226e-06, |
| "loss": 0.8022, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.16982525227664289, |
| "grad_norm": 0.9294300675392151, |
| "learning_rate": 4.993870857173378e-06, |
| "loss": 0.7544, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.17019443760767905, |
| "grad_norm": 0.9679121375083923, |
| "learning_rate": 4.9938367731043035e-06, |
| "loss": 0.793, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.17056362293871524, |
| "grad_norm": 0.9177256226539612, |
| "learning_rate": 4.993802594644295e-06, |
| "loss": 0.7825, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.1709328082697514, |
| "grad_norm": 0.8937193155288696, |
| "learning_rate": 4.993768321794645e-06, |
| "loss": 0.7613, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.1713019936007876, |
| "grad_norm": 0.9800901412963867, |
| "learning_rate": 4.993733954556652e-06, |
| "loss": 0.8105, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.17167117893182376, |
| "grad_norm": 0.8884421586990356, |
| "learning_rate": 4.9936994929316155e-06, |
| "loss": 0.759, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.17204036426285996, |
| "grad_norm": 0.980379045009613, |
| "learning_rate": 4.99366493692084e-06, |
| "loss": 0.8023, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.17240954959389612, |
| "grad_norm": 0.8764387369155884, |
| "learning_rate": 4.993630286525634e-06, |
| "loss": 0.7574, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.17277873492493231, |
| "grad_norm": 0.913542628288269, |
| "learning_rate": 4.993595541747309e-06, |
| "loss": 0.8313, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.1731479202559685, |
| "grad_norm": 0.9282394647598267, |
| "learning_rate": 4.993560702587179e-06, |
| "loss": 0.7718, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.17351710558700467, |
| "grad_norm": 0.9678360223770142, |
| "learning_rate": 4.9935257690465634e-06, |
| "loss": 0.7783, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.17388629091804086, |
| "grad_norm": 0.949858546257019, |
| "learning_rate": 4.993490741126785e-06, |
| "loss": 0.814, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.17425547624907703, |
| "grad_norm": 0.8861430883407593, |
| "learning_rate": 4.9934556188291685e-06, |
| "loss": 0.8069, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.17462466158011322, |
| "grad_norm": 0.8986914157867432, |
| "learning_rate": 4.993420402155044e-06, |
| "loss": 0.7622, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.1749938469111494, |
| "grad_norm": 0.8991053104400635, |
| "learning_rate": 4.993385091105743e-06, |
| "loss": 0.7721, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.17536303224218558, |
| "grad_norm": 0.9329451322555542, |
| "learning_rate": 4.993349685682605e-06, |
| "loss": 0.7656, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.17573221757322174, |
| "grad_norm": 0.876977801322937, |
| "learning_rate": 4.993314185886967e-06, |
| "loss": 0.8482, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.17610140290425794, |
| "grad_norm": 0.9570649266242981, |
| "learning_rate": 4.9932785917201754e-06, |
| "loss": 0.8036, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.17647058823529413, |
| "grad_norm": 0.8990687727928162, |
| "learning_rate": 4.993242903183575e-06, |
| "loss": 0.7766, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.1768397735663303, |
| "grad_norm": 0.9275680780410767, |
| "learning_rate": 4.993207120278518e-06, |
| "loss": 0.8204, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.1772089588973665, |
| "grad_norm": 0.9137722253799438, |
| "learning_rate": 4.9931712430063585e-06, |
| "loss": 0.7201, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.17757814422840265, |
| "grad_norm": 0.938310980796814, |
| "learning_rate": 4.993135271368454e-06, |
| "loss": 0.7859, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.17794732955943884, |
| "grad_norm": 0.941462516784668, |
| "learning_rate": 4.993099205366166e-06, |
| "loss": 0.8033, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.178316514890475, |
| "grad_norm": 0.9050039649009705, |
| "learning_rate": 4.99306304500086e-06, |
| "loss": 0.8441, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.1786857002215112, |
| "grad_norm": 0.9100262522697449, |
| "learning_rate": 4.993026790273905e-06, |
| "loss": 0.7973, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.17905488555254737, |
| "grad_norm": 0.8839780688285828, |
| "learning_rate": 4.992990441186672e-06, |
| "loss": 0.8029, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.17942407088358356, |
| "grad_norm": 0.8999461531639099, |
| "learning_rate": 4.992953997740538e-06, |
| "loss": 0.7783, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.17979325621461975, |
| "grad_norm": 0.9286245107650757, |
| "learning_rate": 4.992917459936882e-06, |
| "loss": 0.7623, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.18016244154565592, |
| "grad_norm": 0.9243547320365906, |
| "learning_rate": 4.992880827777088e-06, |
| "loss": 0.7888, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.1805316268766921, |
| "grad_norm": 0.8967995643615723, |
| "learning_rate": 4.992844101262541e-06, |
| "loss": 0.7806, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.18090081220772827, |
| "grad_norm": 0.9205966591835022, |
| "learning_rate": 4.99280728039463e-06, |
| "loss": 0.7851, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.18126999753876447, |
| "grad_norm": 0.9394510388374329, |
| "learning_rate": 4.992770365174752e-06, |
| "loss": 0.8065, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.18163918286980063, |
| "grad_norm": 0.911313533782959, |
| "learning_rate": 4.992733355604301e-06, |
| "loss": 0.8055, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.18200836820083682, |
| "grad_norm": 0.9121047854423523, |
| "learning_rate": 4.99269625168468e-06, |
| "loss": 0.757, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.182377553531873, |
| "grad_norm": 0.8973436951637268, |
| "learning_rate": 4.9926590534172926e-06, |
| "loss": 0.7526, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.18274673886290918, |
| "grad_norm": 0.9058072566986084, |
| "learning_rate": 4.992621760803547e-06, |
| "loss": 0.8152, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.18311592419394537, |
| "grad_norm": 0.8761561512947083, |
| "learning_rate": 4.992584373844853e-06, |
| "loss": 0.7594, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.18348510952498154, |
| "grad_norm": 0.9290655851364136, |
| "learning_rate": 4.992546892542628e-06, |
| "loss": 0.801, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.18385429485601773, |
| "grad_norm": 0.9174765944480896, |
| "learning_rate": 4.99250931689829e-06, |
| "loss": 0.7823, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.1842234801870539, |
| "grad_norm": 0.9156612157821655, |
| "learning_rate": 4.992471646913261e-06, |
| "loss": 0.8129, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.1845926655180901, |
| "grad_norm": 0.9134384989738464, |
| "learning_rate": 4.992433882588967e-06, |
| "loss": 0.7436, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.18496185084912625, |
| "grad_norm": 0.9095898866653442, |
| "learning_rate": 4.9923960239268365e-06, |
| "loss": 0.7668, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.18533103618016245, |
| "grad_norm": 0.9294151663780212, |
| "learning_rate": 4.992358070928304e-06, |
| "loss": 0.8083, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.1857002215111986, |
| "grad_norm": 0.9146490693092346, |
| "learning_rate": 4.992320023594803e-06, |
| "loss": 0.7678, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.1860694068422348, |
| "grad_norm": 0.9225884079933167, |
| "learning_rate": 4.992281881927778e-06, |
| "loss": 0.7853, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.186438592173271, |
| "grad_norm": 0.9287518262863159, |
| "learning_rate": 4.992243645928669e-06, |
| "loss": 0.8154, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.18680777750430716, |
| "grad_norm": 0.9329172372817993, |
| "learning_rate": 4.992205315598926e-06, |
| "loss": 0.7989, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.18717696283534335, |
| "grad_norm": 0.9103102087974548, |
| "learning_rate": 4.9921668909399976e-06, |
| "loss": 0.7687, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.18754614816637952, |
| "grad_norm": 0.909018337726593, |
| "learning_rate": 4.992128371953339e-06, |
| "loss": 0.7794, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.1879153334974157, |
| "grad_norm": 0.8800110220909119, |
| "learning_rate": 4.992089758640407e-06, |
| "loss": 0.7365, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.18828451882845187, |
| "grad_norm": 0.9656134247779846, |
| "learning_rate": 4.992051051002665e-06, |
| "loss": 0.8176, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.18865370415948807, |
| "grad_norm": 0.9035509824752808, |
| "learning_rate": 4.992012249041578e-06, |
| "loss": 0.7532, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.18902288949052423, |
| "grad_norm": 0.9327494502067566, |
| "learning_rate": 4.9919733527586126e-06, |
| "loss": 0.7733, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.18939207482156042, |
| "grad_norm": 0.918406069278717, |
| "learning_rate": 4.991934362155243e-06, |
| "loss": 0.8047, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.18976126015259662, |
| "grad_norm": 0.9656594395637512, |
| "learning_rate": 4.991895277232944e-06, |
| "loss": 0.7923, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.19013044548363278, |
| "grad_norm": 0.9281619787216187, |
| "learning_rate": 4.991856097993195e-06, |
| "loss": 0.7801, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.19049963081466897, |
| "grad_norm": 0.8924593329429626, |
| "learning_rate": 4.99181682443748e-06, |
| "loss": 0.7822, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.19086881614570514, |
| "grad_norm": 0.9071043729782104, |
| "learning_rate": 4.991777456567284e-06, |
| "loss": 0.7975, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.19123800147674133, |
| "grad_norm": 0.9340181350708008, |
| "learning_rate": 4.991737994384097e-06, |
| "loss": 0.8108, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.1916071868077775, |
| "grad_norm": 0.8986587524414062, |
| "learning_rate": 4.991698437889414e-06, |
| "loss": 0.8141, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.1919763721388137, |
| "grad_norm": 0.8858151435852051, |
| "learning_rate": 4.991658787084732e-06, |
| "loss": 0.7736, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.19234555746984985, |
| "grad_norm": 0.9259651899337769, |
| "learning_rate": 4.991619041971551e-06, |
| "loss": 0.8044, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.19271474280088605, |
| "grad_norm": 0.8833301067352295, |
| "learning_rate": 4.991579202551376e-06, |
| "loss": 0.7753, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.1930839281319222, |
| "grad_norm": 0.8822750449180603, |
| "learning_rate": 4.991539268825713e-06, |
| "loss": 0.7369, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.1934531134629584, |
| "grad_norm": 0.900492787361145, |
| "learning_rate": 4.9914992407960765e-06, |
| "loss": 0.792, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.1938222987939946, |
| "grad_norm": 0.8839775323867798, |
| "learning_rate": 4.991459118463979e-06, |
| "loss": 0.7633, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.19419148412503076, |
| "grad_norm": 0.9306631088256836, |
| "learning_rate": 4.991418901830941e-06, |
| "loss": 0.7585, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.19456066945606695, |
| "grad_norm": 0.8982890248298645, |
| "learning_rate": 4.991378590898483e-06, |
| "loss": 0.7856, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.19492985478710312, |
| "grad_norm": 0.8740949034690857, |
| "learning_rate": 4.991338185668133e-06, |
| "loss": 0.8008, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.1952990401181393, |
| "grad_norm": 0.9367266297340393, |
| "learning_rate": 4.991297686141418e-06, |
| "loss": 0.8715, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.19566822544917548, |
| "grad_norm": 0.8908909559249878, |
| "learning_rate": 4.9912570923198724e-06, |
| "loss": 0.777, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.19603741078021167, |
| "grad_norm": 0.9466776251792908, |
| "learning_rate": 4.9912164042050315e-06, |
| "loss": 0.7857, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.19640659611124783, |
| "grad_norm": 0.9122392535209656, |
| "learning_rate": 4.991175621798436e-06, |
| "loss": 0.7454, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.19677578144228403, |
| "grad_norm": 0.9155495762825012, |
| "learning_rate": 4.99113474510163e-06, |
| "loss": 0.788, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.19714496677332022, |
| "grad_norm": 0.9131052494049072, |
| "learning_rate": 4.99109377411616e-06, |
| "loss": 0.7724, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.19751415210435638, |
| "grad_norm": 0.9260715246200562, |
| "learning_rate": 4.9910527088435766e-06, |
| "loss": 0.7539, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.19788333743539258, |
| "grad_norm": 0.9215927124023438, |
| "learning_rate": 4.991011549285434e-06, |
| "loss": 0.8061, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.19825252276642874, |
| "grad_norm": 0.9671293497085571, |
| "learning_rate": 4.990970295443291e-06, |
| "loss": 0.8186, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.19862170809746493, |
| "grad_norm": 0.8750205636024475, |
| "learning_rate": 4.990928947318708e-06, |
| "loss": 0.7434, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.1989908934285011, |
| "grad_norm": 0.9240961670875549, |
| "learning_rate": 4.990887504913251e-06, |
| "loss": 0.7365, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.1993600787595373, |
| "grad_norm": 0.9260193109512329, |
| "learning_rate": 4.990845968228488e-06, |
| "loss": 0.8067, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.19972926409057346, |
| "grad_norm": 0.9060749411582947, |
| "learning_rate": 4.990804337265991e-06, |
| "loss": 0.8146, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.20009844942160965, |
| "grad_norm": 0.9333279728889465, |
| "learning_rate": 4.9907626120273355e-06, |
| "loss": 0.7811, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.20046763475264584, |
| "grad_norm": 1.0027539730072021, |
| "learning_rate": 4.990720792514102e-06, |
| "loss": 0.7587, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.200836820083682, |
| "grad_norm": 0.9125142097473145, |
| "learning_rate": 4.9906788787278725e-06, |
| "loss": 0.8037, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.2012060054147182, |
| "grad_norm": 0.8967962861061096, |
| "learning_rate": 4.990636870670234e-06, |
| "loss": 0.7533, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.20157519074575436, |
| "grad_norm": 0.9158695936203003, |
| "learning_rate": 4.9905947683427745e-06, |
| "loss": 0.7654, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.20194437607679055, |
| "grad_norm": 0.9484332203865051, |
| "learning_rate": 4.99055257174709e-06, |
| "loss": 0.7419, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.20231356140782672, |
| "grad_norm": 0.9259840846061707, |
| "learning_rate": 4.990510280884777e-06, |
| "loss": 0.7781, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.2026827467388629, |
| "grad_norm": 0.8913872241973877, |
| "learning_rate": 4.990467895757435e-06, |
| "loss": 0.7545, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.20305193206989908, |
| "grad_norm": 0.9442530870437622, |
| "learning_rate": 4.99042541636667e-06, |
| "loss": 0.7402, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.20342111740093527, |
| "grad_norm": 0.9780921339988708, |
| "learning_rate": 4.9903828427140885e-06, |
| "loss": 0.7911, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.20379030273197146, |
| "grad_norm": 0.9168081283569336, |
| "learning_rate": 4.990340174801302e-06, |
| "loss": 0.7886, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.20415948806300763, |
| "grad_norm": 0.8821970224380493, |
| "learning_rate": 4.990297412629926e-06, |
| "loss": 0.8018, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.20452867339404382, |
| "grad_norm": 0.8984123468399048, |
| "learning_rate": 4.99025455620158e-06, |
| "loss": 0.7459, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.20489785872507998, |
| "grad_norm": 0.9142769575119019, |
| "learning_rate": 4.990211605517884e-06, |
| "loss": 0.7697, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.20526704405611618, |
| "grad_norm": 0.8822063207626343, |
| "learning_rate": 4.990168560580465e-06, |
| "loss": 0.801, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.20563622938715234, |
| "grad_norm": 0.9008304476737976, |
| "learning_rate": 4.990125421390952e-06, |
| "loss": 0.7478, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.20600541471818853, |
| "grad_norm": 0.9141461849212646, |
| "learning_rate": 4.990082187950977e-06, |
| "loss": 0.7356, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.2063746000492247, |
| "grad_norm": 0.9026126265525818, |
| "learning_rate": 4.9900388602621775e-06, |
| "loss": 0.7594, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.2067437853802609, |
| "grad_norm": 0.9227587580680847, |
| "learning_rate": 4.989995438326193e-06, |
| "loss": 0.7465, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.20711297071129708, |
| "grad_norm": 0.9264957904815674, |
| "learning_rate": 4.989951922144667e-06, |
| "loss": 0.799, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.20748215604233325, |
| "grad_norm": 0.9035301208496094, |
| "learning_rate": 4.989908311719247e-06, |
| "loss": 0.8013, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.20785134137336944, |
| "grad_norm": 0.9293225407600403, |
| "learning_rate": 4.989864607051583e-06, |
| "loss": 0.78, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.2082205267044056, |
| "grad_norm": 0.9383095502853394, |
| "learning_rate": 4.989820808143328e-06, |
| "loss": 0.7629, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.2085897120354418, |
| "grad_norm": 0.9269458055496216, |
| "learning_rate": 4.989776914996144e-06, |
| "loss": 0.7698, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.20895889736647796, |
| "grad_norm": 0.8632141351699829, |
| "learning_rate": 4.989732927611688e-06, |
| "loss": 0.7084, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.20932808269751416, |
| "grad_norm": 0.9031556844711304, |
| "learning_rate": 4.989688845991626e-06, |
| "loss": 0.7425, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.20969726802855032, |
| "grad_norm": 0.9389427304267883, |
| "learning_rate": 4.989644670137627e-06, |
| "loss": 0.847, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.2100664533595865, |
| "grad_norm": 0.8885316252708435, |
| "learning_rate": 4.9896004000513635e-06, |
| "loss": 0.768, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.2104356386906227, |
| "grad_norm": 0.9279949069023132, |
| "learning_rate": 4.989556035734511e-06, |
| "loss": 0.8118, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.21080482402165887, |
| "grad_norm": 0.9301499724388123, |
| "learning_rate": 4.989511577188748e-06, |
| "loss": 0.7743, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.21117400935269506, |
| "grad_norm": 0.9175605773925781, |
| "learning_rate": 4.989467024415757e-06, |
| "loss": 0.7543, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.21154319468373123, |
| "grad_norm": 0.8823210597038269, |
| "learning_rate": 4.989422377417225e-06, |
| "loss": 0.7554, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.21191238001476742, |
| "grad_norm": 0.9052088260650635, |
| "learning_rate": 4.989377636194842e-06, |
| "loss": 0.7853, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.21228156534580359, |
| "grad_norm": 0.9024859666824341, |
| "learning_rate": 4.9893328007503e-06, |
| "loss": 0.7822, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.21265075067683978, |
| "grad_norm": 0.8866460919380188, |
| "learning_rate": 4.989287871085299e-06, |
| "loss": 0.781, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.21301993600787594, |
| "grad_norm": 0.9139004349708557, |
| "learning_rate": 4.989242847201537e-06, |
| "loss": 0.7486, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.21338912133891214, |
| "grad_norm": 0.912174642086029, |
| "learning_rate": 4.9891977291007174e-06, |
| "loss": 0.7911, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.2137583066699483, |
| "grad_norm": 0.9372491240501404, |
| "learning_rate": 4.989152516784551e-06, |
| "loss": 0.7751, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.2141274920009845, |
| "grad_norm": 0.9477733373641968, |
| "learning_rate": 4.989107210254748e-06, |
| "loss": 0.7798, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.21449667733202069, |
| "grad_norm": 0.9927518367767334, |
| "learning_rate": 4.989061809513021e-06, |
| "loss": 0.7844, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.21486586266305685, |
| "grad_norm": 0.9412055015563965, |
| "learning_rate": 4.98901631456109e-06, |
| "loss": 0.8244, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.21523504799409304, |
| "grad_norm": 0.8675752282142639, |
| "learning_rate": 4.988970725400678e-06, |
| "loss": 0.7988, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.2156042333251292, |
| "grad_norm": 0.9113022089004517, |
| "learning_rate": 4.98892504203351e-06, |
| "loss": 0.7368, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.2159734186561654, |
| "grad_norm": 0.949691116809845, |
| "learning_rate": 4.988879264461314e-06, |
| "loss": 0.7842, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.21634260398720156, |
| "grad_norm": 0.9027278423309326, |
| "learning_rate": 4.9888333926858235e-06, |
| "loss": 0.7686, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.21671178931823776, |
| "grad_norm": 0.926006019115448, |
| "learning_rate": 4.988787426708775e-06, |
| "loss": 0.8051, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.21708097464927392, |
| "grad_norm": 0.9157929420471191, |
| "learning_rate": 4.988741366531906e-06, |
| "loss": 0.7682, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.21745015998031011, |
| "grad_norm": 0.9037911295890808, |
| "learning_rate": 4.988695212156963e-06, |
| "loss": 0.7853, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.2178193453113463, |
| "grad_norm": 0.927545964717865, |
| "learning_rate": 4.988648963585692e-06, |
| "loss": 0.8019, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.21818853064238247, |
| "grad_norm": 0.901202917098999, |
| "learning_rate": 4.988602620819843e-06, |
| "loss": 0.7602, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.21855771597341866, |
| "grad_norm": 0.92628413438797, |
| "learning_rate": 4.98855618386117e-06, |
| "loss": 0.7674, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.21892690130445483, |
| "grad_norm": 0.9130452871322632, |
| "learning_rate": 4.988509652711431e-06, |
| "loss": 0.7891, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.21929608663549102, |
| "grad_norm": 0.8740320801734924, |
| "learning_rate": 4.988463027372387e-06, |
| "loss": 0.7462, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.2196652719665272, |
| "grad_norm": 0.9068711400032043, |
| "learning_rate": 4.9884163078458026e-06, |
| "loss": 0.7934, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.22003445729756338, |
| "grad_norm": 0.8960202932357788, |
| "learning_rate": 4.988369494133447e-06, |
| "loss": 0.7855, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.22040364262859954, |
| "grad_norm": 0.8958917260169983, |
| "learning_rate": 4.98832258623709e-06, |
| "loss": 0.7641, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.22077282795963574, |
| "grad_norm": 0.9499006271362305, |
| "learning_rate": 4.988275584158509e-06, |
| "loss": 0.773, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.22114201329067193, |
| "grad_norm": 0.9037056565284729, |
| "learning_rate": 4.988228487899483e-06, |
| "loss": 0.7528, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.2215111986217081, |
| "grad_norm": 0.9397220611572266, |
| "learning_rate": 4.988181297461794e-06, |
| "loss": 0.7659, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2218803839527443, |
| "grad_norm": 0.91253262758255, |
| "learning_rate": 4.988134012847228e-06, |
| "loss": 0.7959, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.22224956928378045, |
| "grad_norm": 0.9446122646331787, |
| "learning_rate": 4.988086634057575e-06, |
| "loss": 0.7876, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.22261875461481664, |
| "grad_norm": 0.9358010292053223, |
| "learning_rate": 4.9880391610946276e-06, |
| "loss": 0.7768, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.2229879399458528, |
| "grad_norm": 0.8857962489128113, |
| "learning_rate": 4.987991593960184e-06, |
| "loss": 0.7973, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.223357125276889, |
| "grad_norm": 0.8937807679176331, |
| "learning_rate": 4.987943932656043e-06, |
| "loss": 0.7557, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.22372631060792517, |
| "grad_norm": 0.9220109581947327, |
| "learning_rate": 4.9878961771840096e-06, |
| "loss": 0.8054, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.22409549593896136, |
| "grad_norm": 0.8997550010681152, |
| "learning_rate": 4.987848327545891e-06, |
| "loss": 0.7715, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.22446468126999755, |
| "grad_norm": 0.9468237161636353, |
| "learning_rate": 4.9878003837434986e-06, |
| "loss": 0.7738, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.22483386660103372, |
| "grad_norm": 0.9114269018173218, |
| "learning_rate": 4.987752345778647e-06, |
| "loss": 0.7656, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.2252030519320699, |
| "grad_norm": 0.9273460507392883, |
| "learning_rate": 4.987704213653154e-06, |
| "loss": 0.7221, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.22557223726310607, |
| "grad_norm": 1.0038539171218872, |
| "learning_rate": 4.987655987368842e-06, |
| "loss": 0.7798, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.22594142259414227, |
| "grad_norm": 0.9012103080749512, |
| "learning_rate": 4.987607666927535e-06, |
| "loss": 0.7526, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.22631060792517843, |
| "grad_norm": 0.9375380277633667, |
| "learning_rate": 4.987559252331064e-06, |
| "loss": 0.7864, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.22667979325621462, |
| "grad_norm": 0.9294722080230713, |
| "learning_rate": 4.98751074358126e-06, |
| "loss": 0.7441, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.2270489785872508, |
| "grad_norm": 0.9203857183456421, |
| "learning_rate": 4.9874621406799595e-06, |
| "loss": 0.7508, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.22741816391828698, |
| "grad_norm": 0.9149799346923828, |
| "learning_rate": 4.987413443629002e-06, |
| "loss": 0.764, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.22778734924932317, |
| "grad_norm": 0.9542253613471985, |
| "learning_rate": 4.987364652430231e-06, |
| "loss": 0.7515, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.22815653458035934, |
| "grad_norm": 0.9015132784843445, |
| "learning_rate": 4.9873157670854925e-06, |
| "loss": 0.7474, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.22852571991139553, |
| "grad_norm": 0.8520395159721375, |
| "learning_rate": 4.987266787596637e-06, |
| "loss": 0.7202, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.2288949052424317, |
| "grad_norm": 0.939619243144989, |
| "learning_rate": 4.987217713965519e-06, |
| "loss": 0.7998, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2292640905734679, |
| "grad_norm": 0.9087517857551575, |
| "learning_rate": 4.9871685461939954e-06, |
| "loss": 0.7436, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.22963327590450405, |
| "grad_norm": 0.9153372049331665, |
| "learning_rate": 4.9871192842839264e-06, |
| "loss": 0.7439, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.23000246123554025, |
| "grad_norm": 0.9004572629928589, |
| "learning_rate": 4.98706992823718e-06, |
| "loss": 0.7366, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.2303716465665764, |
| "grad_norm": 0.8933371305465698, |
| "learning_rate": 4.9870204780556185e-06, |
| "loss": 0.7707, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.2307408318976126, |
| "grad_norm": 0.9173306822776794, |
| "learning_rate": 4.9869709337411184e-06, |
| "loss": 0.769, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.2311100172286488, |
| "grad_norm": 0.9018936157226562, |
| "learning_rate": 4.986921295295554e-06, |
| "loss": 0.7574, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.23147920255968496, |
| "grad_norm": 0.9065893888473511, |
| "learning_rate": 4.986871562720803e-06, |
| "loss": 0.7649, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.23184838789072115, |
| "grad_norm": 0.875457763671875, |
| "learning_rate": 4.986821736018748e-06, |
| "loss": 0.7606, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.23221757322175732, |
| "grad_norm": 0.9208522439002991, |
| "learning_rate": 4.986771815191275e-06, |
| "loss": 0.7883, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.2325867585527935, |
| "grad_norm": 0.9178383350372314, |
| "learning_rate": 4.986721800240273e-06, |
| "loss": 0.7694, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.23295594388382967, |
| "grad_norm": 0.9127592444419861, |
| "learning_rate": 4.986671691167637e-06, |
| "loss": 0.7405, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.23332512921486587, |
| "grad_norm": 0.9244971871376038, |
| "learning_rate": 4.986621487975261e-06, |
| "loss": 0.796, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.23369431454590203, |
| "grad_norm": 0.9289013147354126, |
| "learning_rate": 4.9865711906650485e-06, |
| "loss": 0.7398, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.23406349987693822, |
| "grad_norm": 0.8860224485397339, |
| "learning_rate": 4.9865207992389e-06, |
| "loss": 0.7518, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.2344326852079744, |
| "grad_norm": 0.983054518699646, |
| "learning_rate": 4.986470313698723e-06, |
| "loss": 0.772, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.23480187053901058, |
| "grad_norm": 0.896415650844574, |
| "learning_rate": 4.98641973404643e-06, |
| "loss": 0.783, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.23517105587004677, |
| "grad_norm": 0.9165555238723755, |
| "learning_rate": 4.986369060283935e-06, |
| "loss": 0.7902, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.23554024120108294, |
| "grad_norm": 0.928102970123291, |
| "learning_rate": 4.986318292413157e-06, |
| "loss": 0.7458, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.23590942653211913, |
| "grad_norm": 0.8924850821495056, |
| "learning_rate": 4.986267430436015e-06, |
| "loss": 0.7464, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.2362786118631553, |
| "grad_norm": 0.9051222801208496, |
| "learning_rate": 4.986216474354436e-06, |
| "loss": 0.7789, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.2366477971941915, |
| "grad_norm": 0.9151371717453003, |
| "learning_rate": 4.986165424170347e-06, |
| "loss": 0.7722, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.23701698252522765, |
| "grad_norm": 0.9557187557220459, |
| "learning_rate": 4.9861142798856824e-06, |
| "loss": 0.7606, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.23738616785626385, |
| "grad_norm": 0.9134449362754822, |
| "learning_rate": 4.986063041502377e-06, |
| "loss": 0.7482, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.2377553531873, |
| "grad_norm": 0.920628547668457, |
| "learning_rate": 4.9860117090223695e-06, |
| "loss": 0.7545, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.2381245385183362, |
| "grad_norm": 0.9102922081947327, |
| "learning_rate": 4.9859602824476035e-06, |
| "loss": 0.7575, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.2384937238493724, |
| "grad_norm": 0.9474995136260986, |
| "learning_rate": 4.985908761780025e-06, |
| "loss": 0.7511, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.23886290918040856, |
| "grad_norm": 0.9534257650375366, |
| "learning_rate": 4.9858571470215854e-06, |
| "loss": 0.7846, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.23923209451144475, |
| "grad_norm": 0.9420318007469177, |
| "learning_rate": 4.9858054381742374e-06, |
| "loss": 0.7846, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.23960127984248092, |
| "grad_norm": 0.9332766532897949, |
| "learning_rate": 4.9857536352399376e-06, |
| "loss": 0.7763, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.2399704651735171, |
| "grad_norm": 0.8975653648376465, |
| "learning_rate": 4.985701738220647e-06, |
| "loss": 0.7375, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.24033965050455328, |
| "grad_norm": 1.0437159538269043, |
| "learning_rate": 4.98564974711833e-06, |
| "loss": 0.7838, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.24070883583558947, |
| "grad_norm": 0.9787809252738953, |
| "learning_rate": 4.985597661934955e-06, |
| "loss": 0.8084, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.24107802116662563, |
| "grad_norm": 0.9051234722137451, |
| "learning_rate": 4.985545482672493e-06, |
| "loss": 0.7251, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.24144720649766183, |
| "grad_norm": 0.9520812630653381, |
| "learning_rate": 4.985493209332918e-06, |
| "loss": 0.7966, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.24181639182869802, |
| "grad_norm": 0.9688146114349365, |
| "learning_rate": 4.985440841918211e-06, |
| "loss": 0.7541, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.24218557715973418, |
| "grad_norm": 0.9103109240531921, |
| "learning_rate": 4.9853883804303515e-06, |
| "loss": 0.7515, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.24255476249077038, |
| "grad_norm": 0.8874120712280273, |
| "learning_rate": 4.9853358248713266e-06, |
| "loss": 0.7416, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.24292394782180654, |
| "grad_norm": 0.8995088338851929, |
| "learning_rate": 4.9852831752431256e-06, |
| "loss": 0.7567, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.24329313315284273, |
| "grad_norm": 0.9225655198097229, |
| "learning_rate": 4.98523043154774e-06, |
| "loss": 0.754, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.2436623184838789, |
| "grad_norm": 0.9014883637428284, |
| "learning_rate": 4.985177593787167e-06, |
| "loss": 0.7289, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.2440315038149151, |
| "grad_norm": 1.0416333675384521, |
| "learning_rate": 4.9851246619634054e-06, |
| "loss": 0.7705, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.24440068914595126, |
| "grad_norm": 0.9075980186462402, |
| "learning_rate": 4.9850716360784615e-06, |
| "loss": 0.7608, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.24476987447698745, |
| "grad_norm": 0.8677279949188232, |
| "learning_rate": 4.98501851613434e-06, |
| "loss": 0.7988, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.24513905980802364, |
| "grad_norm": 0.9182518124580383, |
| "learning_rate": 4.984965302133051e-06, |
| "loss": 0.7687, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.2455082451390598, |
| "grad_norm": 0.9112989902496338, |
| "learning_rate": 4.98491199407661e-06, |
| "loss": 0.7842, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.245877430470096, |
| "grad_norm": 0.8956560492515564, |
| "learning_rate": 4.984858591967035e-06, |
| "loss": 0.7418, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.24624661580113216, |
| "grad_norm": 0.9098480343818665, |
| "learning_rate": 4.984805095806346e-06, |
| "loss": 0.7978, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.24661580113216836, |
| "grad_norm": 0.8892375230789185, |
| "learning_rate": 4.9847515055965685e-06, |
| "loss": 0.7563, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.24698498646320452, |
| "grad_norm": 0.9025793671607971, |
| "learning_rate": 4.984697821339731e-06, |
| "loss": 0.7302, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.2473541717942407, |
| "grad_norm": 0.9058409333229065, |
| "learning_rate": 4.984644043037864e-06, |
| "loss": 0.7938, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.24772335712527688, |
| "grad_norm": 0.9041579365730286, |
| "learning_rate": 4.984590170693005e-06, |
| "loss": 0.7788, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.24809254245631307, |
| "grad_norm": 0.9054029583930969, |
| "learning_rate": 4.9845362043071925e-06, |
| "loss": 0.7546, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.24846172778734926, |
| "grad_norm": 0.9551224708557129, |
| "learning_rate": 4.984482143882469e-06, |
| "loss": 0.7458, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.24883091311838543, |
| "grad_norm": 0.9546729922294617, |
| "learning_rate": 4.9844279894208795e-06, |
| "loss": 0.7808, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.24920009844942162, |
| "grad_norm": 0.9500798583030701, |
| "learning_rate": 4.984373740924475e-06, |
| "loss": 0.7773, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.24956928378045778, |
| "grad_norm": 0.9165859222412109, |
| "learning_rate": 4.984319398395308e-06, |
| "loss": 0.7596, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.24993846911149398, |
| "grad_norm": 0.9380325675010681, |
| "learning_rate": 4.984264961835436e-06, |
| "loss": 0.799, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.25030765444253017, |
| "grad_norm": 0.9391986131668091, |
| "learning_rate": 4.98421043124692e-06, |
| "loss": 0.7435, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.2506768397735663, |
| "grad_norm": 1.0003539323806763, |
| "learning_rate": 4.984155806631823e-06, |
| "loss": 0.7798, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.2510460251046025, |
| "grad_norm": 0.9141901731491089, |
| "learning_rate": 4.984101087992212e-06, |
| "loss": 0.7093, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.2514152104356387, |
| "grad_norm": 1.0558290481567383, |
| "learning_rate": 4.98404627533016e-06, |
| "loss": 0.7651, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.2517843957666749, |
| "grad_norm": 0.8751945495605469, |
| "learning_rate": 4.98399136864774e-06, |
| "loss": 0.7598, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.2521535810977111, |
| "grad_norm": 0.8984244465827942, |
| "learning_rate": 4.9839363679470296e-06, |
| "loss": 0.7921, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.2525227664287472, |
| "grad_norm": 0.9515509605407715, |
| "learning_rate": 4.9838812732301134e-06, |
| "loss": 0.7781, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.2528919517597834, |
| "grad_norm": 0.9040102958679199, |
| "learning_rate": 4.983826084499074e-06, |
| "loss": 0.7435, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.2532611370908196, |
| "grad_norm": 0.8813716769218445, |
| "learning_rate": 4.983770801756001e-06, |
| "loss": 0.7675, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.2536303224218558, |
| "grad_norm": 0.9750944375991821, |
| "learning_rate": 4.983715425002987e-06, |
| "loss": 0.7389, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.25399950775289193, |
| "grad_norm": 0.8693578839302063, |
| "learning_rate": 4.983659954242128e-06, |
| "loss": 0.7325, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.2543686930839281, |
| "grad_norm": 0.934140145778656, |
| "learning_rate": 4.983604389475525e-06, |
| "loss": 0.8121, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.2547378784149643, |
| "grad_norm": 0.9266787767410278, |
| "learning_rate": 4.983548730705278e-06, |
| "loss": 0.793, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.2551070637460005, |
| "grad_norm": 0.9268523454666138, |
| "learning_rate": 4.9834929779334964e-06, |
| "loss": 0.7605, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.2554762490770367, |
| "grad_norm": 0.9014139771461487, |
| "learning_rate": 4.983437131162289e-06, |
| "loss": 0.7518, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.25584543440807284, |
| "grad_norm": 0.8949803113937378, |
| "learning_rate": 4.98338119039377e-06, |
| "loss": 0.7179, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.25621461973910903, |
| "grad_norm": 0.9687701463699341, |
| "learning_rate": 4.983325155630056e-06, |
| "loss": 0.7602, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.2565838050701452, |
| "grad_norm": 0.9221265316009521, |
| "learning_rate": 4.983269026873269e-06, |
| "loss": 0.7374, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.2569529904011814, |
| "grad_norm": 0.9037137627601624, |
| "learning_rate": 4.983212804125533e-06, |
| "loss": 0.7424, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.25732217573221755, |
| "grad_norm": 0.9276369214057922, |
| "learning_rate": 4.983156487388977e-06, |
| "loss": 0.8033, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.25769136106325374, |
| "grad_norm": 0.964596152305603, |
| "learning_rate": 4.983100076665731e-06, |
| "loss": 0.8284, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.25806054639428994, |
| "grad_norm": 0.9751665592193604, |
| "learning_rate": 4.983043571957931e-06, |
| "loss": 0.7919, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.25842973172532613, |
| "grad_norm": 0.9727154970169067, |
| "learning_rate": 4.9829869732677146e-06, |
| "loss": 0.8094, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2587989170563623, |
| "grad_norm": 0.9635800719261169, |
| "learning_rate": 4.982930280597226e-06, |
| "loss": 0.7682, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.25916810238739846, |
| "grad_norm": 0.8952401280403137, |
| "learning_rate": 4.98287349394861e-06, |
| "loss": 0.812, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.25953728771843465, |
| "grad_norm": 0.905311107635498, |
| "learning_rate": 4.982816613324015e-06, |
| "loss": 0.7621, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.25990647304947084, |
| "grad_norm": 0.9060837030410767, |
| "learning_rate": 4.982759638725595e-06, |
| "loss": 0.7343, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.26027565838050704, |
| "grad_norm": 0.9590125679969788, |
| "learning_rate": 4.982702570155506e-06, |
| "loss": 0.7996, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.2606448437115432, |
| "grad_norm": 0.9153867959976196, |
| "learning_rate": 4.9826454076159094e-06, |
| "loss": 0.7566, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.26101402904257937, |
| "grad_norm": 0.8935747146606445, |
| "learning_rate": 4.982588151108966e-06, |
| "loss": 0.7675, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.26138321437361556, |
| "grad_norm": 0.9266985058784485, |
| "learning_rate": 4.982530800636845e-06, |
| "loss": 0.7712, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.26175239970465175, |
| "grad_norm": 0.9422087073326111, |
| "learning_rate": 4.982473356201718e-06, |
| "loss": 0.7768, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.26212158503568794, |
| "grad_norm": 0.9119973182678223, |
| "learning_rate": 4.982415817805757e-06, |
| "loss": 0.7693, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.2624907703667241, |
| "grad_norm": 0.8853102326393127, |
| "learning_rate": 4.982358185451141e-06, |
| "loss": 0.75, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.2628599556977603, |
| "grad_norm": 0.9192734360694885, |
| "learning_rate": 4.982300459140051e-06, |
| "loss": 0.7715, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.26322914102879647, |
| "grad_norm": 0.8763948082923889, |
| "learning_rate": 4.982242638874672e-06, |
| "loss": 0.7635, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.26359832635983266, |
| "grad_norm": 0.8886423707008362, |
| "learning_rate": 4.982184724657192e-06, |
| "loss": 0.753, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.2639675116908688, |
| "grad_norm": 0.9051578044891357, |
| "learning_rate": 4.9821267164898045e-06, |
| "loss": 0.7759, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.264336697021905, |
| "grad_norm": 0.9290419220924377, |
| "learning_rate": 4.9820686143747045e-06, |
| "loss": 0.8023, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.2647058823529412, |
| "grad_norm": 0.9183179140090942, |
| "learning_rate": 4.982010418314089e-06, |
| "loss": 0.7909, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.2650750676839774, |
| "grad_norm": 0.9106544256210327, |
| "learning_rate": 4.981952128310165e-06, |
| "loss": 0.7534, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.2654442530150135, |
| "grad_norm": 0.8992197513580322, |
| "learning_rate": 4.981893744365134e-06, |
| "loss": 0.7368, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.2658134383460497, |
| "grad_norm": 0.8842350840568542, |
| "learning_rate": 4.981835266481209e-06, |
| "loss": 0.762, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.2661826236770859, |
| "grad_norm": 0.9051290154457092, |
| "learning_rate": 4.9817766946606025e-06, |
| "loss": 0.7509, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.2665518090081221, |
| "grad_norm": 0.9140417575836182, |
| "learning_rate": 4.9817180289055314e-06, |
| "loss": 0.7554, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.2669209943391583, |
| "grad_norm": 0.8826530575752258, |
| "learning_rate": 4.981659269218216e-06, |
| "loss": 0.7498, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.2672901796701944, |
| "grad_norm": 0.8926510214805603, |
| "learning_rate": 4.98160041560088e-06, |
| "loss": 0.7614, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.2676593650012306, |
| "grad_norm": 0.9062178134918213, |
| "learning_rate": 4.9815414680557514e-06, |
| "loss": 0.7644, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.2680285503322668, |
| "grad_norm": 0.9631175994873047, |
| "learning_rate": 4.981482426585063e-06, |
| "loss": 0.7816, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.268397735663303, |
| "grad_norm": 0.9016267657279968, |
| "learning_rate": 4.981423291191047e-06, |
| "loss": 0.7367, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.26876692099433913, |
| "grad_norm": 0.8919848799705505, |
| "learning_rate": 4.981364061875942e-06, |
| "loss": 0.7523, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.2691361063253753, |
| "grad_norm": 0.8961866497993469, |
| "learning_rate": 4.981304738641991e-06, |
| "loss": 0.7743, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.2695052916564115, |
| "grad_norm": 1.0036101341247559, |
| "learning_rate": 4.981245321491438e-06, |
| "loss": 0.7526, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.2698744769874477, |
| "grad_norm": 0.9046428203582764, |
| "learning_rate": 4.9811858104265334e-06, |
| "loss": 0.7522, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.2702436623184839, |
| "grad_norm": 0.9048503041267395, |
| "learning_rate": 4.981126205449529e-06, |
| "loss": 0.7233, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.27061284764952004, |
| "grad_norm": 1.0362783670425415, |
| "learning_rate": 4.9810665065626805e-06, |
| "loss": 0.751, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.27098203298055623, |
| "grad_norm": 0.9431849122047424, |
| "learning_rate": 4.981006713768248e-06, |
| "loss": 0.7831, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.2713512183115924, |
| "grad_norm": 0.9257729649543762, |
| "learning_rate": 4.980946827068494e-06, |
| "loss": 0.7801, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.2717204036426286, |
| "grad_norm": 0.8980259299278259, |
| "learning_rate": 4.980886846465686e-06, |
| "loss": 0.7452, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.27208958897366475, |
| "grad_norm": 0.895393967628479, |
| "learning_rate": 4.980826771962094e-06, |
| "loss": 0.72, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.27245877430470095, |
| "grad_norm": 0.8936492800712585, |
| "learning_rate": 4.980766603559991e-06, |
| "loss": 0.7668, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.27282795963573714, |
| "grad_norm": 0.8899438977241516, |
| "learning_rate": 4.980706341261655e-06, |
| "loss": 0.7224, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.27319714496677333, |
| "grad_norm": 0.9478532075881958, |
| "learning_rate": 4.980645985069367e-06, |
| "loss": 0.7859, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.2735663302978095, |
| "grad_norm": 0.9106540083885193, |
| "learning_rate": 4.980585534985412e-06, |
| "loss": 0.7769, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.27393551562884566, |
| "grad_norm": 0.9290282130241394, |
| "learning_rate": 4.9805249910120776e-06, |
| "loss": 0.7358, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.27430470095988185, |
| "grad_norm": 0.9177589416503906, |
| "learning_rate": 4.980464353151654e-06, |
| "loss": 0.7647, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.27467388629091805, |
| "grad_norm": 0.900225818157196, |
| "learning_rate": 4.980403621406439e-06, |
| "loss": 0.7416, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.27504307162195424, |
| "grad_norm": 0.905703604221344, |
| "learning_rate": 4.980342795778728e-06, |
| "loss": 0.8068, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.2754122569529904, |
| "grad_norm": 0.8948282599449158, |
| "learning_rate": 4.980281876270826e-06, |
| "loss": 0.7597, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.27578144228402657, |
| "grad_norm": 0.9245081543922424, |
| "learning_rate": 4.980220862885038e-06, |
| "loss": 0.7406, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.27615062761506276, |
| "grad_norm": 0.8864959478378296, |
| "learning_rate": 4.980159755623673e-06, |
| "loss": 0.7464, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.27651981294609895, |
| "grad_norm": 0.9063557386398315, |
| "learning_rate": 4.9800985544890425e-06, |
| "loss": 0.77, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.27688899827713515, |
| "grad_norm": 0.8840070962905884, |
| "learning_rate": 4.9800372594834656e-06, |
| "loss": 0.7356, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.2772581836081713, |
| "grad_norm": 0.8699694275856018, |
| "learning_rate": 4.979975870609261e-06, |
| "loss": 0.747, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.2776273689392075, |
| "grad_norm": 0.9077945351600647, |
| "learning_rate": 4.979914387868753e-06, |
| "loss": 0.7428, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.27799655427024367, |
| "grad_norm": 0.9290851354598999, |
| "learning_rate": 4.979852811264267e-06, |
| "loss": 0.7509, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.27836573960127986, |
| "grad_norm": 0.928439199924469, |
| "learning_rate": 4.979791140798136e-06, |
| "loss": 0.7949, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.278734924932316, |
| "grad_norm": 0.8840503692626953, |
| "learning_rate": 4.9797293764726924e-06, |
| "loss": 0.7167, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.2791041102633522, |
| "grad_norm": 0.9051108360290527, |
| "learning_rate": 4.979667518290274e-06, |
| "loss": 0.7636, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.2794732955943884, |
| "grad_norm": 0.9400845766067505, |
| "learning_rate": 4.979605566253224e-06, |
| "loss": 0.7595, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.2798424809254246, |
| "grad_norm": 0.9284423589706421, |
| "learning_rate": 4.979543520363884e-06, |
| "loss": 0.728, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.28021166625646077, |
| "grad_norm": 0.8976213932037354, |
| "learning_rate": 4.979481380624606e-06, |
| "loss": 0.7399, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.2805808515874969, |
| "grad_norm": 0.8708903193473816, |
| "learning_rate": 4.97941914703774e-06, |
| "loss": 0.7534, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.2809500369185331, |
| "grad_norm": 0.9106154441833496, |
| "learning_rate": 4.979356819605641e-06, |
| "loss": 0.7552, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.2813192222495693, |
| "grad_norm": 0.9194098711013794, |
| "learning_rate": 4.979294398330668e-06, |
| "loss": 0.7365, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.2816884075806055, |
| "grad_norm": 0.9231695532798767, |
| "learning_rate": 4.9792318832151864e-06, |
| "loss": 0.7607, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.2820575929116416, |
| "grad_norm": 0.9608139991760254, |
| "learning_rate": 4.97916927426156e-06, |
| "loss": 0.7909, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.2824267782426778, |
| "grad_norm": 0.9300723075866699, |
| "learning_rate": 4.979106571472159e-06, |
| "loss": 0.7453, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.282795963573714, |
| "grad_norm": 0.9103767275810242, |
| "learning_rate": 4.979043774849356e-06, |
| "loss": 0.7816, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.2831651489047502, |
| "grad_norm": 0.9026121497154236, |
| "learning_rate": 4.9789808843955294e-06, |
| "loss": 0.7656, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.2835343342357864, |
| "grad_norm": 0.8946061730384827, |
| "learning_rate": 4.978917900113059e-06, |
| "loss": 0.7399, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.2839035195668225, |
| "grad_norm": 0.8899773955345154, |
| "learning_rate": 4.978854822004327e-06, |
| "loss": 0.7666, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.2842727048978587, |
| "grad_norm": 0.9117376804351807, |
| "learning_rate": 4.978791650071723e-06, |
| "loss": 0.7552, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.2846418902288949, |
| "grad_norm": 0.8872507810592651, |
| "learning_rate": 4.978728384317637e-06, |
| "loss": 0.7367, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.2850110755599311, |
| "grad_norm": 0.9358695149421692, |
| "learning_rate": 4.978665024744465e-06, |
| "loss": 0.7751, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.28538026089096724, |
| "grad_norm": 0.879411518573761, |
| "learning_rate": 4.9786015713546035e-06, |
| "loss": 0.7681, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.28574944622200343, |
| "grad_norm": 0.9180899262428284, |
| "learning_rate": 4.978538024150455e-06, |
| "loss": 0.7481, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.2861186315530396, |
| "grad_norm": 0.8577932119369507, |
| "learning_rate": 4.978474383134424e-06, |
| "loss": 0.7133, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.2864878168840758, |
| "grad_norm": 0.8630688190460205, |
| "learning_rate": 4.97841064830892e-06, |
| "loss": 0.7232, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.286857002215112, |
| "grad_norm": 0.902454137802124, |
| "learning_rate": 4.978346819676355e-06, |
| "loss": 0.7438, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.28722618754614815, |
| "grad_norm": 0.9192384481430054, |
| "learning_rate": 4.9782828972391466e-06, |
| "loss": 0.8082, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.28759537287718434, |
| "grad_norm": 0.9017093777656555, |
| "learning_rate": 4.9782188809997106e-06, |
| "loss": 0.7713, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.28796455820822053, |
| "grad_norm": 0.8741960525512695, |
| "learning_rate": 4.978154770960473e-06, |
| "loss": 0.7601, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.2883337435392567, |
| "grad_norm": 0.8619513511657715, |
| "learning_rate": 4.978090567123859e-06, |
| "loss": 0.7267, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.28870292887029286, |
| "grad_norm": 0.9079142808914185, |
| "learning_rate": 4.9780262694923e-06, |
| "loss": 0.7804, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.28907211420132906, |
| "grad_norm": 0.8758432865142822, |
| "learning_rate": 4.977961878068228e-06, |
| "loss": 0.7271, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.28944129953236525, |
| "grad_norm": 0.8986886143684387, |
| "learning_rate": 4.977897392854081e-06, |
| "loss": 0.7885, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.28981048486340144, |
| "grad_norm": 0.9059154391288757, |
| "learning_rate": 4.9778328138523e-06, |
| "loss": 0.7367, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.29017967019443763, |
| "grad_norm": 0.9500458240509033, |
| "learning_rate": 4.9777681410653295e-06, |
| "loss": 0.7784, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.29054885552547377, |
| "grad_norm": 0.9947640299797058, |
| "learning_rate": 4.977703374495616e-06, |
| "loss": 0.7704, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.29091804085650996, |
| "grad_norm": 0.9205284118652344, |
| "learning_rate": 4.977638514145612e-06, |
| "loss": 0.7906, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.29128722618754616, |
| "grad_norm": 0.925631582736969, |
| "learning_rate": 4.977573560017772e-06, |
| "loss": 0.7847, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.29165641151858235, |
| "grad_norm": 0.96160888671875, |
| "learning_rate": 4.977508512114556e-06, |
| "loss": 0.7993, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.2920255968496185, |
| "grad_norm": 0.9323878884315491, |
| "learning_rate": 4.977443370438423e-06, |
| "loss": 0.7811, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.2923947821806547, |
| "grad_norm": 0.9564676880836487, |
| "learning_rate": 4.977378134991841e-06, |
| "loss": 0.7759, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.29276396751169087, |
| "grad_norm": 0.8873472213745117, |
| "learning_rate": 4.977312805777279e-06, |
| "loss": 0.7757, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.29313315284272706, |
| "grad_norm": 0.9213703274726868, |
| "learning_rate": 4.977247382797208e-06, |
| "loss": 0.7134, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.29350233817376326, |
| "grad_norm": 0.9845170974731445, |
| "learning_rate": 4.977181866054106e-06, |
| "loss": 0.7865, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.2938715235047994, |
| "grad_norm": 0.9223014116287231, |
| "learning_rate": 4.977116255550452e-06, |
| "loss": 0.7784, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.2942407088358356, |
| "grad_norm": 0.8821209073066711, |
| "learning_rate": 4.97705055128873e-06, |
| "loss": 0.7164, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.2946098941668718, |
| "grad_norm": 0.8854329586029053, |
| "learning_rate": 4.976984753271427e-06, |
| "loss": 0.7593, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.29497907949790797, |
| "grad_norm": 0.9611573219299316, |
| "learning_rate": 4.976918861501031e-06, |
| "loss": 0.7343, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.2953482648289441, |
| "grad_norm": 0.9474232792854309, |
| "learning_rate": 4.976852875980039e-06, |
| "loss": 0.7501, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2957174501599803, |
| "grad_norm": 0.9252268671989441, |
| "learning_rate": 4.976786796710947e-06, |
| "loss": 0.7608, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.2960866354910165, |
| "grad_norm": 0.897132396697998, |
| "learning_rate": 4.976720623696257e-06, |
| "loss": 0.7549, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.2964558208220527, |
| "grad_norm": 0.8893219828605652, |
| "learning_rate": 4.976654356938472e-06, |
| "loss": 0.7268, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.2968250061530889, |
| "grad_norm": 0.9587628841400146, |
| "learning_rate": 4.976587996440102e-06, |
| "loss": 0.7845, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.297194191484125, |
| "grad_norm": 0.8750391006469727, |
| "learning_rate": 4.976521542203658e-06, |
| "loss": 0.7305, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.2975633768151612, |
| "grad_norm": 0.9181191921234131, |
| "learning_rate": 4.976454994231656e-06, |
| "loss": 0.7969, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.2979325621461974, |
| "grad_norm": 0.934037983417511, |
| "learning_rate": 4.976388352526612e-06, |
| "loss": 0.7438, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.2983017474772336, |
| "grad_norm": 0.9424565434455872, |
| "learning_rate": 4.976321617091052e-06, |
| "loss": 0.7631, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.29867093280826973, |
| "grad_norm": 0.9459184408187866, |
| "learning_rate": 4.976254787927499e-06, |
| "loss": 0.7616, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.2990401181393059, |
| "grad_norm": 0.912196934223175, |
| "learning_rate": 4.976187865038485e-06, |
| "loss": 0.7814, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.2994093034703421, |
| "grad_norm": 0.8629269003868103, |
| "learning_rate": 4.976120848426542e-06, |
| "loss": 0.7365, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.2997784888013783, |
| "grad_norm": 0.9021220207214355, |
| "learning_rate": 4.9760537380942055e-06, |
| "loss": 0.7271, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.3001476741324145, |
| "grad_norm": 0.8790842294692993, |
| "learning_rate": 4.975986534044017e-06, |
| "loss": 0.7211, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.30051685946345064, |
| "grad_norm": 0.9232593178749084, |
| "learning_rate": 4.975919236278519e-06, |
| "loss": 0.7557, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.30088604479448683, |
| "grad_norm": 0.9208589792251587, |
| "learning_rate": 4.975851844800259e-06, |
| "loss": 0.7585, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.301255230125523, |
| "grad_norm": 0.8960398435592651, |
| "learning_rate": 4.9757843596117894e-06, |
| "loss": 0.81, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.3016244154565592, |
| "grad_norm": 0.9213392734527588, |
| "learning_rate": 4.975716780715662e-06, |
| "loss": 0.7421, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.30199360078759535, |
| "grad_norm": 0.882556140422821, |
| "learning_rate": 4.975649108114437e-06, |
| "loss": 0.7653, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.30236278611863154, |
| "grad_norm": 0.8911692500114441, |
| "learning_rate": 4.9755813418106735e-06, |
| "loss": 0.7357, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.30273197144966774, |
| "grad_norm": 0.9037520885467529, |
| "learning_rate": 4.975513481806939e-06, |
| "loss": 0.7503, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.30310115678070393, |
| "grad_norm": 0.9327360391616821, |
| "learning_rate": 4.975445528105799e-06, |
| "loss": 0.7269, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.3034703421117401, |
| "grad_norm": 0.8998024463653564, |
| "learning_rate": 4.9753774807098275e-06, |
| "loss": 0.7384, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.30383952744277626, |
| "grad_norm": 0.9088361859321594, |
| "learning_rate": 4.975309339621599e-06, |
| "loss": 0.7571, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.30420871277381245, |
| "grad_norm": 0.8943783640861511, |
| "learning_rate": 4.975241104843694e-06, |
| "loss": 0.7007, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.30457789810484864, |
| "grad_norm": 0.9370293021202087, |
| "learning_rate": 4.975172776378694e-06, |
| "loss": 0.746, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.30494708343588484, |
| "grad_norm": 0.946474552154541, |
| "learning_rate": 4.9751043542291854e-06, |
| "loss": 0.7987, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.305316268766921, |
| "grad_norm": 0.9123954772949219, |
| "learning_rate": 4.975035838397759e-06, |
| "loss": 0.7611, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.30568545409795717, |
| "grad_norm": 0.9167425632476807, |
| "learning_rate": 4.974967228887007e-06, |
| "loss": 0.7794, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.30605463942899336, |
| "grad_norm": 0.9110398292541504, |
| "learning_rate": 4.974898525699526e-06, |
| "loss": 0.763, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.30642382476002955, |
| "grad_norm": 0.9574660062789917, |
| "learning_rate": 4.974829728837917e-06, |
| "loss": 0.7552, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.3067930100910657, |
| "grad_norm": 0.9426335096359253, |
| "learning_rate": 4.974760838304784e-06, |
| "loss": 0.7404, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.3071621954221019, |
| "grad_norm": 0.9049092531204224, |
| "learning_rate": 4.974691854102734e-06, |
| "loss": 0.7394, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.3075313807531381, |
| "grad_norm": 0.8891183733940125, |
| "learning_rate": 4.974622776234379e-06, |
| "loss": 0.734, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.30790056608417427, |
| "grad_norm": 0.8763934373855591, |
| "learning_rate": 4.974553604702332e-06, |
| "loss": 0.7341, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.30826975141521046, |
| "grad_norm": 0.9248993396759033, |
| "learning_rate": 4.974484339509213e-06, |
| "loss": 0.7694, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.3086389367462466, |
| "grad_norm": 0.8956395983695984, |
| "learning_rate": 4.974414980657642e-06, |
| "loss": 0.7665, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.3090081220772828, |
| "grad_norm": 0.8790796399116516, |
| "learning_rate": 4.974345528150245e-06, |
| "loss": 0.7492, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.309377307408319, |
| "grad_norm": 0.9157189726829529, |
| "learning_rate": 4.974275981989651e-06, |
| "loss": 0.7643, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.3097464927393552, |
| "grad_norm": 0.9237195253372192, |
| "learning_rate": 4.974206342178492e-06, |
| "loss": 0.755, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.3101156780703913, |
| "grad_norm": 0.8979294896125793, |
| "learning_rate": 4.974136608719404e-06, |
| "loss": 0.8043, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.3104848634014275, |
| "grad_norm": 0.9376009702682495, |
| "learning_rate": 4.974066781615026e-06, |
| "loss": 0.7899, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.3108540487324637, |
| "grad_norm": 0.926584780216217, |
| "learning_rate": 4.973996860868001e-06, |
| "loss": 0.7436, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.3112232340634999, |
| "grad_norm": 0.8795569539070129, |
| "learning_rate": 4.973926846480975e-06, |
| "loss": 0.7399, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.3115924193945361, |
| "grad_norm": 0.9180058836936951, |
| "learning_rate": 4.973856738456599e-06, |
| "loss": 0.7332, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.3119616047255722, |
| "grad_norm": 0.9014691710472107, |
| "learning_rate": 4.973786536797527e-06, |
| "loss": 0.719, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.3123307900566084, |
| "grad_norm": 0.9873002767562866, |
| "learning_rate": 4.973716241506415e-06, |
| "loss": 0.7921, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.3126999753876446, |
| "grad_norm": 0.8903076648712158, |
| "learning_rate": 4.973645852585923e-06, |
| "loss": 0.7645, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.3130691607186808, |
| "grad_norm": 0.8644299507141113, |
| "learning_rate": 4.973575370038718e-06, |
| "loss": 0.7022, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.31343834604971693, |
| "grad_norm": 0.9160073399543762, |
| "learning_rate": 4.973504793867465e-06, |
| "loss": 0.7261, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.3138075313807531, |
| "grad_norm": 0.937968373298645, |
| "learning_rate": 4.973434124074836e-06, |
| "loss": 0.7851, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.3141767167117893, |
| "grad_norm": 0.95966637134552, |
| "learning_rate": 4.973363360663506e-06, |
| "loss": 0.7395, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.3145459020428255, |
| "grad_norm": 0.8805307745933533, |
| "learning_rate": 4.973292503636154e-06, |
| "loss": 0.7323, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.3149150873738617, |
| "grad_norm": 0.9085668921470642, |
| "learning_rate": 4.97322155299546e-06, |
| "loss": 0.6853, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.31528427270489784, |
| "grad_norm": 0.9099245071411133, |
| "learning_rate": 4.973150508744111e-06, |
| "loss": 0.7732, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.31565345803593403, |
| "grad_norm": 0.8982505202293396, |
| "learning_rate": 4.973079370884797e-06, |
| "loss": 0.7127, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.3160226433669702, |
| "grad_norm": 0.9104130864143372, |
| "learning_rate": 4.973008139420209e-06, |
| "loss": 0.7405, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.3163918286980064, |
| "grad_norm": 0.8904514908790588, |
| "learning_rate": 4.9729368143530435e-06, |
| "loss": 0.7901, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.31676101402904255, |
| "grad_norm": 0.9432110786437988, |
| "learning_rate": 4.9728653956859995e-06, |
| "loss": 0.7735, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.31713019936007875, |
| "grad_norm": 0.9377402067184448, |
| "learning_rate": 4.97279388342178e-06, |
| "loss": 0.7634, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.31749938469111494, |
| "grad_norm": 0.8846672177314758, |
| "learning_rate": 4.972722277563094e-06, |
| "loss": 0.7373, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.31786857002215113, |
| "grad_norm": 0.907082200050354, |
| "learning_rate": 4.97265057811265e-06, |
| "loss": 0.746, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.3182377553531873, |
| "grad_norm": 0.92213374376297, |
| "learning_rate": 4.972578785073161e-06, |
| "loss": 0.7361, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.31860694068422346, |
| "grad_norm": 0.950560986995697, |
| "learning_rate": 4.972506898447346e-06, |
| "loss": 0.8017, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.31897612601525965, |
| "grad_norm": 0.880368709564209, |
| "learning_rate": 4.972434918237925e-06, |
| "loss": 0.7593, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.31934531134629585, |
| "grad_norm": 1.2160859107971191, |
| "learning_rate": 4.972362844447623e-06, |
| "loss": 0.7608, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.31971449667733204, |
| "grad_norm": 0.9254541993141174, |
| "learning_rate": 4.972290677079168e-06, |
| "loss": 0.6959, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.3200836820083682, |
| "grad_norm": 0.8902239203453064, |
| "learning_rate": 4.97221841613529e-06, |
| "loss": 0.7166, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.32045286733940437, |
| "grad_norm": 0.9283355474472046, |
| "learning_rate": 4.972146061618726e-06, |
| "loss": 0.8209, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.32082205267044056, |
| "grad_norm": 0.906304657459259, |
| "learning_rate": 4.972073613532214e-06, |
| "loss": 0.7639, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.32119123800147675, |
| "grad_norm": 0.9373779892921448, |
| "learning_rate": 4.972001071878495e-06, |
| "loss": 0.7908, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.32156042333251295, |
| "grad_norm": 0.9056022763252258, |
| "learning_rate": 4.971928436660316e-06, |
| "loss": 0.768, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.3219296086635491, |
| "grad_norm": 0.8719453811645508, |
| "learning_rate": 4.971855707880426e-06, |
| "loss": 0.7401, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.3222987939945853, |
| "grad_norm": 0.9360424876213074, |
| "learning_rate": 4.971782885541578e-06, |
| "loss": 0.777, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.32266797932562147, |
| "grad_norm": 0.8848892450332642, |
| "learning_rate": 4.971709969646527e-06, |
| "loss": 0.6993, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.32303716465665766, |
| "grad_norm": 0.8988775610923767, |
| "learning_rate": 4.971636960198033e-06, |
| "loss": 0.7392, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.3234063499876938, |
| "grad_norm": 0.901785135269165, |
| "learning_rate": 4.971563857198862e-06, |
| "loss": 0.7618, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.32377553531873, |
| "grad_norm": 0.8972448110580444, |
| "learning_rate": 4.971490660651778e-06, |
| "loss": 0.7159, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.3241447206497662, |
| "grad_norm": 0.9601827263832092, |
| "learning_rate": 4.971417370559552e-06, |
| "loss": 0.748, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.3245139059808024, |
| "grad_norm": 0.8916758894920349, |
| "learning_rate": 4.97134398692496e-06, |
| "loss": 0.7598, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.32488309131183857, |
| "grad_norm": 0.8835681080818176, |
| "learning_rate": 4.971270509750778e-06, |
| "loss": 0.6888, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.3252522766428747, |
| "grad_norm": 0.9150434732437134, |
| "learning_rate": 4.971196939039786e-06, |
| "loss": 0.7612, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.3256214619739109, |
| "grad_norm": 0.8889224529266357, |
| "learning_rate": 4.97112327479477e-06, |
| "loss": 0.7409, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.3259906473049471, |
| "grad_norm": 0.8889238834381104, |
| "learning_rate": 4.971049517018518e-06, |
| "loss": 0.7537, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.3263598326359833, |
| "grad_norm": 0.9054772853851318, |
| "learning_rate": 4.970975665713822e-06, |
| "loss": 0.7535, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.3267290179670194, |
| "grad_norm": 0.9268242716789246, |
| "learning_rate": 4.970901720883477e-06, |
| "loss": 0.7688, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.3270982032980556, |
| "grad_norm": 0.9592145681381226, |
| "learning_rate": 4.970827682530282e-06, |
| "loss": 0.751, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.3274673886290918, |
| "grad_norm": 0.9229305386543274, |
| "learning_rate": 4.970753550657038e-06, |
| "loss": 0.755, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.327836573960128, |
| "grad_norm": 0.9249312281608582, |
| "learning_rate": 4.970679325266552e-06, |
| "loss": 0.7411, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.3282057592911642, |
| "grad_norm": 0.8924015164375305, |
| "learning_rate": 4.970605006361634e-06, |
| "loss": 0.7259, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.3285749446222003, |
| "grad_norm": 0.8923146724700928, |
| "learning_rate": 4.970530593945096e-06, |
| "loss": 0.75, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.3289441299532365, |
| "grad_norm": 0.9257709980010986, |
| "learning_rate": 4.9704560880197546e-06, |
| "loss": 0.7579, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.3293133152842727, |
| "grad_norm": 0.8968879580497742, |
| "learning_rate": 4.97038148858843e-06, |
| "loss": 0.7173, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.3296825006153089, |
| "grad_norm": 0.9248254299163818, |
| "learning_rate": 4.970306795653946e-06, |
| "loss": 0.7422, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.33005168594634504, |
| "grad_norm": 0.8645913600921631, |
| "learning_rate": 4.970232009219129e-06, |
| "loss": 0.7232, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.33042087127738123, |
| "grad_norm": 0.8754826188087463, |
| "learning_rate": 4.97015712928681e-06, |
| "loss": 0.7279, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.3307900566084174, |
| "grad_norm": 0.8828722238540649, |
| "learning_rate": 4.970082155859823e-06, |
| "loss": 0.7185, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.3311592419394536, |
| "grad_norm": 0.8933262825012207, |
| "learning_rate": 4.970007088941007e-06, |
| "loss": 0.7293, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.3315284272704898, |
| "grad_norm": 0.908781886100769, |
| "learning_rate": 4.9699319285332016e-06, |
| "loss": 0.7549, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.33189761260152595, |
| "grad_norm": 0.8825910091400146, |
| "learning_rate": 4.969856674639252e-06, |
| "loss": 0.767, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.33226679793256214, |
| "grad_norm": 0.9013200402259827, |
| "learning_rate": 4.969781327262008e-06, |
| "loss": 0.7292, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.33263598326359833, |
| "grad_norm": 0.944476306438446, |
| "learning_rate": 4.969705886404319e-06, |
| "loss": 0.7586, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.3330051685946345, |
| "grad_norm": 0.9089605808258057, |
| "learning_rate": 4.9696303520690415e-06, |
| "loss": 0.7238, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.33337435392567066, |
| "grad_norm": 0.88468998670578, |
| "learning_rate": 4.969554724259036e-06, |
| "loss": 0.7392, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.33374353925670686, |
| "grad_norm": 0.9044007062911987, |
| "learning_rate": 4.969479002977162e-06, |
| "loss": 0.7202, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.33411272458774305, |
| "grad_norm": 0.907281756401062, |
| "learning_rate": 4.969403188226288e-06, |
| "loss": 0.7362, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.33448190991877924, |
| "grad_norm": 0.9365758895874023, |
| "learning_rate": 4.969327280009282e-06, |
| "loss": 0.7881, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.33485109524981543, |
| "grad_norm": 0.9953451752662659, |
| "learning_rate": 4.969251278329018e-06, |
| "loss": 0.7516, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.33522028058085157, |
| "grad_norm": 0.9089345932006836, |
| "learning_rate": 4.969175183188373e-06, |
| "loss": 0.7508, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.33558946591188776, |
| "grad_norm": 0.9271081686019897, |
| "learning_rate": 4.969098994590226e-06, |
| "loss": 0.7696, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.33595865124292396, |
| "grad_norm": 0.9360471367835999, |
| "learning_rate": 4.9690227125374615e-06, |
| "loss": 0.7608, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.33632783657396015, |
| "grad_norm": 0.9032567739486694, |
| "learning_rate": 4.968946337032967e-06, |
| "loss": 0.7514, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.3366970219049963, |
| "grad_norm": 0.9877912998199463, |
| "learning_rate": 4.9688698680796325e-06, |
| "loss": 0.765, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.3370662072360325, |
| "grad_norm": 0.9205021858215332, |
| "learning_rate": 4.9687933056803525e-06, |
| "loss": 0.7753, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.33743539256706867, |
| "grad_norm": 0.8936699032783508, |
| "learning_rate": 4.968716649838025e-06, |
| "loss": 0.745, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.33780457789810486, |
| "grad_norm": 0.9072567820549011, |
| "learning_rate": 4.968639900555552e-06, |
| "loss": 0.7801, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.33817376322914106, |
| "grad_norm": 0.8919604420661926, |
| "learning_rate": 4.968563057835837e-06, |
| "loss": 0.7493, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.3385429485601772, |
| "grad_norm": 0.9033501148223877, |
| "learning_rate": 4.96848612168179e-06, |
| "loss": 0.7606, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.3389121338912134, |
| "grad_norm": 0.916556715965271, |
| "learning_rate": 4.968409092096322e-06, |
| "loss": 0.719, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.3392813192222496, |
| "grad_norm": 0.9027720093727112, |
| "learning_rate": 4.968331969082349e-06, |
| "loss": 0.7027, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.33965050455328577, |
| "grad_norm": 1.002319574356079, |
| "learning_rate": 4.96825475264279e-06, |
| "loss": 0.7999, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.3400196898843219, |
| "grad_norm": 0.9688315391540527, |
| "learning_rate": 4.968177442780568e-06, |
| "loss": 0.8079, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.3403888752153581, |
| "grad_norm": 0.9085078239440918, |
| "learning_rate": 4.968100039498609e-06, |
| "loss": 0.7369, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.3407580605463943, |
| "grad_norm": 0.9328852295875549, |
| "learning_rate": 4.968022542799842e-06, |
| "loss": 0.7864, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.3411272458774305, |
| "grad_norm": 0.9257019758224487, |
| "learning_rate": 4.9679449526872e-06, |
| "loss": 0.7838, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.3414964312084667, |
| "grad_norm": 0.9182167053222656, |
| "learning_rate": 4.9678672691636214e-06, |
| "loss": 0.7348, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.3418656165395028, |
| "grad_norm": 0.928878664970398, |
| "learning_rate": 4.967789492232046e-06, |
| "loss": 0.7564, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.342234801870539, |
| "grad_norm": 0.8896941542625427, |
| "learning_rate": 4.967711621895416e-06, |
| "loss": 0.7435, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.3426039872015752, |
| "grad_norm": 0.9109853506088257, |
| "learning_rate": 4.9676336581566795e-06, |
| "loss": 0.7818, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.3429731725326114, |
| "grad_norm": 0.899246096611023, |
| "learning_rate": 4.96755560101879e-06, |
| "loss": 0.7404, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.34334235786364753, |
| "grad_norm": 0.9120781421661377, |
| "learning_rate": 4.967477450484698e-06, |
| "loss": 0.7928, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.3437115431946837, |
| "grad_norm": 0.9362387657165527, |
| "learning_rate": 4.967399206557363e-06, |
| "loss": 0.8073, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.3440807285257199, |
| "grad_norm": 0.8933055996894836, |
| "learning_rate": 4.967320869239748e-06, |
| "loss": 0.756, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.3444499138567561, |
| "grad_norm": 0.9109773635864258, |
| "learning_rate": 4.967242438534816e-06, |
| "loss": 0.7481, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.34481909918779224, |
| "grad_norm": 0.9119990468025208, |
| "learning_rate": 4.967163914445537e-06, |
| "loss": 0.7529, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.34518828451882844, |
| "grad_norm": 0.8959357142448425, |
| "learning_rate": 4.967085296974882e-06, |
| "loss": 0.7286, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.34555746984986463, |
| "grad_norm": 0.9190980195999146, |
| "learning_rate": 4.967006586125827e-06, |
| "loss": 0.7311, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.3459266551809008, |
| "grad_norm": 0.9075052738189697, |
| "learning_rate": 4.966927781901351e-06, |
| "loss": 0.7027, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.346295840511937, |
| "grad_norm": 0.8960855603218079, |
| "learning_rate": 4.9668488843044375e-06, |
| "loss": 0.7864, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.34666502584297315, |
| "grad_norm": 0.9005295038223267, |
| "learning_rate": 4.9667698933380724e-06, |
| "loss": 0.7036, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.34703421117400934, |
| "grad_norm": 0.9100990891456604, |
| "learning_rate": 4.966690809005246e-06, |
| "loss": 0.7641, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.34740339650504554, |
| "grad_norm": 0.8951278328895569, |
| "learning_rate": 4.96661163130895e-06, |
| "loss": 0.7563, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.34777258183608173, |
| "grad_norm": 0.942742109298706, |
| "learning_rate": 4.966532360252182e-06, |
| "loss": 0.7392, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.34814176716711787, |
| "grad_norm": 0.9036753177642822, |
| "learning_rate": 4.966452995837943e-06, |
| "loss": 0.7435, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.34851095249815406, |
| "grad_norm": 0.9136203527450562, |
| "learning_rate": 4.966373538069236e-06, |
| "loss": 0.751, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.34888013782919025, |
| "grad_norm": 0.9514570832252502, |
| "learning_rate": 4.96629398694907e-06, |
| "loss": 0.7368, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.34924932316022644, |
| "grad_norm": 0.870691180229187, |
| "learning_rate": 4.966214342480455e-06, |
| "loss": 0.6951, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.34961850849126264, |
| "grad_norm": 0.8581681847572327, |
| "learning_rate": 4.966134604666405e-06, |
| "loss": 0.6636, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.3499876938222988, |
| "grad_norm": 0.912087619304657, |
| "learning_rate": 4.966054773509938e-06, |
| "loss": 0.7349, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.35035687915333497, |
| "grad_norm": 0.9115909934043884, |
| "learning_rate": 4.965974849014078e-06, |
| "loss": 0.7449, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.35072606448437116, |
| "grad_norm": 0.8997576832771301, |
| "learning_rate": 4.965894831181847e-06, |
| "loss": 0.7326, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.35109524981540735, |
| "grad_norm": 0.8838664293289185, |
| "learning_rate": 4.965814720016274e-06, |
| "loss": 0.7337, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.3514644351464435, |
| "grad_norm": 0.9265721440315247, |
| "learning_rate": 4.965734515520393e-06, |
| "loss": 0.733, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.3518336204774797, |
| "grad_norm": 0.9667180776596069, |
| "learning_rate": 4.9656542176972386e-06, |
| "loss": 0.7494, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.3522028058085159, |
| "grad_norm": 0.9301754236221313, |
| "learning_rate": 4.965573826549851e-06, |
| "loss": 0.7519, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.35257199113955207, |
| "grad_norm": 0.9007225036621094, |
| "learning_rate": 4.965493342081271e-06, |
| "loss": 0.7306, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "grad_norm": 0.89749675989151, |
| "learning_rate": 4.965412764294547e-06, |
| "loss": 0.7551, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.3533103618016244, |
| "grad_norm": 0.9183578491210938, |
| "learning_rate": 4.965332093192727e-06, |
| "loss": 0.7756, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.3536795471326606, |
| "grad_norm": 0.9291596412658691, |
| "learning_rate": 4.9652513287788665e-06, |
| "loss": 0.721, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.3540487324636968, |
| "grad_norm": 0.9414975047111511, |
| "learning_rate": 4.965170471056021e-06, |
| "loss": 0.7852, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.354417917794733, |
| "grad_norm": 0.9100828766822815, |
| "learning_rate": 4.965089520027251e-06, |
| "loss": 0.7166, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.3547871031257691, |
| "grad_norm": 0.8750141263008118, |
| "learning_rate": 4.96500847569562e-06, |
| "loss": 0.7535, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.3551562884568053, |
| "grad_norm": 0.9412403702735901, |
| "learning_rate": 4.964927338064197e-06, |
| "loss": 0.7289, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.3555254737878415, |
| "grad_norm": 0.9291063547134399, |
| "learning_rate": 4.964846107136052e-06, |
| "loss": 0.7615, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.3558946591188777, |
| "grad_norm": 0.9080612063407898, |
| "learning_rate": 4.96476478291426e-06, |
| "loss": 0.7425, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.3562638444499139, |
| "grad_norm": 0.9023758769035339, |
| "learning_rate": 4.9646833654018974e-06, |
| "loss": 0.761, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.35663302978095, |
| "grad_norm": 0.9533596038818359, |
| "learning_rate": 4.964601854602049e-06, |
| "loss": 0.726, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.3570022151119862, |
| "grad_norm": 0.9523007273674011, |
| "learning_rate": 4.964520250517798e-06, |
| "loss": 0.7443, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.3573714004430224, |
| "grad_norm": 0.9014210104942322, |
| "learning_rate": 4.964438553152233e-06, |
| "loss": 0.7348, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.3577405857740586, |
| "grad_norm": 0.9457989931106567, |
| "learning_rate": 4.964356762508447e-06, |
| "loss": 0.7742, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.35810977110509473, |
| "grad_norm": 0.8954206109046936, |
| "learning_rate": 4.964274878589535e-06, |
| "loss": 0.7083, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.3584789564361309, |
| "grad_norm": 0.9402279853820801, |
| "learning_rate": 4.964192901398595e-06, |
| "loss": 0.7566, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.3588481417671671, |
| "grad_norm": 0.8966061472892761, |
| "learning_rate": 4.964110830938734e-06, |
| "loss": 0.7132, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.3592173270982033, |
| "grad_norm": 0.8585612773895264, |
| "learning_rate": 4.964028667213054e-06, |
| "loss": 0.7365, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.3595865124292395, |
| "grad_norm": 0.8799294233322144, |
| "learning_rate": 4.9639464102246675e-06, |
| "loss": 0.7253, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.35995569776027564, |
| "grad_norm": 0.9214800000190735, |
| "learning_rate": 4.963864059976686e-06, |
| "loss": 0.7317, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.36032488309131183, |
| "grad_norm": 0.9081399440765381, |
| "learning_rate": 4.9637816164722285e-06, |
| "loss": 0.746, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.360694068422348, |
| "grad_norm": 0.9382967948913574, |
| "learning_rate": 4.963699079714415e-06, |
| "loss": 0.7606, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.3610632537533842, |
| "grad_norm": 0.9069362282752991, |
| "learning_rate": 4.963616449706367e-06, |
| "loss": 0.7644, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.36143243908442035, |
| "grad_norm": 0.8491265773773193, |
| "learning_rate": 4.963533726451215e-06, |
| "loss": 0.738, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.36180162441545655, |
| "grad_norm": 0.9470365643501282, |
| "learning_rate": 4.963450909952089e-06, |
| "loss": 0.7655, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.36217080974649274, |
| "grad_norm": 0.8890638947486877, |
| "learning_rate": 4.963368000212123e-06, |
| "loss": 0.7089, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.36253999507752893, |
| "grad_norm": 0.9325974583625793, |
| "learning_rate": 4.963284997234456e-06, |
| "loss": 0.775, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.3629091804085651, |
| "grad_norm": 0.874653160572052, |
| "learning_rate": 4.96320190102223e-06, |
| "loss": 0.7289, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.36327836573960126, |
| "grad_norm": 0.9684560894966125, |
| "learning_rate": 4.9631187115785885e-06, |
| "loss": 0.7216, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.36364755107063745, |
| "grad_norm": 0.9118450284004211, |
| "learning_rate": 4.963035428906681e-06, |
| "loss": 0.7647, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.36401673640167365, |
| "grad_norm": 0.9047130346298218, |
| "learning_rate": 4.96295205300966e-06, |
| "loss": 0.748, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.36438592173270984, |
| "grad_norm": 0.9111741185188293, |
| "learning_rate": 4.962868583890682e-06, |
| "loss": 0.7079, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.364755107063746, |
| "grad_norm": 0.9440281391143799, |
| "learning_rate": 4.962785021552904e-06, |
| "loss": 0.7544, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.36512429239478217, |
| "grad_norm": 0.9372045397758484, |
| "learning_rate": 4.962701365999491e-06, |
| "loss": 0.7232, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.36549347772581836, |
| "grad_norm": 0.9209040999412537, |
| "learning_rate": 4.962617617233608e-06, |
| "loss": 0.7434, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.36586266305685455, |
| "grad_norm": 0.9633092284202576, |
| "learning_rate": 4.962533775258426e-06, |
| "loss": 0.7531, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.36623184838789075, |
| "grad_norm": 0.9297153353691101, |
| "learning_rate": 4.962449840077118e-06, |
| "loss": 0.7644, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.3666010337189269, |
| "grad_norm": 0.9207053184509277, |
| "learning_rate": 4.96236581169286e-06, |
| "loss": 0.7789, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.3669702190499631, |
| "grad_norm": 0.9333449602127075, |
| "learning_rate": 4.962281690108834e-06, |
| "loss": 0.8054, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.36733940438099927, |
| "grad_norm": 0.9394077658653259, |
| "learning_rate": 4.962197475328222e-06, |
| "loss": 0.7472, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.36770858971203546, |
| "grad_norm": 0.980050802230835, |
| "learning_rate": 4.962113167354213e-06, |
| "loss": 0.7505, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.3680777750430716, |
| "grad_norm": 0.9034879803657532, |
| "learning_rate": 4.962028766189999e-06, |
| "loss": 0.7232, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.3684469603741078, |
| "grad_norm": 0.9549526572227478, |
| "learning_rate": 4.961944271838772e-06, |
| "loss": 0.7566, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.368816145705144, |
| "grad_norm": 0.9430050253868103, |
| "learning_rate": 4.961859684303731e-06, |
| "loss": 0.7485, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.3691853310361802, |
| "grad_norm": 1.0592334270477295, |
| "learning_rate": 4.961775003588079e-06, |
| "loss": 0.7219, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.36955451636721637, |
| "grad_norm": 0.9190243482589722, |
| "learning_rate": 4.96169022969502e-06, |
| "loss": 0.7679, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.3699237016982525, |
| "grad_norm": 0.9574081301689148, |
| "learning_rate": 4.961605362627761e-06, |
| "loss": 0.7678, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.3702928870292887, |
| "grad_norm": 0.9283050298690796, |
| "learning_rate": 4.961520402389517e-06, |
| "loss": 0.7584, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.3706620723603249, |
| "grad_norm": 0.9419953227043152, |
| "learning_rate": 4.961435348983503e-06, |
| "loss": 0.7955, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.3710312576913611, |
| "grad_norm": 0.9116794466972351, |
| "learning_rate": 4.961350202412938e-06, |
| "loss": 0.7739, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.3714004430223972, |
| "grad_norm": 0.8843052387237549, |
| "learning_rate": 4.961264962681044e-06, |
| "loss": 0.6978, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.3717696283534334, |
| "grad_norm": 0.9120619297027588, |
| "learning_rate": 4.961179629791049e-06, |
| "loss": 0.7662, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.3721388136844696, |
| "grad_norm": 0.9376091361045837, |
| "learning_rate": 4.961094203746181e-06, |
| "loss": 0.7636, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.3725079990155058, |
| "grad_norm": 0.9145896434783936, |
| "learning_rate": 4.961008684549674e-06, |
| "loss": 0.7355, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.372877184346542, |
| "grad_norm": 0.8806540966033936, |
| "learning_rate": 4.960923072204765e-06, |
| "loss": 0.7709, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.3732463696775781, |
| "grad_norm": 0.8880794048309326, |
| "learning_rate": 4.9608373667146945e-06, |
| "loss": 0.7272, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.3736155550086143, |
| "grad_norm": 0.9063923358917236, |
| "learning_rate": 4.9607515680827065e-06, |
| "loss": 0.7184, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.3739847403396505, |
| "grad_norm": 0.8815094232559204, |
| "learning_rate": 4.960665676312047e-06, |
| "loss": 0.753, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.3743539256706867, |
| "grad_norm": 0.899018406867981, |
| "learning_rate": 4.96057969140597e-06, |
| "loss": 0.7388, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.37472311100172284, |
| "grad_norm": 0.9038127064704895, |
| "learning_rate": 4.960493613367728e-06, |
| "loss": 0.7131, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.37509229633275903, |
| "grad_norm": 0.890527069568634, |
| "learning_rate": 4.960407442200579e-06, |
| "loss": 0.7646, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.3754614816637952, |
| "grad_norm": 0.9003925919532776, |
| "learning_rate": 4.9603211779077845e-06, |
| "loss": 0.7285, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.3758306669948314, |
| "grad_norm": 0.9128808975219727, |
| "learning_rate": 4.96023482049261e-06, |
| "loss": 0.7608, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.3761998523258676, |
| "grad_norm": 0.9050713181495667, |
| "learning_rate": 4.960148369958324e-06, |
| "loss": 0.7375, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.37656903765690375, |
| "grad_norm": 0.9218745827674866, |
| "learning_rate": 4.960061826308199e-06, |
| "loss": 0.7222, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.37693822298793994, |
| "grad_norm": 0.9460575580596924, |
| "learning_rate": 4.95997518954551e-06, |
| "loss": 0.7733, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.37730740831897613, |
| "grad_norm": 0.9175562262535095, |
| "learning_rate": 4.959888459673536e-06, |
| "loss": 0.7418, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.3776765936500123, |
| "grad_norm": 0.9456244707107544, |
| "learning_rate": 4.959801636695561e-06, |
| "loss": 0.7552, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.37804577898104846, |
| "grad_norm": 0.8985305428504944, |
| "learning_rate": 4.959714720614871e-06, |
| "loss": 0.7366, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.37841496431208466, |
| "grad_norm": 0.9268773794174194, |
| "learning_rate": 4.959627711434753e-06, |
| "loss": 0.7408, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.37878414964312085, |
| "grad_norm": 0.9267814755439758, |
| "learning_rate": 4.959540609158504e-06, |
| "loss": 0.7589, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.37915333497415704, |
| "grad_norm": 0.905430018901825, |
| "learning_rate": 4.959453413789419e-06, |
| "loss": 0.7404, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.37952252030519323, |
| "grad_norm": 0.9021572470664978, |
| "learning_rate": 4.959366125330798e-06, |
| "loss": 0.7399, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.37989170563622937, |
| "grad_norm": 0.9395810961723328, |
| "learning_rate": 4.9592787437859455e-06, |
| "loss": 0.7259, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.38026089096726556, |
| "grad_norm": 0.8881012201309204, |
| "learning_rate": 4.959191269158169e-06, |
| "loss": 0.7365, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.38063007629830176, |
| "grad_norm": 0.9610297679901123, |
| "learning_rate": 4.959103701450779e-06, |
| "loss": 0.7141, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.38099926162933795, |
| "grad_norm": 0.8691598176956177, |
| "learning_rate": 4.959016040667089e-06, |
| "loss": 0.725, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.3813684469603741, |
| "grad_norm": 0.912972092628479, |
| "learning_rate": 4.9589282868104195e-06, |
| "loss": 0.6629, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.3817376322914103, |
| "grad_norm": 0.9238312840461731, |
| "learning_rate": 4.95884043988409e-06, |
| "loss": 0.7799, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.38210681762244647, |
| "grad_norm": 0.8956131935119629, |
| "learning_rate": 4.9587524998914255e-06, |
| "loss": 0.759, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.38247600295348266, |
| "grad_norm": 0.8826711177825928, |
| "learning_rate": 4.958664466835756e-06, |
| "loss": 0.7486, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.38284518828451886, |
| "grad_norm": 0.9134225845336914, |
| "learning_rate": 4.95857634072041e-06, |
| "loss": 0.7608, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.383214373615555, |
| "grad_norm": 0.9029486179351807, |
| "learning_rate": 4.958488121548727e-06, |
| "loss": 0.7254, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.3835835589465912, |
| "grad_norm": 0.8955227732658386, |
| "learning_rate": 4.958399809324045e-06, |
| "loss": 0.7928, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.3839527442776274, |
| "grad_norm": 0.8920780420303345, |
| "learning_rate": 4.958311404049705e-06, |
| "loss": 0.7654, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.38432192960866357, |
| "grad_norm": 0.9059119820594788, |
| "learning_rate": 4.958222905729055e-06, |
| "loss": 0.7263, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.3846911149396997, |
| "grad_norm": 0.8846084475517273, |
| "learning_rate": 4.958134314365443e-06, |
| "loss": 0.7119, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.3850603002707359, |
| "grad_norm": 0.8834053874015808, |
| "learning_rate": 4.9580456299622235e-06, |
| "loss": 0.7113, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.3854294856017721, |
| "grad_norm": 0.9049264192581177, |
| "learning_rate": 4.957956852522753e-06, |
| "loss": 0.7233, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.3857986709328083, |
| "grad_norm": 0.8891218900680542, |
| "learning_rate": 4.9578679820503905e-06, |
| "loss": 0.6903, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.3861678562638444, |
| "grad_norm": 0.8697208166122437, |
| "learning_rate": 4.957779018548501e-06, |
| "loss": 0.748, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.3865370415948806, |
| "grad_norm": 0.9173257946968079, |
| "learning_rate": 4.957689962020452e-06, |
| "loss": 0.762, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.3869062269259168, |
| "grad_norm": 0.9417243003845215, |
| "learning_rate": 4.957600812469613e-06, |
| "loss": 0.7513, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.387275412256953, |
| "grad_norm": 0.9135996103286743, |
| "learning_rate": 4.95751156989936e-06, |
| "loss": 0.7303, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.3876445975879892, |
| "grad_norm": 0.8908846378326416, |
| "learning_rate": 4.957422234313068e-06, |
| "loss": 0.7606, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.38801378291902533, |
| "grad_norm": 0.8868354558944702, |
| "learning_rate": 4.95733280571412e-06, |
| "loss": 0.7219, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.3883829682500615, |
| "grad_norm": 0.9057561755180359, |
| "learning_rate": 4.957243284105902e-06, |
| "loss": 0.7392, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.3887521535810977, |
| "grad_norm": 0.9165422916412354, |
| "learning_rate": 4.9571536694918e-06, |
| "loss": 0.7682, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.3891213389121339, |
| "grad_norm": 0.9119623899459839, |
| "learning_rate": 4.957063961875208e-06, |
| "loss": 0.714, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.38949052424317004, |
| "grad_norm": 0.9237239956855774, |
| "learning_rate": 4.95697416125952e-06, |
| "loss": 0.7394, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.38985970957420624, |
| "grad_norm": 0.8836085796356201, |
| "learning_rate": 4.956884267648136e-06, |
| "loss": 0.7101, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.39022889490524243, |
| "grad_norm": 0.8749224543571472, |
| "learning_rate": 4.956794281044458e-06, |
| "loss": 0.7181, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.3905980802362786, |
| "grad_norm": 0.9028134942054749, |
| "learning_rate": 4.956704201451891e-06, |
| "loss": 0.7268, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.3909672655673148, |
| "grad_norm": 0.8953654170036316, |
| "learning_rate": 4.956614028873846e-06, |
| "loss": 0.7691, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.39133645089835095, |
| "grad_norm": 0.881486713886261, |
| "learning_rate": 4.956523763313736e-06, |
| "loss": 0.7499, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.39170563622938714, |
| "grad_norm": 0.8811509609222412, |
| "learning_rate": 4.956433404774975e-06, |
| "loss": 0.718, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.39207482156042334, |
| "grad_norm": 0.9057355523109436, |
| "learning_rate": 4.956342953260986e-06, |
| "loss": 0.7533, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.39244400689145953, |
| "grad_norm": 0.9343195557594299, |
| "learning_rate": 4.956252408775191e-06, |
| "loss": 0.7808, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.39281319222249567, |
| "grad_norm": 0.8957801461219788, |
| "learning_rate": 4.9561617713210174e-06, |
| "loss": 0.7304, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.39318237755353186, |
| "grad_norm": 0.8987041115760803, |
| "learning_rate": 4.956071040901897e-06, |
| "loss": 0.745, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.39355156288456805, |
| "grad_norm": 0.9891381859779358, |
| "learning_rate": 4.955980217521263e-06, |
| "loss": 0.761, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.39392074821560424, |
| "grad_norm": 0.9239757061004639, |
| "learning_rate": 4.955889301182551e-06, |
| "loss": 0.7811, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.39428993354664044, |
| "grad_norm": 0.9178396463394165, |
| "learning_rate": 4.955798291889205e-06, |
| "loss": 0.7447, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.3946591188776766, |
| "grad_norm": 0.927836537361145, |
| "learning_rate": 4.955707189644669e-06, |
| "loss": 0.7527, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.39502830420871277, |
| "grad_norm": 0.9121710658073425, |
| "learning_rate": 4.955615994452391e-06, |
| "loss": 0.7283, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.39539748953974896, |
| "grad_norm": 0.8991080522537231, |
| "learning_rate": 4.955524706315822e-06, |
| "loss": 0.7521, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.39576667487078515, |
| "grad_norm": 0.9071163535118103, |
| "learning_rate": 4.955433325238418e-06, |
| "loss": 0.7227, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.3961358602018213, |
| "grad_norm": 0.9359886646270752, |
| "learning_rate": 4.955341851223639e-06, |
| "loss": 0.7888, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.3965050455328575, |
| "grad_norm": 0.9484068155288696, |
| "learning_rate": 4.955250284274944e-06, |
| "loss": 0.7609, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.3968742308638937, |
| "grad_norm": 0.9310367107391357, |
| "learning_rate": 4.9551586243958e-06, |
| "loss": 0.7698, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.39724341619492987, |
| "grad_norm": 0.9183504581451416, |
| "learning_rate": 4.955066871589679e-06, |
| "loss": 0.7442, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.39761260152596606, |
| "grad_norm": 0.9239982962608337, |
| "learning_rate": 4.954975025860051e-06, |
| "loss": 0.7485, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.3979817868570022, |
| "grad_norm": 0.9956908226013184, |
| "learning_rate": 4.954883087210393e-06, |
| "loss": 0.7817, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.3983509721880384, |
| "grad_norm": 0.897830069065094, |
| "learning_rate": 4.9547910556441845e-06, |
| "loss": 0.7241, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.3987201575190746, |
| "grad_norm": 0.8917036652565002, |
| "learning_rate": 4.95469893116491e-06, |
| "loss": 0.7047, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.3990893428501108, |
| "grad_norm": 0.8943704962730408, |
| "learning_rate": 4.954606713776056e-06, |
| "loss": 0.7071, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.3994585281811469, |
| "grad_norm": 0.8890754580497742, |
| "learning_rate": 4.954514403481112e-06, |
| "loss": 0.7295, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.3998277135121831, |
| "grad_norm": 0.9230964779853821, |
| "learning_rate": 4.954422000283572e-06, |
| "loss": 0.7491, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.4001968988432193, |
| "grad_norm": 0.9171218276023865, |
| "learning_rate": 4.954329504186935e-06, |
| "loss": 0.7463, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.4005660841742555, |
| "grad_norm": 0.8871411085128784, |
| "learning_rate": 4.954236915194699e-06, |
| "loss": 0.7312, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.4009352695052917, |
| "grad_norm": 0.9313485026359558, |
| "learning_rate": 4.954144233310372e-06, |
| "loss": 0.7129, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.4013044548363278, |
| "grad_norm": 0.9138079881668091, |
| "learning_rate": 4.95405145853746e-06, |
| "loss": 0.746, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.401673640167364, |
| "grad_norm": 0.9450967907905579, |
| "learning_rate": 4.9539585908794746e-06, |
| "loss": 0.7811, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.4020428254984002, |
| "grad_norm": 0.9284427762031555, |
| "learning_rate": 4.9538656303399314e-06, |
| "loss": 0.74, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.4024120108294364, |
| "grad_norm": 0.9271199107170105, |
| "learning_rate": 4.953772576922348e-06, |
| "loss": 0.7622, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.40278119616047253, |
| "grad_norm": 0.8856538534164429, |
| "learning_rate": 4.953679430630247e-06, |
| "loss": 0.6994, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.4031503814915087, |
| "grad_norm": 0.9314479231834412, |
| "learning_rate": 4.953586191467155e-06, |
| "loss": 0.7583, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.4035195668225449, |
| "grad_norm": 0.9417778253555298, |
| "learning_rate": 4.953492859436599e-06, |
| "loss": 0.7601, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.4038887521535811, |
| "grad_norm": 0.8887828588485718, |
| "learning_rate": 4.953399434542112e-06, |
| "loss": 0.744, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.4042579374846173, |
| "grad_norm": 0.926156759262085, |
| "learning_rate": 4.953305916787232e-06, |
| "loss": 0.7113, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.40462712281565344, |
| "grad_norm": 0.9664729833602905, |
| "learning_rate": 4.9532123061754966e-06, |
| "loss": 0.733, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.40499630814668963, |
| "grad_norm": 0.9112409353256226, |
| "learning_rate": 4.953118602710449e-06, |
| "loss": 0.764, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.4053654934777258, |
| "grad_norm": 0.9250743389129639, |
| "learning_rate": 4.9530248063956375e-06, |
| "loss": 0.7536, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.405734678808762, |
| "grad_norm": 0.8889137506484985, |
| "learning_rate": 4.952930917234612e-06, |
| "loss": 0.7208, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.40610386413979815, |
| "grad_norm": 0.9497808814048767, |
| "learning_rate": 4.952836935230924e-06, |
| "loss": 0.7263, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.40647304947083435, |
| "grad_norm": 0.9288194179534912, |
| "learning_rate": 4.952742860388133e-06, |
| "loss": 0.7321, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.40684223480187054, |
| "grad_norm": 0.9409608244895935, |
| "learning_rate": 4.952648692709798e-06, |
| "loss": 0.7375, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.40721142013290673, |
| "grad_norm": 0.8995351195335388, |
| "learning_rate": 4.952554432199485e-06, |
| "loss": 0.7626, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.4075806054639429, |
| "grad_norm": 0.9207445979118347, |
| "learning_rate": 4.95246007886076e-06, |
| "loss": 0.7324, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.40794979079497906, |
| "grad_norm": 0.9042755365371704, |
| "learning_rate": 4.9523656326971954e-06, |
| "loss": 0.7281, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.40831897612601525, |
| "grad_norm": 0.9243267178535461, |
| "learning_rate": 4.952271093712366e-06, |
| "loss": 0.7536, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.40868816145705145, |
| "grad_norm": 0.9243746399879456, |
| "learning_rate": 4.952176461909849e-06, |
| "loss": 0.7514, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.40905734678808764, |
| "grad_norm": 0.9338531494140625, |
| "learning_rate": 4.952081737293227e-06, |
| "loss": 0.7443, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.4094265321191238, |
| "grad_norm": 0.9238013625144958, |
| "learning_rate": 4.951986919866085e-06, |
| "loss": 0.6872, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.40979571745015997, |
| "grad_norm": 0.9439871907234192, |
| "learning_rate": 4.951892009632012e-06, |
| "loss": 0.7577, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.41016490278119616, |
| "grad_norm": 0.9026377201080322, |
| "learning_rate": 4.951797006594601e-06, |
| "loss": 0.7025, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.41053408811223235, |
| "grad_norm": 0.9088364839553833, |
| "learning_rate": 4.951701910757446e-06, |
| "loss": 0.7286, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.41090327344326855, |
| "grad_norm": 0.9279249906539917, |
| "learning_rate": 4.9516067221241485e-06, |
| "loss": 0.7533, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.4112724587743047, |
| "grad_norm": 0.8800785541534424, |
| "learning_rate": 4.951511440698309e-06, |
| "loss": 0.7352, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.4116416441053409, |
| "grad_norm": 0.8675339818000793, |
| "learning_rate": 4.9514160664835366e-06, |
| "loss": 0.7565, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.41201082943637707, |
| "grad_norm": 0.9367777109146118, |
| "learning_rate": 4.951320599483439e-06, |
| "loss": 0.7501, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.41238001476741326, |
| "grad_norm": 0.9239450693130493, |
| "learning_rate": 4.9512250397016304e-06, |
| "loss": 0.7774, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.4127492000984494, |
| "grad_norm": 0.8786678314208984, |
| "learning_rate": 4.951129387141728e-06, |
| "loss": 0.7385, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.4131183854294856, |
| "grad_norm": 0.9262030720710754, |
| "learning_rate": 4.951033641807351e-06, |
| "loss": 0.7653, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.4134875707605218, |
| "grad_norm": 0.901329755783081, |
| "learning_rate": 4.950937803702125e-06, |
| "loss": 0.7375, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.413856756091558, |
| "grad_norm": 0.9039179086685181, |
| "learning_rate": 4.950841872829676e-06, |
| "loss": 0.7644, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.41422594142259417, |
| "grad_norm": 0.9563358426094055, |
| "learning_rate": 4.9507458491936365e-06, |
| "loss": 0.7476, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.4145951267536303, |
| "grad_norm": 0.9279500842094421, |
| "learning_rate": 4.950649732797639e-06, |
| "loss": 0.723, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.4149643120846665, |
| "grad_norm": 0.9395243525505066, |
| "learning_rate": 4.950553523645324e-06, |
| "loss": 0.7341, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.4153334974157027, |
| "grad_norm": 0.9069976210594177, |
| "learning_rate": 4.9504572217403305e-06, |
| "loss": 0.7458, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.4157026827467389, |
| "grad_norm": 0.9429416656494141, |
| "learning_rate": 4.9503608270863046e-06, |
| "loss": 0.7534, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.416071868077775, |
| "grad_norm": 0.9167525172233582, |
| "learning_rate": 4.950264339686895e-06, |
| "loss": 0.7223, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.4164410534088112, |
| "grad_norm": 0.8981575965881348, |
| "learning_rate": 4.950167759545753e-06, |
| "loss": 0.7562, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.4168102387398474, |
| "grad_norm": 0.8772927522659302, |
| "learning_rate": 4.950071086666535e-06, |
| "loss": 0.7528, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.4171794240708836, |
| "grad_norm": 0.8946474194526672, |
| "learning_rate": 4.949974321052899e-06, |
| "loss": 0.7232, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.4175486094019198, |
| "grad_norm": 0.9215155839920044, |
| "learning_rate": 4.94987746270851e-06, |
| "loss": 0.7705, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.4179177947329559, |
| "grad_norm": 0.8860598802566528, |
| "learning_rate": 4.94978051163703e-06, |
| "loss": 0.7468, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.4182869800639921, |
| "grad_norm": 0.9385093450546265, |
| "learning_rate": 4.9496834678421325e-06, |
| "loss": 0.7881, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.4186561653950283, |
| "grad_norm": 0.9394497275352478, |
| "learning_rate": 4.949586331327488e-06, |
| "loss": 0.7543, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.4190253507260645, |
| "grad_norm": 0.9088026881217957, |
| "learning_rate": 4.949489102096774e-06, |
| "loss": 0.7598, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.41939453605710064, |
| "grad_norm": 0.9452118873596191, |
| "learning_rate": 4.94939178015367e-06, |
| "loss": 0.7462, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.41976372138813683, |
| "grad_norm": 0.9107792377471924, |
| "learning_rate": 4.949294365501862e-06, |
| "loss": 0.7423, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.420132906719173, |
| "grad_norm": 0.8913663625717163, |
| "learning_rate": 4.9491968581450334e-06, |
| "loss": 0.7334, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.4205020920502092, |
| "grad_norm": 0.902692973613739, |
| "learning_rate": 4.949099258086878e-06, |
| "loss": 0.701, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.4208712773812454, |
| "grad_norm": 0.8897360563278198, |
| "learning_rate": 4.949001565331087e-06, |
| "loss": 0.7425, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.42124046271228155, |
| "grad_norm": 0.9004592299461365, |
| "learning_rate": 4.948903779881361e-06, |
| "loss": 0.7052, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.42160964804331774, |
| "grad_norm": 0.8605636954307556, |
| "learning_rate": 4.9488059017413995e-06, |
| "loss": 0.7199, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.42197883337435393, |
| "grad_norm": 0.9023451805114746, |
| "learning_rate": 4.948707930914908e-06, |
| "loss": 0.7718, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.4223480187053901, |
| "grad_norm": 0.9158706068992615, |
| "learning_rate": 4.948609867405594e-06, |
| "loss": 0.759, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.42271720403642626, |
| "grad_norm": 0.914189338684082, |
| "learning_rate": 4.948511711217168e-06, |
| "loss": 0.7292, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.42308638936746246, |
| "grad_norm": 0.9431989789009094, |
| "learning_rate": 4.948413462353347e-06, |
| "loss": 0.7385, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.42345557469849865, |
| "grad_norm": 0.9301594495773315, |
| "learning_rate": 4.9483151208178505e-06, |
| "loss": 0.7613, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.42382476002953484, |
| "grad_norm": 0.9736144542694092, |
| "learning_rate": 4.948216686614398e-06, |
| "loss": 0.7694, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.42419394536057103, |
| "grad_norm": 0.9035144448280334, |
| "learning_rate": 4.948118159746718e-06, |
| "loss": 0.7107, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.42456313069160717, |
| "grad_norm": 0.9065275192260742, |
| "learning_rate": 4.948019540218536e-06, |
| "loss": 0.7772, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.42493231602264336, |
| "grad_norm": 0.920475959777832, |
| "learning_rate": 4.9479208280335885e-06, |
| "loss": 0.7342, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.42530150135367956, |
| "grad_norm": 0.9290496706962585, |
| "learning_rate": 4.947822023195611e-06, |
| "loss": 0.7331, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.42567068668471575, |
| "grad_norm": 0.8922300934791565, |
| "learning_rate": 4.9477231257083415e-06, |
| "loss": 0.7655, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.4260398720157519, |
| "grad_norm": 0.9539517164230347, |
| "learning_rate": 4.947624135575524e-06, |
| "loss": 0.7567, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.4264090573467881, |
| "grad_norm": 0.8572643995285034, |
| "learning_rate": 4.9475250528009055e-06, |
| "loss": 0.6853, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.42677824267782427, |
| "grad_norm": 1.0646028518676758, |
| "learning_rate": 4.947425877388237e-06, |
| "loss": 0.7308, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.42714742800886046, |
| "grad_norm": 0.9418565630912781, |
| "learning_rate": 4.947326609341271e-06, |
| "loss": 0.7319, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.4275166133398966, |
| "grad_norm": 0.9165734052658081, |
| "learning_rate": 4.947227248663764e-06, |
| "loss": 0.7168, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.4278857986709328, |
| "grad_norm": 0.9139310717582703, |
| "learning_rate": 4.94712779535948e-06, |
| "loss": 0.7387, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.428254984001969, |
| "grad_norm": 0.8762199282646179, |
| "learning_rate": 4.94702824943218e-06, |
| "loss": 0.6796, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.4286241693330052, |
| "grad_norm": 0.9274040460586548, |
| "learning_rate": 4.946928610885633e-06, |
| "loss": 0.7424, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.42899335466404137, |
| "grad_norm": 0.9182401299476624, |
| "learning_rate": 4.946828879723611e-06, |
| "loss": 0.7242, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.4293625399950775, |
| "grad_norm": 0.9233170747756958, |
| "learning_rate": 4.946729055949888e-06, |
| "loss": 0.7557, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.4297317253261137, |
| "grad_norm": 0.9127527475357056, |
| "learning_rate": 4.946629139568242e-06, |
| "loss": 0.754, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.4301009106571499, |
| "grad_norm": 0.9096380472183228, |
| "learning_rate": 4.946529130582456e-06, |
| "loss": 0.7747, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.4304700959881861, |
| "grad_norm": 0.927699089050293, |
| "learning_rate": 4.946429028996314e-06, |
| "loss": 0.7741, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.4308392813192222, |
| "grad_norm": 0.8996379971504211, |
| "learning_rate": 4.946328834813605e-06, |
| "loss": 0.7582, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.4312084666502584, |
| "grad_norm": 0.9378359317779541, |
| "learning_rate": 4.946228548038122e-06, |
| "loss": 0.7873, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.4315776519812946, |
| "grad_norm": 0.85906583070755, |
| "learning_rate": 4.946128168673662e-06, |
| "loss": 0.6732, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.4319468373123308, |
| "grad_norm": 0.8577884435653687, |
| "learning_rate": 4.94602769672402e-06, |
| "loss": 0.705, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.432316022643367, |
| "grad_norm": 0.8988800048828125, |
| "learning_rate": 4.945927132193003e-06, |
| "loss": 0.7255, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.43268520797440313, |
| "grad_norm": 0.8887507319450378, |
| "learning_rate": 4.945826475084417e-06, |
| "loss": 0.7122, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.4330543933054393, |
| "grad_norm": 0.9307096004486084, |
| "learning_rate": 4.9457257254020696e-06, |
| "loss": 0.7394, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.4334235786364755, |
| "grad_norm": 0.9346253871917725, |
| "learning_rate": 4.945624883149776e-06, |
| "loss": 0.7617, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.4337927639675117, |
| "grad_norm": 0.935279905796051, |
| "learning_rate": 4.945523948331352e-06, |
| "loss": 0.72, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.43416194929854784, |
| "grad_norm": 0.9695756435394287, |
| "learning_rate": 4.9454229209506186e-06, |
| "loss": 0.7499, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.43453113462958404, |
| "grad_norm": 0.8869019150733948, |
| "learning_rate": 4.9453218010114e-06, |
| "loss": 0.6932, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.43490031996062023, |
| "grad_norm": 0.9020052552223206, |
| "learning_rate": 4.945220588517522e-06, |
| "loss": 0.7313, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.4352695052916564, |
| "grad_norm": 0.9483025670051575, |
| "learning_rate": 4.945119283472816e-06, |
| "loss": 0.7311, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.4356386906226926, |
| "grad_norm": 0.8632071614265442, |
| "learning_rate": 4.945017885881118e-06, |
| "loss": 0.6701, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.43600787595372875, |
| "grad_norm": 0.8918522000312805, |
| "learning_rate": 4.944916395746264e-06, |
| "loss": 0.6994, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.43637706128476494, |
| "grad_norm": 0.9055469036102295, |
| "learning_rate": 4.944814813072097e-06, |
| "loss": 0.7532, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.43674624661580114, |
| "grad_norm": 0.9149122834205627, |
| "learning_rate": 4.94471313786246e-06, |
| "loss": 0.7713, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.43711543194683733, |
| "grad_norm": 0.9110752940177917, |
| "learning_rate": 4.944611370121203e-06, |
| "loss": 0.7157, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.43748461727787347, |
| "grad_norm": 0.9197525382041931, |
| "learning_rate": 4.9445095098521765e-06, |
| "loss": 0.7595, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.43785380260890966, |
| "grad_norm": 0.9142046570777893, |
| "learning_rate": 4.944407557059236e-06, |
| "loss": 0.6909, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.43822298793994585, |
| "grad_norm": 0.8837047219276428, |
| "learning_rate": 4.944305511746242e-06, |
| "loss": 0.7213, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.43859217327098204, |
| "grad_norm": 0.9123366475105286, |
| "learning_rate": 4.944203373917056e-06, |
| "loss": 0.7507, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.43896135860201824, |
| "grad_norm": 0.9120188355445862, |
| "learning_rate": 4.944101143575542e-06, |
| "loss": 0.7194, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.4393305439330544, |
| "grad_norm": 0.9454036951065063, |
| "learning_rate": 4.943998820725573e-06, |
| "loss": 0.7523, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.43969972926409057, |
| "grad_norm": 0.9568728804588318, |
| "learning_rate": 4.943896405371019e-06, |
| "loss": 0.7193, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.44006891459512676, |
| "grad_norm": 0.9208801984786987, |
| "learning_rate": 4.9437938975157586e-06, |
| "loss": 0.7172, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.44043809992616295, |
| "grad_norm": 0.9229491353034973, |
| "learning_rate": 4.9436912971636695e-06, |
| "loss": 0.738, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.4408072852571991, |
| "grad_norm": 0.9245941042900085, |
| "learning_rate": 4.943588604318635e-06, |
| "loss": 0.7437, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.4411764705882353, |
| "grad_norm": 0.8792277574539185, |
| "learning_rate": 4.943485818984545e-06, |
| "loss": 0.7363, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.4415456559192715, |
| "grad_norm": 0.9498505592346191, |
| "learning_rate": 4.9433829411652864e-06, |
| "loss": 0.7757, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.44191484125030767, |
| "grad_norm": 0.9258801341056824, |
| "learning_rate": 4.943279970864755e-06, |
| "loss": 0.7355, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.44228402658134386, |
| "grad_norm": 0.9117864370346069, |
| "learning_rate": 4.943176908086849e-06, |
| "loss": 0.7208, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.44265321191238, |
| "grad_norm": 0.8816313743591309, |
| "learning_rate": 4.9430737528354665e-06, |
| "loss": 0.6972, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.4430223972434162, |
| "grad_norm": 0.9166438579559326, |
| "learning_rate": 4.942970505114514e-06, |
| "loss": 0.76, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.4433915825744524, |
| "grad_norm": 0.8871222138404846, |
| "learning_rate": 4.942867164927899e-06, |
| "loss": 0.7348, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.4437607679054886, |
| "grad_norm": 0.9092727303504944, |
| "learning_rate": 4.942763732279533e-06, |
| "loss": 0.7114, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.4441299532365247, |
| "grad_norm": 0.9247320294380188, |
| "learning_rate": 4.94266020717333e-06, |
| "loss": 0.756, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.4444991385675609, |
| "grad_norm": 0.9279753565788269, |
| "learning_rate": 4.94255658961321e-06, |
| "loss": 0.7269, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.4448683238985971, |
| "grad_norm": 0.917389452457428, |
| "learning_rate": 4.942452879603094e-06, |
| "loss": 0.7285, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.4452375092296333, |
| "grad_norm": 0.8783107399940491, |
| "learning_rate": 4.942349077146906e-06, |
| "loss": 0.7487, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.4456066945606695, |
| "grad_norm": 0.9037907719612122, |
| "learning_rate": 4.9422451822485776e-06, |
| "loss": 0.7436, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.4459758798917056, |
| "grad_norm": 0.8458569049835205, |
| "learning_rate": 4.942141194912039e-06, |
| "loss": 0.6799, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.4463450652227418, |
| "grad_norm": 0.882938802242279, |
| "learning_rate": 4.942037115141228e-06, |
| "loss": 0.7216, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.446714250553778, |
| "grad_norm": 0.920384407043457, |
| "learning_rate": 4.9419329429400816e-06, |
| "loss": 0.7292, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.4470834358848142, |
| "grad_norm": 0.9709598422050476, |
| "learning_rate": 4.941828678312545e-06, |
| "loss": 0.7588, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.44745262121585033, |
| "grad_norm": 0.883941650390625, |
| "learning_rate": 4.941724321262563e-06, |
| "loss": 0.7007, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.4478218065468865, |
| "grad_norm": 0.9086169004440308, |
| "learning_rate": 4.941619871794087e-06, |
| "loss": 0.73, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.4481909918779227, |
| "grad_norm": 0.8642125129699707, |
| "learning_rate": 4.941515329911068e-06, |
| "loss": 0.6792, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.4485601772089589, |
| "grad_norm": 0.9487695693969727, |
| "learning_rate": 4.941410695617464e-06, |
| "loss": 0.7419, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.4489293625399951, |
| "grad_norm": 0.994884729385376, |
| "learning_rate": 4.941305968917238e-06, |
| "loss": 0.6888, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.44929854787103124, |
| "grad_norm": 0.9372398853302002, |
| "learning_rate": 4.941201149814349e-06, |
| "loss": 0.7325, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.44966773320206743, |
| "grad_norm": 0.9120617508888245, |
| "learning_rate": 4.94109623831277e-06, |
| "loss": 0.7697, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.4500369185331036, |
| "grad_norm": 0.9304324388504028, |
| "learning_rate": 4.940991234416466e-06, |
| "loss": 0.74, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.4504061038641398, |
| "grad_norm": 0.8959391713142395, |
| "learning_rate": 4.940886138129415e-06, |
| "loss": 0.7074, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.45077528919517595, |
| "grad_norm": 0.9347814917564392, |
| "learning_rate": 4.940780949455595e-06, |
| "loss": 0.7111, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.45114447452621215, |
| "grad_norm": 0.8901122212409973, |
| "learning_rate": 4.940675668398986e-06, |
| "loss": 0.74, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.45151365985724834, |
| "grad_norm": 0.9231247305870056, |
| "learning_rate": 4.940570294963572e-06, |
| "loss": 0.7486, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.45188284518828453, |
| "grad_norm": 0.9006732106208801, |
| "learning_rate": 4.940464829153343e-06, |
| "loss": 0.7592, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.4522520305193207, |
| "grad_norm": 0.8815886974334717, |
| "learning_rate": 4.940359270972291e-06, |
| "loss": 0.6919, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.45262121585035686, |
| "grad_norm": 0.8991600275039673, |
| "learning_rate": 4.940253620424411e-06, |
| "loss": 0.7178, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.45299040118139305, |
| "grad_norm": 0.8781217932701111, |
| "learning_rate": 4.940147877513701e-06, |
| "loss": 0.722, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.45335958651242925, |
| "grad_norm": 0.9302307367324829, |
| "learning_rate": 4.940042042244164e-06, |
| "loss": 0.7535, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.45372877184346544, |
| "grad_norm": 0.9426242113113403, |
| "learning_rate": 4.9399361146198065e-06, |
| "loss": 0.7451, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.4540979571745016, |
| "grad_norm": 0.9411885738372803, |
| "learning_rate": 4.939830094644637e-06, |
| "loss": 0.7625, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.45446714250553777, |
| "grad_norm": 0.9621394872665405, |
| "learning_rate": 4.939723982322667e-06, |
| "loss": 0.6874, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.45483632783657396, |
| "grad_norm": 0.928747296333313, |
| "learning_rate": 4.939617777657916e-06, |
| "loss": 0.761, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.45520551316761015, |
| "grad_norm": 0.8658984899520874, |
| "learning_rate": 4.939511480654401e-06, |
| "loss": 0.7426, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.45557469849864635, |
| "grad_norm": 0.901623010635376, |
| "learning_rate": 4.939405091316147e-06, |
| "loss": 0.7723, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.4559438838296825, |
| "grad_norm": 0.8813204765319824, |
| "learning_rate": 4.9392986096471796e-06, |
| "loss": 0.7268, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.4563130691607187, |
| "grad_norm": 0.9547492265701294, |
| "learning_rate": 4.93919203565153e-06, |
| "loss": 0.7687, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.45668225449175487, |
| "grad_norm": 0.9250112175941467, |
| "learning_rate": 4.939085369333232e-06, |
| "loss": 0.7749, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.45705143982279106, |
| "grad_norm": 0.9013698697090149, |
| "learning_rate": 4.938978610696322e-06, |
| "loss": 0.7553, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.4574206251538272, |
| "grad_norm": 0.8597696423530579, |
| "learning_rate": 4.938871759744842e-06, |
| "loss": 0.6982, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.4577898104848634, |
| "grad_norm": 0.9342607259750366, |
| "learning_rate": 4.938764816482835e-06, |
| "loss": 0.7266, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.4581589958158996, |
| "grad_norm": 0.9153682589530945, |
| "learning_rate": 4.93865778091435e-06, |
| "loss": 0.7474, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.4585281811469358, |
| "grad_norm": 0.9273695945739746, |
| "learning_rate": 4.938550653043437e-06, |
| "loss": 0.7094, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.45889736647797197, |
| "grad_norm": 0.9250311255455017, |
| "learning_rate": 4.938443432874151e-06, |
| "loss": 0.7576, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.4592665518090081, |
| "grad_norm": 0.9787151217460632, |
| "learning_rate": 4.938336120410551e-06, |
| "loss": 0.7324, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.4596357371400443, |
| "grad_norm": 0.8964402079582214, |
| "learning_rate": 4.938228715656699e-06, |
| "loss": 0.766, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.4600049224710805, |
| "grad_norm": 0.9307600259780884, |
| "learning_rate": 4.938121218616659e-06, |
| "loss": 0.7328, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.4603741078021167, |
| "grad_norm": 0.8917447328567505, |
| "learning_rate": 4.938013629294502e-06, |
| "loss": 0.7606, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.4607432931331528, |
| "grad_norm": 0.902154803276062, |
| "learning_rate": 4.937905947694296e-06, |
| "loss": 0.6913, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.461112478464189, |
| "grad_norm": 0.9088221192359924, |
| "learning_rate": 4.937798173820121e-06, |
| "loss": 0.7124, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.4614816637952252, |
| "grad_norm": 0.8867905735969543, |
| "learning_rate": 4.937690307676054e-06, |
| "loss": 0.741, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.4618508491262614, |
| "grad_norm": 0.9009400606155396, |
| "learning_rate": 4.937582349266178e-06, |
| "loss": 0.7393, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.4622200344572976, |
| "grad_norm": 0.9617549777030945, |
| "learning_rate": 4.937474298594579e-06, |
| "loss": 0.7683, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.4625892197883337, |
| "grad_norm": 0.9465776085853577, |
| "learning_rate": 4.937366155665348e-06, |
| "loss": 0.7684, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.4629584051193699, |
| "grad_norm": 0.9941141605377197, |
| "learning_rate": 4.9372579204825775e-06, |
| "loss": 0.7538, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.4633275904504061, |
| "grad_norm": 0.8829614520072937, |
| "learning_rate": 4.937149593050363e-06, |
| "loss": 0.7195, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.4636967757814423, |
| "grad_norm": 0.8967337608337402, |
| "learning_rate": 4.937041173372806e-06, |
| "loss": 0.7436, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.46406596111247844, |
| "grad_norm": 0.8753035664558411, |
| "learning_rate": 4.9369326614540096e-06, |
| "loss": 0.7431, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.46443514644351463, |
| "grad_norm": 0.9020246863365173, |
| "learning_rate": 4.936824057298081e-06, |
| "loss": 0.7362, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.4648043317745508, |
| "grad_norm": 0.8981832265853882, |
| "learning_rate": 4.936715360909131e-06, |
| "loss": 0.7306, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.465173517105587, |
| "grad_norm": 0.9331729412078857, |
| "learning_rate": 4.9366065722912735e-06, |
| "loss": 0.7471, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.4655427024366232, |
| "grad_norm": 0.9088561534881592, |
| "learning_rate": 4.936497691448627e-06, |
| "loss": 0.7693, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.46591188776765935, |
| "grad_norm": 0.9289308190345764, |
| "learning_rate": 4.936388718385311e-06, |
| "loss": 0.7338, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.46628107309869554, |
| "grad_norm": 0.9137311577796936, |
| "learning_rate": 4.936279653105452e-06, |
| "loss": 0.7147, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.46665025842973173, |
| "grad_norm": 0.8960225582122803, |
| "learning_rate": 4.936170495613175e-06, |
| "loss": 0.7154, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.4670194437607679, |
| "grad_norm": 0.8980303406715393, |
| "learning_rate": 4.936061245912615e-06, |
| "loss": 0.7259, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.46738862909180406, |
| "grad_norm": 0.9025071263313293, |
| "learning_rate": 4.935951904007906e-06, |
| "loss": 0.7525, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.46775781442284026, |
| "grad_norm": 0.8963534235954285, |
| "learning_rate": 4.935842469903186e-06, |
| "loss": 0.721, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.46812699975387645, |
| "grad_norm": 0.9002708196640015, |
| "learning_rate": 4.935732943602597e-06, |
| "loss": 0.7474, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.46849618508491264, |
| "grad_norm": 0.9217738509178162, |
| "learning_rate": 4.935623325110285e-06, |
| "loss": 0.7647, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.4688653704159488, |
| "grad_norm": 0.9251262545585632, |
| "learning_rate": 4.935513614430399e-06, |
| "loss": 0.732, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.46923455574698497, |
| "grad_norm": 0.8719298243522644, |
| "learning_rate": 4.935403811567091e-06, |
| "loss": 0.7526, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.46960374107802116, |
| "grad_norm": 0.893237292766571, |
| "learning_rate": 4.935293916524517e-06, |
| "loss": 0.7306, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.46997292640905736, |
| "grad_norm": 0.8918770551681519, |
| "learning_rate": 4.935183929306837e-06, |
| "loss": 0.7181, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.47034211174009355, |
| "grad_norm": 0.914319634437561, |
| "learning_rate": 4.935073849918214e-06, |
| "loss": 0.7679, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.4707112970711297, |
| "grad_norm": 0.895769476890564, |
| "learning_rate": 4.934963678362815e-06, |
| "loss": 0.7601, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.4710804824021659, |
| "grad_norm": 0.8876564502716064, |
| "learning_rate": 4.934853414644808e-06, |
| "loss": 0.733, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.47144966773320207, |
| "grad_norm": 0.9218546152114868, |
| "learning_rate": 4.934743058768369e-06, |
| "loss": 0.7578, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.47181885306423826, |
| "grad_norm": 0.9365533590316772, |
| "learning_rate": 4.934632610737673e-06, |
| "loss": 0.7511, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.4721880383952744, |
| "grad_norm": 0.8840087652206421, |
| "learning_rate": 4.934522070556901e-06, |
| "loss": 0.7085, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.4725572237263106, |
| "grad_norm": 0.8947144150733948, |
| "learning_rate": 4.934411438230237e-06, |
| "loss": 0.7547, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.4729264090573468, |
| "grad_norm": 0.9141665697097778, |
| "learning_rate": 4.934300713761868e-06, |
| "loss": 0.7278, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.473295594388383, |
| "grad_norm": 0.8770225048065186, |
| "learning_rate": 4.9341898971559856e-06, |
| "loss": 0.7245, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.47366477971941917, |
| "grad_norm": 0.8997796177864075, |
| "learning_rate": 4.934078988416784e-06, |
| "loss": 0.775, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.4740339650504553, |
| "grad_norm": 0.9298402667045593, |
| "learning_rate": 4.933967987548461e-06, |
| "loss": 0.7564, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.4744031503814915, |
| "grad_norm": 0.9105640053749084, |
| "learning_rate": 4.933856894555218e-06, |
| "loss": 0.7507, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.4747723357125277, |
| "grad_norm": 0.8863377571105957, |
| "learning_rate": 4.933745709441259e-06, |
| "loss": 0.7113, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.4751415210435639, |
| "grad_norm": 0.897258996963501, |
| "learning_rate": 4.9336344322107935e-06, |
| "loss": 0.7776, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.4755107063746, |
| "grad_norm": 0.8783311247825623, |
| "learning_rate": 4.933523062868033e-06, |
| "loss": 0.7136, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.4758798917056362, |
| "grad_norm": 0.8907281160354614, |
| "learning_rate": 4.933411601417192e-06, |
| "loss": 0.6871, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.4762490770366724, |
| "grad_norm": 0.9048116207122803, |
| "learning_rate": 4.93330004786249e-06, |
| "loss": 0.7545, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.4766182623677086, |
| "grad_norm": 0.8691855669021606, |
| "learning_rate": 4.933188402208149e-06, |
| "loss": 0.703, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.4769874476987448, |
| "grad_norm": 0.8851851224899292, |
| "learning_rate": 4.933076664458395e-06, |
| "loss": 0.7349, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.47735663302978093, |
| "grad_norm": 0.9410663843154907, |
| "learning_rate": 4.9329648346174575e-06, |
| "loss": 0.7593, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.4777258183608171, |
| "grad_norm": 0.9117968082427979, |
| "learning_rate": 4.932852912689569e-06, |
| "loss": 0.7231, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.4780950036918533, |
| "grad_norm": 0.9473034143447876, |
| "learning_rate": 4.932740898678965e-06, |
| "loss": 0.7467, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.4784641890228895, |
| "grad_norm": 0.9036644697189331, |
| "learning_rate": 4.932628792589887e-06, |
| "loss": 0.7426, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.47883337435392564, |
| "grad_norm": 0.9032191038131714, |
| "learning_rate": 4.932516594426575e-06, |
| "loss": 0.7258, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.47920255968496184, |
| "grad_norm": 0.864815354347229, |
| "learning_rate": 4.932404304193279e-06, |
| "loss": 0.7088, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.47957174501599803, |
| "grad_norm": 0.8944458961486816, |
| "learning_rate": 4.9322919218942466e-06, |
| "loss": 0.7706, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.4799409303470342, |
| "grad_norm": 0.9362423419952393, |
| "learning_rate": 4.932179447533734e-06, |
| "loss": 0.7286, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.4803101156780704, |
| "grad_norm": 0.9102439284324646, |
| "learning_rate": 4.9320668811159954e-06, |
| "loss": 0.7096, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.48067930100910655, |
| "grad_norm": 0.8899404406547546, |
| "learning_rate": 4.931954222645294e-06, |
| "loss": 0.7413, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.48104848634014274, |
| "grad_norm": 0.9097535610198975, |
| "learning_rate": 4.9318414721258924e-06, |
| "loss": 0.7417, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.48141767167117894, |
| "grad_norm": 0.9498276114463806, |
| "learning_rate": 4.931728629562059e-06, |
| "loss": 0.724, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.48178685700221513, |
| "grad_norm": 0.9295927882194519, |
| "learning_rate": 4.9316156949580645e-06, |
| "loss": 0.7388, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.48215604233325127, |
| "grad_norm": 0.907588541507721, |
| "learning_rate": 4.931502668318183e-06, |
| "loss": 0.7486, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.48252522766428746, |
| "grad_norm": 0.9081675410270691, |
| "learning_rate": 4.9313895496466936e-06, |
| "loss": 0.7562, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.48289441299532365, |
| "grad_norm": 0.9324904084205627, |
| "learning_rate": 4.931276338947876e-06, |
| "loss": 0.7588, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.48326359832635984, |
| "grad_norm": 0.9412097930908203, |
| "learning_rate": 4.931163036226017e-06, |
| "loss": 0.7773, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.48363278365739604, |
| "grad_norm": 0.9143854975700378, |
| "learning_rate": 4.931049641485404e-06, |
| "loss": 0.7591, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.4840019689884322, |
| "grad_norm": 0.8855016827583313, |
| "learning_rate": 4.930936154730329e-06, |
| "loss": 0.7749, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.48437115431946837, |
| "grad_norm": 0.9048222303390503, |
| "learning_rate": 4.930822575965089e-06, |
| "loss": 0.7268, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.48474033965050456, |
| "grad_norm": 0.9106447100639343, |
| "learning_rate": 4.93070890519398e-06, |
| "loss": 0.7537, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.48510952498154075, |
| "grad_norm": 0.9264537692070007, |
| "learning_rate": 4.930595142421307e-06, |
| "loss": 0.7544, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.4854787103125769, |
| "grad_norm": 0.9199881553649902, |
| "learning_rate": 4.930481287651375e-06, |
| "loss": 0.7032, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.4858478956436131, |
| "grad_norm": 0.9257407188415527, |
| "learning_rate": 4.930367340888494e-06, |
| "loss": 0.7343, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.4862170809746493, |
| "grad_norm": 0.9798755645751953, |
| "learning_rate": 4.930253302136976e-06, |
| "loss": 0.7448, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.48658626630568547, |
| "grad_norm": 0.919750988483429, |
| "learning_rate": 4.930139171401136e-06, |
| "loss": 0.7061, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.48695545163672166, |
| "grad_norm": 0.9479880332946777, |
| "learning_rate": 4.930024948685297e-06, |
| "loss": 0.7962, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.4873246369677578, |
| "grad_norm": 0.9518943428993225, |
| "learning_rate": 4.92991063399378e-06, |
| "loss": 0.7174, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.487693822298794, |
| "grad_norm": 0.908926784992218, |
| "learning_rate": 4.929796227330912e-06, |
| "loss": 0.7462, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.4880630076298302, |
| "grad_norm": 0.8598317503929138, |
| "learning_rate": 4.929681728701023e-06, |
| "loss": 0.7367, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.4884321929608664, |
| "grad_norm": 0.871263325214386, |
| "learning_rate": 4.929567138108449e-06, |
| "loss": 0.7283, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.4888013782919025, |
| "grad_norm": 0.8650959134101868, |
| "learning_rate": 4.9294524555575255e-06, |
| "loss": 0.703, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.4891705636229387, |
| "grad_norm": 0.9248819351196289, |
| "learning_rate": 4.9293376810525925e-06, |
| "loss": 0.7485, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.4895397489539749, |
| "grad_norm": 0.9359372854232788, |
| "learning_rate": 4.929222814597995e-06, |
| "loss": 0.7505, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.4899089342850111, |
| "grad_norm": 0.9581688046455383, |
| "learning_rate": 4.929107856198081e-06, |
| "loss": 0.747, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.4902781196160473, |
| "grad_norm": 0.9165839552879333, |
| "learning_rate": 4.928992805857201e-06, |
| "loss": 0.7406, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.4906473049470834, |
| "grad_norm": 0.9622183442115784, |
| "learning_rate": 4.9288776635797105e-06, |
| "loss": 0.7578, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.4910164902781196, |
| "grad_norm": 0.9130443930625916, |
| "learning_rate": 4.928762429369966e-06, |
| "loss": 0.7165, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.4913856756091558, |
| "grad_norm": 0.9140217304229736, |
| "learning_rate": 4.928647103232331e-06, |
| "loss": 0.7387, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.491754860940192, |
| "grad_norm": 0.9288978576660156, |
| "learning_rate": 4.928531685171169e-06, |
| "loss": 0.732, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.49212404627122813, |
| "grad_norm": 0.9056506156921387, |
| "learning_rate": 4.92841617519085e-06, |
| "loss": 0.6913, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.4924932316022643, |
| "grad_norm": 0.8599223494529724, |
| "learning_rate": 4.928300573295744e-06, |
| "loss": 0.7318, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.4928624169333005, |
| "grad_norm": 0.9114431142807007, |
| "learning_rate": 4.928184879490228e-06, |
| "loss": 0.6987, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.4932316022643367, |
| "grad_norm": 0.910118579864502, |
| "learning_rate": 4.9280690937786815e-06, |
| "loss": 0.7212, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.4936007875953729, |
| "grad_norm": 0.9045255780220032, |
| "learning_rate": 4.927953216165486e-06, |
| "loss": 0.692, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.49396997292640904, |
| "grad_norm": 0.9406313896179199, |
| "learning_rate": 4.927837246655027e-06, |
| "loss": 0.7439, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.49433915825744523, |
| "grad_norm": 0.9328321218490601, |
| "learning_rate": 4.9277211852516945e-06, |
| "loss": 0.7221, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.4947083435884814, |
| "grad_norm": 0.9344122409820557, |
| "learning_rate": 4.927605031959882e-06, |
| "loss": 0.7485, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.4950775289195176, |
| "grad_norm": 0.8839752674102783, |
| "learning_rate": 4.9274887867839845e-06, |
| "loss": 0.7085, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.49544671425055375, |
| "grad_norm": 0.9064518809318542, |
| "learning_rate": 4.9273724497284025e-06, |
| "loss": 0.7123, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.49581589958158995, |
| "grad_norm": 0.8806946873664856, |
| "learning_rate": 4.9272560207975395e-06, |
| "loss": 0.7362, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.49618508491262614, |
| "grad_norm": 0.9120421409606934, |
| "learning_rate": 4.9271394999958025e-06, |
| "loss": 0.7271, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.49655427024366233, |
| "grad_norm": 0.8838410377502441, |
| "learning_rate": 4.927022887327601e-06, |
| "loss": 0.7167, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.4969234555746985, |
| "grad_norm": 0.8578704595565796, |
| "learning_rate": 4.926906182797349e-06, |
| "loss": 0.6924, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.49729264090573466, |
| "grad_norm": 0.8794922828674316, |
| "learning_rate": 4.9267893864094644e-06, |
| "loss": 0.6941, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.49766182623677085, |
| "grad_norm": 0.902854323387146, |
| "learning_rate": 4.926672498168368e-06, |
| "loss": 0.7546, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.49803101156780705, |
| "grad_norm": 0.9118044376373291, |
| "learning_rate": 4.926555518078482e-06, |
| "loss": 0.7321, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.49840019689884324, |
| "grad_norm": 0.9261218905448914, |
| "learning_rate": 4.926438446144237e-06, |
| "loss": 0.7313, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.4987693822298794, |
| "grad_norm": 0.887624979019165, |
| "learning_rate": 4.9263212823700616e-06, |
| "loss": 0.7024, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.49913856756091557, |
| "grad_norm": 0.899479329586029, |
| "learning_rate": 4.926204026760392e-06, |
| "loss": 0.7183, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.49950775289195176, |
| "grad_norm": 0.8791046142578125, |
| "learning_rate": 4.926086679319665e-06, |
| "loss": 0.7311, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.49987693822298795, |
| "grad_norm": 0.9053353667259216, |
| "learning_rate": 4.925969240052323e-06, |
| "loss": 0.7332, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.5002461235540241, |
| "grad_norm": 0.8757712244987488, |
| "learning_rate": 4.925851708962811e-06, |
| "loss": 0.748, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.5006153088850603, |
| "grad_norm": 0.9180270433425903, |
| "learning_rate": 4.925734086055578e-06, |
| "loss": 0.7304, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.5009844942160965, |
| "grad_norm": 0.8982318043708801, |
| "learning_rate": 4.9256163713350745e-06, |
| "loss": 0.7179, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.5013536795471326, |
| "grad_norm": 0.8590686321258545, |
| "learning_rate": 4.925498564805757e-06, |
| "loss": 0.6921, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.5017228648781689, |
| "grad_norm": 0.9188492894172668, |
| "learning_rate": 4.925380666472085e-06, |
| "loss": 0.719, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.502092050209205, |
| "grad_norm": 0.8929955363273621, |
| "learning_rate": 4.92526267633852e-06, |
| "loss": 0.7179, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.5024612355402412, |
| "grad_norm": 0.9286133646965027, |
| "learning_rate": 4.925144594409528e-06, |
| "loss": 0.7686, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.5028304208712774, |
| "grad_norm": 0.869183361530304, |
| "learning_rate": 4.925026420689579e-06, |
| "loss": 0.6961, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.5031996062023135, |
| "grad_norm": 0.8674918413162231, |
| "learning_rate": 4.924908155183145e-06, |
| "loss": 0.7365, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.5035687915333498, |
| "grad_norm": 0.9061448574066162, |
| "learning_rate": 4.924789797894701e-06, |
| "loss": 0.7142, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.5039379768643859, |
| "grad_norm": 0.9563679695129395, |
| "learning_rate": 4.924671348828731e-06, |
| "loss": 0.7383, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.5043071621954222, |
| "grad_norm": 0.8675025701522827, |
| "learning_rate": 4.924552807989715e-06, |
| "loss": 0.7392, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.5046763475264583, |
| "grad_norm": 0.8793214559555054, |
| "learning_rate": 4.9244341753821396e-06, |
| "loss": 0.7712, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.5050455328574944, |
| "grad_norm": 0.9035436511039734, |
| "learning_rate": 4.924315451010496e-06, |
| "loss": 0.7131, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.5054147181885307, |
| "grad_norm": 0.8869012594223022, |
| "learning_rate": 4.924196634879278e-06, |
| "loss": 0.73, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.5057839035195668, |
| "grad_norm": 0.9123469591140747, |
| "learning_rate": 4.9240777269929825e-06, |
| "loss": 0.7164, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.506153088850603, |
| "grad_norm": 0.8680907487869263, |
| "learning_rate": 4.923958727356109e-06, |
| "loss": 0.7002, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.5065222741816392, |
| "grad_norm": 0.9459185004234314, |
| "learning_rate": 4.923839635973165e-06, |
| "loss": 0.7234, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.5068914595126753, |
| "grad_norm": 0.8962308168411255, |
| "learning_rate": 4.923720452848653e-06, |
| "loss": 0.7349, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.5072606448437116, |
| "grad_norm": 0.8820279836654663, |
| "learning_rate": 4.92360117798709e-06, |
| "loss": 0.7304, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.5076298301747477, |
| "grad_norm": 0.8836473226547241, |
| "learning_rate": 4.923481811392985e-06, |
| "loss": 0.749, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.5079990155057839, |
| "grad_norm": 0.8512134552001953, |
| "learning_rate": 4.923362353070859e-06, |
| "loss": 0.7129, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.5083682008368201, |
| "grad_norm": 0.8974927663803101, |
| "learning_rate": 4.923242803025232e-06, |
| "loss": 0.6806, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.5087373861678562, |
| "grad_norm": 0.8919023275375366, |
| "learning_rate": 4.92312316126063e-06, |
| "loss": 0.7334, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.5091065714988925, |
| "grad_norm": 0.9181932210922241, |
| "learning_rate": 4.923003427781582e-06, |
| "loss": 0.7337, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.5094757568299286, |
| "grad_norm": 0.8906370997428894, |
| "learning_rate": 4.9228836025926185e-06, |
| "loss": 0.7356, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.5098449421609648, |
| "grad_norm": 1.0170215368270874, |
| "learning_rate": 4.922763685698275e-06, |
| "loss": 0.7247, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.510214127492001, |
| "grad_norm": 0.898322343826294, |
| "learning_rate": 4.922643677103091e-06, |
| "loss": 0.7254, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.5105833128230371, |
| "grad_norm": 0.9089271426200867, |
| "learning_rate": 4.922523576811607e-06, |
| "loss": 0.7618, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.5109524981540734, |
| "grad_norm": 0.9560837745666504, |
| "learning_rate": 4.922403384828373e-06, |
| "loss": 0.7461, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.5113216834851095, |
| "grad_norm": 0.9104019403457642, |
| "learning_rate": 4.922283101157933e-06, |
| "loss": 0.7622, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.5116908688161457, |
| "grad_norm": 0.8937442898750305, |
| "learning_rate": 4.922162725804843e-06, |
| "loss": 0.6968, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.5120600541471819, |
| "grad_norm": 0.9203770160675049, |
| "learning_rate": 4.922042258773658e-06, |
| "loss": 0.7144, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.5124292394782181, |
| "grad_norm": 0.9218546748161316, |
| "learning_rate": 4.921921700068938e-06, |
| "loss": 0.7849, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.5127984248092542, |
| "grad_norm": 0.9261901378631592, |
| "learning_rate": 4.921801049695246e-06, |
| "loss": 0.744, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.5131676101402904, |
| "grad_norm": 0.8870830535888672, |
| "learning_rate": 4.92168030765715e-06, |
| "loss": 0.719, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.5135367954713266, |
| "grad_norm": 0.8306862711906433, |
| "learning_rate": 4.921559473959217e-06, |
| "loss": 0.6566, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.5139059808023628, |
| "grad_norm": 0.9043039083480835, |
| "learning_rate": 4.921438548606022e-06, |
| "loss": 0.7366, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.514275166133399, |
| "grad_norm": 0.914897084236145, |
| "learning_rate": 4.921317531602143e-06, |
| "loss": 0.7372, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.5146443514644351, |
| "grad_norm": 0.8624985814094543, |
| "learning_rate": 4.921196422952159e-06, |
| "loss": 0.771, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.5150135367954713, |
| "grad_norm": 0.8990124464035034, |
| "learning_rate": 4.921075222660655e-06, |
| "loss": 0.7263, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.5153827221265075, |
| "grad_norm": 0.906343400478363, |
| "learning_rate": 4.920953930732217e-06, |
| "loss": 0.732, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.5157519074575437, |
| "grad_norm": 0.8977361917495728, |
| "learning_rate": 4.920832547171438e-06, |
| "loss": 0.7035, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.5161210927885799, |
| "grad_norm": 0.8892781138420105, |
| "learning_rate": 4.920711071982911e-06, |
| "loss": 0.7441, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.516490278119616, |
| "grad_norm": 0.8844077587127686, |
| "learning_rate": 4.920589505171234e-06, |
| "loss": 0.7602, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.5168594634506523, |
| "grad_norm": 0.9034737348556519, |
| "learning_rate": 4.9204678467410075e-06, |
| "loss": 0.7123, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.5172286487816884, |
| "grad_norm": 0.9292963147163391, |
| "learning_rate": 4.920346096696837e-06, |
| "loss": 0.7266, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.5175978341127246, |
| "grad_norm": 0.9953920245170593, |
| "learning_rate": 4.920224255043331e-06, |
| "loss": 0.757, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.5179670194437608, |
| "grad_norm": 0.8745222091674805, |
| "learning_rate": 4.9201023217851e-06, |
| "loss": 0.7326, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.5183362047747969, |
| "grad_norm": 0.9055063724517822, |
| "learning_rate": 4.919980296926761e-06, |
| "loss": 0.7127, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.5187053901058332, |
| "grad_norm": 0.9204325079917908, |
| "learning_rate": 4.91985818047293e-06, |
| "loss": 0.7741, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.5190745754368693, |
| "grad_norm": 0.9215565919876099, |
| "learning_rate": 4.919735972428232e-06, |
| "loss": 0.7451, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.5194437607679054, |
| "grad_norm": 0.8616822361946106, |
| "learning_rate": 4.919613672797291e-06, |
| "loss": 0.6946, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.5198129460989417, |
| "grad_norm": 0.8958667516708374, |
| "learning_rate": 4.919491281584736e-06, |
| "loss": 0.7137, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.5201821314299778, |
| "grad_norm": 0.9338605999946594, |
| "learning_rate": 4.919368798795199e-06, |
| "loss": 0.7135, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.5205513167610141, |
| "grad_norm": 0.9158616662025452, |
| "learning_rate": 4.919246224433317e-06, |
| "loss": 0.7253, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.5209205020920502, |
| "grad_norm": 0.9109562635421753, |
| "learning_rate": 4.919123558503729e-06, |
| "loss": 0.7315, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.5212896874230863, |
| "grad_norm": 0.8872061967849731, |
| "learning_rate": 4.919000801011078e-06, |
| "loss": 0.7384, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.5216588727541226, |
| "grad_norm": 0.852870523929596, |
| "learning_rate": 4.918877951960009e-06, |
| "loss": 0.6982, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.5220280580851587, |
| "grad_norm": 0.8745459914207458, |
| "learning_rate": 4.918755011355174e-06, |
| "loss": 0.7137, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.522397243416195, |
| "grad_norm": 0.8926501274108887, |
| "learning_rate": 4.918631979201225e-06, |
| "loss": 0.7493, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.5227664287472311, |
| "grad_norm": 0.9324433207511902, |
| "learning_rate": 4.918508855502819e-06, |
| "loss": 0.6959, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.5231356140782673, |
| "grad_norm": 0.9326198697090149, |
| "learning_rate": 4.918385640264615e-06, |
| "loss": 0.7399, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.5235047994093035, |
| "grad_norm": 0.893012285232544, |
| "learning_rate": 4.9182623334912796e-06, |
| "loss": 0.7212, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.5238739847403396, |
| "grad_norm": 0.8747656941413879, |
| "learning_rate": 4.918138935187478e-06, |
| "loss": 0.7471, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.5242431700713759, |
| "grad_norm": 0.8935147523880005, |
| "learning_rate": 4.91801544535788e-06, |
| "loss": 0.7155, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.524612355402412, |
| "grad_norm": 0.9232352375984192, |
| "learning_rate": 4.91789186400716e-06, |
| "loss": 0.7442, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.5249815407334482, |
| "grad_norm": 0.8894848227500916, |
| "learning_rate": 4.917768191139997e-06, |
| "loss": 0.7587, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.5253507260644844, |
| "grad_norm": 0.8878769278526306, |
| "learning_rate": 4.91764442676107e-06, |
| "loss": 0.7335, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.5257199113955205, |
| "grad_norm": 1.0391231775283813, |
| "learning_rate": 4.917520570875065e-06, |
| "loss": 0.7053, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.5260890967265567, |
| "grad_norm": 0.9046094417572021, |
| "learning_rate": 4.91739662348667e-06, |
| "loss": 0.74, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.5264582820575929, |
| "grad_norm": 0.8990216851234436, |
| "learning_rate": 4.917272584600575e-06, |
| "loss": 0.7539, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.5268274673886291, |
| "grad_norm": 0.876078724861145, |
| "learning_rate": 4.917148454221477e-06, |
| "loss": 0.7024, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.5271966527196653, |
| "grad_norm": 0.9383296966552734, |
| "learning_rate": 4.917024232354071e-06, |
| "loss": 0.706, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.5275658380507015, |
| "grad_norm": 0.9111572504043579, |
| "learning_rate": 4.916899919003062e-06, |
| "loss": 0.7463, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.5279350233817376, |
| "grad_norm": 0.9223730564117432, |
| "learning_rate": 4.916775514173153e-06, |
| "loss": 0.759, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.5283042087127738, |
| "grad_norm": 1.1159535646438599, |
| "learning_rate": 4.916651017869054e-06, |
| "loss": 0.7213, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.52867339404381, |
| "grad_norm": 0.8813338279724121, |
| "learning_rate": 4.9165264300954765e-06, |
| "loss": 0.7215, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.5290425793748462, |
| "grad_norm": 0.90069180727005, |
| "learning_rate": 4.916401750857136e-06, |
| "loss": 0.738, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.5294117647058824, |
| "grad_norm": 0.8849632740020752, |
| "learning_rate": 4.9162769801587515e-06, |
| "loss": 0.6921, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.5297809500369185, |
| "grad_norm": 0.9068708419799805, |
| "learning_rate": 4.916152118005046e-06, |
| "loss": 0.7209, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.5301501353679547, |
| "grad_norm": 0.8788166046142578, |
| "learning_rate": 4.916027164400746e-06, |
| "loss": 0.6902, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.5305193206989909, |
| "grad_norm": 0.8957372903823853, |
| "learning_rate": 4.9159021193505806e-06, |
| "loss": 0.7313, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.530888506030027, |
| "grad_norm": 0.9063811898231506, |
| "learning_rate": 4.915776982859282e-06, |
| "loss": 0.7097, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.5312576913610633, |
| "grad_norm": 0.8811819553375244, |
| "learning_rate": 4.9156517549315875e-06, |
| "loss": 0.7085, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.5316268766920994, |
| "grad_norm": 0.9121610522270203, |
| "learning_rate": 4.915526435572235e-06, |
| "loss": 0.753, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.5319960620231357, |
| "grad_norm": 0.8891005516052246, |
| "learning_rate": 4.915401024785971e-06, |
| "loss": 0.7245, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.5323652473541718, |
| "grad_norm": 0.9231831431388855, |
| "learning_rate": 4.915275522577539e-06, |
| "loss": 0.7305, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.5327344326852079, |
| "grad_norm": 0.9318893551826477, |
| "learning_rate": 4.915149928951693e-06, |
| "loss": 0.7543, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.5331036180162442, |
| "grad_norm": 0.8818265199661255, |
| "learning_rate": 4.915024243913182e-06, |
| "loss": 0.6775, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.5334728033472803, |
| "grad_norm": 0.8918500542640686, |
| "learning_rate": 4.9148984674667675e-06, |
| "loss": 0.7264, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.5338419886783166, |
| "grad_norm": 0.90740966796875, |
| "learning_rate": 4.914772599617207e-06, |
| "loss": 0.6985, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.5342111740093527, |
| "grad_norm": 0.8859344720840454, |
| "learning_rate": 4.914646640369266e-06, |
| "loss": 0.7361, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.5345803593403888, |
| "grad_norm": 0.9221896529197693, |
| "learning_rate": 4.914520589727712e-06, |
| "loss": 0.7234, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.5349495446714251, |
| "grad_norm": 0.925670862197876, |
| "learning_rate": 4.9143944476973146e-06, |
| "loss": 0.7197, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.5353187300024612, |
| "grad_norm": 0.9094201326370239, |
| "learning_rate": 4.91426821428285e-06, |
| "loss": 0.7479, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.5356879153334975, |
| "grad_norm": 0.8817557096481323, |
| "learning_rate": 4.914141889489095e-06, |
| "loss": 0.757, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.5360571006645336, |
| "grad_norm": 0.8657225966453552, |
| "learning_rate": 4.914015473320833e-06, |
| "loss": 0.7251, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.5364262859955697, |
| "grad_norm": 0.8747798204421997, |
| "learning_rate": 4.913888965782846e-06, |
| "loss": 0.7471, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.536795471326606, |
| "grad_norm": 0.898120105266571, |
| "learning_rate": 4.913762366879924e-06, |
| "loss": 0.7839, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.5371646566576421, |
| "grad_norm": 0.9788367748260498, |
| "learning_rate": 4.913635676616858e-06, |
| "loss": 0.7449, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.5375338419886783, |
| "grad_norm": 0.8859315514564514, |
| "learning_rate": 4.9135088949984425e-06, |
| "loss": 0.7056, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.5379030273197145, |
| "grad_norm": 0.8957391977310181, |
| "learning_rate": 4.913382022029478e-06, |
| "loss": 0.7059, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.5382722126507506, |
| "grad_norm": 0.8686297535896301, |
| "learning_rate": 4.913255057714765e-06, |
| "loss": 0.7012, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.5386413979817869, |
| "grad_norm": 0.8956363797187805, |
| "learning_rate": 4.913128002059111e-06, |
| "loss": 0.728, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.539010583312823, |
| "grad_norm": 0.8763745427131653, |
| "learning_rate": 4.913000855067323e-06, |
| "loss": 0.7409, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.5393797686438592, |
| "grad_norm": 0.9013528823852539, |
| "learning_rate": 4.912873616744213e-06, |
| "loss": 0.7157, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.5397489539748954, |
| "grad_norm": 0.9581606984138489, |
| "learning_rate": 4.9127462870945995e-06, |
| "loss": 0.719, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.5401181393059316, |
| "grad_norm": 0.8720983266830444, |
| "learning_rate": 4.912618866123301e-06, |
| "loss": 0.6927, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.5404873246369678, |
| "grad_norm": 0.8839109539985657, |
| "learning_rate": 4.912491353835138e-06, |
| "loss": 0.7265, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.5408565099680039, |
| "grad_norm": 0.9057873487472534, |
| "learning_rate": 4.91236375023494e-06, |
| "loss": 0.7321, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.5412256952990401, |
| "grad_norm": 0.8765888214111328, |
| "learning_rate": 4.912236055327535e-06, |
| "loss": 0.7096, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.5415948806300763, |
| "grad_norm": 0.9088855981826782, |
| "learning_rate": 4.912108269117757e-06, |
| "loss": 0.7158, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.5419640659611125, |
| "grad_norm": 0.9135664105415344, |
| "learning_rate": 4.911980391610442e-06, |
| "loss": 0.7148, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.5423332512921487, |
| "grad_norm": 0.8987833857536316, |
| "learning_rate": 4.91185242281043e-06, |
| "loss": 0.6955, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.5427024366231848, |
| "grad_norm": 0.890201985836029, |
| "learning_rate": 4.911724362722566e-06, |
| "loss": 0.7131, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.543071621954221, |
| "grad_norm": 0.9337875247001648, |
| "learning_rate": 4.911596211351695e-06, |
| "loss": 0.7518, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.5434408072852572, |
| "grad_norm": 0.9150261282920837, |
| "learning_rate": 4.911467968702669e-06, |
| "loss": 0.7283, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.5438099926162934, |
| "grad_norm": 0.9131171107292175, |
| "learning_rate": 4.911339634780341e-06, |
| "loss": 0.7292, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.5441791779473295, |
| "grad_norm": 0.9012570977210999, |
| "learning_rate": 4.91121120958957e-06, |
| "loss": 0.7185, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.5445483632783658, |
| "grad_norm": 0.9188559651374817, |
| "learning_rate": 4.9110826931352145e-06, |
| "loss": 0.7277, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.5449175486094019, |
| "grad_norm": 0.9295513033866882, |
| "learning_rate": 4.91095408542214e-06, |
| "loss": 0.7768, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.5452867339404381, |
| "grad_norm": 0.895024836063385, |
| "learning_rate": 4.910825386455215e-06, |
| "loss": 0.7565, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.5456559192714743, |
| "grad_norm": 0.8578177094459534, |
| "learning_rate": 4.91069659623931e-06, |
| "loss": 0.6794, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.5460251046025104, |
| "grad_norm": 0.8829843401908875, |
| "learning_rate": 4.9105677147792996e-06, |
| "loss": 0.6603, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.5463942899335467, |
| "grad_norm": 0.8859395980834961, |
| "learning_rate": 4.910438742080061e-06, |
| "loss": 0.7196, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.5467634752645828, |
| "grad_norm": 0.8851184844970703, |
| "learning_rate": 4.910309678146478e-06, |
| "loss": 0.7091, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.547132660595619, |
| "grad_norm": 0.8768782615661621, |
| "learning_rate": 4.910180522983434e-06, |
| "loss": 0.7177, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.5475018459266552, |
| "grad_norm": 0.8919864892959595, |
| "learning_rate": 4.910051276595818e-06, |
| "loss": 0.7121, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.5478710312576913, |
| "grad_norm": 0.9079790115356445, |
| "learning_rate": 4.909921938988521e-06, |
| "loss": 0.7072, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.5482402165887276, |
| "grad_norm": 0.9404414892196655, |
| "learning_rate": 4.90979251016644e-06, |
| "loss": 0.7396, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.5486094019197637, |
| "grad_norm": 0.8725743293762207, |
| "learning_rate": 4.909662990134473e-06, |
| "loss": 0.6942, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.5489785872508, |
| "grad_norm": 0.90342777967453, |
| "learning_rate": 4.909533378897522e-06, |
| "loss": 0.7341, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.5493477725818361, |
| "grad_norm": 0.9160227179527283, |
| "learning_rate": 4.909403676460494e-06, |
| "loss": 0.7173, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.5497169579128722, |
| "grad_norm": 0.8947250843048096, |
| "learning_rate": 4.909273882828296e-06, |
| "loss": 0.7126, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.5500861432439085, |
| "grad_norm": 0.889103889465332, |
| "learning_rate": 4.909143998005842e-06, |
| "loss": 0.6949, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.5504553285749446, |
| "grad_norm": 0.8343292474746704, |
| "learning_rate": 4.909014021998049e-06, |
| "loss": 0.698, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.5508245139059808, |
| "grad_norm": 0.8784751892089844, |
| "learning_rate": 4.908883954809834e-06, |
| "loss": 0.6957, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.551193699237017, |
| "grad_norm": 0.9363612532615662, |
| "learning_rate": 4.908753796446123e-06, |
| "loss": 0.7385, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.5515628845680531, |
| "grad_norm": 0.899426281452179, |
| "learning_rate": 4.908623546911841e-06, |
| "loss": 0.7354, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.5519320698990894, |
| "grad_norm": 1.1256046295166016, |
| "learning_rate": 4.908493206211917e-06, |
| "loss": 0.7554, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.5523012552301255, |
| "grad_norm": 0.8998042941093445, |
| "learning_rate": 4.908362774351286e-06, |
| "loss": 0.718, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.5526704405611617, |
| "grad_norm": 0.9059179425239563, |
| "learning_rate": 4.908232251334884e-06, |
| "loss": 0.7193, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.5530396258921979, |
| "grad_norm": 0.8895880579948425, |
| "learning_rate": 4.90810163716765e-06, |
| "loss": 0.7431, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.553408811223234, |
| "grad_norm": 0.8827221393585205, |
| "learning_rate": 4.907970931854531e-06, |
| "loss": 0.7553, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.5537779965542703, |
| "grad_norm": 0.8968391418457031, |
| "learning_rate": 4.9078401354004715e-06, |
| "loss": 0.7487, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5541471818853064, |
| "grad_norm": 0.8841264247894287, |
| "learning_rate": 4.907709247810422e-06, |
| "loss": 0.7482, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.5545163672163426, |
| "grad_norm": 0.8847429156303406, |
| "learning_rate": 4.907578269089338e-06, |
| "loss": 0.7099, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.5548855525473788, |
| "grad_norm": 0.892648458480835, |
| "learning_rate": 4.9074471992421765e-06, |
| "loss": 0.7092, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.555254737878415, |
| "grad_norm": 0.9089244604110718, |
| "learning_rate": 4.907316038273899e-06, |
| "loss": 0.7395, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.5556239232094512, |
| "grad_norm": 0.9008041620254517, |
| "learning_rate": 4.9071847861894684e-06, |
| "loss": 0.7522, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.5559931085404873, |
| "grad_norm": 1.0230878591537476, |
| "learning_rate": 4.907053442993853e-06, |
| "loss": 0.7571, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.5563622938715235, |
| "grad_norm": 0.9059250950813293, |
| "learning_rate": 4.906922008692025e-06, |
| "loss": 0.7478, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.5567314792025597, |
| "grad_norm": 0.8838487863540649, |
| "learning_rate": 4.906790483288958e-06, |
| "loss": 0.7608, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.5571006645335959, |
| "grad_norm": 0.879643440246582, |
| "learning_rate": 4.906658866789632e-06, |
| "loss": 0.7373, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.557469849864632, |
| "grad_norm": 0.9180140495300293, |
| "learning_rate": 4.906527159199027e-06, |
| "loss": 0.7187, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.5578390351956682, |
| "grad_norm": 0.8671844005584717, |
| "learning_rate": 4.906395360522128e-06, |
| "loss": 0.6626, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.5582082205267044, |
| "grad_norm": 0.9056374430656433, |
| "learning_rate": 4.9062634707639235e-06, |
| "loss": 0.7523, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.5585774058577406, |
| "grad_norm": 0.8730549812316895, |
| "learning_rate": 4.9061314899294074e-06, |
| "loss": 0.7356, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.5589465911887768, |
| "grad_norm": 0.8778785467147827, |
| "learning_rate": 4.905999418023574e-06, |
| "loss": 0.7071, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.5593157765198129, |
| "grad_norm": 0.8926696181297302, |
| "learning_rate": 4.905867255051421e-06, |
| "loss": 0.6818, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.5596849618508491, |
| "grad_norm": 0.8964526057243347, |
| "learning_rate": 4.905735001017952e-06, |
| "loss": 0.7376, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.5600541471818853, |
| "grad_norm": 0.8878997564315796, |
| "learning_rate": 4.905602655928172e-06, |
| "loss": 0.702, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.5604233325129215, |
| "grad_norm": 0.9055455327033997, |
| "learning_rate": 4.9054702197870905e-06, |
| "loss": 0.7591, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.5607925178439577, |
| "grad_norm": 0.8939942717552185, |
| "learning_rate": 4.9053376925997216e-06, |
| "loss": 0.7195, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.5611617031749938, |
| "grad_norm": 0.8912205696105957, |
| "learning_rate": 4.90520507437108e-06, |
| "loss": 0.7483, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.5615308885060301, |
| "grad_norm": 0.9732519388198853, |
| "learning_rate": 4.905072365106184e-06, |
| "loss": 0.7273, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.5619000738370662, |
| "grad_norm": 0.8627989888191223, |
| "learning_rate": 4.904939564810059e-06, |
| "loss": 0.7527, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.5622692591681024, |
| "grad_norm": 0.8785387277603149, |
| "learning_rate": 4.904806673487731e-06, |
| "loss": 0.7135, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.5626384444991386, |
| "grad_norm": 0.8822858929634094, |
| "learning_rate": 4.904673691144229e-06, |
| "loss": 0.725, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.5630076298301747, |
| "grad_norm": 0.8640886545181274, |
| "learning_rate": 4.904540617784587e-06, |
| "loss": 0.6923, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.563376815161211, |
| "grad_norm": 0.9395278096199036, |
| "learning_rate": 4.904407453413841e-06, |
| "loss": 0.7389, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.5637460004922471, |
| "grad_norm": 0.9424323439598083, |
| "learning_rate": 4.904274198037031e-06, |
| "loss": 0.7392, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.5641151858232832, |
| "grad_norm": 0.8736268281936646, |
| "learning_rate": 4.904140851659203e-06, |
| "loss": 0.689, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.5644843711543195, |
| "grad_norm": 0.9197478294372559, |
| "learning_rate": 4.904007414285401e-06, |
| "loss": 0.7496, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.5648535564853556, |
| "grad_norm": 0.8863821029663086, |
| "learning_rate": 4.903873885920678e-06, |
| "loss": 0.7162, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.5652227418163919, |
| "grad_norm": 0.9655389189720154, |
| "learning_rate": 4.903740266570087e-06, |
| "loss": 0.735, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.565591927147428, |
| "grad_norm": 0.8864624500274658, |
| "learning_rate": 4.903606556238686e-06, |
| "loss": 0.7066, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.5659611124784641, |
| "grad_norm": 0.8712696433067322, |
| "learning_rate": 4.9034727549315344e-06, |
| "loss": 0.7257, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.5663302978095004, |
| "grad_norm": 0.902729868888855, |
| "learning_rate": 4.903338862653698e-06, |
| "loss": 0.744, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.5666994831405365, |
| "grad_norm": 0.9051229953765869, |
| "learning_rate": 4.903204879410245e-06, |
| "loss": 0.6945, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.5670686684715728, |
| "grad_norm": 0.9622679352760315, |
| "learning_rate": 4.9030708052062445e-06, |
| "loss": 0.7472, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.5674378538026089, |
| "grad_norm": 0.9077664017677307, |
| "learning_rate": 4.902936640046772e-06, |
| "loss": 0.719, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.567807039133645, |
| "grad_norm": 0.8758202195167542, |
| "learning_rate": 4.902802383936908e-06, |
| "loss": 0.7191, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.5681762244646813, |
| "grad_norm": 0.8584937453269958, |
| "learning_rate": 4.902668036881731e-06, |
| "loss": 0.723, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.5685454097957174, |
| "grad_norm": 0.8474022746086121, |
| "learning_rate": 4.902533598886327e-06, |
| "loss": 0.6828, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.5689145951267536, |
| "grad_norm": 0.8970612287521362, |
| "learning_rate": 4.902399069955784e-06, |
| "loss": 0.7298, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.5692837804577898, |
| "grad_norm": 0.8726117014884949, |
| "learning_rate": 4.9022644500951956e-06, |
| "loss": 0.7043, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.569652965788826, |
| "grad_norm": 0.8593499660491943, |
| "learning_rate": 4.902129739309655e-06, |
| "loss": 0.7272, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.5700221511198622, |
| "grad_norm": 0.9041122794151306, |
| "learning_rate": 4.901994937604263e-06, |
| "loss": 0.7057, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.5703913364508983, |
| "grad_norm": 0.9077982306480408, |
| "learning_rate": 4.90186004498412e-06, |
| "loss": 0.7139, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.5707605217819345, |
| "grad_norm": 0.9217522740364075, |
| "learning_rate": 4.9017250614543326e-06, |
| "loss": 0.7491, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.5711297071129707, |
| "grad_norm": 0.95450758934021, |
| "learning_rate": 4.901589987020009e-06, |
| "loss": 0.7225, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.5714988924440069, |
| "grad_norm": 0.8575482368469238, |
| "learning_rate": 4.9014548216862635e-06, |
| "loss": 0.7052, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.5718680777750431, |
| "grad_norm": 0.8921974897384644, |
| "learning_rate": 4.90131956545821e-06, |
| "loss": 0.7423, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.5722372631060793, |
| "grad_norm": 0.9220647811889648, |
| "learning_rate": 4.901184218340969e-06, |
| "loss": 0.779, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.5726064484371154, |
| "grad_norm": 0.9192151427268982, |
| "learning_rate": 4.901048780339662e-06, |
| "loss": 0.7373, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.5729756337681516, |
| "grad_norm": 0.8914799690246582, |
| "learning_rate": 4.900913251459418e-06, |
| "loss": 0.7436, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.5733448190991878, |
| "grad_norm": 0.9210816621780396, |
| "learning_rate": 4.9007776317053654e-06, |
| "loss": 0.7246, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.573714004430224, |
| "grad_norm": 0.8826539516448975, |
| "learning_rate": 4.900641921082636e-06, |
| "loss": 0.6702, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.5740831897612602, |
| "grad_norm": 0.9123632907867432, |
| "learning_rate": 4.9005061195963686e-06, |
| "loss": 0.7505, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.5744523750922963, |
| "grad_norm": 0.8475764989852905, |
| "learning_rate": 4.900370227251702e-06, |
| "loss": 0.7349, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.5748215604233325, |
| "grad_norm": 0.9372847080230713, |
| "learning_rate": 4.900234244053778e-06, |
| "loss": 0.6943, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.5751907457543687, |
| "grad_norm": 0.8899771571159363, |
| "learning_rate": 4.900098170007748e-06, |
| "loss": 0.6975, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.5755599310854048, |
| "grad_norm": 0.9169413447380066, |
| "learning_rate": 4.899962005118759e-06, |
| "loss": 0.7258, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.5759291164164411, |
| "grad_norm": 0.9141312837600708, |
| "learning_rate": 4.899825749391965e-06, |
| "loss": 0.7496, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.5762983017474772, |
| "grad_norm": 0.8956618905067444, |
| "learning_rate": 4.8996894028325234e-06, |
| "loss": 0.7375, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.5766674870785135, |
| "grad_norm": 0.8596277236938477, |
| "learning_rate": 4.899552965445596e-06, |
| "loss": 0.7099, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.5770366724095496, |
| "grad_norm": 0.887212336063385, |
| "learning_rate": 4.899416437236346e-06, |
| "loss": 0.7221, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.5774058577405857, |
| "grad_norm": 0.9066047668457031, |
| "learning_rate": 4.8992798182099415e-06, |
| "loss": 0.7132, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.577775043071622, |
| "grad_norm": 0.8738695979118347, |
| "learning_rate": 4.899143108371552e-06, |
| "loss": 0.7055, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.5781442284026581, |
| "grad_norm": 0.87455153465271, |
| "learning_rate": 4.899006307726354e-06, |
| "loss": 0.7169, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.5785134137336944, |
| "grad_norm": 0.8736885786056519, |
| "learning_rate": 4.898869416279524e-06, |
| "loss": 0.7305, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.5788825990647305, |
| "grad_norm": 0.8731737732887268, |
| "learning_rate": 4.8987324340362445e-06, |
| "loss": 0.771, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.5792517843957666, |
| "grad_norm": 0.8779594898223877, |
| "learning_rate": 4.898595361001698e-06, |
| "loss": 0.7234, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.5796209697268029, |
| "grad_norm": 0.8702481985092163, |
| "learning_rate": 4.898458197181075e-06, |
| "loss": 0.728, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.579990155057839, |
| "grad_norm": 0.9149512052536011, |
| "learning_rate": 4.898320942579566e-06, |
| "loss": 0.7651, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.5803593403888753, |
| "grad_norm": 0.8694207072257996, |
| "learning_rate": 4.898183597202366e-06, |
| "loss": 0.7109, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.5807285257199114, |
| "grad_norm": 0.9014127850532532, |
| "learning_rate": 4.898046161054674e-06, |
| "loss": 0.7631, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.5810977110509475, |
| "grad_norm": 0.889441967010498, |
| "learning_rate": 4.897908634141692e-06, |
| "loss": 0.6963, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.5814668963819838, |
| "grad_norm": 0.8994700908660889, |
| "learning_rate": 4.897771016468624e-06, |
| "loss": 0.718, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.5818360817130199, |
| "grad_norm": 0.877490758895874, |
| "learning_rate": 4.897633308040681e-06, |
| "loss": 0.7188, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.5822052670440561, |
| "grad_norm": 0.8843386769294739, |
| "learning_rate": 4.8974955088630736e-06, |
| "loss": 0.7314, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.5825744523750923, |
| "grad_norm": 0.8549126386642456, |
| "learning_rate": 4.897357618941017e-06, |
| "loss": 0.7254, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.5829436377061284, |
| "grad_norm": 0.8546504378318787, |
| "learning_rate": 4.897219638279732e-06, |
| "loss": 0.737, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.5833128230371647, |
| "grad_norm": 0.8920966386795044, |
| "learning_rate": 4.89708156688444e-06, |
| "loss": 0.7096, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.5836820083682008, |
| "grad_norm": 0.8906827569007874, |
| "learning_rate": 4.896943404760368e-06, |
| "loss": 0.7314, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.584051193699237, |
| "grad_norm": 0.8761510252952576, |
| "learning_rate": 4.896805151912743e-06, |
| "loss": 0.6968, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.5844203790302732, |
| "grad_norm": 0.9379798173904419, |
| "learning_rate": 4.896666808346801e-06, |
| "loss": 0.7284, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.5847895643613094, |
| "grad_norm": 0.8771979212760925, |
| "learning_rate": 4.8965283740677765e-06, |
| "loss": 0.7416, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.5851587496923456, |
| "grad_norm": 0.8932775259017944, |
| "learning_rate": 4.896389849080908e-06, |
| "loss": 0.7203, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.5855279350233817, |
| "grad_norm": 0.9140869975090027, |
| "learning_rate": 4.8962512333914415e-06, |
| "loss": 0.7585, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.5858971203544179, |
| "grad_norm": 0.9230924844741821, |
| "learning_rate": 4.896112527004621e-06, |
| "loss": 0.7134, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.5862663056854541, |
| "grad_norm": 0.9974596500396729, |
| "learning_rate": 4.895973729925698e-06, |
| "loss": 0.6977, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.5866354910164903, |
| "grad_norm": 0.8764031529426575, |
| "learning_rate": 4.8958348421599255e-06, |
| "loss": 0.7449, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.5870046763475265, |
| "grad_norm": 0.9063859581947327, |
| "learning_rate": 4.895695863712561e-06, |
| "loss": 0.7296, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.5873738616785626, |
| "grad_norm": 0.9148220419883728, |
| "learning_rate": 4.895556794588864e-06, |
| "loss": 0.7396, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.5877430470095988, |
| "grad_norm": 0.8891001343727112, |
| "learning_rate": 4.895417634794098e-06, |
| "loss": 0.6993, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.588112232340635, |
| "grad_norm": 0.8979329466819763, |
| "learning_rate": 4.89527838433353e-06, |
| "loss": 0.7306, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.5884814176716712, |
| "grad_norm": 0.9050261974334717, |
| "learning_rate": 4.895139043212432e-06, |
| "loss": 0.7339, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.5888506030027073, |
| "grad_norm": 0.8708974719047546, |
| "learning_rate": 4.894999611436076e-06, |
| "loss": 0.7163, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.5892197883337436, |
| "grad_norm": 0.9276278018951416, |
| "learning_rate": 4.894860089009742e-06, |
| "loss": 0.728, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.5895889736647797, |
| "grad_norm": 1.0377492904663086, |
| "learning_rate": 4.894720475938709e-06, |
| "loss": 0.719, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.5899581589958159, |
| "grad_norm": 0.8987076282501221, |
| "learning_rate": 4.894580772228261e-06, |
| "loss": 0.7321, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.5903273443268521, |
| "grad_norm": 0.9341242909431458, |
| "learning_rate": 4.8944409778836874e-06, |
| "loss": 0.7379, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.5906965296578882, |
| "grad_norm": 0.9609735012054443, |
| "learning_rate": 4.894301092910278e-06, |
| "loss": 0.7361, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5910657149889245, |
| "grad_norm": 0.8665148615837097, |
| "learning_rate": 4.8941611173133285e-06, |
| "loss": 0.7229, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.5914349003199606, |
| "grad_norm": 0.9139685034751892, |
| "learning_rate": 4.894021051098136e-06, |
| "loss": 0.7416, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.5918040856509968, |
| "grad_norm": 0.9231698513031006, |
| "learning_rate": 4.893880894270002e-06, |
| "loss": 0.7051, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.592173270982033, |
| "grad_norm": 0.8691010475158691, |
| "learning_rate": 4.893740646834232e-06, |
| "loss": 0.699, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.5925424563130691, |
| "grad_norm": 0.9221158623695374, |
| "learning_rate": 4.893600308796134e-06, |
| "loss": 0.745, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.5929116416441054, |
| "grad_norm": 0.86871337890625, |
| "learning_rate": 4.893459880161019e-06, |
| "loss": 0.7578, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.5932808269751415, |
| "grad_norm": 0.8679521679878235, |
| "learning_rate": 4.893319360934203e-06, |
| "loss": 0.728, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.5936500123061778, |
| "grad_norm": 0.9038932919502258, |
| "learning_rate": 4.893178751121006e-06, |
| "loss": 0.7258, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.5940191976372139, |
| "grad_norm": 0.890328049659729, |
| "learning_rate": 4.893038050726747e-06, |
| "loss": 0.7208, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.59438838296825, |
| "grad_norm": 0.9175987243652344, |
| "learning_rate": 4.892897259756753e-06, |
| "loss": 0.7468, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.5947575682992863, |
| "grad_norm": 0.9225278496742249, |
| "learning_rate": 4.892756378216354e-06, |
| "loss": 0.7379, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.5951267536303224, |
| "grad_norm": 0.8708120584487915, |
| "learning_rate": 4.8926154061108814e-06, |
| "loss": 0.7159, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.5954959389613586, |
| "grad_norm": 0.8939933776855469, |
| "learning_rate": 4.89247434344567e-06, |
| "loss": 0.7274, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.5958651242923948, |
| "grad_norm": 0.9112115502357483, |
| "learning_rate": 4.8923331902260604e-06, |
| "loss": 0.74, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.5962343096234309, |
| "grad_norm": 0.9555295705795288, |
| "learning_rate": 4.892191946457394e-06, |
| "loss": 0.7531, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.5966034949544672, |
| "grad_norm": 0.9047622680664062, |
| "learning_rate": 4.892050612145017e-06, |
| "loss": 0.763, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.5969726802855033, |
| "grad_norm": 0.9045296907424927, |
| "learning_rate": 4.8919091872942805e-06, |
| "loss": 0.6807, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.5973418656165395, |
| "grad_norm": 0.9008351564407349, |
| "learning_rate": 4.8917676719105355e-06, |
| "loss": 0.7378, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.5977110509475757, |
| "grad_norm": 0.8514592051506042, |
| "learning_rate": 4.891626065999139e-06, |
| "loss": 0.7158, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.5980802362786118, |
| "grad_norm": 0.9227151274681091, |
| "learning_rate": 4.8914843695654504e-06, |
| "loss": 0.7079, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.5984494216096481, |
| "grad_norm": 0.9016704559326172, |
| "learning_rate": 4.891342582614834e-06, |
| "loss": 0.7307, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.5988186069406842, |
| "grad_norm": 0.8876495957374573, |
| "learning_rate": 4.891200705152654e-06, |
| "loss": 0.7375, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.5991877922717204, |
| "grad_norm": 0.8613106608390808, |
| "learning_rate": 4.891058737184284e-06, |
| "loss": 0.7055, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.5995569776027566, |
| "grad_norm": 0.9127830862998962, |
| "learning_rate": 4.890916678715094e-06, |
| "loss": 0.7439, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.5999261629337928, |
| "grad_norm": 0.8781334161758423, |
| "learning_rate": 4.890774529750463e-06, |
| "loss": 0.7034, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.600295348264829, |
| "grad_norm": 0.8634480834007263, |
| "learning_rate": 4.890632290295771e-06, |
| "loss": 0.7129, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.6006645335958651, |
| "grad_norm": 0.8753401637077332, |
| "learning_rate": 4.8904899603564e-06, |
| "loss": 0.71, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.6010337189269013, |
| "grad_norm": 0.9049730896949768, |
| "learning_rate": 4.890347539937739e-06, |
| "loss": 0.7161, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.6014029042579375, |
| "grad_norm": 0.8901035785675049, |
| "learning_rate": 4.890205029045179e-06, |
| "loss": 0.7126, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.6017720895889737, |
| "grad_norm": 0.9000959396362305, |
| "learning_rate": 4.890062427684111e-06, |
| "loss": 0.6685, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.6021412749200098, |
| "grad_norm": 0.8935882449150085, |
| "learning_rate": 4.889919735859936e-06, |
| "loss": 0.693, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.602510460251046, |
| "grad_norm": 0.8670996427536011, |
| "learning_rate": 4.8897769535780525e-06, |
| "loss": 0.725, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.6028796455820822, |
| "grad_norm": 0.8828466534614563, |
| "learning_rate": 4.889634080843866e-06, |
| "loss": 0.7506, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.6032488309131184, |
| "grad_norm": 0.8849684596061707, |
| "learning_rate": 4.889491117662783e-06, |
| "loss": 0.7249, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.6036180162441546, |
| "grad_norm": 0.8891832828521729, |
| "learning_rate": 4.889348064040217e-06, |
| "loss": 0.7206, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.6039872015751907, |
| "grad_norm": 0.8497231602668762, |
| "learning_rate": 4.889204919981579e-06, |
| "loss": 0.6812, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.604356386906227, |
| "grad_norm": 0.872078537940979, |
| "learning_rate": 4.88906168549229e-06, |
| "loss": 0.7234, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.6047255722372631, |
| "grad_norm": 0.8662791848182678, |
| "learning_rate": 4.88891836057777e-06, |
| "loss": 0.7222, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.6050947575682993, |
| "grad_norm": 0.8954866528511047, |
| "learning_rate": 4.888774945243444e-06, |
| "loss": 0.7175, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.6054639428993355, |
| "grad_norm": 0.9128788709640503, |
| "learning_rate": 4.8886314394947396e-06, |
| "loss": 0.7185, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.6058331282303716, |
| "grad_norm": 0.9027896523475647, |
| "learning_rate": 4.888487843337089e-06, |
| "loss": 0.7559, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.6062023135614079, |
| "grad_norm": 0.868640124797821, |
| "learning_rate": 4.888344156775928e-06, |
| "loss": 0.7144, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.606571498892444, |
| "grad_norm": 0.9141109585762024, |
| "learning_rate": 4.888200379816695e-06, |
| "loss": 0.7381, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.6069406842234802, |
| "grad_norm": 0.875461757183075, |
| "learning_rate": 4.88805651246483e-06, |
| "loss": 0.7539, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.6073098695545164, |
| "grad_norm": 0.9165515303611755, |
| "learning_rate": 4.887912554725781e-06, |
| "loss": 0.7278, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.6076790548855525, |
| "grad_norm": 0.8719481825828552, |
| "learning_rate": 4.887768506604995e-06, |
| "loss": 0.7323, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.6080482402165888, |
| "grad_norm": 0.8756598830223083, |
| "learning_rate": 4.887624368107924e-06, |
| "loss": 0.7104, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.6084174255476249, |
| "grad_norm": 0.9028515219688416, |
| "learning_rate": 4.887480139240025e-06, |
| "loss": 0.731, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.608786610878661, |
| "grad_norm": 0.8773224353790283, |
| "learning_rate": 4.887335820006756e-06, |
| "loss": 0.7058, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.6091557962096973, |
| "grad_norm": 0.9026007652282715, |
| "learning_rate": 4.887191410413579e-06, |
| "loss": 0.6913, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.6095249815407334, |
| "grad_norm": 0.8898575305938721, |
| "learning_rate": 4.887046910465961e-06, |
| "loss": 0.7041, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.6098941668717697, |
| "grad_norm": 0.9111222624778748, |
| "learning_rate": 4.886902320169371e-06, |
| "loss": 0.7135, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.6102633522028058, |
| "grad_norm": 0.9058326482772827, |
| "learning_rate": 4.886757639529282e-06, |
| "loss": 0.6976, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.610632537533842, |
| "grad_norm": 0.8757637739181519, |
| "learning_rate": 4.886612868551168e-06, |
| "loss": 0.7507, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.6110017228648782, |
| "grad_norm": 0.910811722278595, |
| "learning_rate": 4.886468007240511e-06, |
| "loss": 0.757, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.6113709081959143, |
| "grad_norm": 0.897999107837677, |
| "learning_rate": 4.886323055602793e-06, |
| "loss": 0.7752, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.6117400935269506, |
| "grad_norm": 0.8987488746643066, |
| "learning_rate": 4.886178013643501e-06, |
| "loss": 0.7045, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.6121092788579867, |
| "grad_norm": 0.8889223337173462, |
| "learning_rate": 4.886032881368124e-06, |
| "loss": 0.6935, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.6124784641890229, |
| "grad_norm": 0.8837577700614929, |
| "learning_rate": 4.885887658782156e-06, |
| "loss": 0.6639, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.6128476495200591, |
| "grad_norm": 0.8779164552688599, |
| "learning_rate": 4.8857423458910925e-06, |
| "loss": 0.7181, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.6132168348510952, |
| "grad_norm": 0.9047713279724121, |
| "learning_rate": 4.885596942700434e-06, |
| "loss": 0.7417, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.6135860201821314, |
| "grad_norm": 0.8854183554649353, |
| "learning_rate": 4.885451449215685e-06, |
| "loss": 0.7511, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.6139552055131676, |
| "grad_norm": 0.9122277498245239, |
| "learning_rate": 4.88530586544235e-06, |
| "loss": 0.7472, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.6143243908442038, |
| "grad_norm": 0.8803077340126038, |
| "learning_rate": 4.885160191385942e-06, |
| "loss": 0.7052, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.61469357617524, |
| "grad_norm": 0.878976047039032, |
| "learning_rate": 4.885014427051973e-06, |
| "loss": 0.7416, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.6150627615062761, |
| "grad_norm": 0.8740445375442505, |
| "learning_rate": 4.884868572445961e-06, |
| "loss": 0.6892, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.6154319468373123, |
| "grad_norm": 0.8993122577667236, |
| "learning_rate": 4.884722627573426e-06, |
| "loss": 0.7153, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.6158011321683485, |
| "grad_norm": 0.8919605016708374, |
| "learning_rate": 4.884576592439893e-06, |
| "loss": 0.7189, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.6161703174993847, |
| "grad_norm": 0.8694654107093811, |
| "learning_rate": 4.884430467050887e-06, |
| "loss": 0.7068, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.6165395028304209, |
| "grad_norm": 0.8842293620109558, |
| "learning_rate": 4.884284251411941e-06, |
| "loss": 0.7464, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.616908688161457, |
| "grad_norm": 0.9038980007171631, |
| "learning_rate": 4.884137945528589e-06, |
| "loss": 0.7447, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.6172778734924932, |
| "grad_norm": 0.8944399356842041, |
| "learning_rate": 4.883991549406368e-06, |
| "loss": 0.7608, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.6176470588235294, |
| "grad_norm": 0.8438637256622314, |
| "learning_rate": 4.883845063050819e-06, |
| "loss": 0.7548, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.6180162441545656, |
| "grad_norm": 0.8841381669044495, |
| "learning_rate": 4.883698486467487e-06, |
| "loss": 0.7395, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.6183854294856018, |
| "grad_norm": 0.8783007860183716, |
| "learning_rate": 4.883551819661919e-06, |
| "loss": 0.7002, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.618754614816638, |
| "grad_norm": 0.8673411011695862, |
| "learning_rate": 4.883405062639668e-06, |
| "loss": 0.6778, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.6191238001476741, |
| "grad_norm": 0.9229005575180054, |
| "learning_rate": 4.883258215406287e-06, |
| "loss": 0.7444, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.6194929854787103, |
| "grad_norm": 0.8770948052406311, |
| "learning_rate": 4.883111277967334e-06, |
| "loss": 0.7174, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.6198621708097465, |
| "grad_norm": 0.8880107998847961, |
| "learning_rate": 4.882964250328373e-06, |
| "loss": 0.7353, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.6202313561407826, |
| "grad_norm": 0.8703299760818481, |
| "learning_rate": 4.882817132494966e-06, |
| "loss": 0.7469, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.6206005414718189, |
| "grad_norm": 0.8967667818069458, |
| "learning_rate": 4.882669924472682e-06, |
| "loss": 0.702, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.620969726802855, |
| "grad_norm": 0.9066647291183472, |
| "learning_rate": 4.882522626267094e-06, |
| "loss": 0.6851, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.6213389121338913, |
| "grad_norm": 0.8500033020973206, |
| "learning_rate": 4.882375237883777e-06, |
| "loss": 0.74, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.6217080974649274, |
| "grad_norm": 0.9138725996017456, |
| "learning_rate": 4.882227759328308e-06, |
| "loss": 0.7305, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.6220772827959635, |
| "grad_norm": 0.8820671439170837, |
| "learning_rate": 4.882080190606271e-06, |
| "loss": 0.6959, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.6224464681269998, |
| "grad_norm": 0.8790507316589355, |
| "learning_rate": 4.881932531723251e-06, |
| "loss": 0.7276, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.6228156534580359, |
| "grad_norm": 0.880133330821991, |
| "learning_rate": 4.881784782684835e-06, |
| "loss": 0.7213, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.6231848387890722, |
| "grad_norm": 0.9163568615913391, |
| "learning_rate": 4.881636943496618e-06, |
| "loss": 0.7214, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.6235540241201083, |
| "grad_norm": 0.8676769733428955, |
| "learning_rate": 4.881489014164194e-06, |
| "loss": 0.7168, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.6239232094511444, |
| "grad_norm": 0.9037620425224304, |
| "learning_rate": 4.881340994693162e-06, |
| "loss": 0.7092, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.6242923947821807, |
| "grad_norm": 0.8854486346244812, |
| "learning_rate": 4.881192885089125e-06, |
| "loss": 0.6896, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.6246615801132168, |
| "grad_norm": 0.904160737991333, |
| "learning_rate": 4.88104468535769e-06, |
| "loss": 0.7349, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.6250307654442531, |
| "grad_norm": 0.8994008898735046, |
| "learning_rate": 4.880896395504464e-06, |
| "loss": 0.7331, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.6253999507752892, |
| "grad_norm": 0.8491187691688538, |
| "learning_rate": 4.8807480155350605e-06, |
| "loss": 0.7185, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.6257691361063253, |
| "grad_norm": 0.8684033155441284, |
| "learning_rate": 4.880599545455097e-06, |
| "loss": 0.7089, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.6261383214373616, |
| "grad_norm": 0.9083407521247864, |
| "learning_rate": 4.880450985270191e-06, |
| "loss": 0.7018, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.6265075067683977, |
| "grad_norm": 0.9744650721549988, |
| "learning_rate": 4.880302334985967e-06, |
| "loss": 0.7175, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.6268766920994339, |
| "grad_norm": 0.9108169078826904, |
| "learning_rate": 4.880153594608051e-06, |
| "loss": 0.7127, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.6272458774304701, |
| "grad_norm": 0.8855581879615784, |
| "learning_rate": 4.880004764142073e-06, |
| "loss": 0.7121, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.6276150627615062, |
| "grad_norm": 0.8942852020263672, |
| "learning_rate": 4.879855843593665e-06, |
| "loss": 0.6918, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.6279842480925425, |
| "grad_norm": 0.9338670969009399, |
| "learning_rate": 4.879706832968465e-06, |
| "loss": 0.7297, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.6283534334235786, |
| "grad_norm": 0.8706702589988708, |
| "learning_rate": 4.879557732272112e-06, |
| "loss": 0.6909, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.6287226187546148, |
| "grad_norm": 0.8873736262321472, |
| "learning_rate": 4.87940854151025e-06, |
| "loss": 0.7814, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.629091804085651, |
| "grad_norm": 0.8286136984825134, |
| "learning_rate": 4.879259260688526e-06, |
| "loss": 0.6852, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.6294609894166872, |
| "grad_norm": 0.8823638558387756, |
| "learning_rate": 4.879109889812589e-06, |
| "loss": 0.7283, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.6298301747477234, |
| "grad_norm": 0.8702864646911621, |
| "learning_rate": 4.878960428888094e-06, |
| "loss": 0.7124, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.6301993600787595, |
| "grad_norm": 0.877116858959198, |
| "learning_rate": 4.878810877920698e-06, |
| "loss": 0.734, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.6305685454097957, |
| "grad_norm": 0.8908865451812744, |
| "learning_rate": 4.878661236916061e-06, |
| "loss": 0.7197, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.6309377307408319, |
| "grad_norm": 0.8861148357391357, |
| "learning_rate": 4.878511505879846e-06, |
| "loss": 0.6981, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.6313069160718681, |
| "grad_norm": 0.8963793516159058, |
| "learning_rate": 4.8783616848177215e-06, |
| "loss": 0.7136, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.6316761014029043, |
| "grad_norm": 0.9019988775253296, |
| "learning_rate": 4.878211773735359e-06, |
| "loss": 0.7063, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.6320452867339404, |
| "grad_norm": 0.9044898748397827, |
| "learning_rate": 4.8780617726384305e-06, |
| "loss": 0.7491, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.6324144720649766, |
| "grad_norm": 0.8985450863838196, |
| "learning_rate": 4.877911681532614e-06, |
| "loss": 0.6868, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.6327836573960128, |
| "grad_norm": 0.931446373462677, |
| "learning_rate": 4.877761500423591e-06, |
| "loss": 0.7363, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.633152842727049, |
| "grad_norm": 0.916556715965271, |
| "learning_rate": 4.877611229317047e-06, |
| "loss": 0.773, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.6335220280580851, |
| "grad_norm": 0.8990119695663452, |
| "learning_rate": 4.877460868218667e-06, |
| "loss": 0.6959, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.6338912133891214, |
| "grad_norm": 0.8884509205818176, |
| "learning_rate": 4.877310417134144e-06, |
| "loss": 0.7187, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.6342603987201575, |
| "grad_norm": 0.8993596434593201, |
| "learning_rate": 4.8771598760691715e-06, |
| "loss": 0.7331, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.6346295840511937, |
| "grad_norm": 0.8999956846237183, |
| "learning_rate": 4.877009245029448e-06, |
| "loss": 0.7349, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.6349987693822299, |
| "grad_norm": 0.8785694241523743, |
| "learning_rate": 4.876858524020675e-06, |
| "loss": 0.689, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.635367954713266, |
| "grad_norm": 0.8470606803894043, |
| "learning_rate": 4.876707713048558e-06, |
| "loss": 0.676, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.6357371400443023, |
| "grad_norm": 0.8418689966201782, |
| "learning_rate": 4.876556812118802e-06, |
| "loss": 0.709, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.6361063253753384, |
| "grad_norm": 0.8840335011482239, |
| "learning_rate": 4.876405821237122e-06, |
| "loss": 0.7133, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.6364755107063746, |
| "grad_norm": 0.8696883916854858, |
| "learning_rate": 4.876254740409232e-06, |
| "loss": 0.6845, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.6368446960374108, |
| "grad_norm": 0.8984381556510925, |
| "learning_rate": 4.876103569640849e-06, |
| "loss": 0.7111, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.6372138813684469, |
| "grad_norm": 0.8952850699424744, |
| "learning_rate": 4.875952308937697e-06, |
| "loss": 0.7383, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.6375830666994832, |
| "grad_norm": 0.8560416102409363, |
| "learning_rate": 4.875800958305499e-06, |
| "loss": 0.6988, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.6379522520305193, |
| "grad_norm": 0.8600884079933167, |
| "learning_rate": 4.875649517749985e-06, |
| "loss": 0.6895, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.6383214373615556, |
| "grad_norm": 0.9282815456390381, |
| "learning_rate": 4.875497987276886e-06, |
| "loss": 0.7253, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.6386906226925917, |
| "grad_norm": 0.8954489827156067, |
| "learning_rate": 4.875346366891939e-06, |
| "loss": 0.7638, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.6390598080236278, |
| "grad_norm": 0.8884443640708923, |
| "learning_rate": 4.875194656600881e-06, |
| "loss": 0.7284, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.6394289933546641, |
| "grad_norm": 0.8890431523323059, |
| "learning_rate": 4.875042856409454e-06, |
| "loss": 0.7393, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.6397981786857002, |
| "grad_norm": 0.8974068760871887, |
| "learning_rate": 4.874890966323406e-06, |
| "loss": 0.7329, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.6401673640167364, |
| "grad_norm": 0.8659381866455078, |
| "learning_rate": 4.874738986348484e-06, |
| "loss": 0.7035, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.6405365493477726, |
| "grad_norm": 0.8336740732192993, |
| "learning_rate": 4.87458691649044e-06, |
| "loss": 0.6696, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.6409057346788087, |
| "grad_norm": 0.8795167207717896, |
| "learning_rate": 4.874434756755032e-06, |
| "loss": 0.7289, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.641274920009845, |
| "grad_norm": 0.8558307886123657, |
| "learning_rate": 4.874282507148017e-06, |
| "loss": 0.7214, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.6416441053408811, |
| "grad_norm": 0.8652957677841187, |
| "learning_rate": 4.8741301676751584e-06, |
| "loss": 0.7629, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.6420132906719173, |
| "grad_norm": 0.8892576098442078, |
| "learning_rate": 4.873977738342222e-06, |
| "loss": 0.7168, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.6423824760029535, |
| "grad_norm": 0.8710838556289673, |
| "learning_rate": 4.873825219154978e-06, |
| "loss": 0.6837, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.6427516613339896, |
| "grad_norm": 0.8877007365226746, |
| "learning_rate": 4.873672610119199e-06, |
| "loss": 0.6765, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.6431208466650259, |
| "grad_norm": 0.8830922842025757, |
| "learning_rate": 4.87351991124066e-06, |
| "loss": 0.6797, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.643490031996062, |
| "grad_norm": 0.8394154906272888, |
| "learning_rate": 4.873367122525142e-06, |
| "loss": 0.666, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.6438592173270982, |
| "grad_norm": 0.9082286953926086, |
| "learning_rate": 4.873214243978427e-06, |
| "loss": 0.6855, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.6442284026581344, |
| "grad_norm": 0.869299054145813, |
| "learning_rate": 4.873061275606302e-06, |
| "loss": 0.7026, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.6445975879891706, |
| "grad_norm": 0.8861920833587646, |
| "learning_rate": 4.872908217414557e-06, |
| "loss": 0.7525, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.6449667733202068, |
| "grad_norm": 0.9306628704071045, |
| "learning_rate": 4.8727550694089845e-06, |
| "loss": 0.7314, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.6453359586512429, |
| "grad_norm": 0.8352028131484985, |
| "learning_rate": 4.872601831595381e-06, |
| "loss": 0.6876, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.6457051439822791, |
| "grad_norm": 0.91121506690979, |
| "learning_rate": 4.872448503979548e-06, |
| "loss": 0.6885, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.6460743293133153, |
| "grad_norm": 0.9285972714424133, |
| "learning_rate": 4.872295086567288e-06, |
| "loss": 0.7105, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.6464435146443515, |
| "grad_norm": 0.914553701877594, |
| "learning_rate": 4.872141579364407e-06, |
| "loss": 0.7032, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.6468126999753876, |
| "grad_norm": 0.8394815325737, |
| "learning_rate": 4.871987982376716e-06, |
| "loss": 0.6784, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.6471818853064238, |
| "grad_norm": 0.8877943754196167, |
| "learning_rate": 4.871834295610028e-06, |
| "loss": 0.7183, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.64755107063746, |
| "grad_norm": 0.8248438239097595, |
| "learning_rate": 4.871680519070162e-06, |
| "loss": 0.6573, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.6479202559684962, |
| "grad_norm": 0.88385409116745, |
| "learning_rate": 4.871526652762936e-06, |
| "loss": 0.7206, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.6482894412995324, |
| "grad_norm": 0.8734132647514343, |
| "learning_rate": 4.8713726966941745e-06, |
| "loss": 0.7712, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.6486586266305685, |
| "grad_norm": 0.8956114649772644, |
| "learning_rate": 4.871218650869704e-06, |
| "loss": 0.7221, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.6490278119616048, |
| "grad_norm": 0.9125884771347046, |
| "learning_rate": 4.871064515295357e-06, |
| "loss": 0.7374, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.6493969972926409, |
| "grad_norm": 0.8915518522262573, |
| "learning_rate": 4.870910289976967e-06, |
| "loss": 0.7126, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.6497661826236771, |
| "grad_norm": 0.9576418995857239, |
| "learning_rate": 4.870755974920369e-06, |
| "loss": 0.7538, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.6501353679547133, |
| "grad_norm": 0.8745632171630859, |
| "learning_rate": 4.870601570131407e-06, |
| "loss": 0.6837, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.6505045532857494, |
| "grad_norm": 0.9216246604919434, |
| "learning_rate": 4.870447075615923e-06, |
| "loss": 0.7076, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.6508737386167857, |
| "grad_norm": 0.8735246658325195, |
| "learning_rate": 4.870292491379765e-06, |
| "loss": 0.6905, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.6512429239478218, |
| "grad_norm": 0.8984786868095398, |
| "learning_rate": 4.870137817428786e-06, |
| "loss": 0.6877, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.6516121092788579, |
| "grad_norm": 0.8973082304000854, |
| "learning_rate": 4.869983053768838e-06, |
| "loss": 0.715, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.6519812946098942, |
| "grad_norm": 0.8570342063903809, |
| "learning_rate": 4.869828200405778e-06, |
| "loss": 0.6751, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.6523504799409303, |
| "grad_norm": 0.8807101845741272, |
| "learning_rate": 4.86967325734547e-06, |
| "loss": 0.7177, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.6527196652719666, |
| "grad_norm": 0.8817850947380066, |
| "learning_rate": 4.869518224593777e-06, |
| "loss": 0.7227, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.6530888506030027, |
| "grad_norm": 0.9274417161941528, |
| "learning_rate": 4.869363102156566e-06, |
| "loss": 0.7644, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.6534580359340388, |
| "grad_norm": 0.8698463439941406, |
| "learning_rate": 4.86920789003971e-06, |
| "loss": 0.69, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.6538272212650751, |
| "grad_norm": 0.9024192690849304, |
| "learning_rate": 4.869052588249083e-06, |
| "loss": 0.6817, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.6541964065961112, |
| "grad_norm": 0.8906611800193787, |
| "learning_rate": 4.868897196790563e-06, |
| "loss": 0.7094, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.6545655919271475, |
| "grad_norm": 0.9051023125648499, |
| "learning_rate": 4.868741715670032e-06, |
| "loss": 0.7047, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.6549347772581836, |
| "grad_norm": 0.8683571219444275, |
| "learning_rate": 4.868586144893375e-06, |
| "loss": 0.6969, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.6553039625892197, |
| "grad_norm": 0.8832207322120667, |
| "learning_rate": 4.8684304844664796e-06, |
| "loss": 0.7062, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.655673147920256, |
| "grad_norm": 0.8671314716339111, |
| "learning_rate": 4.868274734395238e-06, |
| "loss": 0.7214, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.6560423332512921, |
| "grad_norm": 0.9176437258720398, |
| "learning_rate": 4.8681188946855454e-06, |
| "loss": 0.7633, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.6564115185823284, |
| "grad_norm": 0.9051275849342346, |
| "learning_rate": 4.867962965343299e-06, |
| "loss": 0.7361, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.6567807039133645, |
| "grad_norm": 0.8660034537315369, |
| "learning_rate": 4.867806946374403e-06, |
| "loss": 0.6905, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.6571498892444007, |
| "grad_norm": 0.8938033580780029, |
| "learning_rate": 4.86765083778476e-06, |
| "loss": 0.7329, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.6575190745754369, |
| "grad_norm": 0.8933357000350952, |
| "learning_rate": 4.867494639580281e-06, |
| "loss": 0.7153, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.657888259906473, |
| "grad_norm": 0.8628551363945007, |
| "learning_rate": 4.867338351766877e-06, |
| "loss": 0.7198, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.6582574452375092, |
| "grad_norm": 0.8424699306488037, |
| "learning_rate": 4.867181974350463e-06, |
| "loss": 0.6751, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.6586266305685454, |
| "grad_norm": 0.8781000375747681, |
| "learning_rate": 4.867025507336959e-06, |
| "loss": 0.74, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.6589958158995816, |
| "grad_norm": 0.8817055821418762, |
| "learning_rate": 4.866868950732286e-06, |
| "loss": 0.6806, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.6593650012306178, |
| "grad_norm": 0.89399254322052, |
| "learning_rate": 4.8667123045423705e-06, |
| "loss": 0.7123, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.659734186561654, |
| "grad_norm": 0.8942681550979614, |
| "learning_rate": 4.866555568773141e-06, |
| "loss": 0.7426, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.6601033718926901, |
| "grad_norm": 0.8757315278053284, |
| "learning_rate": 4.866398743430531e-06, |
| "loss": 0.7476, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.6604725572237263, |
| "grad_norm": 0.8564402461051941, |
| "learning_rate": 4.866241828520475e-06, |
| "loss": 0.6926, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.6608417425547625, |
| "grad_norm": 0.8761123418807983, |
| "learning_rate": 4.866084824048913e-06, |
| "loss": 0.7061, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.6612109278857987, |
| "grad_norm": 0.8826016783714294, |
| "learning_rate": 4.8659277300217856e-06, |
| "loss": 0.6939, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.6615801132168349, |
| "grad_norm": 0.8721649646759033, |
| "learning_rate": 4.865770546445041e-06, |
| "loss": 0.727, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.661949298547871, |
| "grad_norm": 0.91274493932724, |
| "learning_rate": 4.865613273324629e-06, |
| "loss": 0.7393, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.6623184838789072, |
| "grad_norm": 0.8830939531326294, |
| "learning_rate": 4.8654559106665e-06, |
| "loss": 0.687, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.6626876692099434, |
| "grad_norm": 0.8717511892318726, |
| "learning_rate": 4.865298458476612e-06, |
| "loss": 0.6921, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.6630568545409796, |
| "grad_norm": 0.8554193377494812, |
| "learning_rate": 4.865140916760923e-06, |
| "loss": 0.6921, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.6634260398720158, |
| "grad_norm": 0.8773930668830872, |
| "learning_rate": 4.864983285525397e-06, |
| "loss": 0.7317, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.6637952252030519, |
| "grad_norm": 0.8963366150856018, |
| "learning_rate": 4.864825564776e-06, |
| "loss": 0.7034, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.6641644105340881, |
| "grad_norm": 0.8847402334213257, |
| "learning_rate": 4.864667754518702e-06, |
| "loss": 0.7242, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.6645335958651243, |
| "grad_norm": 0.8819407224655151, |
| "learning_rate": 4.864509854759476e-06, |
| "loss": 0.7329, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6649027811961604, |
| "grad_norm": 0.8914307951927185, |
| "learning_rate": 4.864351865504298e-06, |
| "loss": 0.7128, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.6652719665271967, |
| "grad_norm": 0.8843200206756592, |
| "learning_rate": 4.864193786759148e-06, |
| "loss": 0.7198, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.6656411518582328, |
| "grad_norm": 0.8661046028137207, |
| "learning_rate": 4.8640356185300094e-06, |
| "loss": 0.7281, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.666010337189269, |
| "grad_norm": 0.8948192596435547, |
| "learning_rate": 4.863877360822869e-06, |
| "loss": 0.7508, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.6663795225203052, |
| "grad_norm": 0.8964792490005493, |
| "learning_rate": 4.863719013643716e-06, |
| "loss": 0.7193, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.6667487078513413, |
| "grad_norm": 0.8771764039993286, |
| "learning_rate": 4.863560576998545e-06, |
| "loss": 0.7127, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.6671178931823776, |
| "grad_norm": 0.8624410033226013, |
| "learning_rate": 4.8634020508933524e-06, |
| "loss": 0.6808, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.6674870785134137, |
| "grad_norm": 0.901315450668335, |
| "learning_rate": 4.863243435334137e-06, |
| "loss": 0.6907, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.66785626384445, |
| "grad_norm": 0.8859581351280212, |
| "learning_rate": 4.8630847303269034e-06, |
| "loss": 0.7351, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.6682254491754861, |
| "grad_norm": 0.9065508842468262, |
| "learning_rate": 4.862925935877659e-06, |
| "loss": 0.7049, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.6685946345065222, |
| "grad_norm": 0.8637186288833618, |
| "learning_rate": 4.8627670519924146e-06, |
| "loss": 0.702, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.6689638198375585, |
| "grad_norm": 0.8939265012741089, |
| "learning_rate": 4.862608078677181e-06, |
| "loss": 0.7209, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.6693330051685946, |
| "grad_norm": 0.9074069857597351, |
| "learning_rate": 4.862449015937979e-06, |
| "loss": 0.7215, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.6697021904996309, |
| "grad_norm": 0.898177444934845, |
| "learning_rate": 4.8622898637808265e-06, |
| "loss": 0.7341, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.670071375830667, |
| "grad_norm": 0.8927603960037231, |
| "learning_rate": 4.862130622211749e-06, |
| "loss": 0.7372, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.6704405611617031, |
| "grad_norm": 0.9042506814002991, |
| "learning_rate": 4.861971291236772e-06, |
| "loss": 0.6699, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.6708097464927394, |
| "grad_norm": 0.867923378944397, |
| "learning_rate": 4.861811870861928e-06, |
| "loss": 0.7065, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.6711789318237755, |
| "grad_norm": 0.8792146444320679, |
| "learning_rate": 4.861652361093249e-06, |
| "loss": 0.7359, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.6715481171548117, |
| "grad_norm": 0.8699401021003723, |
| "learning_rate": 4.861492761936774e-06, |
| "loss": 0.73, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.6719173024858479, |
| "grad_norm": 0.9019505381584167, |
| "learning_rate": 4.861333073398543e-06, |
| "loss": 0.7142, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.672286487816884, |
| "grad_norm": 0.8983092904090881, |
| "learning_rate": 4.8611732954846015e-06, |
| "loss": 0.7232, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.6726556731479203, |
| "grad_norm": 0.8775074481964111, |
| "learning_rate": 4.861013428200995e-06, |
| "loss": 0.7051, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.6730248584789564, |
| "grad_norm": 0.8952973484992981, |
| "learning_rate": 4.8608534715537755e-06, |
| "loss": 0.7544, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.6733940438099926, |
| "grad_norm": 0.857286274433136, |
| "learning_rate": 4.860693425548997e-06, |
| "loss": 0.6949, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.6737632291410288, |
| "grad_norm": 0.850751519203186, |
| "learning_rate": 4.8605332901927175e-06, |
| "loss": 0.7082, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.674132414472065, |
| "grad_norm": 0.872599720954895, |
| "learning_rate": 4.860373065490998e-06, |
| "loss": 0.7073, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.6745015998031012, |
| "grad_norm": 0.9076393842697144, |
| "learning_rate": 4.860212751449903e-06, |
| "loss": 0.6977, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.6748707851341373, |
| "grad_norm": 0.8853808045387268, |
| "learning_rate": 4.8600523480755e-06, |
| "loss": 0.6811, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.6752399704651735, |
| "grad_norm": 0.8611722588539124, |
| "learning_rate": 4.859891855373861e-06, |
| "loss": 0.7209, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.6756091557962097, |
| "grad_norm": 0.8485976457595825, |
| "learning_rate": 4.85973127335106e-06, |
| "loss": 0.6629, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.6759783411272459, |
| "grad_norm": 0.875453770160675, |
| "learning_rate": 4.859570602013175e-06, |
| "loss": 0.7349, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.6763475264582821, |
| "grad_norm": 0.8506700992584229, |
| "learning_rate": 4.859409841366287e-06, |
| "loss": 0.6818, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.6767167117893182, |
| "grad_norm": 0.8715220093727112, |
| "learning_rate": 4.859248991416481e-06, |
| "loss": 0.7189, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.6770858971203544, |
| "grad_norm": 0.9876884818077087, |
| "learning_rate": 4.859088052169845e-06, |
| "loss": 0.7445, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.6774550824513906, |
| "grad_norm": 0.8883238434791565, |
| "learning_rate": 4.858927023632472e-06, |
| "loss": 0.7338, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.6778242677824268, |
| "grad_norm": 0.8532505035400391, |
| "learning_rate": 4.858765905810455e-06, |
| "loss": 0.6844, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.6781934531134629, |
| "grad_norm": 0.8952226042747498, |
| "learning_rate": 4.8586046987098935e-06, |
| "loss": 0.6937, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.6785626384444992, |
| "grad_norm": 0.8690524101257324, |
| "learning_rate": 4.858443402336888e-06, |
| "loss": 0.7029, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.6789318237755353, |
| "grad_norm": 0.8886831998825073, |
| "learning_rate": 4.858282016697544e-06, |
| "loss": 0.728, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.6793010091065715, |
| "grad_norm": 0.9667893052101135, |
| "learning_rate": 4.85812054179797e-06, |
| "loss": 0.7331, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.6796701944376077, |
| "grad_norm": 0.8873503804206848, |
| "learning_rate": 4.857958977644278e-06, |
| "loss": 0.7073, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.6800393797686438, |
| "grad_norm": 0.8646997213363647, |
| "learning_rate": 4.857797324242582e-06, |
| "loss": 0.6857, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.6804085650996801, |
| "grad_norm": 0.8797730207443237, |
| "learning_rate": 4.857635581599003e-06, |
| "loss": 0.7488, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.6807777504307162, |
| "grad_norm": 0.8577935695648193, |
| "learning_rate": 4.85747374971966e-06, |
| "loss": 0.7206, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.6811469357617524, |
| "grad_norm": 0.871494472026825, |
| "learning_rate": 4.8573118286106805e-06, |
| "loss": 0.707, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.6815161210927886, |
| "grad_norm": 0.8755682110786438, |
| "learning_rate": 4.857149818278192e-06, |
| "loss": 0.7693, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.6818853064238247, |
| "grad_norm": 0.8881521821022034, |
| "learning_rate": 4.8569877187283255e-06, |
| "loss": 0.7216, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.682254491754861, |
| "grad_norm": 0.8813753724098206, |
| "learning_rate": 4.856825529967219e-06, |
| "loss": 0.6836, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.6826236770858971, |
| "grad_norm": 0.8883729577064514, |
| "learning_rate": 4.85666325200101e-06, |
| "loss": 0.6993, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.6829928624169334, |
| "grad_norm": 0.9154224395751953, |
| "learning_rate": 4.8565008848358405e-06, |
| "loss": 0.7361, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.6833620477479695, |
| "grad_norm": 0.8938775062561035, |
| "learning_rate": 4.856338428477856e-06, |
| "loss": 0.7191, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.6837312330790056, |
| "grad_norm": 0.8818868398666382, |
| "learning_rate": 4.8561758829332064e-06, |
| "loss": 0.687, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.6841004184100419, |
| "grad_norm": 0.9043888449668884, |
| "learning_rate": 4.856013248208043e-06, |
| "loss": 0.7375, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.684469603741078, |
| "grad_norm": 0.8634012341499329, |
| "learning_rate": 4.855850524308521e-06, |
| "loss": 0.7032, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.6848387890721142, |
| "grad_norm": 0.8524016737937927, |
| "learning_rate": 4.8556877112408e-06, |
| "loss": 0.6875, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.6852079744031504, |
| "grad_norm": 0.8432945013046265, |
| "learning_rate": 4.855524809011043e-06, |
| "loss": 0.6705, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.6855771597341865, |
| "grad_norm": 0.8706541657447815, |
| "learning_rate": 4.855361817625416e-06, |
| "loss": 0.7322, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.6859463450652228, |
| "grad_norm": 0.8889358043670654, |
| "learning_rate": 4.8551987370900875e-06, |
| "loss": 0.7295, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.6863155303962589, |
| "grad_norm": 0.8888987302780151, |
| "learning_rate": 4.8550355674112295e-06, |
| "loss": 0.7163, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.6866847157272951, |
| "grad_norm": 0.8585571646690369, |
| "learning_rate": 4.854872308595019e-06, |
| "loss": 0.7041, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.6870539010583313, |
| "grad_norm": 0.872947096824646, |
| "learning_rate": 4.8547089606476335e-06, |
| "loss": 0.7142, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.6874230863893674, |
| "grad_norm": 0.9010311961174011, |
| "learning_rate": 4.854545523575259e-06, |
| "loss": 0.729, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.6877922717204037, |
| "grad_norm": 0.8814377784729004, |
| "learning_rate": 4.854381997384079e-06, |
| "loss": 0.6988, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.6881614570514398, |
| "grad_norm": 0.8774176239967346, |
| "learning_rate": 4.854218382080283e-06, |
| "loss": 0.7103, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.688530642382476, |
| "grad_norm": 0.8719117641448975, |
| "learning_rate": 4.854054677670064e-06, |
| "loss": 0.7084, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.6888998277135122, |
| "grad_norm": 0.874610424041748, |
| "learning_rate": 4.853890884159619e-06, |
| "loss": 0.6931, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.6892690130445484, |
| "grad_norm": 0.9094902276992798, |
| "learning_rate": 4.853727001555146e-06, |
| "loss": 0.7156, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.6896381983755845, |
| "grad_norm": 0.8830024003982544, |
| "learning_rate": 4.85356302986285e-06, |
| "loss": 0.7119, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.6900073837066207, |
| "grad_norm": 0.9095394611358643, |
| "learning_rate": 4.853398969088936e-06, |
| "loss": 0.7219, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.6903765690376569, |
| "grad_norm": 0.8986232280731201, |
| "learning_rate": 4.853234819239613e-06, |
| "loss": 0.71, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.6907457543686931, |
| "grad_norm": 0.8806168437004089, |
| "learning_rate": 4.8530705803210955e-06, |
| "loss": 0.6731, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.6911149396997293, |
| "grad_norm": 0.9023184180259705, |
| "learning_rate": 4.852906252339598e-06, |
| "loss": 0.7086, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.6914841250307654, |
| "grad_norm": 0.8981339931488037, |
| "learning_rate": 4.852741835301343e-06, |
| "loss": 0.7037, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.6918533103618016, |
| "grad_norm": 0.9292305111885071, |
| "learning_rate": 4.852577329212551e-06, |
| "loss": 0.7096, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.6922224956928378, |
| "grad_norm": 0.903359591960907, |
| "learning_rate": 4.85241273407945e-06, |
| "loss": 0.6792, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.692591681023874, |
| "grad_norm": 0.893671452999115, |
| "learning_rate": 4.85224804990827e-06, |
| "loss": 0.7367, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.6929608663549102, |
| "grad_norm": 0.9473923444747925, |
| "learning_rate": 4.852083276705243e-06, |
| "loss": 0.6995, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.6933300516859463, |
| "grad_norm": 0.9057194590568542, |
| "learning_rate": 4.851918414476606e-06, |
| "loss": 0.7498, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.6936992370169826, |
| "grad_norm": 0.8625937700271606, |
| "learning_rate": 4.8517534632286e-06, |
| "loss": 0.6826, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.6940684223480187, |
| "grad_norm": 0.8820124864578247, |
| "learning_rate": 4.851588422967467e-06, |
| "loss": 0.7204, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.6944376076790549, |
| "grad_norm": 0.9044023752212524, |
| "learning_rate": 4.851423293699455e-06, |
| "loss": 0.7131, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.6948067930100911, |
| "grad_norm": 0.8808709979057312, |
| "learning_rate": 4.851258075430813e-06, |
| "loss": 0.68, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.6951759783411272, |
| "grad_norm": 0.875217854976654, |
| "learning_rate": 4.851092768167795e-06, |
| "loss": 0.7038, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.6955451636721635, |
| "grad_norm": 0.8607890605926514, |
| "learning_rate": 4.850927371916658e-06, |
| "loss": 0.6845, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.6959143490031996, |
| "grad_norm": 0.9063311815261841, |
| "learning_rate": 4.850761886683662e-06, |
| "loss": 0.7337, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.6962835343342357, |
| "grad_norm": 0.9188293814659119, |
| "learning_rate": 4.85059631247507e-06, |
| "loss": 0.6916, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.696652719665272, |
| "grad_norm": 0.913374125957489, |
| "learning_rate": 4.85043064929715e-06, |
| "loss": 0.759, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.6970219049963081, |
| "grad_norm": 0.8925139904022217, |
| "learning_rate": 4.850264897156171e-06, |
| "loss": 0.7173, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.6973910903273444, |
| "grad_norm": 0.9073317050933838, |
| "learning_rate": 4.8500990560584075e-06, |
| "loss": 0.7424, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.6977602756583805, |
| "grad_norm": 0.8561435341835022, |
| "learning_rate": 4.8499331260101365e-06, |
| "loss": 0.7152, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.6981294609894166, |
| "grad_norm": 0.8652263283729553, |
| "learning_rate": 4.8497671070176385e-06, |
| "loss": 0.6963, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.6984986463204529, |
| "grad_norm": 0.8837151527404785, |
| "learning_rate": 4.849600999087197e-06, |
| "loss": 0.7357, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.698867831651489, |
| "grad_norm": 0.8606382012367249, |
| "learning_rate": 4.8494348022251e-06, |
| "loss": 0.7131, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.6992370169825253, |
| "grad_norm": 0.8533560633659363, |
| "learning_rate": 4.8492685164376365e-06, |
| "loss": 0.695, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.6996062023135614, |
| "grad_norm": 0.9554173946380615, |
| "learning_rate": 4.849102141731101e-06, |
| "loss": 0.6683, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.6999753876445975, |
| "grad_norm": 0.9184376001358032, |
| "learning_rate": 4.848935678111792e-06, |
| "loss": 0.7477, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.7003445729756338, |
| "grad_norm": 0.9730942845344543, |
| "learning_rate": 4.848769125586007e-06, |
| "loss": 0.7392, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.7007137583066699, |
| "grad_norm": 0.8965922594070435, |
| "learning_rate": 4.848602484160053e-06, |
| "loss": 0.7275, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.7010829436377062, |
| "grad_norm": 0.9296282529830933, |
| "learning_rate": 4.848435753840236e-06, |
| "loss": 0.7158, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.7014521289687423, |
| "grad_norm": 0.9205310940742493, |
| "learning_rate": 4.8482689346328674e-06, |
| "loss": 0.6932, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.7018213142997785, |
| "grad_norm": 0.8746544718742371, |
| "learning_rate": 4.84810202654426e-06, |
| "loss": 0.6765, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.7021904996308147, |
| "grad_norm": 0.890127956867218, |
| "learning_rate": 4.847935029580732e-06, |
| "loss": 0.6847, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.7025596849618508, |
| "grad_norm": 0.8811405897140503, |
| "learning_rate": 4.847767943748605e-06, |
| "loss": 0.6643, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.702928870292887, |
| "grad_norm": 0.8962213397026062, |
| "learning_rate": 4.847600769054201e-06, |
| "loss": 0.7078, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.7032980556239232, |
| "grad_norm": 0.8875841498374939, |
| "learning_rate": 4.84743350550385e-06, |
| "loss": 0.707, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.7036672409549594, |
| "grad_norm": 0.8978013396263123, |
| "learning_rate": 4.8472661531038815e-06, |
| "loss": 0.6716, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.7040364262859956, |
| "grad_norm": 0.9015927910804749, |
| "learning_rate": 4.847098711860629e-06, |
| "loss": 0.6643, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.7044056116170317, |
| "grad_norm": 0.893324077129364, |
| "learning_rate": 4.846931181780431e-06, |
| "loss": 0.7005, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.7047747969480679, |
| "grad_norm": 0.9275810122489929, |
| "learning_rate": 4.84676356286963e-06, |
| "loss": 0.724, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.7051439822791041, |
| "grad_norm": 0.8800505995750427, |
| "learning_rate": 4.8465958551345675e-06, |
| "loss": 0.7251, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.7055131676101403, |
| "grad_norm": 0.8828743100166321, |
| "learning_rate": 4.846428058581593e-06, |
| "loss": 0.7277, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 0.9125253558158875, |
| "learning_rate": 4.846260173217056e-06, |
| "loss": 0.7619, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.7062515382722127, |
| "grad_norm": 0.9037159085273743, |
| "learning_rate": 4.846092199047314e-06, |
| "loss": 0.7182, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.7066207236032488, |
| "grad_norm": 0.8811241984367371, |
| "learning_rate": 4.8459241360787215e-06, |
| "loss": 0.749, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.706989908934285, |
| "grad_norm": 0.8958812952041626, |
| "learning_rate": 4.845755984317641e-06, |
| "loss": 0.7191, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.7073590942653212, |
| "grad_norm": 0.9097388386726379, |
| "learning_rate": 4.845587743770436e-06, |
| "loss": 0.7166, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.7077282795963574, |
| "grad_norm": 0.8992327451705933, |
| "learning_rate": 4.8454194144434765e-06, |
| "loss": 0.7493, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.7080974649273936, |
| "grad_norm": 0.9153758883476257, |
| "learning_rate": 4.845250996343132e-06, |
| "loss": 0.7036, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.7084666502584297, |
| "grad_norm": 0.9150279760360718, |
| "learning_rate": 4.845082489475777e-06, |
| "loss": 0.7278, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.708835835589466, |
| "grad_norm": 0.8316643238067627, |
| "learning_rate": 4.84491389384779e-06, |
| "loss": 0.6683, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.7092050209205021, |
| "grad_norm": 0.8955804705619812, |
| "learning_rate": 4.844745209465552e-06, |
| "loss": 0.7344, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.7095742062515382, |
| "grad_norm": 0.9217088222503662, |
| "learning_rate": 4.844576436335448e-06, |
| "loss": 0.6768, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.7099433915825745, |
| "grad_norm": 0.9741485714912415, |
| "learning_rate": 4.844407574463866e-06, |
| "loss": 0.713, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.7103125769136106, |
| "grad_norm": 0.9079376459121704, |
| "learning_rate": 4.844238623857197e-06, |
| "loss": 0.7543, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.7106817622446469, |
| "grad_norm": 0.8815346956253052, |
| "learning_rate": 4.844069584521836e-06, |
| "loss": 0.7318, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.711050947575683, |
| "grad_norm": 0.9132115244865417, |
| "learning_rate": 4.843900456464181e-06, |
| "loss": 0.7084, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.7114201329067191, |
| "grad_norm": 0.9009430408477783, |
| "learning_rate": 4.843731239690634e-06, |
| "loss": 0.7461, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.7117893182377554, |
| "grad_norm": 0.847509503364563, |
| "learning_rate": 4.843561934207599e-06, |
| "loss": 0.6802, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.7121585035687915, |
| "grad_norm": 0.8662732243537903, |
| "learning_rate": 4.843392540021485e-06, |
| "loss": 0.6892, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.7125276888998278, |
| "grad_norm": 0.9007134437561035, |
| "learning_rate": 4.843223057138701e-06, |
| "loss": 0.7029, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.7128968742308639, |
| "grad_norm": 0.882623553276062, |
| "learning_rate": 4.8430534855656655e-06, |
| "loss": 0.6805, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.7132660595619, |
| "grad_norm": 0.91568922996521, |
| "learning_rate": 4.842883825308794e-06, |
| "loss": 0.7135, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.7136352448929363, |
| "grad_norm": 0.9128447771072388, |
| "learning_rate": 4.84271407637451e-06, |
| "loss": 0.7521, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.7140044302239724, |
| "grad_norm": 0.890592098236084, |
| "learning_rate": 4.842544238769238e-06, |
| "loss": 0.7424, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.7143736155550087, |
| "grad_norm": 0.8989784717559814, |
| "learning_rate": 4.842374312499405e-06, |
| "loss": 0.7288, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.7147428008860448, |
| "grad_norm": 0.8863556981086731, |
| "learning_rate": 4.842204297571444e-06, |
| "loss": 0.7189, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.7151119862170809, |
| "grad_norm": 0.9078112840652466, |
| "learning_rate": 4.842034193991789e-06, |
| "loss": 0.7186, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.7154811715481172, |
| "grad_norm": 0.8869308829307556, |
| "learning_rate": 4.841864001766879e-06, |
| "loss": 0.7004, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.7158503568791533, |
| "grad_norm": 0.8935455083847046, |
| "learning_rate": 4.8416937209031555e-06, |
| "loss": 0.7231, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.7162195422101895, |
| "grad_norm": 0.877627968788147, |
| "learning_rate": 4.841523351407064e-06, |
| "loss": 0.6868, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.7165887275412257, |
| "grad_norm": 0.8890548944473267, |
| "learning_rate": 4.841352893285053e-06, |
| "loss": 0.7483, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.7169579128722618, |
| "grad_norm": 0.8524648547172546, |
| "learning_rate": 4.841182346543574e-06, |
| "loss": 0.7152, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.7173270982032981, |
| "grad_norm": 0.8882040977478027, |
| "learning_rate": 4.841011711189081e-06, |
| "loss": 0.7057, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.7176962835343342, |
| "grad_norm": 0.8520607352256775, |
| "learning_rate": 4.840840987228035e-06, |
| "loss": 0.6976, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.7180654688653704, |
| "grad_norm": 0.883282482624054, |
| "learning_rate": 4.840670174666896e-06, |
| "loss": 0.6937, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.7184346541964066, |
| "grad_norm": 0.8982214331626892, |
| "learning_rate": 4.840499273512129e-06, |
| "loss": 0.6427, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.7188038395274428, |
| "grad_norm": 0.8696303367614746, |
| "learning_rate": 4.840328283770203e-06, |
| "loss": 0.7672, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.719173024858479, |
| "grad_norm": 0.8526875972747803, |
| "learning_rate": 4.840157205447591e-06, |
| "loss": 0.7058, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.7195422101895151, |
| "grad_norm": 0.8616173267364502, |
| "learning_rate": 4.839986038550767e-06, |
| "loss": 0.6861, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.7199113955205513, |
| "grad_norm": 0.8847987651824951, |
| "learning_rate": 4.83981478308621e-06, |
| "loss": 0.7424, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.7202805808515875, |
| "grad_norm": 0.9206272959709167, |
| "learning_rate": 4.839643439060401e-06, |
| "loss": 0.712, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.7206497661826237, |
| "grad_norm": 0.8666979670524597, |
| "learning_rate": 4.8394720064798275e-06, |
| "loss": 0.7077, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.7210189515136599, |
| "grad_norm": 0.8352959752082825, |
| "learning_rate": 4.839300485350976e-06, |
| "loss": 0.6755, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.721388136844696, |
| "grad_norm": 0.8747813105583191, |
| "learning_rate": 4.83912887568034e-06, |
| "loss": 0.7047, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.7217573221757322, |
| "grad_norm": 0.9319245219230652, |
| "learning_rate": 4.838957177474414e-06, |
| "loss": 0.752, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.7221265075067684, |
| "grad_norm": 0.8869343996047974, |
| "learning_rate": 4.838785390739698e-06, |
| "loss": 0.7211, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.7224956928378046, |
| "grad_norm": 0.9012927412986755, |
| "learning_rate": 4.838613515482692e-06, |
| "loss": 0.7184, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.7228648781688407, |
| "grad_norm": 0.8678253293037415, |
| "learning_rate": 4.838441551709902e-06, |
| "loss": 0.7164, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.723234063499877, |
| "grad_norm": 0.8944018483161926, |
| "learning_rate": 4.838269499427838e-06, |
| "loss": 0.7141, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.7236032488309131, |
| "grad_norm": 0.868712306022644, |
| "learning_rate": 4.838097358643012e-06, |
| "loss": 0.6841, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.7239724341619493, |
| "grad_norm": 0.8801015615463257, |
| "learning_rate": 4.837925129361938e-06, |
| "loss": 0.6913, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.7243416194929855, |
| "grad_norm": 0.8590583801269531, |
| "learning_rate": 4.837752811591136e-06, |
| "loss": 0.7065, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.7247108048240216, |
| "grad_norm": 0.895354151725769, |
| "learning_rate": 4.837580405337128e-06, |
| "loss": 0.7574, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.7250799901550579, |
| "grad_norm": 0.8873820304870605, |
| "learning_rate": 4.83740791060644e-06, |
| "loss": 0.7083, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.725449175486094, |
| "grad_norm": 0.8843436241149902, |
| "learning_rate": 4.837235327405599e-06, |
| "loss": 0.7178, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.7258183608171302, |
| "grad_norm": 0.9182543158531189, |
| "learning_rate": 4.837062655741139e-06, |
| "loss": 0.7311, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.7261875461481664, |
| "grad_norm": 0.8999276161193848, |
| "learning_rate": 4.836889895619595e-06, |
| "loss": 0.7329, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.7265567314792025, |
| "grad_norm": 0.9033870697021484, |
| "learning_rate": 4.836717047047507e-06, |
| "loss": 0.7254, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.7269259168102388, |
| "grad_norm": 0.8504629731178284, |
| "learning_rate": 4.836544110031415e-06, |
| "loss": 0.6982, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.7272951021412749, |
| "grad_norm": 0.8877577185630798, |
| "learning_rate": 4.836371084577867e-06, |
| "loss": 0.7121, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.7276642874723112, |
| "grad_norm": 0.845675528049469, |
| "learning_rate": 4.8361979706934096e-06, |
| "loss": 0.7187, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.7280334728033473, |
| "grad_norm": 0.8912767767906189, |
| "learning_rate": 4.836024768384597e-06, |
| "loss": 0.7221, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.7284026581343834, |
| "grad_norm": 0.9066804647445679, |
| "learning_rate": 4.8358514776579835e-06, |
| "loss": 0.737, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.7287718434654197, |
| "grad_norm": 0.874573826789856, |
| "learning_rate": 4.83567809852013e-06, |
| "loss": 0.6928, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.7291410287964558, |
| "grad_norm": 0.9108884930610657, |
| "learning_rate": 4.835504630977597e-06, |
| "loss": 0.6933, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.729510214127492, |
| "grad_norm": 0.8197352290153503, |
| "learning_rate": 4.8353310750369496e-06, |
| "loss": 0.6412, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.7298793994585282, |
| "grad_norm": 0.8398105502128601, |
| "learning_rate": 4.83515743070476e-06, |
| "loss": 0.6667, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.7302485847895643, |
| "grad_norm": 0.8997299075126648, |
| "learning_rate": 4.834983697987597e-06, |
| "loss": 0.6776, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.7306177701206006, |
| "grad_norm": 0.8984546661376953, |
| "learning_rate": 4.834809876892039e-06, |
| "loss": 0.7515, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.7309869554516367, |
| "grad_norm": 0.8575339913368225, |
| "learning_rate": 4.834635967424664e-06, |
| "loss": 0.7179, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.7313561407826729, |
| "grad_norm": 0.8993721008300781, |
| "learning_rate": 4.8344619695920545e-06, |
| "loss": 0.7405, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.7317253261137091, |
| "grad_norm": 0.8856477737426758, |
| "learning_rate": 4.8342878834007955e-06, |
| "loss": 0.766, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.7320945114447452, |
| "grad_norm": 0.8776651620864868, |
| "learning_rate": 4.834113708857477e-06, |
| "loss": 0.6993, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.7324636967757815, |
| "grad_norm": 0.8624359965324402, |
| "learning_rate": 4.8339394459686925e-06, |
| "loss": 0.7049, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.7328328821068176, |
| "grad_norm": 0.9040077924728394, |
| "learning_rate": 4.833765094741035e-06, |
| "loss": 0.6616, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.7332020674378538, |
| "grad_norm": 0.8632827997207642, |
| "learning_rate": 4.833590655181106e-06, |
| "loss": 0.6911, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.73357125276889, |
| "grad_norm": 0.869616687297821, |
| "learning_rate": 4.833416127295507e-06, |
| "loss": 0.7216, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.7339404380999262, |
| "grad_norm": 0.8821524977684021, |
| "learning_rate": 4.833241511090845e-06, |
| "loss": 0.7398, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.7343096234309623, |
| "grad_norm": 0.9026939272880554, |
| "learning_rate": 4.833066806573727e-06, |
| "loss": 0.6981, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.7346788087619985, |
| "grad_norm": 0.9178372621536255, |
| "learning_rate": 4.8328920137507665e-06, |
| "loss": 0.7219, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.7350479940930347, |
| "grad_norm": 0.8885320425033569, |
| "learning_rate": 4.832717132628581e-06, |
| "loss": 0.7093, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.7354171794240709, |
| "grad_norm": 0.8704083561897278, |
| "learning_rate": 4.832542163213787e-06, |
| "loss": 0.7475, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.7357863647551071, |
| "grad_norm": 0.8328550457954407, |
| "learning_rate": 4.832367105513008e-06, |
| "loss": 0.6655, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.7361555500861432, |
| "grad_norm": 0.900768518447876, |
| "learning_rate": 4.832191959532871e-06, |
| "loss": 0.6967, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.7365247354171794, |
| "grad_norm": 0.8467245697975159, |
| "learning_rate": 4.832016725280005e-06, |
| "loss": 0.6984, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.7368939207482156, |
| "grad_norm": 0.8155560493469238, |
| "learning_rate": 4.83184140276104e-06, |
| "loss": 0.6468, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.7372631060792518, |
| "grad_norm": 0.8829312920570374, |
| "learning_rate": 4.831665991982615e-06, |
| "loss": 0.7444, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.737632291410288, |
| "grad_norm": 0.8596073389053345, |
| "learning_rate": 4.831490492951368e-06, |
| "loss": 0.686, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.7380014767413241, |
| "grad_norm": 0.895241916179657, |
| "learning_rate": 4.831314905673942e-06, |
| "loss": 0.6905, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.7383706620723604, |
| "grad_norm": 0.8932267427444458, |
| "learning_rate": 4.831139230156982e-06, |
| "loss": 0.6809, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.7387398474033965, |
| "grad_norm": 0.8914393186569214, |
| "learning_rate": 4.8309634664071385e-06, |
| "loss": 0.6851, |
| "step": 2001 |
| }, |
| { |
| "epoch": 0.7391090327344327, |
| "grad_norm": 0.8881421685218811, |
| "learning_rate": 4.830787614431062e-06, |
| "loss": 0.7322, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.7394782180654689, |
| "grad_norm": 0.8394930958747864, |
| "learning_rate": 4.830611674235411e-06, |
| "loss": 0.6989, |
| "step": 2003 |
| }, |
| { |
| "epoch": 0.739847403396505, |
| "grad_norm": 0.8811700940132141, |
| "learning_rate": 4.830435645826844e-06, |
| "loss": 0.7009, |
| "step": 2004 |
| }, |
| { |
| "epoch": 0.7402165887275413, |
| "grad_norm": 0.8765476942062378, |
| "learning_rate": 4.830259529212023e-06, |
| "loss": 0.7325, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.7405857740585774, |
| "grad_norm": 0.8366778492927551, |
| "learning_rate": 4.830083324397614e-06, |
| "loss": 0.6912, |
| "step": 2006 |
| }, |
| { |
| "epoch": 0.7409549593896135, |
| "grad_norm": 0.8748518228530884, |
| "learning_rate": 4.829907031390287e-06, |
| "loss": 0.7269, |
| "step": 2007 |
| }, |
| { |
| "epoch": 0.7413241447206498, |
| "grad_norm": 0.93858802318573, |
| "learning_rate": 4.829730650196714e-06, |
| "loss": 0.7176, |
| "step": 2008 |
| }, |
| { |
| "epoch": 0.7416933300516859, |
| "grad_norm": 0.8684030771255493, |
| "learning_rate": 4.8295541808235715e-06, |
| "loss": 0.7018, |
| "step": 2009 |
| }, |
| { |
| "epoch": 0.7420625153827222, |
| "grad_norm": 0.8710829019546509, |
| "learning_rate": 4.829377623277538e-06, |
| "loss": 0.7137, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.7424317007137583, |
| "grad_norm": 0.8900696039199829, |
| "learning_rate": 4.829200977565296e-06, |
| "loss": 0.695, |
| "step": 2011 |
| }, |
| { |
| "epoch": 0.7428008860447944, |
| "grad_norm": 0.8644715547561646, |
| "learning_rate": 4.8290242436935334e-06, |
| "loss": 0.6685, |
| "step": 2012 |
| }, |
| { |
| "epoch": 0.7431700713758307, |
| "grad_norm": 0.8664246797561646, |
| "learning_rate": 4.828847421668938e-06, |
| "loss": 0.6827, |
| "step": 2013 |
| }, |
| { |
| "epoch": 0.7435392567068668, |
| "grad_norm": 0.9115155935287476, |
| "learning_rate": 4.828670511498202e-06, |
| "loss": 0.7093, |
| "step": 2014 |
| }, |
| { |
| "epoch": 0.7439084420379031, |
| "grad_norm": 0.9124789834022522, |
| "learning_rate": 4.828493513188022e-06, |
| "loss": 0.7339, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.7442776273689392, |
| "grad_norm": 0.8920673131942749, |
| "learning_rate": 4.828316426745098e-06, |
| "loss": 0.7109, |
| "step": 2016 |
| }, |
| { |
| "epoch": 0.7446468126999753, |
| "grad_norm": 0.9076531529426575, |
| "learning_rate": 4.828139252176131e-06, |
| "loss": 0.7117, |
| "step": 2017 |
| }, |
| { |
| "epoch": 0.7450159980310116, |
| "grad_norm": 0.8628717660903931, |
| "learning_rate": 4.82796198948783e-06, |
| "loss": 0.7317, |
| "step": 2018 |
| }, |
| { |
| "epoch": 0.7453851833620477, |
| "grad_norm": 0.8688773512840271, |
| "learning_rate": 4.827784638686901e-06, |
| "loss": 0.7004, |
| "step": 2019 |
| }, |
| { |
| "epoch": 0.745754368693084, |
| "grad_norm": 0.8732299208641052, |
| "learning_rate": 4.827607199780059e-06, |
| "loss": 0.7333, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.7461235540241201, |
| "grad_norm": 0.8918249011039734, |
| "learning_rate": 4.8274296727740185e-06, |
| "loss": 0.6741, |
| "step": 2021 |
| }, |
| { |
| "epoch": 0.7464927393551563, |
| "grad_norm": 0.8658198714256287, |
| "learning_rate": 4.827252057675499e-06, |
| "loss": 0.7014, |
| "step": 2022 |
| }, |
| { |
| "epoch": 0.7468619246861925, |
| "grad_norm": 0.9010634422302246, |
| "learning_rate": 4.827074354491225e-06, |
| "loss": 0.7067, |
| "step": 2023 |
| }, |
| { |
| "epoch": 0.7472311100172286, |
| "grad_norm": 0.8897079825401306, |
| "learning_rate": 4.8268965632279194e-06, |
| "loss": 0.7272, |
| "step": 2024 |
| }, |
| { |
| "epoch": 0.7476002953482648, |
| "grad_norm": 0.8660743236541748, |
| "learning_rate": 4.8267186838923145e-06, |
| "loss": 0.7262, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.747969480679301, |
| "grad_norm": 0.8774269819259644, |
| "learning_rate": 4.826540716491141e-06, |
| "loss": 0.7368, |
| "step": 2026 |
| }, |
| { |
| "epoch": 0.7483386660103372, |
| "grad_norm": 0.8606297969818115, |
| "learning_rate": 4.826362661031136e-06, |
| "loss": 0.7107, |
| "step": 2027 |
| }, |
| { |
| "epoch": 0.7487078513413734, |
| "grad_norm": 0.8835901618003845, |
| "learning_rate": 4.826184517519038e-06, |
| "loss": 0.7234, |
| "step": 2028 |
| }, |
| { |
| "epoch": 0.7490770366724095, |
| "grad_norm": 0.8923384547233582, |
| "learning_rate": 4.8260062859615915e-06, |
| "loss": 0.68, |
| "step": 2029 |
| }, |
| { |
| "epoch": 0.7494462220034457, |
| "grad_norm": 0.8915189504623413, |
| "learning_rate": 4.825827966365541e-06, |
| "loss": 0.6991, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.7498154073344819, |
| "grad_norm": 0.8923548460006714, |
| "learning_rate": 4.825649558737635e-06, |
| "loss": 0.75, |
| "step": 2031 |
| }, |
| { |
| "epoch": 0.7501845926655181, |
| "grad_norm": 0.8493019342422485, |
| "learning_rate": 4.825471063084627e-06, |
| "loss": 0.6659, |
| "step": 2032 |
| }, |
| { |
| "epoch": 0.7505537779965543, |
| "grad_norm": 0.8680519461631775, |
| "learning_rate": 4.825292479413274e-06, |
| "loss": 0.6889, |
| "step": 2033 |
| }, |
| { |
| "epoch": 0.7509229633275905, |
| "grad_norm": 0.9089414477348328, |
| "learning_rate": 4.825113807730334e-06, |
| "loss": 0.7363, |
| "step": 2034 |
| }, |
| { |
| "epoch": 0.7512921486586266, |
| "grad_norm": 0.8787106275558472, |
| "learning_rate": 4.8249350480425704e-06, |
| "loss": 0.6808, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.7516613339896628, |
| "grad_norm": 0.8691989779472351, |
| "learning_rate": 4.8247562003567486e-06, |
| "loss": 0.6974, |
| "step": 2036 |
| }, |
| { |
| "epoch": 0.752030519320699, |
| "grad_norm": 0.8437734246253967, |
| "learning_rate": 4.824577264679639e-06, |
| "loss": 0.6861, |
| "step": 2037 |
| }, |
| { |
| "epoch": 0.7523997046517352, |
| "grad_norm": 0.8832452893257141, |
| "learning_rate": 4.824398241018014e-06, |
| "loss": 0.7166, |
| "step": 2038 |
| }, |
| { |
| "epoch": 0.7527688899827714, |
| "grad_norm": 0.8944876194000244, |
| "learning_rate": 4.824219129378648e-06, |
| "loss": 0.7272, |
| "step": 2039 |
| }, |
| { |
| "epoch": 0.7531380753138075, |
| "grad_norm": 0.8749659657478333, |
| "learning_rate": 4.824039929768322e-06, |
| "loss": 0.7176, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.7535072606448437, |
| "grad_norm": 0.8656901717185974, |
| "learning_rate": 4.823860642193818e-06, |
| "loss": 0.7335, |
| "step": 2041 |
| }, |
| { |
| "epoch": 0.7538764459758799, |
| "grad_norm": 0.874985933303833, |
| "learning_rate": 4.823681266661923e-06, |
| "loss": 0.6942, |
| "step": 2042 |
| }, |
| { |
| "epoch": 0.754245631306916, |
| "grad_norm": 0.8945735692977905, |
| "learning_rate": 4.823501803179424e-06, |
| "loss": 0.733, |
| "step": 2043 |
| }, |
| { |
| "epoch": 0.7546148166379523, |
| "grad_norm": 0.8628880977630615, |
| "learning_rate": 4.823322251753115e-06, |
| "loss": 0.7065, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.7549840019689884, |
| "grad_norm": 0.850796639919281, |
| "learning_rate": 4.823142612389793e-06, |
| "loss": 0.6986, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.7553531873000247, |
| "grad_norm": 0.8502240180969238, |
| "learning_rate": 4.822962885096256e-06, |
| "loss": 0.7364, |
| "step": 2046 |
| }, |
| { |
| "epoch": 0.7557223726310608, |
| "grad_norm": 0.8640246987342834, |
| "learning_rate": 4.8227830698793076e-06, |
| "loss": 0.7207, |
| "step": 2047 |
| }, |
| { |
| "epoch": 0.7560915579620969, |
| "grad_norm": 0.8806377649307251, |
| "learning_rate": 4.8226031667457516e-06, |
| "loss": 0.7591, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.7564607432931332, |
| "grad_norm": 0.9259816408157349, |
| "learning_rate": 4.8224231757024e-06, |
| "loss": 0.7162, |
| "step": 2049 |
| }, |
| { |
| "epoch": 0.7568299286241693, |
| "grad_norm": 0.8613938689231873, |
| "learning_rate": 4.822243096756064e-06, |
| "loss": 0.7024, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.7571991139552056, |
| "grad_norm": 0.8919888734817505, |
| "learning_rate": 4.822062929913559e-06, |
| "loss": 0.7056, |
| "step": 2051 |
| }, |
| { |
| "epoch": 0.7575682992862417, |
| "grad_norm": 0.8805553913116455, |
| "learning_rate": 4.821882675181706e-06, |
| "loss": 0.7197, |
| "step": 2052 |
| }, |
| { |
| "epoch": 0.7579374846172778, |
| "grad_norm": 0.8903268575668335, |
| "learning_rate": 4.821702332567326e-06, |
| "loss": 0.7158, |
| "step": 2053 |
| }, |
| { |
| "epoch": 0.7583066699483141, |
| "grad_norm": 0.8912745118141174, |
| "learning_rate": 4.8215219020772455e-06, |
| "loss": 0.6996, |
| "step": 2054 |
| }, |
| { |
| "epoch": 0.7586758552793502, |
| "grad_norm": 0.8754813075065613, |
| "learning_rate": 4.821341383718293e-06, |
| "loss": 0.7052, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.7590450406103865, |
| "grad_norm": 0.8683738708496094, |
| "learning_rate": 4.821160777497303e-06, |
| "loss": 0.7288, |
| "step": 2056 |
| }, |
| { |
| "epoch": 0.7594142259414226, |
| "grad_norm": 0.8737871646881104, |
| "learning_rate": 4.820980083421109e-06, |
| "loss": 0.7289, |
| "step": 2057 |
| }, |
| { |
| "epoch": 0.7597834112724587, |
| "grad_norm": 0.8430957794189453, |
| "learning_rate": 4.820799301496552e-06, |
| "loss": 0.6643, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.760152596603495, |
| "grad_norm": 0.8619078397750854, |
| "learning_rate": 4.820618431730474e-06, |
| "loss": 0.6849, |
| "step": 2059 |
| }, |
| { |
| "epoch": 0.7605217819345311, |
| "grad_norm": 0.8729053139686584, |
| "learning_rate": 4.820437474129721e-06, |
| "loss": 0.7081, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.7608909672655673, |
| "grad_norm": 0.8700425028800964, |
| "learning_rate": 4.820256428701141e-06, |
| "loss": 0.6952, |
| "step": 2061 |
| }, |
| { |
| "epoch": 0.7612601525966035, |
| "grad_norm": 0.8315994143486023, |
| "learning_rate": 4.8200752954515885e-06, |
| "loss": 0.6861, |
| "step": 2062 |
| }, |
| { |
| "epoch": 0.7616293379276396, |
| "grad_norm": 0.8591640591621399, |
| "learning_rate": 4.819894074387917e-06, |
| "loss": 0.7155, |
| "step": 2063 |
| }, |
| { |
| "epoch": 0.7619985232586759, |
| "grad_norm": 0.9315993189811707, |
| "learning_rate": 4.8197127655169885e-06, |
| "loss": 0.7121, |
| "step": 2064 |
| }, |
| { |
| "epoch": 0.762367708589712, |
| "grad_norm": 0.8699647188186646, |
| "learning_rate": 4.819531368845662e-06, |
| "loss": 0.7095, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.7627368939207482, |
| "grad_norm": 0.8588031530380249, |
| "learning_rate": 4.819349884380807e-06, |
| "loss": 0.6605, |
| "step": 2066 |
| }, |
| { |
| "epoch": 0.7631060792517844, |
| "grad_norm": 0.898235559463501, |
| "learning_rate": 4.81916831212929e-06, |
| "loss": 0.7332, |
| "step": 2067 |
| }, |
| { |
| "epoch": 0.7634752645828206, |
| "grad_norm": 0.8969528079032898, |
| "learning_rate": 4.818986652097985e-06, |
| "loss": 0.7423, |
| "step": 2068 |
| }, |
| { |
| "epoch": 0.7638444499138568, |
| "grad_norm": 0.8572843074798584, |
| "learning_rate": 4.818804904293767e-06, |
| "loss": 0.7698, |
| "step": 2069 |
| }, |
| { |
| "epoch": 0.7642136352448929, |
| "grad_norm": 0.8814283609390259, |
| "learning_rate": 4.8186230687235145e-06, |
| "loss": 0.709, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.7645828205759291, |
| "grad_norm": 0.9045025110244751, |
| "learning_rate": 4.818441145394111e-06, |
| "loss": 0.7065, |
| "step": 2071 |
| }, |
| { |
| "epoch": 0.7649520059069653, |
| "grad_norm": 0.8909565210342407, |
| "learning_rate": 4.818259134312442e-06, |
| "loss": 0.6989, |
| "step": 2072 |
| }, |
| { |
| "epoch": 0.7653211912380015, |
| "grad_norm": 0.874191164970398, |
| "learning_rate": 4.818077035485396e-06, |
| "loss": 0.6966, |
| "step": 2073 |
| }, |
| { |
| "epoch": 0.7656903765690377, |
| "grad_norm": 0.8939563035964966, |
| "learning_rate": 4.817894848919866e-06, |
| "loss": 0.7279, |
| "step": 2074 |
| }, |
| { |
| "epoch": 0.7660595619000738, |
| "grad_norm": 0.8762744665145874, |
| "learning_rate": 4.817712574622748e-06, |
| "loss": 0.7097, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.76642874723111, |
| "grad_norm": 0.9104212522506714, |
| "learning_rate": 4.81753021260094e-06, |
| "loss": 0.7419, |
| "step": 2076 |
| }, |
| { |
| "epoch": 0.7667979325621462, |
| "grad_norm": 0.8775395154953003, |
| "learning_rate": 4.817347762861345e-06, |
| "loss": 0.6853, |
| "step": 2077 |
| }, |
| { |
| "epoch": 0.7671671178931824, |
| "grad_norm": 0.9023780822753906, |
| "learning_rate": 4.817165225410868e-06, |
| "loss": 0.7256, |
| "step": 2078 |
| }, |
| { |
| "epoch": 0.7675363032242185, |
| "grad_norm": 0.8750520944595337, |
| "learning_rate": 4.816982600256419e-06, |
| "loss": 0.6715, |
| "step": 2079 |
| }, |
| { |
| "epoch": 0.7679054885552548, |
| "grad_norm": 0.8693894743919373, |
| "learning_rate": 4.816799887404911e-06, |
| "loss": 0.694, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.7682746738862909, |
| "grad_norm": 0.8720349073410034, |
| "learning_rate": 4.816617086863256e-06, |
| "loss": 0.7284, |
| "step": 2081 |
| }, |
| { |
| "epoch": 0.7686438592173271, |
| "grad_norm": 0.8731261491775513, |
| "learning_rate": 4.816434198638378e-06, |
| "loss": 0.7152, |
| "step": 2082 |
| }, |
| { |
| "epoch": 0.7690130445483633, |
| "grad_norm": 0.8482458591461182, |
| "learning_rate": 4.816251222737195e-06, |
| "loss": 0.6944, |
| "step": 2083 |
| }, |
| { |
| "epoch": 0.7693822298793994, |
| "grad_norm": 0.9315195083618164, |
| "learning_rate": 4.816068159166635e-06, |
| "loss": 0.6859, |
| "step": 2084 |
| }, |
| { |
| "epoch": 0.7697514152104357, |
| "grad_norm": 0.8510304689407349, |
| "learning_rate": 4.815885007933625e-06, |
| "loss": 0.6863, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.7701206005414718, |
| "grad_norm": 0.8732700347900391, |
| "learning_rate": 4.815701769045099e-06, |
| "loss": 0.7242, |
| "step": 2086 |
| }, |
| { |
| "epoch": 0.770489785872508, |
| "grad_norm": 0.8937168121337891, |
| "learning_rate": 4.815518442507992e-06, |
| "loss": 0.7239, |
| "step": 2087 |
| }, |
| { |
| "epoch": 0.7708589712035442, |
| "grad_norm": 0.8584597706794739, |
| "learning_rate": 4.815335028329243e-06, |
| "loss": 0.7197, |
| "step": 2088 |
| }, |
| { |
| "epoch": 0.7712281565345803, |
| "grad_norm": 0.8576000928878784, |
| "learning_rate": 4.815151526515794e-06, |
| "loss": 0.6708, |
| "step": 2089 |
| }, |
| { |
| "epoch": 0.7715973418656166, |
| "grad_norm": 0.9142547845840454, |
| "learning_rate": 4.814967937074589e-06, |
| "loss": 0.7148, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.7719665271966527, |
| "grad_norm": 0.8664703369140625, |
| "learning_rate": 4.81478426001258e-06, |
| "loss": 0.7159, |
| "step": 2091 |
| }, |
| { |
| "epoch": 0.7723357125276888, |
| "grad_norm": 0.8845775723457336, |
| "learning_rate": 4.814600495336716e-06, |
| "loss": 0.7031, |
| "step": 2092 |
| }, |
| { |
| "epoch": 0.7727048978587251, |
| "grad_norm": 0.8626143932342529, |
| "learning_rate": 4.8144166430539555e-06, |
| "loss": 0.6828, |
| "step": 2093 |
| }, |
| { |
| "epoch": 0.7730740831897612, |
| "grad_norm": 0.8681825399398804, |
| "learning_rate": 4.814232703171254e-06, |
| "loss": 0.7169, |
| "step": 2094 |
| }, |
| { |
| "epoch": 0.7734432685207975, |
| "grad_norm": 0.9187625050544739, |
| "learning_rate": 4.8140486756955755e-06, |
| "loss": 0.7019, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.7738124538518336, |
| "grad_norm": 0.8887724876403809, |
| "learning_rate": 4.813864560633885e-06, |
| "loss": 0.7349, |
| "step": 2096 |
| }, |
| { |
| "epoch": 0.7741816391828698, |
| "grad_norm": 0.8629727959632874, |
| "learning_rate": 4.81368035799315e-06, |
| "loss": 0.7328, |
| "step": 2097 |
| }, |
| { |
| "epoch": 0.774550824513906, |
| "grad_norm": 0.8609279990196228, |
| "learning_rate": 4.813496067780345e-06, |
| "loss": 0.6909, |
| "step": 2098 |
| }, |
| { |
| "epoch": 0.7749200098449421, |
| "grad_norm": 0.8608946800231934, |
| "learning_rate": 4.813311690002444e-06, |
| "loss": 0.6885, |
| "step": 2099 |
| }, |
| { |
| "epoch": 0.7752891951759784, |
| "grad_norm": 0.8858616352081299, |
| "learning_rate": 4.813127224666425e-06, |
| "loss": 0.6919, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.7756583805070145, |
| "grad_norm": 0.8694943785667419, |
| "learning_rate": 4.812942671779271e-06, |
| "loss": 0.6736, |
| "step": 2101 |
| }, |
| { |
| "epoch": 0.7760275658380507, |
| "grad_norm": 0.8370912671089172, |
| "learning_rate": 4.812758031347967e-06, |
| "loss": 0.6608, |
| "step": 2102 |
| }, |
| { |
| "epoch": 0.7763967511690869, |
| "grad_norm": 0.8685939908027649, |
| "learning_rate": 4.812573303379501e-06, |
| "loss": 0.74, |
| "step": 2103 |
| }, |
| { |
| "epoch": 0.776765936500123, |
| "grad_norm": 0.8643637299537659, |
| "learning_rate": 4.812388487880865e-06, |
| "loss": 0.6988, |
| "step": 2104 |
| }, |
| { |
| "epoch": 0.7771351218311593, |
| "grad_norm": 0.9060371518135071, |
| "learning_rate": 4.8122035848590555e-06, |
| "loss": 0.731, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.7775043071621954, |
| "grad_norm": 0.8900934457778931, |
| "learning_rate": 4.81201859432107e-06, |
| "loss": 0.7363, |
| "step": 2106 |
| }, |
| { |
| "epoch": 0.7778734924932316, |
| "grad_norm": 0.8641214966773987, |
| "learning_rate": 4.8118335162739096e-06, |
| "loss": 0.6838, |
| "step": 2107 |
| }, |
| { |
| "epoch": 0.7782426778242678, |
| "grad_norm": 0.926879346370697, |
| "learning_rate": 4.81164835072458e-06, |
| "loss": 0.7187, |
| "step": 2108 |
| }, |
| { |
| "epoch": 0.778611863155304, |
| "grad_norm": 0.8715068697929382, |
| "learning_rate": 4.81146309768009e-06, |
| "loss": 0.7339, |
| "step": 2109 |
| }, |
| { |
| "epoch": 0.7789810484863401, |
| "grad_norm": 0.9065748453140259, |
| "learning_rate": 4.811277757147452e-06, |
| "loss": 0.7526, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.7793502338173763, |
| "grad_norm": 0.8723341822624207, |
| "learning_rate": 4.8110923291336804e-06, |
| "loss": 0.7001, |
| "step": 2111 |
| }, |
| { |
| "epoch": 0.7797194191484125, |
| "grad_norm": 0.8835813999176025, |
| "learning_rate": 4.810906813645793e-06, |
| "loss": 0.7187, |
| "step": 2112 |
| }, |
| { |
| "epoch": 0.7800886044794487, |
| "grad_norm": 0.8894395232200623, |
| "learning_rate": 4.810721210690812e-06, |
| "loss": 0.7349, |
| "step": 2113 |
| }, |
| { |
| "epoch": 0.7804577898104849, |
| "grad_norm": 0.8809918761253357, |
| "learning_rate": 4.8105355202757635e-06, |
| "loss": 0.6857, |
| "step": 2114 |
| }, |
| { |
| "epoch": 0.780826975141521, |
| "grad_norm": 0.8665323257446289, |
| "learning_rate": 4.810349742407673e-06, |
| "loss": 0.724, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.7811961604725572, |
| "grad_norm": 0.9003410935401917, |
| "learning_rate": 4.810163877093575e-06, |
| "loss": 0.6776, |
| "step": 2116 |
| }, |
| { |
| "epoch": 0.7815653458035934, |
| "grad_norm": 0.9015949368476868, |
| "learning_rate": 4.8099779243405035e-06, |
| "loss": 0.7107, |
| "step": 2117 |
| }, |
| { |
| "epoch": 0.7819345311346296, |
| "grad_norm": 0.9056869149208069, |
| "learning_rate": 4.8097918841554965e-06, |
| "loss": 0.7009, |
| "step": 2118 |
| }, |
| { |
| "epoch": 0.7823037164656658, |
| "grad_norm": 0.8645703792572021, |
| "learning_rate": 4.809605756545596e-06, |
| "loss": 0.7285, |
| "step": 2119 |
| }, |
| { |
| "epoch": 0.7826729017967019, |
| "grad_norm": 0.9308361411094666, |
| "learning_rate": 4.809419541517845e-06, |
| "loss": 0.7097, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.7830420871277382, |
| "grad_norm": 0.8967791795730591, |
| "learning_rate": 4.809233239079295e-06, |
| "loss": 0.7163, |
| "step": 2121 |
| }, |
| { |
| "epoch": 0.7834112724587743, |
| "grad_norm": 0.868552565574646, |
| "learning_rate": 4.809046849236995e-06, |
| "loss": 0.7397, |
| "step": 2122 |
| }, |
| { |
| "epoch": 0.7837804577898105, |
| "grad_norm": 0.894079864025116, |
| "learning_rate": 4.808860371998e-06, |
| "loss": 0.7136, |
| "step": 2123 |
| }, |
| { |
| "epoch": 0.7841496431208467, |
| "grad_norm": 0.874018132686615, |
| "learning_rate": 4.808673807369369e-06, |
| "loss": 0.6998, |
| "step": 2124 |
| }, |
| { |
| "epoch": 0.7845188284518828, |
| "grad_norm": 0.8726391792297363, |
| "learning_rate": 4.808487155358163e-06, |
| "loss": 0.7335, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.7848880137829191, |
| "grad_norm": 0.8656647205352783, |
| "learning_rate": 4.8083004159714465e-06, |
| "loss": 0.6756, |
| "step": 2126 |
| }, |
| { |
| "epoch": 0.7852571991139552, |
| "grad_norm": 0.8752986788749695, |
| "learning_rate": 4.808113589216288e-06, |
| "loss": 0.6846, |
| "step": 2127 |
| }, |
| { |
| "epoch": 0.7856263844449913, |
| "grad_norm": 0.8898423910140991, |
| "learning_rate": 4.807926675099759e-06, |
| "loss": 0.7056, |
| "step": 2128 |
| }, |
| { |
| "epoch": 0.7859955697760276, |
| "grad_norm": 0.9080005288124084, |
| "learning_rate": 4.807739673628934e-06, |
| "loss": 0.6836, |
| "step": 2129 |
| }, |
| { |
| "epoch": 0.7863647551070637, |
| "grad_norm": 0.9017002582550049, |
| "learning_rate": 4.8075525848108895e-06, |
| "loss": 0.7043, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.7867339404381, |
| "grad_norm": 0.9311098456382751, |
| "learning_rate": 4.807365408652709e-06, |
| "loss": 0.7493, |
| "step": 2131 |
| }, |
| { |
| "epoch": 0.7871031257691361, |
| "grad_norm": 1.0424463748931885, |
| "learning_rate": 4.807178145161475e-06, |
| "loss": 0.708, |
| "step": 2132 |
| }, |
| { |
| "epoch": 0.7874723111001722, |
| "grad_norm": 0.8803859949111938, |
| "learning_rate": 4.8069907943442775e-06, |
| "loss": 0.7011, |
| "step": 2133 |
| }, |
| { |
| "epoch": 0.7878414964312085, |
| "grad_norm": 0.8854016661643982, |
| "learning_rate": 4.806803356208205e-06, |
| "loss": 0.7101, |
| "step": 2134 |
| }, |
| { |
| "epoch": 0.7882106817622446, |
| "grad_norm": 0.8736090064048767, |
| "learning_rate": 4.806615830760355e-06, |
| "loss": 0.7102, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.7885798670932809, |
| "grad_norm": 0.8740845918655396, |
| "learning_rate": 4.806428218007823e-06, |
| "loss": 0.691, |
| "step": 2136 |
| }, |
| { |
| "epoch": 0.788949052424317, |
| "grad_norm": 0.9291470050811768, |
| "learning_rate": 4.806240517957711e-06, |
| "loss": 0.6972, |
| "step": 2137 |
| }, |
| { |
| "epoch": 0.7893182377553531, |
| "grad_norm": 0.8928626775741577, |
| "learning_rate": 4.8060527306171235e-06, |
| "loss": 0.6803, |
| "step": 2138 |
| }, |
| { |
| "epoch": 0.7896874230863894, |
| "grad_norm": 0.90341717004776, |
| "learning_rate": 4.805864855993167e-06, |
| "loss": 0.7155, |
| "step": 2139 |
| }, |
| { |
| "epoch": 0.7900566084174255, |
| "grad_norm": 0.8977493047714233, |
| "learning_rate": 4.805676894092956e-06, |
| "loss": 0.7205, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.7904257937484618, |
| "grad_norm": 0.8815012574195862, |
| "learning_rate": 4.805488844923599e-06, |
| "loss": 0.7137, |
| "step": 2141 |
| }, |
| { |
| "epoch": 0.7907949790794979, |
| "grad_norm": 0.8667290806770325, |
| "learning_rate": 4.8053007084922185e-06, |
| "loss": 0.7219, |
| "step": 2142 |
| }, |
| { |
| "epoch": 0.791164164410534, |
| "grad_norm": 0.8803372979164124, |
| "learning_rate": 4.805112484805934e-06, |
| "loss": 0.737, |
| "step": 2143 |
| }, |
| { |
| "epoch": 0.7915333497415703, |
| "grad_norm": 0.9153837561607361, |
| "learning_rate": 4.804924173871869e-06, |
| "loss": 0.7093, |
| "step": 2144 |
| }, |
| { |
| "epoch": 0.7919025350726064, |
| "grad_norm": 0.875203013420105, |
| "learning_rate": 4.804735775697151e-06, |
| "loss": 0.7147, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.7922717204036426, |
| "grad_norm": 0.8866360187530518, |
| "learning_rate": 4.8045472902889125e-06, |
| "loss": 0.7184, |
| "step": 2146 |
| }, |
| { |
| "epoch": 0.7926409057346788, |
| "grad_norm": 0.8125059604644775, |
| "learning_rate": 4.804358717654286e-06, |
| "loss": 0.6916, |
| "step": 2147 |
| }, |
| { |
| "epoch": 0.793010091065715, |
| "grad_norm": 0.8981543183326721, |
| "learning_rate": 4.804170057800408e-06, |
| "loss": 0.7109, |
| "step": 2148 |
| }, |
| { |
| "epoch": 0.7933792763967512, |
| "grad_norm": 0.841891348361969, |
| "learning_rate": 4.803981310734422e-06, |
| "loss": 0.6623, |
| "step": 2149 |
| }, |
| { |
| "epoch": 0.7937484617277873, |
| "grad_norm": 0.9186368584632874, |
| "learning_rate": 4.80379247646347e-06, |
| "loss": 0.7312, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.7941176470588235, |
| "grad_norm": 0.8802003860473633, |
| "learning_rate": 4.8036035549947e-06, |
| "loss": 0.7202, |
| "step": 2151 |
| }, |
| { |
| "epoch": 0.7944868323898597, |
| "grad_norm": 0.8542637825012207, |
| "learning_rate": 4.803414546335262e-06, |
| "loss": 0.6933, |
| "step": 2152 |
| }, |
| { |
| "epoch": 0.7948560177208959, |
| "grad_norm": 0.8632619976997375, |
| "learning_rate": 4.803225450492311e-06, |
| "loss": 0.7034, |
| "step": 2153 |
| }, |
| { |
| "epoch": 0.7952252030519321, |
| "grad_norm": 0.9003282785415649, |
| "learning_rate": 4.803036267473003e-06, |
| "loss": 0.707, |
| "step": 2154 |
| }, |
| { |
| "epoch": 0.7955943883829683, |
| "grad_norm": 0.8903799057006836, |
| "learning_rate": 4.802846997284499e-06, |
| "loss": 0.7226, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.7959635737140044, |
| "grad_norm": 0.8972636461257935, |
| "learning_rate": 4.802657639933964e-06, |
| "loss": 0.7459, |
| "step": 2156 |
| }, |
| { |
| "epoch": 0.7963327590450406, |
| "grad_norm": 0.8760960102081299, |
| "learning_rate": 4.802468195428563e-06, |
| "loss": 0.6983, |
| "step": 2157 |
| }, |
| { |
| "epoch": 0.7967019443760768, |
| "grad_norm": 0.8755945563316345, |
| "learning_rate": 4.802278663775468e-06, |
| "loss": 0.6942, |
| "step": 2158 |
| }, |
| { |
| "epoch": 0.797071129707113, |
| "grad_norm": 0.8775022625923157, |
| "learning_rate": 4.8020890449818524e-06, |
| "loss": 0.7312, |
| "step": 2159 |
| }, |
| { |
| "epoch": 0.7974403150381492, |
| "grad_norm": 0.8853294253349304, |
| "learning_rate": 4.801899339054893e-06, |
| "loss": 0.7129, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.7978095003691853, |
| "grad_norm": 0.8880541920661926, |
| "learning_rate": 4.801709546001769e-06, |
| "loss": 0.6857, |
| "step": 2161 |
| }, |
| { |
| "epoch": 0.7981786857002215, |
| "grad_norm": 0.9117274880409241, |
| "learning_rate": 4.801519665829666e-06, |
| "loss": 0.7093, |
| "step": 2162 |
| }, |
| { |
| "epoch": 0.7985478710312577, |
| "grad_norm": 0.8845729827880859, |
| "learning_rate": 4.8013296985457705e-06, |
| "loss": 0.7214, |
| "step": 2163 |
| }, |
| { |
| "epoch": 0.7989170563622938, |
| "grad_norm": 0.8805612325668335, |
| "learning_rate": 4.801139644157272e-06, |
| "loss": 0.6788, |
| "step": 2164 |
| }, |
| { |
| "epoch": 0.7992862416933301, |
| "grad_norm": 0.8353918790817261, |
| "learning_rate": 4.800949502671364e-06, |
| "loss": 0.6798, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.7996554270243662, |
| "grad_norm": 0.8719606995582581, |
| "learning_rate": 4.800759274095243e-06, |
| "loss": 0.6992, |
| "step": 2166 |
| }, |
| { |
| "epoch": 0.8000246123554025, |
| "grad_norm": 0.9136744141578674, |
| "learning_rate": 4.800568958436111e-06, |
| "loss": 0.7132, |
| "step": 2167 |
| }, |
| { |
| "epoch": 0.8003937976864386, |
| "grad_norm": 0.8668189644813538, |
| "learning_rate": 4.800378555701168e-06, |
| "loss": 0.686, |
| "step": 2168 |
| }, |
| { |
| "epoch": 0.8007629830174747, |
| "grad_norm": 0.8641545176506042, |
| "learning_rate": 4.800188065897624e-06, |
| "loss": 0.6975, |
| "step": 2169 |
| }, |
| { |
| "epoch": 0.801132168348511, |
| "grad_norm": 0.8470262289047241, |
| "learning_rate": 4.799997489032687e-06, |
| "loss": 0.7277, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.8015013536795471, |
| "grad_norm": 0.8653181791305542, |
| "learning_rate": 4.799806825113571e-06, |
| "loss": 0.7211, |
| "step": 2171 |
| }, |
| { |
| "epoch": 0.8018705390105834, |
| "grad_norm": 0.8891727328300476, |
| "learning_rate": 4.799616074147493e-06, |
| "loss": 0.7161, |
| "step": 2172 |
| }, |
| { |
| "epoch": 0.8022397243416195, |
| "grad_norm": 0.8755497336387634, |
| "learning_rate": 4.799425236141672e-06, |
| "loss": 0.694, |
| "step": 2173 |
| }, |
| { |
| "epoch": 0.8026089096726556, |
| "grad_norm": 0.8740184903144836, |
| "learning_rate": 4.7992343111033314e-06, |
| "loss": 0.6935, |
| "step": 2174 |
| }, |
| { |
| "epoch": 0.8029780950036919, |
| "grad_norm": 0.885025143623352, |
| "learning_rate": 4.7990432990396985e-06, |
| "loss": 0.6768, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.803347280334728, |
| "grad_norm": 0.9006462097167969, |
| "learning_rate": 4.798852199958002e-06, |
| "loss": 0.7212, |
| "step": 2176 |
| }, |
| { |
| "epoch": 0.8037164656657643, |
| "grad_norm": 0.8840904831886292, |
| "learning_rate": 4.798661013865475e-06, |
| "loss": 0.7266, |
| "step": 2177 |
| }, |
| { |
| "epoch": 0.8040856509968004, |
| "grad_norm": 0.8590693473815918, |
| "learning_rate": 4.798469740769354e-06, |
| "loss": 0.6822, |
| "step": 2178 |
| }, |
| { |
| "epoch": 0.8044548363278365, |
| "grad_norm": 0.8712232112884521, |
| "learning_rate": 4.798278380676879e-06, |
| "loss": 0.724, |
| "step": 2179 |
| }, |
| { |
| "epoch": 0.8048240216588728, |
| "grad_norm": 0.9065150618553162, |
| "learning_rate": 4.798086933595293e-06, |
| "loss": 0.6891, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.8051932069899089, |
| "grad_norm": 0.8752925395965576, |
| "learning_rate": 4.797895399531841e-06, |
| "loss": 0.71, |
| "step": 2181 |
| }, |
| { |
| "epoch": 0.8055623923209451, |
| "grad_norm": 0.8934815526008606, |
| "learning_rate": 4.797703778493774e-06, |
| "loss": 0.7422, |
| "step": 2182 |
| }, |
| { |
| "epoch": 0.8059315776519813, |
| "grad_norm": 0.8814444541931152, |
| "learning_rate": 4.7975120704883435e-06, |
| "loss": 0.7572, |
| "step": 2183 |
| }, |
| { |
| "epoch": 0.8063007629830174, |
| "grad_norm": 0.9211199879646301, |
| "learning_rate": 4.797320275522806e-06, |
| "loss": 0.7112, |
| "step": 2184 |
| }, |
| { |
| "epoch": 0.8066699483140537, |
| "grad_norm": 0.8932773470878601, |
| "learning_rate": 4.7971283936044226e-06, |
| "loss": 0.7328, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.8070391336450898, |
| "grad_norm": 0.8870787620544434, |
| "learning_rate": 4.796936424740454e-06, |
| "loss": 0.7048, |
| "step": 2186 |
| }, |
| { |
| "epoch": 0.807408318976126, |
| "grad_norm": 0.8720287680625916, |
| "learning_rate": 4.796744368938166e-06, |
| "loss": 0.6703, |
| "step": 2187 |
| }, |
| { |
| "epoch": 0.8077775043071622, |
| "grad_norm": 0.8679975271224976, |
| "learning_rate": 4.79655222620483e-06, |
| "loss": 0.7078, |
| "step": 2188 |
| }, |
| { |
| "epoch": 0.8081466896381984, |
| "grad_norm": 0.8599095940589905, |
| "learning_rate": 4.796359996547715e-06, |
| "loss": 0.6839, |
| "step": 2189 |
| }, |
| { |
| "epoch": 0.8085158749692346, |
| "grad_norm": 0.8417700529098511, |
| "learning_rate": 4.7961676799741e-06, |
| "loss": 0.6757, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.8088850603002707, |
| "grad_norm": 0.8529911637306213, |
| "learning_rate": 4.795975276491262e-06, |
| "loss": 0.7033, |
| "step": 2191 |
| }, |
| { |
| "epoch": 0.8092542456313069, |
| "grad_norm": 0.8772951364517212, |
| "learning_rate": 4.7957827861064855e-06, |
| "loss": 0.6562, |
| "step": 2192 |
| }, |
| { |
| "epoch": 0.8096234309623431, |
| "grad_norm": 0.9334204196929932, |
| "learning_rate": 4.795590208827054e-06, |
| "loss": 0.7505, |
| "step": 2193 |
| }, |
| { |
| "epoch": 0.8099926162933793, |
| "grad_norm": 0.8638662695884705, |
| "learning_rate": 4.795397544660258e-06, |
| "loss": 0.6816, |
| "step": 2194 |
| }, |
| { |
| "epoch": 0.8103618016244155, |
| "grad_norm": 0.9105616211891174, |
| "learning_rate": 4.795204793613391e-06, |
| "loss": 0.714, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.8107309869554516, |
| "grad_norm": 0.8942384719848633, |
| "learning_rate": 4.7950119556937455e-06, |
| "loss": 0.7327, |
| "step": 2196 |
| }, |
| { |
| "epoch": 0.8111001722864878, |
| "grad_norm": 0.8448975086212158, |
| "learning_rate": 4.794819030908622e-06, |
| "loss": 0.6941, |
| "step": 2197 |
| }, |
| { |
| "epoch": 0.811469357617524, |
| "grad_norm": 0.912215530872345, |
| "learning_rate": 4.7946260192653215e-06, |
| "loss": 0.7216, |
| "step": 2198 |
| }, |
| { |
| "epoch": 0.8118385429485602, |
| "grad_norm": 0.8788778185844421, |
| "learning_rate": 4.794432920771152e-06, |
| "loss": 0.7399, |
| "step": 2199 |
| }, |
| { |
| "epoch": 0.8122077282795963, |
| "grad_norm": 0.8956362009048462, |
| "learning_rate": 4.794239735433419e-06, |
| "loss": 0.7282, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.8125769136106326, |
| "grad_norm": 0.9069665670394897, |
| "learning_rate": 4.7940464632594376e-06, |
| "loss": 0.7298, |
| "step": 2201 |
| }, |
| { |
| "epoch": 0.8129460989416687, |
| "grad_norm": 0.8730249404907227, |
| "learning_rate": 4.793853104256521e-06, |
| "loss": 0.7101, |
| "step": 2202 |
| }, |
| { |
| "epoch": 0.8133152842727049, |
| "grad_norm": 1.0605006217956543, |
| "learning_rate": 4.793659658431988e-06, |
| "loss": 0.7115, |
| "step": 2203 |
| }, |
| { |
| "epoch": 0.8136844696037411, |
| "grad_norm": 0.9141219258308411, |
| "learning_rate": 4.793466125793161e-06, |
| "loss": 0.7554, |
| "step": 2204 |
| }, |
| { |
| "epoch": 0.8140536549347772, |
| "grad_norm": 0.8501049876213074, |
| "learning_rate": 4.793272506347365e-06, |
| "loss": 0.6839, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.8144228402658135, |
| "grad_norm": 0.8409802317619324, |
| "learning_rate": 4.793078800101929e-06, |
| "loss": 0.6518, |
| "step": 2206 |
| }, |
| { |
| "epoch": 0.8147920255968496, |
| "grad_norm": 0.8758774995803833, |
| "learning_rate": 4.792885007064183e-06, |
| "loss": 0.7349, |
| "step": 2207 |
| }, |
| { |
| "epoch": 0.8151612109278858, |
| "grad_norm": 0.8612200021743774, |
| "learning_rate": 4.792691127241463e-06, |
| "loss": 0.6952, |
| "step": 2208 |
| }, |
| { |
| "epoch": 0.815530396258922, |
| "grad_norm": 0.8541130423545837, |
| "learning_rate": 4.792497160641108e-06, |
| "loss": 0.6723, |
| "step": 2209 |
| }, |
| { |
| "epoch": 0.8158995815899581, |
| "grad_norm": 0.884846568107605, |
| "learning_rate": 4.7923031072704595e-06, |
| "loss": 0.7165, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.8162687669209944, |
| "grad_norm": 0.8823457956314087, |
| "learning_rate": 4.792108967136861e-06, |
| "loss": 0.7599, |
| "step": 2211 |
| }, |
| { |
| "epoch": 0.8166379522520305, |
| "grad_norm": 0.8571475744247437, |
| "learning_rate": 4.791914740247663e-06, |
| "loss": 0.7122, |
| "step": 2212 |
| }, |
| { |
| "epoch": 0.8170071375830666, |
| "grad_norm": 0.8436484336853027, |
| "learning_rate": 4.791720426610214e-06, |
| "loss": 0.6781, |
| "step": 2213 |
| }, |
| { |
| "epoch": 0.8173763229141029, |
| "grad_norm": 0.8832083344459534, |
| "learning_rate": 4.791526026231871e-06, |
| "loss": 0.7202, |
| "step": 2214 |
| }, |
| { |
| "epoch": 0.817745508245139, |
| "grad_norm": 0.9136930108070374, |
| "learning_rate": 4.79133153911999e-06, |
| "loss": 0.756, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.8181146935761753, |
| "grad_norm": 0.87180095911026, |
| "learning_rate": 4.791136965281934e-06, |
| "loss": 0.6874, |
| "step": 2216 |
| }, |
| { |
| "epoch": 0.8184838789072114, |
| "grad_norm": 0.8829808831214905, |
| "learning_rate": 4.790942304725067e-06, |
| "loss": 0.6974, |
| "step": 2217 |
| }, |
| { |
| "epoch": 0.8188530642382476, |
| "grad_norm": 0.8646160960197449, |
| "learning_rate": 4.790747557456757e-06, |
| "loss": 0.704, |
| "step": 2218 |
| }, |
| { |
| "epoch": 0.8192222495692838, |
| "grad_norm": 0.8445609211921692, |
| "learning_rate": 4.790552723484375e-06, |
| "loss": 0.7028, |
| "step": 2219 |
| }, |
| { |
| "epoch": 0.8195914349003199, |
| "grad_norm": 0.8569789528846741, |
| "learning_rate": 4.7903578028152946e-06, |
| "loss": 0.7304, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.8199606202313562, |
| "grad_norm": 0.8919884562492371, |
| "learning_rate": 4.790162795456895e-06, |
| "loss": 0.7303, |
| "step": 2221 |
| }, |
| { |
| "epoch": 0.8203298055623923, |
| "grad_norm": 0.8782804012298584, |
| "learning_rate": 4.789967701416556e-06, |
| "loss": 0.6676, |
| "step": 2222 |
| }, |
| { |
| "epoch": 0.8206989908934285, |
| "grad_norm": 0.8411016464233398, |
| "learning_rate": 4.789772520701662e-06, |
| "loss": 0.6856, |
| "step": 2223 |
| }, |
| { |
| "epoch": 0.8210681762244647, |
| "grad_norm": 0.8851430416107178, |
| "learning_rate": 4.7895772533196e-06, |
| "loss": 0.6995, |
| "step": 2224 |
| }, |
| { |
| "epoch": 0.8214373615555008, |
| "grad_norm": 0.8756270408630371, |
| "learning_rate": 4.789381899277763e-06, |
| "loss": 0.6872, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.8218065468865371, |
| "grad_norm": 0.8848443031311035, |
| "learning_rate": 4.7891864585835435e-06, |
| "loss": 0.7128, |
| "step": 2226 |
| }, |
| { |
| "epoch": 0.8221757322175732, |
| "grad_norm": 0.8853998780250549, |
| "learning_rate": 4.788990931244338e-06, |
| "loss": 0.7123, |
| "step": 2227 |
| }, |
| { |
| "epoch": 0.8225449175486094, |
| "grad_norm": 0.8603769540786743, |
| "learning_rate": 4.788795317267549e-06, |
| "loss": 0.7094, |
| "step": 2228 |
| }, |
| { |
| "epoch": 0.8229141028796456, |
| "grad_norm": 0.8783960342407227, |
| "learning_rate": 4.7885996166605795e-06, |
| "loss": 0.6871, |
| "step": 2229 |
| }, |
| { |
| "epoch": 0.8232832882106818, |
| "grad_norm": 0.9068373441696167, |
| "learning_rate": 4.788403829430837e-06, |
| "loss": 0.7508, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.8236524735417179, |
| "grad_norm": 0.9134407043457031, |
| "learning_rate": 4.788207955585732e-06, |
| "loss": 0.685, |
| "step": 2231 |
| }, |
| { |
| "epoch": 0.8240216588727541, |
| "grad_norm": 0.8849846720695496, |
| "learning_rate": 4.788011995132678e-06, |
| "loss": 0.6984, |
| "step": 2232 |
| }, |
| { |
| "epoch": 0.8243908442037903, |
| "grad_norm": 0.8880661725997925, |
| "learning_rate": 4.787815948079092e-06, |
| "loss": 0.6916, |
| "step": 2233 |
| }, |
| { |
| "epoch": 0.8247600295348265, |
| "grad_norm": 0.8438676595687866, |
| "learning_rate": 4.787619814432394e-06, |
| "loss": 0.6781, |
| "step": 2234 |
| }, |
| { |
| "epoch": 0.8251292148658627, |
| "grad_norm": 0.8555399775505066, |
| "learning_rate": 4.787423594200007e-06, |
| "loss": 0.7133, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.8254984001968988, |
| "grad_norm": 0.8856319189071655, |
| "learning_rate": 4.787227287389361e-06, |
| "loss": 0.7102, |
| "step": 2236 |
| }, |
| { |
| "epoch": 0.825867585527935, |
| "grad_norm": 0.8559306859970093, |
| "learning_rate": 4.787030894007882e-06, |
| "loss": 0.6828, |
| "step": 2237 |
| }, |
| { |
| "epoch": 0.8262367708589712, |
| "grad_norm": 0.8558578491210938, |
| "learning_rate": 4.7868344140630076e-06, |
| "loss": 0.692, |
| "step": 2238 |
| }, |
| { |
| "epoch": 0.8266059561900074, |
| "grad_norm": 0.8673276901245117, |
| "learning_rate": 4.786637847562171e-06, |
| "loss": 0.7351, |
| "step": 2239 |
| }, |
| { |
| "epoch": 0.8269751415210436, |
| "grad_norm": 0.88542640209198, |
| "learning_rate": 4.786441194512814e-06, |
| "loss": 0.7116, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.8273443268520797, |
| "grad_norm": 0.8700323700904846, |
| "learning_rate": 4.786244454922379e-06, |
| "loss": 0.707, |
| "step": 2241 |
| }, |
| { |
| "epoch": 0.827713512183116, |
| "grad_norm": 0.8809252977371216, |
| "learning_rate": 4.7860476287983124e-06, |
| "loss": 0.715, |
| "step": 2242 |
| }, |
| { |
| "epoch": 0.8280826975141521, |
| "grad_norm": 0.9027058482170105, |
| "learning_rate": 4.785850716148066e-06, |
| "loss": 0.7354, |
| "step": 2243 |
| }, |
| { |
| "epoch": 0.8284518828451883, |
| "grad_norm": 0.8742669224739075, |
| "learning_rate": 4.7856537169790905e-06, |
| "loss": 0.6748, |
| "step": 2244 |
| }, |
| { |
| "epoch": 0.8288210681762245, |
| "grad_norm": 0.8710356950759888, |
| "learning_rate": 4.7854566312988435e-06, |
| "loss": 0.7133, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.8291902535072606, |
| "grad_norm": 0.9114298224449158, |
| "learning_rate": 4.785259459114784e-06, |
| "loss": 0.7176, |
| "step": 2246 |
| }, |
| { |
| "epoch": 0.8295594388382969, |
| "grad_norm": 0.8762310147285461, |
| "learning_rate": 4.785062200434376e-06, |
| "loss": 0.7265, |
| "step": 2247 |
| }, |
| { |
| "epoch": 0.829928624169333, |
| "grad_norm": 0.8757422566413879, |
| "learning_rate": 4.784864855265083e-06, |
| "loss": 0.6755, |
| "step": 2248 |
| }, |
| { |
| "epoch": 0.8302978095003691, |
| "grad_norm": 0.8273042440414429, |
| "learning_rate": 4.784667423614379e-06, |
| "loss": 0.6903, |
| "step": 2249 |
| }, |
| { |
| "epoch": 0.8306669948314054, |
| "grad_norm": 0.911002516746521, |
| "learning_rate": 4.784469905489732e-06, |
| "loss": 0.7196, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.8310361801624415, |
| "grad_norm": 0.8729998469352722, |
| "learning_rate": 4.784272300898621e-06, |
| "loss": 0.727, |
| "step": 2251 |
| }, |
| { |
| "epoch": 0.8314053654934778, |
| "grad_norm": 0.897255003452301, |
| "learning_rate": 4.784074609848524e-06, |
| "loss": 0.7145, |
| "step": 2252 |
| }, |
| { |
| "epoch": 0.8317745508245139, |
| "grad_norm": 0.8995161056518555, |
| "learning_rate": 4.783876832346924e-06, |
| "loss": 0.6983, |
| "step": 2253 |
| }, |
| { |
| "epoch": 0.83214373615555, |
| "grad_norm": 0.8717133402824402, |
| "learning_rate": 4.783678968401306e-06, |
| "loss": 0.7164, |
| "step": 2254 |
| }, |
| { |
| "epoch": 0.8325129214865863, |
| "grad_norm": 0.8860628604888916, |
| "learning_rate": 4.783481018019161e-06, |
| "loss": 0.6891, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.8328821068176224, |
| "grad_norm": 0.8644296526908875, |
| "learning_rate": 4.783282981207979e-06, |
| "loss": 0.7449, |
| "step": 2256 |
| }, |
| { |
| "epoch": 0.8332512921486587, |
| "grad_norm": 0.8703963160514832, |
| "learning_rate": 4.783084857975258e-06, |
| "loss": 0.7067, |
| "step": 2257 |
| }, |
| { |
| "epoch": 0.8336204774796948, |
| "grad_norm": 0.8384194374084473, |
| "learning_rate": 4.782886648328495e-06, |
| "loss": 0.6811, |
| "step": 2258 |
| }, |
| { |
| "epoch": 0.833989662810731, |
| "grad_norm": 0.8807311654090881, |
| "learning_rate": 4.7826883522751934e-06, |
| "loss": 0.7016, |
| "step": 2259 |
| }, |
| { |
| "epoch": 0.8343588481417672, |
| "grad_norm": 0.8632846474647522, |
| "learning_rate": 4.782489969822857e-06, |
| "loss": 0.6956, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.8347280334728033, |
| "grad_norm": 0.852558434009552, |
| "learning_rate": 4.7822915009789965e-06, |
| "loss": 0.7076, |
| "step": 2261 |
| }, |
| { |
| "epoch": 0.8350972188038396, |
| "grad_norm": 0.8728243708610535, |
| "learning_rate": 4.782092945751122e-06, |
| "loss": 0.6962, |
| "step": 2262 |
| }, |
| { |
| "epoch": 0.8354664041348757, |
| "grad_norm": 0.8713400363922119, |
| "learning_rate": 4.781894304146751e-06, |
| "loss": 0.7163, |
| "step": 2263 |
| }, |
| { |
| "epoch": 0.8358355894659119, |
| "grad_norm": 0.893301784992218, |
| "learning_rate": 4.7816955761734e-06, |
| "loss": 0.7268, |
| "step": 2264 |
| }, |
| { |
| "epoch": 0.8362047747969481, |
| "grad_norm": 0.854168713092804, |
| "learning_rate": 4.781496761838592e-06, |
| "loss": 0.6719, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.8365739601279842, |
| "grad_norm": 0.8409112095832825, |
| "learning_rate": 4.781297861149852e-06, |
| "loss": 0.7148, |
| "step": 2266 |
| }, |
| { |
| "epoch": 0.8369431454590204, |
| "grad_norm": 0.8801021575927734, |
| "learning_rate": 4.781098874114707e-06, |
| "loss": 0.7365, |
| "step": 2267 |
| }, |
| { |
| "epoch": 0.8373123307900566, |
| "grad_norm": 0.849139392375946, |
| "learning_rate": 4.780899800740689e-06, |
| "loss": 0.7219, |
| "step": 2268 |
| }, |
| { |
| "epoch": 0.8376815161210928, |
| "grad_norm": 0.8867851495742798, |
| "learning_rate": 4.780700641035335e-06, |
| "loss": 0.7369, |
| "step": 2269 |
| }, |
| { |
| "epoch": 0.838050701452129, |
| "grad_norm": 0.877516508102417, |
| "learning_rate": 4.780501395006181e-06, |
| "loss": 0.7083, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.8384198867831651, |
| "grad_norm": 0.8607859015464783, |
| "learning_rate": 4.7803020626607686e-06, |
| "loss": 0.6921, |
| "step": 2271 |
| }, |
| { |
| "epoch": 0.8387890721142013, |
| "grad_norm": 0.8483718633651733, |
| "learning_rate": 4.7801026440066425e-06, |
| "loss": 0.6989, |
| "step": 2272 |
| }, |
| { |
| "epoch": 0.8391582574452375, |
| "grad_norm": 0.8639686107635498, |
| "learning_rate": 4.779903139051352e-06, |
| "loss": 0.6932, |
| "step": 2273 |
| }, |
| { |
| "epoch": 0.8395274427762737, |
| "grad_norm": 0.8545430302619934, |
| "learning_rate": 4.779703547802446e-06, |
| "loss": 0.7133, |
| "step": 2274 |
| }, |
| { |
| "epoch": 0.8398966281073099, |
| "grad_norm": 0.8846385478973389, |
| "learning_rate": 4.7795038702674816e-06, |
| "loss": 0.6923, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.840265813438346, |
| "grad_norm": 0.8536352515220642, |
| "learning_rate": 4.7793041064540135e-06, |
| "loss": 0.6885, |
| "step": 2276 |
| }, |
| { |
| "epoch": 0.8406349987693822, |
| "grad_norm": 0.8723476529121399, |
| "learning_rate": 4.779104256369605e-06, |
| "loss": 0.7001, |
| "step": 2277 |
| }, |
| { |
| "epoch": 0.8410041841004184, |
| "grad_norm": 0.8628838062286377, |
| "learning_rate": 4.77890432002182e-06, |
| "loss": 0.6879, |
| "step": 2278 |
| }, |
| { |
| "epoch": 0.8413733694314546, |
| "grad_norm": 0.8553763031959534, |
| "learning_rate": 4.778704297418226e-06, |
| "loss": 0.6245, |
| "step": 2279 |
| }, |
| { |
| "epoch": 0.8417425547624908, |
| "grad_norm": 0.9173882603645325, |
| "learning_rate": 4.778504188566393e-06, |
| "loss": 0.7236, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.842111740093527, |
| "grad_norm": 0.8579378128051758, |
| "learning_rate": 4.7783039934738955e-06, |
| "loss": 0.7327, |
| "step": 2281 |
| }, |
| { |
| "epoch": 0.8424809254245631, |
| "grad_norm": 0.8806033134460449, |
| "learning_rate": 4.778103712148311e-06, |
| "loss": 0.7236, |
| "step": 2282 |
| }, |
| { |
| "epoch": 0.8428501107555993, |
| "grad_norm": 0.8968467116355896, |
| "learning_rate": 4.77790334459722e-06, |
| "loss": 0.744, |
| "step": 2283 |
| }, |
| { |
| "epoch": 0.8432192960866355, |
| "grad_norm": 0.8785833120346069, |
| "learning_rate": 4.777702890828206e-06, |
| "loss": 0.6963, |
| "step": 2284 |
| }, |
| { |
| "epoch": 0.8435884814176716, |
| "grad_norm": 0.8928925395011902, |
| "learning_rate": 4.777502350848857e-06, |
| "loss": 0.7089, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.8439576667487079, |
| "grad_norm": 0.8602617383003235, |
| "learning_rate": 4.777301724666763e-06, |
| "loss": 0.7025, |
| "step": 2286 |
| }, |
| { |
| "epoch": 0.844326852079744, |
| "grad_norm": 0.9113879799842834, |
| "learning_rate": 4.777101012289517e-06, |
| "loss": 0.6959, |
| "step": 2287 |
| }, |
| { |
| "epoch": 0.8446960374107803, |
| "grad_norm": 0.8733735680580139, |
| "learning_rate": 4.776900213724717e-06, |
| "loss": 0.7307, |
| "step": 2288 |
| }, |
| { |
| "epoch": 0.8450652227418164, |
| "grad_norm": 0.8886083364486694, |
| "learning_rate": 4.776699328979961e-06, |
| "loss": 0.7134, |
| "step": 2289 |
| }, |
| { |
| "epoch": 0.8454344080728525, |
| "grad_norm": 0.9018360376358032, |
| "learning_rate": 4.776498358062855e-06, |
| "loss": 0.7246, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.8458035934038888, |
| "grad_norm": 0.9006841778755188, |
| "learning_rate": 4.776297300981005e-06, |
| "loss": 0.7188, |
| "step": 2291 |
| }, |
| { |
| "epoch": 0.8461727787349249, |
| "grad_norm": 0.8728944063186646, |
| "learning_rate": 4.77609615774202e-06, |
| "loss": 0.685, |
| "step": 2292 |
| }, |
| { |
| "epoch": 0.8465419640659612, |
| "grad_norm": 0.8995937705039978, |
| "learning_rate": 4.775894928353514e-06, |
| "loss": 0.6868, |
| "step": 2293 |
| }, |
| { |
| "epoch": 0.8469111493969973, |
| "grad_norm": 0.8754889369010925, |
| "learning_rate": 4.7756936128231026e-06, |
| "loss": 0.7013, |
| "step": 2294 |
| }, |
| { |
| "epoch": 0.8472803347280334, |
| "grad_norm": 0.8546683192253113, |
| "learning_rate": 4.775492211158407e-06, |
| "loss": 0.6874, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.8476495200590697, |
| "grad_norm": 0.8704326152801514, |
| "learning_rate": 4.775290723367048e-06, |
| "loss": 0.7295, |
| "step": 2296 |
| }, |
| { |
| "epoch": 0.8480187053901058, |
| "grad_norm": 0.9398274421691895, |
| "learning_rate": 4.7750891494566555e-06, |
| "loss": 0.7252, |
| "step": 2297 |
| }, |
| { |
| "epoch": 0.8483878907211421, |
| "grad_norm": 0.8432691097259521, |
| "learning_rate": 4.774887489434855e-06, |
| "loss": 0.6964, |
| "step": 2298 |
| }, |
| { |
| "epoch": 0.8487570760521782, |
| "grad_norm": 0.8544699549674988, |
| "learning_rate": 4.774685743309282e-06, |
| "loss": 0.6654, |
| "step": 2299 |
| }, |
| { |
| "epoch": 0.8491262613832143, |
| "grad_norm": 0.8489146828651428, |
| "learning_rate": 4.774483911087571e-06, |
| "loss": 0.7227, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.8494954467142506, |
| "grad_norm": 0.882328987121582, |
| "learning_rate": 4.774281992777361e-06, |
| "loss": 0.7194, |
| "step": 2301 |
| }, |
| { |
| "epoch": 0.8498646320452867, |
| "grad_norm": 0.8312026858329773, |
| "learning_rate": 4.7740799883862966e-06, |
| "loss": 0.6877, |
| "step": 2302 |
| }, |
| { |
| "epoch": 0.8502338173763229, |
| "grad_norm": 0.8677752017974854, |
| "learning_rate": 4.7738778979220215e-06, |
| "loss": 0.7522, |
| "step": 2303 |
| }, |
| { |
| "epoch": 0.8506030027073591, |
| "grad_norm": 0.8952152729034424, |
| "learning_rate": 4.773675721392186e-06, |
| "loss": 0.7023, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.8509721880383952, |
| "grad_norm": 0.8487045764923096, |
| "learning_rate": 4.773473458804442e-06, |
| "loss": 0.6788, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.8513413733694315, |
| "grad_norm": 0.8884228467941284, |
| "learning_rate": 4.7732711101664455e-06, |
| "loss": 0.6975, |
| "step": 2306 |
| }, |
| { |
| "epoch": 0.8517105587004676, |
| "grad_norm": 0.8592703342437744, |
| "learning_rate": 4.773068675485854e-06, |
| "loss": 0.6663, |
| "step": 2307 |
| }, |
| { |
| "epoch": 0.8520797440315038, |
| "grad_norm": 0.8684633374214172, |
| "learning_rate": 4.772866154770331e-06, |
| "loss": 0.7363, |
| "step": 2308 |
| }, |
| { |
| "epoch": 0.85244892936254, |
| "grad_norm": 0.8979021906852722, |
| "learning_rate": 4.772663548027542e-06, |
| "loss": 0.7207, |
| "step": 2309 |
| }, |
| { |
| "epoch": 0.8528181146935762, |
| "grad_norm": 0.8448242545127869, |
| "learning_rate": 4.772460855265154e-06, |
| "loss": 0.6676, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.8531873000246124, |
| "grad_norm": 0.8736171722412109, |
| "learning_rate": 4.77225807649084e-06, |
| "loss": 0.6814, |
| "step": 2311 |
| }, |
| { |
| "epoch": 0.8535564853556485, |
| "grad_norm": 0.908427357673645, |
| "learning_rate": 4.772055211712276e-06, |
| "loss": 0.7004, |
| "step": 2312 |
| }, |
| { |
| "epoch": 0.8539256706866847, |
| "grad_norm": 0.9255645275115967, |
| "learning_rate": 4.771852260937138e-06, |
| "loss": 0.7046, |
| "step": 2313 |
| }, |
| { |
| "epoch": 0.8542948560177209, |
| "grad_norm": 0.8731818795204163, |
| "learning_rate": 4.771649224173109e-06, |
| "loss": 0.6838, |
| "step": 2314 |
| }, |
| { |
| "epoch": 0.8546640413487571, |
| "grad_norm": 0.9106831550598145, |
| "learning_rate": 4.7714461014278745e-06, |
| "loss": 0.6892, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.8550332266797932, |
| "grad_norm": 0.9361001253128052, |
| "learning_rate": 4.771242892709121e-06, |
| "loss": 0.7194, |
| "step": 2316 |
| }, |
| { |
| "epoch": 0.8554024120108294, |
| "grad_norm": 0.9008825421333313, |
| "learning_rate": 4.771039598024542e-06, |
| "loss": 0.7086, |
| "step": 2317 |
| }, |
| { |
| "epoch": 0.8557715973418656, |
| "grad_norm": 0.8849498629570007, |
| "learning_rate": 4.77083621738183e-06, |
| "loss": 0.7, |
| "step": 2318 |
| }, |
| { |
| "epoch": 0.8561407826729018, |
| "grad_norm": 0.9198765158653259, |
| "learning_rate": 4.770632750788685e-06, |
| "loss": 0.7492, |
| "step": 2319 |
| }, |
| { |
| "epoch": 0.856509968003938, |
| "grad_norm": 0.8527234196662903, |
| "learning_rate": 4.770429198252806e-06, |
| "loss": 0.7013, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.8568791533349741, |
| "grad_norm": 0.8671658635139465, |
| "learning_rate": 4.770225559781899e-06, |
| "loss": 0.6557, |
| "step": 2321 |
| }, |
| { |
| "epoch": 0.8572483386660104, |
| "grad_norm": 0.9001504778862, |
| "learning_rate": 4.77002183538367e-06, |
| "loss": 0.7024, |
| "step": 2322 |
| }, |
| { |
| "epoch": 0.8576175239970465, |
| "grad_norm": 0.8563042879104614, |
| "learning_rate": 4.769818025065832e-06, |
| "loss": 0.6986, |
| "step": 2323 |
| }, |
| { |
| "epoch": 0.8579867093280827, |
| "grad_norm": 0.8947487473487854, |
| "learning_rate": 4.769614128836098e-06, |
| "loss": 0.696, |
| "step": 2324 |
| }, |
| { |
| "epoch": 0.8583558946591189, |
| "grad_norm": 0.8475277423858643, |
| "learning_rate": 4.769410146702186e-06, |
| "loss": 0.6987, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.858725079990155, |
| "grad_norm": 0.8660178184509277, |
| "learning_rate": 4.769206078671815e-06, |
| "loss": 0.7162, |
| "step": 2326 |
| }, |
| { |
| "epoch": 0.8590942653211913, |
| "grad_norm": 0.9072052836418152, |
| "learning_rate": 4.769001924752711e-06, |
| "loss": 0.7051, |
| "step": 2327 |
| }, |
| { |
| "epoch": 0.8594634506522274, |
| "grad_norm": 0.893517255783081, |
| "learning_rate": 4.7687976849526e-06, |
| "loss": 0.6847, |
| "step": 2328 |
| }, |
| { |
| "epoch": 0.8598326359832636, |
| "grad_norm": 0.8827802538871765, |
| "learning_rate": 4.768593359279212e-06, |
| "loss": 0.732, |
| "step": 2329 |
| }, |
| { |
| "epoch": 0.8602018213142998, |
| "grad_norm": 0.8519994616508484, |
| "learning_rate": 4.768388947740282e-06, |
| "loss": 0.7077, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.8605710066453359, |
| "grad_norm": 0.8811690211296082, |
| "learning_rate": 4.768184450343546e-06, |
| "loss": 0.724, |
| "step": 2331 |
| }, |
| { |
| "epoch": 0.8609401919763722, |
| "grad_norm": 0.8675876259803772, |
| "learning_rate": 4.7679798670967446e-06, |
| "loss": 0.723, |
| "step": 2332 |
| }, |
| { |
| "epoch": 0.8613093773074083, |
| "grad_norm": 0.8784676194190979, |
| "learning_rate": 4.767775198007621e-06, |
| "loss": 0.7162, |
| "step": 2333 |
| }, |
| { |
| "epoch": 0.8616785626384444, |
| "grad_norm": 0.904371440410614, |
| "learning_rate": 4.767570443083922e-06, |
| "loss": 0.746, |
| "step": 2334 |
| }, |
| { |
| "epoch": 0.8620477479694807, |
| "grad_norm": 0.9030969738960266, |
| "learning_rate": 4.767365602333397e-06, |
| "loss": 0.7272, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.8624169333005168, |
| "grad_norm": 0.9071660041809082, |
| "learning_rate": 4.7671606757638e-06, |
| "loss": 0.7358, |
| "step": 2336 |
| }, |
| { |
| "epoch": 0.8627861186315531, |
| "grad_norm": 0.8928775191307068, |
| "learning_rate": 4.766955663382887e-06, |
| "loss": 0.7025, |
| "step": 2337 |
| }, |
| { |
| "epoch": 0.8631553039625892, |
| "grad_norm": 0.8183935880661011, |
| "learning_rate": 4.766750565198417e-06, |
| "loss": 0.6496, |
| "step": 2338 |
| }, |
| { |
| "epoch": 0.8635244892936254, |
| "grad_norm": 0.9024012088775635, |
| "learning_rate": 4.7665453812181535e-06, |
| "loss": 0.7378, |
| "step": 2339 |
| }, |
| { |
| "epoch": 0.8638936746246616, |
| "grad_norm": 0.8578570485115051, |
| "learning_rate": 4.766340111449863e-06, |
| "loss": 0.7127, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.8642628599556977, |
| "grad_norm": 0.8736898303031921, |
| "learning_rate": 4.766134755901315e-06, |
| "loss": 0.7021, |
| "step": 2341 |
| }, |
| { |
| "epoch": 0.864632045286734, |
| "grad_norm": 0.8850502371788025, |
| "learning_rate": 4.765929314580281e-06, |
| "loss": 0.7092, |
| "step": 2342 |
| }, |
| { |
| "epoch": 0.8650012306177701, |
| "grad_norm": 0.8683915734291077, |
| "learning_rate": 4.765723787494538e-06, |
| "loss": 0.6966, |
| "step": 2343 |
| }, |
| { |
| "epoch": 0.8653704159488063, |
| "grad_norm": 0.8678285479545593, |
| "learning_rate": 4.765518174651864e-06, |
| "loss": 0.6836, |
| "step": 2344 |
| }, |
| { |
| "epoch": 0.8657396012798425, |
| "grad_norm": 0.8723390698432922, |
| "learning_rate": 4.7653124760600435e-06, |
| "loss": 0.6934, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.8661087866108786, |
| "grad_norm": 0.8977248072624207, |
| "learning_rate": 4.7651066917268595e-06, |
| "loss": 0.6904, |
| "step": 2346 |
| }, |
| { |
| "epoch": 0.8664779719419149, |
| "grad_norm": 0.8875908255577087, |
| "learning_rate": 4.764900821660102e-06, |
| "loss": 0.7268, |
| "step": 2347 |
| }, |
| { |
| "epoch": 0.866847157272951, |
| "grad_norm": 0.8535262942314148, |
| "learning_rate": 4.764694865867564e-06, |
| "loss": 0.6876, |
| "step": 2348 |
| }, |
| { |
| "epoch": 0.8672163426039872, |
| "grad_norm": 0.8672966361045837, |
| "learning_rate": 4.76448882435704e-06, |
| "loss": 0.7274, |
| "step": 2349 |
| }, |
| { |
| "epoch": 0.8675855279350234, |
| "grad_norm": 0.8733333945274353, |
| "learning_rate": 4.764282697136328e-06, |
| "loss": 0.7304, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.8679547132660596, |
| "grad_norm": 0.8675313591957092, |
| "learning_rate": 4.764076484213232e-06, |
| "loss": 0.73, |
| "step": 2351 |
| }, |
| { |
| "epoch": 0.8683238985970957, |
| "grad_norm": 0.9445770382881165, |
| "learning_rate": 4.763870185595554e-06, |
| "loss": 0.7039, |
| "step": 2352 |
| }, |
| { |
| "epoch": 0.8686930839281319, |
| "grad_norm": 0.8547798991203308, |
| "learning_rate": 4.763663801291104e-06, |
| "loss": 0.6863, |
| "step": 2353 |
| }, |
| { |
| "epoch": 0.8690622692591681, |
| "grad_norm": 0.8876418471336365, |
| "learning_rate": 4.763457331307695e-06, |
| "loss": 0.7051, |
| "step": 2354 |
| }, |
| { |
| "epoch": 0.8694314545902043, |
| "grad_norm": 0.8787064552307129, |
| "learning_rate": 4.763250775653139e-06, |
| "loss": 0.667, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.8698006399212405, |
| "grad_norm": 0.8645234704017639, |
| "learning_rate": 4.763044134335256e-06, |
| "loss": 0.7211, |
| "step": 2356 |
| }, |
| { |
| "epoch": 0.8701698252522766, |
| "grad_norm": 0.8852728605270386, |
| "learning_rate": 4.762837407361866e-06, |
| "loss": 0.6929, |
| "step": 2357 |
| }, |
| { |
| "epoch": 0.8705390105833128, |
| "grad_norm": 0.8681321144104004, |
| "learning_rate": 4.7626305947407944e-06, |
| "loss": 0.6783, |
| "step": 2358 |
| }, |
| { |
| "epoch": 0.870908195914349, |
| "grad_norm": 0.8449599146842957, |
| "learning_rate": 4.7624236964798695e-06, |
| "loss": 0.6916, |
| "step": 2359 |
| }, |
| { |
| "epoch": 0.8712773812453852, |
| "grad_norm": 0.8922649621963501, |
| "learning_rate": 4.762216712586922e-06, |
| "loss": 0.7218, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.8716465665764214, |
| "grad_norm": 0.8748049736022949, |
| "learning_rate": 4.762009643069786e-06, |
| "loss": 0.705, |
| "step": 2361 |
| }, |
| { |
| "epoch": 0.8720157519074575, |
| "grad_norm": 0.90827476978302, |
| "learning_rate": 4.761802487936298e-06, |
| "loss": 0.7234, |
| "step": 2362 |
| }, |
| { |
| "epoch": 0.8723849372384938, |
| "grad_norm": 0.8977128863334656, |
| "learning_rate": 4.7615952471943006e-06, |
| "loss": 0.7423, |
| "step": 2363 |
| }, |
| { |
| "epoch": 0.8727541225695299, |
| "grad_norm": 0.8443285822868347, |
| "learning_rate": 4.761387920851636e-06, |
| "loss": 0.6924, |
| "step": 2364 |
| }, |
| { |
| "epoch": 0.8731233079005661, |
| "grad_norm": 0.8841611742973328, |
| "learning_rate": 4.761180508916152e-06, |
| "loss": 0.6902, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.8734924932316023, |
| "grad_norm": 0.8554913401603699, |
| "learning_rate": 4.760973011395701e-06, |
| "loss": 0.6946, |
| "step": 2366 |
| }, |
| { |
| "epoch": 0.8738616785626384, |
| "grad_norm": 0.8738844990730286, |
| "learning_rate": 4.760765428298134e-06, |
| "loss": 0.6728, |
| "step": 2367 |
| }, |
| { |
| "epoch": 0.8742308638936747, |
| "grad_norm": 0.8898583650588989, |
| "learning_rate": 4.760557759631309e-06, |
| "loss": 0.7027, |
| "step": 2368 |
| }, |
| { |
| "epoch": 0.8746000492247108, |
| "grad_norm": 0.8664471507072449, |
| "learning_rate": 4.760350005403086e-06, |
| "loss": 0.7114, |
| "step": 2369 |
| }, |
| { |
| "epoch": 0.8749692345557469, |
| "grad_norm": 0.902643084526062, |
| "learning_rate": 4.76014216562133e-06, |
| "loss": 0.7317, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.8753384198867832, |
| "grad_norm": 0.8991020917892456, |
| "learning_rate": 4.759934240293906e-06, |
| "loss": 0.7036, |
| "step": 2371 |
| }, |
| { |
| "epoch": 0.8757076052178193, |
| "grad_norm": 0.8848997950553894, |
| "learning_rate": 4.759726229428683e-06, |
| "loss": 0.7416, |
| "step": 2372 |
| }, |
| { |
| "epoch": 0.8760767905488556, |
| "grad_norm": 0.8591799736022949, |
| "learning_rate": 4.759518133033536e-06, |
| "loss": 0.6713, |
| "step": 2373 |
| }, |
| { |
| "epoch": 0.8764459758798917, |
| "grad_norm": 0.8584043979644775, |
| "learning_rate": 4.7593099511163405e-06, |
| "loss": 0.6793, |
| "step": 2374 |
| }, |
| { |
| "epoch": 0.8768151612109278, |
| "grad_norm": 0.8347330689430237, |
| "learning_rate": 4.759101683684977e-06, |
| "loss": 0.6585, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.8771843465419641, |
| "grad_norm": 0.8343052864074707, |
| "learning_rate": 4.7588933307473275e-06, |
| "loss": 0.6982, |
| "step": 2376 |
| }, |
| { |
| "epoch": 0.8775535318730002, |
| "grad_norm": 0.8740595579147339, |
| "learning_rate": 4.758684892311278e-06, |
| "loss": 0.6836, |
| "step": 2377 |
| }, |
| { |
| "epoch": 0.8779227172040365, |
| "grad_norm": 0.8705688118934631, |
| "learning_rate": 4.758476368384719e-06, |
| "loss": 0.6859, |
| "step": 2378 |
| }, |
| { |
| "epoch": 0.8782919025350726, |
| "grad_norm": 1.0071030855178833, |
| "learning_rate": 4.758267758975541e-06, |
| "loss": 0.7224, |
| "step": 2379 |
| }, |
| { |
| "epoch": 0.8786610878661087, |
| "grad_norm": 0.8606764078140259, |
| "learning_rate": 4.758059064091642e-06, |
| "loss": 0.6912, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.879030273197145, |
| "grad_norm": 0.9006412625312805, |
| "learning_rate": 4.75785028374092e-06, |
| "loss": 0.7044, |
| "step": 2381 |
| }, |
| { |
| "epoch": 0.8793994585281811, |
| "grad_norm": 0.8861314058303833, |
| "learning_rate": 4.757641417931278e-06, |
| "loss": 0.6825, |
| "step": 2382 |
| }, |
| { |
| "epoch": 0.8797686438592174, |
| "grad_norm": 0.8772760033607483, |
| "learning_rate": 4.75743246667062e-06, |
| "loss": 0.6987, |
| "step": 2383 |
| }, |
| { |
| "epoch": 0.8801378291902535, |
| "grad_norm": 0.8669841289520264, |
| "learning_rate": 4.757223429966855e-06, |
| "loss": 0.6761, |
| "step": 2384 |
| }, |
| { |
| "epoch": 0.8805070145212897, |
| "grad_norm": 0.8585817813873291, |
| "learning_rate": 4.757014307827897e-06, |
| "loss": 0.6793, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.8808761998523259, |
| "grad_norm": 0.8737130165100098, |
| "learning_rate": 4.756805100261658e-06, |
| "loss": 0.6956, |
| "step": 2386 |
| }, |
| { |
| "epoch": 0.881245385183362, |
| "grad_norm": 0.8613405823707581, |
| "learning_rate": 4.75659580727606e-06, |
| "loss": 0.6852, |
| "step": 2387 |
| }, |
| { |
| "epoch": 0.8816145705143982, |
| "grad_norm": 0.8970152735710144, |
| "learning_rate": 4.756386428879022e-06, |
| "loss": 0.7446, |
| "step": 2388 |
| }, |
| { |
| "epoch": 0.8819837558454344, |
| "grad_norm": 0.8712779879570007, |
| "learning_rate": 4.75617696507847e-06, |
| "loss": 0.7216, |
| "step": 2389 |
| }, |
| { |
| "epoch": 0.8823529411764706, |
| "grad_norm": 0.8699203133583069, |
| "learning_rate": 4.755967415882331e-06, |
| "loss": 0.7225, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.8827221265075068, |
| "grad_norm": 0.8482356071472168, |
| "learning_rate": 4.755757781298539e-06, |
| "loss": 0.7013, |
| "step": 2391 |
| }, |
| { |
| "epoch": 0.883091311838543, |
| "grad_norm": 0.8718639016151428, |
| "learning_rate": 4.7555480613350255e-06, |
| "loss": 0.7017, |
| "step": 2392 |
| }, |
| { |
| "epoch": 0.8834604971695791, |
| "grad_norm": 0.886998176574707, |
| "learning_rate": 4.7553382559997305e-06, |
| "loss": 0.7333, |
| "step": 2393 |
| }, |
| { |
| "epoch": 0.8838296825006153, |
| "grad_norm": 0.9135888814926147, |
| "learning_rate": 4.755128365300594e-06, |
| "loss": 0.7033, |
| "step": 2394 |
| }, |
| { |
| "epoch": 0.8841988678316515, |
| "grad_norm": 0.8773205280303955, |
| "learning_rate": 4.7549183892455605e-06, |
| "loss": 0.71, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.8845680531626877, |
| "grad_norm": 0.8733408451080322, |
| "learning_rate": 4.754708327842579e-06, |
| "loss": 0.6728, |
| "step": 2396 |
| }, |
| { |
| "epoch": 0.8849372384937239, |
| "grad_norm": 0.86775141954422, |
| "learning_rate": 4.7544981810995976e-06, |
| "loss": 0.6991, |
| "step": 2397 |
| }, |
| { |
| "epoch": 0.88530642382476, |
| "grad_norm": 0.8773441314697266, |
| "learning_rate": 4.7542879490245705e-06, |
| "loss": 0.7403, |
| "step": 2398 |
| }, |
| { |
| "epoch": 0.8856756091557962, |
| "grad_norm": 0.8608452677726746, |
| "learning_rate": 4.754077631625457e-06, |
| "loss": 0.6506, |
| "step": 2399 |
| }, |
| { |
| "epoch": 0.8860447944868324, |
| "grad_norm": 0.866370677947998, |
| "learning_rate": 4.753867228910217e-06, |
| "loss": 0.7008, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.8864139798178686, |
| "grad_norm": 0.8670253753662109, |
| "learning_rate": 4.753656740886814e-06, |
| "loss": 0.7363, |
| "step": 2401 |
| }, |
| { |
| "epoch": 0.8867831651489048, |
| "grad_norm": 0.8610836267471313, |
| "learning_rate": 4.753446167563214e-06, |
| "loss": 0.6961, |
| "step": 2402 |
| }, |
| { |
| "epoch": 0.8871523504799409, |
| "grad_norm": 0.8401498198509216, |
| "learning_rate": 4.753235508947388e-06, |
| "loss": 0.6867, |
| "step": 2403 |
| }, |
| { |
| "epoch": 0.8875215358109771, |
| "grad_norm": 0.8600946068763733, |
| "learning_rate": 4.753024765047309e-06, |
| "loss": 0.6911, |
| "step": 2404 |
| }, |
| { |
| "epoch": 0.8878907211420133, |
| "grad_norm": 0.8780009150505066, |
| "learning_rate": 4.752813935870954e-06, |
| "loss": 0.7171, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.8882599064730494, |
| "grad_norm": 0.8487856984138489, |
| "learning_rate": 4.752603021426302e-06, |
| "loss": 0.6779, |
| "step": 2406 |
| }, |
| { |
| "epoch": 0.8886290918040857, |
| "grad_norm": 0.8483532071113586, |
| "learning_rate": 4.752392021721337e-06, |
| "loss": 0.6661, |
| "step": 2407 |
| }, |
| { |
| "epoch": 0.8889982771351218, |
| "grad_norm": 0.858508825302124, |
| "learning_rate": 4.752180936764044e-06, |
| "loss": 0.715, |
| "step": 2408 |
| }, |
| { |
| "epoch": 0.889367462466158, |
| "grad_norm": 0.850054919719696, |
| "learning_rate": 4.751969766562414e-06, |
| "loss": 0.6773, |
| "step": 2409 |
| }, |
| { |
| "epoch": 0.8897366477971942, |
| "grad_norm": 0.8784133195877075, |
| "learning_rate": 4.751758511124439e-06, |
| "loss": 0.6924, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.8901058331282303, |
| "grad_norm": 0.9067288041114807, |
| "learning_rate": 4.751547170458115e-06, |
| "loss": 0.698, |
| "step": 2411 |
| }, |
| { |
| "epoch": 0.8904750184592666, |
| "grad_norm": 0.8615005016326904, |
| "learning_rate": 4.751335744571441e-06, |
| "loss": 0.6891, |
| "step": 2412 |
| }, |
| { |
| "epoch": 0.8908442037903027, |
| "grad_norm": 0.8885693550109863, |
| "learning_rate": 4.75112423347242e-06, |
| "loss": 0.7179, |
| "step": 2413 |
| }, |
| { |
| "epoch": 0.891213389121339, |
| "grad_norm": 0.8749710321426392, |
| "learning_rate": 4.750912637169057e-06, |
| "loss": 0.7343, |
| "step": 2414 |
| }, |
| { |
| "epoch": 0.8915825744523751, |
| "grad_norm": 0.8808736205101013, |
| "learning_rate": 4.750700955669362e-06, |
| "loss": 0.7388, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.8919517597834112, |
| "grad_norm": 0.8593876957893372, |
| "learning_rate": 4.750489188981345e-06, |
| "loss": 0.6887, |
| "step": 2416 |
| }, |
| { |
| "epoch": 0.8923209451144475, |
| "grad_norm": 0.8734657168388367, |
| "learning_rate": 4.7502773371130225e-06, |
| "loss": 0.6933, |
| "step": 2417 |
| }, |
| { |
| "epoch": 0.8926901304454836, |
| "grad_norm": 0.8681308031082153, |
| "learning_rate": 4.750065400072413e-06, |
| "loss": 0.6763, |
| "step": 2418 |
| }, |
| { |
| "epoch": 0.8930593157765198, |
| "grad_norm": 0.8781231641769409, |
| "learning_rate": 4.7498533778675386e-06, |
| "loss": 0.6863, |
| "step": 2419 |
| }, |
| { |
| "epoch": 0.893428501107556, |
| "grad_norm": 1.038848876953125, |
| "learning_rate": 4.749641270506424e-06, |
| "loss": 0.6943, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.8937976864385921, |
| "grad_norm": 0.8760762214660645, |
| "learning_rate": 4.749429077997098e-06, |
| "loss": 0.7224, |
| "step": 2421 |
| }, |
| { |
| "epoch": 0.8941668717696284, |
| "grad_norm": 0.8649592995643616, |
| "learning_rate": 4.7492168003475894e-06, |
| "loss": 0.7052, |
| "step": 2422 |
| }, |
| { |
| "epoch": 0.8945360571006645, |
| "grad_norm": 0.8721056580543518, |
| "learning_rate": 4.749004437565936e-06, |
| "loss": 0.7032, |
| "step": 2423 |
| }, |
| { |
| "epoch": 0.8949052424317007, |
| "grad_norm": 0.9054531455039978, |
| "learning_rate": 4.748791989660174e-06, |
| "loss": 0.7161, |
| "step": 2424 |
| }, |
| { |
| "epoch": 0.8952744277627369, |
| "grad_norm": 0.8350791335105896, |
| "learning_rate": 4.748579456638346e-06, |
| "loss": 0.6682, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.895643613093773, |
| "grad_norm": 0.8753942251205444, |
| "learning_rate": 4.748366838508494e-06, |
| "loss": 0.6904, |
| "step": 2426 |
| }, |
| { |
| "epoch": 0.8960127984248093, |
| "grad_norm": 0.9068854451179504, |
| "learning_rate": 4.748154135278667e-06, |
| "loss": 0.7247, |
| "step": 2427 |
| }, |
| { |
| "epoch": 0.8963819837558454, |
| "grad_norm": 0.9158604145050049, |
| "learning_rate": 4.747941346956916e-06, |
| "loss": 0.7043, |
| "step": 2428 |
| }, |
| { |
| "epoch": 0.8967511690868816, |
| "grad_norm": 0.8779573440551758, |
| "learning_rate": 4.747728473551294e-06, |
| "loss": 0.7254, |
| "step": 2429 |
| }, |
| { |
| "epoch": 0.8971203544179178, |
| "grad_norm": 0.8369114398956299, |
| "learning_rate": 4.7475155150698585e-06, |
| "loss": 0.687, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.897489539748954, |
| "grad_norm": 0.8734737038612366, |
| "learning_rate": 4.747302471520671e-06, |
| "loss": 0.7011, |
| "step": 2431 |
| }, |
| { |
| "epoch": 0.8978587250799902, |
| "grad_norm": 0.8805413246154785, |
| "learning_rate": 4.747089342911793e-06, |
| "loss": 0.6706, |
| "step": 2432 |
| }, |
| { |
| "epoch": 0.8982279104110263, |
| "grad_norm": 0.8578121662139893, |
| "learning_rate": 4.746876129251293e-06, |
| "loss": 0.666, |
| "step": 2433 |
| }, |
| { |
| "epoch": 0.8985970957420625, |
| "grad_norm": 0.8652933835983276, |
| "learning_rate": 4.746662830547242e-06, |
| "loss": 0.6837, |
| "step": 2434 |
| }, |
| { |
| "epoch": 0.8989662810730987, |
| "grad_norm": 0.8619236350059509, |
| "learning_rate": 4.74644944680771e-06, |
| "loss": 0.7021, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.8993354664041349, |
| "grad_norm": 0.9408307075500488, |
| "learning_rate": 4.746235978040776e-06, |
| "loss": 0.7223, |
| "step": 2436 |
| }, |
| { |
| "epoch": 0.899704651735171, |
| "grad_norm": 0.8929667472839355, |
| "learning_rate": 4.74602242425452e-06, |
| "loss": 0.7132, |
| "step": 2437 |
| }, |
| { |
| "epoch": 0.9000738370662072, |
| "grad_norm": 0.8759931921958923, |
| "learning_rate": 4.745808785457023e-06, |
| "loss": 0.7078, |
| "step": 2438 |
| }, |
| { |
| "epoch": 0.9004430223972434, |
| "grad_norm": 0.8628905415534973, |
| "learning_rate": 4.745595061656372e-06, |
| "loss": 0.685, |
| "step": 2439 |
| }, |
| { |
| "epoch": 0.9008122077282796, |
| "grad_norm": 0.8453993797302246, |
| "learning_rate": 4.745381252860658e-06, |
| "loss": 0.6745, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.9011813930593158, |
| "grad_norm": 0.8623847365379333, |
| "learning_rate": 4.745167359077971e-06, |
| "loss": 0.6999, |
| "step": 2441 |
| }, |
| { |
| "epoch": 0.9015505783903519, |
| "grad_norm": 0.8913419246673584, |
| "learning_rate": 4.7449533803164085e-06, |
| "loss": 0.7087, |
| "step": 2442 |
| }, |
| { |
| "epoch": 0.9019197637213882, |
| "grad_norm": 0.8496670722961426, |
| "learning_rate": 4.744739316584069e-06, |
| "loss": 0.7265, |
| "step": 2443 |
| }, |
| { |
| "epoch": 0.9022889490524243, |
| "grad_norm": 0.8533394932746887, |
| "learning_rate": 4.7445251678890555e-06, |
| "loss": 0.7205, |
| "step": 2444 |
| }, |
| { |
| "epoch": 0.9026581343834605, |
| "grad_norm": 0.886719286441803, |
| "learning_rate": 4.744310934239472e-06, |
| "loss": 0.7136, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.9030273197144967, |
| "grad_norm": 0.8670817613601685, |
| "learning_rate": 4.744096615643428e-06, |
| "loss": 0.7349, |
| "step": 2446 |
| }, |
| { |
| "epoch": 0.9033965050455328, |
| "grad_norm": 0.8604776263237, |
| "learning_rate": 4.743882212109036e-06, |
| "loss": 0.6867, |
| "step": 2447 |
| }, |
| { |
| "epoch": 0.9037656903765691, |
| "grad_norm": 0.8604888916015625, |
| "learning_rate": 4.74366772364441e-06, |
| "loss": 0.6832, |
| "step": 2448 |
| }, |
| { |
| "epoch": 0.9041348757076052, |
| "grad_norm": 0.9036632776260376, |
| "learning_rate": 4.743453150257668e-06, |
| "loss": 0.7155, |
| "step": 2449 |
| }, |
| { |
| "epoch": 0.9045040610386414, |
| "grad_norm": 0.8700803518295288, |
| "learning_rate": 4.743238491956934e-06, |
| "loss": 0.6916, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.9048732463696776, |
| "grad_norm": 0.8630246520042419, |
| "learning_rate": 4.74302374875033e-06, |
| "loss": 0.6927, |
| "step": 2451 |
| }, |
| { |
| "epoch": 0.9052424317007137, |
| "grad_norm": 0.8841362595558167, |
| "learning_rate": 4.7428089206459845e-06, |
| "loss": 0.7137, |
| "step": 2452 |
| }, |
| { |
| "epoch": 0.90561161703175, |
| "grad_norm": 0.8694362044334412, |
| "learning_rate": 4.742594007652031e-06, |
| "loss": 0.6853, |
| "step": 2453 |
| }, |
| { |
| "epoch": 0.9059808023627861, |
| "grad_norm": 0.8494770526885986, |
| "learning_rate": 4.7423790097766006e-06, |
| "loss": 0.6711, |
| "step": 2454 |
| }, |
| { |
| "epoch": 0.9063499876938222, |
| "grad_norm": 0.8646619915962219, |
| "learning_rate": 4.742163927027833e-06, |
| "loss": 0.7179, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.9067191730248585, |
| "grad_norm": 0.877597987651825, |
| "learning_rate": 4.741948759413868e-06, |
| "loss": 0.7234, |
| "step": 2456 |
| }, |
| { |
| "epoch": 0.9070883583558946, |
| "grad_norm": 0.850517988204956, |
| "learning_rate": 4.741733506942849e-06, |
| "loss": 0.7318, |
| "step": 2457 |
| }, |
| { |
| "epoch": 0.9074575436869309, |
| "grad_norm": 0.8301242589950562, |
| "learning_rate": 4.741518169622926e-06, |
| "loss": 0.694, |
| "step": 2458 |
| }, |
| { |
| "epoch": 0.907826729017967, |
| "grad_norm": 0.8391947150230408, |
| "learning_rate": 4.741302747462248e-06, |
| "loss": 0.7091, |
| "step": 2459 |
| }, |
| { |
| "epoch": 0.9081959143490032, |
| "grad_norm": 0.8886072039604187, |
| "learning_rate": 4.741087240468967e-06, |
| "loss": 0.7058, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.9085650996800394, |
| "grad_norm": 0.8848855495452881, |
| "learning_rate": 4.7408716486512416e-06, |
| "loss": 0.7176, |
| "step": 2461 |
| }, |
| { |
| "epoch": 0.9089342850110755, |
| "grad_norm": 0.8657143712043762, |
| "learning_rate": 4.740655972017232e-06, |
| "loss": 0.72, |
| "step": 2462 |
| }, |
| { |
| "epoch": 0.9093034703421118, |
| "grad_norm": 0.879115104675293, |
| "learning_rate": 4.7404402105751e-06, |
| "loss": 0.7008, |
| "step": 2463 |
| }, |
| { |
| "epoch": 0.9096726556731479, |
| "grad_norm": 0.8512206673622131, |
| "learning_rate": 4.740224364333013e-06, |
| "loss": 0.7051, |
| "step": 2464 |
| }, |
| { |
| "epoch": 0.9100418410041841, |
| "grad_norm": 0.8688360452651978, |
| "learning_rate": 4.740008433299142e-06, |
| "loss": 0.7031, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.9104110263352203, |
| "grad_norm": 0.8307510614395142, |
| "learning_rate": 4.739792417481659e-06, |
| "loss": 0.6484, |
| "step": 2466 |
| }, |
| { |
| "epoch": 0.9107802116662564, |
| "grad_norm": 0.8932550549507141, |
| "learning_rate": 4.7395763168887395e-06, |
| "loss": 0.7204, |
| "step": 2467 |
| }, |
| { |
| "epoch": 0.9111493969972927, |
| "grad_norm": 0.8596043586730957, |
| "learning_rate": 4.739360131528563e-06, |
| "loss": 0.7266, |
| "step": 2468 |
| }, |
| { |
| "epoch": 0.9115185823283288, |
| "grad_norm": 0.8368245959281921, |
| "learning_rate": 4.739143861409312e-06, |
| "loss": 0.6906, |
| "step": 2469 |
| }, |
| { |
| "epoch": 0.911887767659365, |
| "grad_norm": 0.8623902797698975, |
| "learning_rate": 4.738927506539173e-06, |
| "loss": 0.6946, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.9122569529904012, |
| "grad_norm": 0.8611836433410645, |
| "learning_rate": 4.738711066926335e-06, |
| "loss": 0.6655, |
| "step": 2471 |
| }, |
| { |
| "epoch": 0.9126261383214374, |
| "grad_norm": 0.8788560032844543, |
| "learning_rate": 4.738494542578989e-06, |
| "loss": 0.7327, |
| "step": 2472 |
| }, |
| { |
| "epoch": 0.9129953236524735, |
| "grad_norm": 0.9105246663093567, |
| "learning_rate": 4.73827793350533e-06, |
| "loss": 0.7424, |
| "step": 2473 |
| }, |
| { |
| "epoch": 0.9133645089835097, |
| "grad_norm": 0.8795875310897827, |
| "learning_rate": 4.738061239713559e-06, |
| "loss": 0.7092, |
| "step": 2474 |
| }, |
| { |
| "epoch": 0.9137336943145459, |
| "grad_norm": 0.8684108257293701, |
| "learning_rate": 4.737844461211876e-06, |
| "loss": 0.6988, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.9141028796455821, |
| "grad_norm": 0.8691182136535645, |
| "learning_rate": 4.737627598008486e-06, |
| "loss": 0.7363, |
| "step": 2476 |
| }, |
| { |
| "epoch": 0.9144720649766183, |
| "grad_norm": 0.8747261166572571, |
| "learning_rate": 4.737410650111599e-06, |
| "loss": 0.6877, |
| "step": 2477 |
| }, |
| { |
| "epoch": 0.9148412503076544, |
| "grad_norm": 0.8895252346992493, |
| "learning_rate": 4.7371936175294246e-06, |
| "loss": 0.7078, |
| "step": 2478 |
| }, |
| { |
| "epoch": 0.9152104356386906, |
| "grad_norm": 0.877252459526062, |
| "learning_rate": 4.736976500270177e-06, |
| "loss": 0.7118, |
| "step": 2479 |
| }, |
| { |
| "epoch": 0.9155796209697268, |
| "grad_norm": 0.8511465787887573, |
| "learning_rate": 4.736759298342075e-06, |
| "loss": 0.6753, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.915948806300763, |
| "grad_norm": 0.8913504481315613, |
| "learning_rate": 4.7365420117533404e-06, |
| "loss": 0.6777, |
| "step": 2481 |
| }, |
| { |
| "epoch": 0.9163179916317992, |
| "grad_norm": 0.8873251080513, |
| "learning_rate": 4.736324640512195e-06, |
| "loss": 0.7191, |
| "step": 2482 |
| }, |
| { |
| "epoch": 0.9166871769628353, |
| "grad_norm": 0.8914928436279297, |
| "learning_rate": 4.736107184626869e-06, |
| "loss": 0.7094, |
| "step": 2483 |
| }, |
| { |
| "epoch": 0.9170563622938716, |
| "grad_norm": 0.8406426310539246, |
| "learning_rate": 4.735889644105591e-06, |
| "loss": 0.7082, |
| "step": 2484 |
| }, |
| { |
| "epoch": 0.9174255476249077, |
| "grad_norm": 0.9501475095748901, |
| "learning_rate": 4.735672018956596e-06, |
| "loss": 0.7158, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.9177947329559439, |
| "grad_norm": 0.8835409283638, |
| "learning_rate": 4.735454309188121e-06, |
| "loss": 0.6742, |
| "step": 2486 |
| }, |
| { |
| "epoch": 0.9181639182869801, |
| "grad_norm": 0.883091926574707, |
| "learning_rate": 4.735236514808406e-06, |
| "loss": 0.6994, |
| "step": 2487 |
| }, |
| { |
| "epoch": 0.9185331036180162, |
| "grad_norm": 0.8853040337562561, |
| "learning_rate": 4.735018635825693e-06, |
| "loss": 0.7197, |
| "step": 2488 |
| }, |
| { |
| "epoch": 0.9189022889490525, |
| "grad_norm": 0.8868618011474609, |
| "learning_rate": 4.734800672248231e-06, |
| "loss": 0.6744, |
| "step": 2489 |
| }, |
| { |
| "epoch": 0.9192714742800886, |
| "grad_norm": 0.9185392260551453, |
| "learning_rate": 4.73458262408427e-06, |
| "loss": 0.6616, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.9196406596111247, |
| "grad_norm": 0.8852180242538452, |
| "learning_rate": 4.734364491342061e-06, |
| "loss": 0.6921, |
| "step": 2491 |
| }, |
| { |
| "epoch": 0.920009844942161, |
| "grad_norm": 0.8949165344238281, |
| "learning_rate": 4.7341462740298605e-06, |
| "loss": 0.7063, |
| "step": 2492 |
| }, |
| { |
| "epoch": 0.9203790302731971, |
| "grad_norm": 0.8711039423942566, |
| "learning_rate": 4.7339279721559285e-06, |
| "loss": 0.7267, |
| "step": 2493 |
| }, |
| { |
| "epoch": 0.9207482156042334, |
| "grad_norm": 0.8843966126441956, |
| "learning_rate": 4.733709585728528e-06, |
| "loss": 0.7179, |
| "step": 2494 |
| }, |
| { |
| "epoch": 0.9211174009352695, |
| "grad_norm": 0.93259596824646, |
| "learning_rate": 4.733491114755926e-06, |
| "loss": 0.7065, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.9214865862663056, |
| "grad_norm": 0.9091447591781616, |
| "learning_rate": 4.733272559246389e-06, |
| "loss": 0.7224, |
| "step": 2496 |
| }, |
| { |
| "epoch": 0.9218557715973419, |
| "grad_norm": 0.8795716762542725, |
| "learning_rate": 4.73305391920819e-06, |
| "loss": 0.7119, |
| "step": 2497 |
| }, |
| { |
| "epoch": 0.922224956928378, |
| "grad_norm": 0.8672060966491699, |
| "learning_rate": 4.732835194649607e-06, |
| "loss": 0.7081, |
| "step": 2498 |
| }, |
| { |
| "epoch": 0.9225941422594143, |
| "grad_norm": 0.9056263566017151, |
| "learning_rate": 4.732616385578914e-06, |
| "loss": 0.7254, |
| "step": 2499 |
| }, |
| { |
| "epoch": 0.9229633275904504, |
| "grad_norm": 0.9208519458770752, |
| "learning_rate": 4.7323974920043965e-06, |
| "loss": 0.7183, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.9233325129214865, |
| "grad_norm": 0.8479480743408203, |
| "learning_rate": 4.732178513934339e-06, |
| "loss": 0.7086, |
| "step": 2501 |
| }, |
| { |
| "epoch": 0.9237016982525228, |
| "grad_norm": 0.84977126121521, |
| "learning_rate": 4.73195945137703e-06, |
| "loss": 0.701, |
| "step": 2502 |
| }, |
| { |
| "epoch": 0.9240708835835589, |
| "grad_norm": 0.8850725889205933, |
| "learning_rate": 4.7317403043407584e-06, |
| "loss": 0.6984, |
| "step": 2503 |
| }, |
| { |
| "epoch": 0.9244400689145952, |
| "grad_norm": 0.8803204298019409, |
| "learning_rate": 4.7315210728338215e-06, |
| "loss": 0.7318, |
| "step": 2504 |
| }, |
| { |
| "epoch": 0.9248092542456313, |
| "grad_norm": 0.8582308888435364, |
| "learning_rate": 4.731301756864516e-06, |
| "loss": 0.7015, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.9251784395766675, |
| "grad_norm": 0.8383595943450928, |
| "learning_rate": 4.731082356441143e-06, |
| "loss": 0.6385, |
| "step": 2506 |
| }, |
| { |
| "epoch": 0.9255476249077037, |
| "grad_norm": 0.8716678619384766, |
| "learning_rate": 4.730862871572008e-06, |
| "loss": 0.6989, |
| "step": 2507 |
| }, |
| { |
| "epoch": 0.9259168102387398, |
| "grad_norm": 0.8813826441764832, |
| "learning_rate": 4.730643302265416e-06, |
| "loss": 0.6775, |
| "step": 2508 |
| }, |
| { |
| "epoch": 0.926285995569776, |
| "grad_norm": 0.8515213131904602, |
| "learning_rate": 4.730423648529679e-06, |
| "loss": 0.7048, |
| "step": 2509 |
| }, |
| { |
| "epoch": 0.9266551809008122, |
| "grad_norm": 0.8710786700248718, |
| "learning_rate": 4.730203910373112e-06, |
| "loss": 0.7262, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.9270243662318484, |
| "grad_norm": 0.8492597341537476, |
| "learning_rate": 4.729984087804031e-06, |
| "loss": 0.7031, |
| "step": 2511 |
| }, |
| { |
| "epoch": 0.9273935515628846, |
| "grad_norm": 0.8640585541725159, |
| "learning_rate": 4.729764180830754e-06, |
| "loss": 0.683, |
| "step": 2512 |
| }, |
| { |
| "epoch": 0.9277627368939207, |
| "grad_norm": 0.85969078540802, |
| "learning_rate": 4.729544189461608e-06, |
| "loss": 0.6709, |
| "step": 2513 |
| }, |
| { |
| "epoch": 0.9281319222249569, |
| "grad_norm": 0.8955800533294678, |
| "learning_rate": 4.729324113704918e-06, |
| "loss": 0.7304, |
| "step": 2514 |
| }, |
| { |
| "epoch": 0.9285011075559931, |
| "grad_norm": 0.882088303565979, |
| "learning_rate": 4.729103953569014e-06, |
| "loss": 0.7078, |
| "step": 2515 |
| }, |
| { |
| "epoch": 0.9288702928870293, |
| "grad_norm": 0.8810111880302429, |
| "learning_rate": 4.728883709062229e-06, |
| "loss": 0.7412, |
| "step": 2516 |
| }, |
| { |
| "epoch": 0.9292394782180655, |
| "grad_norm": 0.9056873321533203, |
| "learning_rate": 4.728663380192898e-06, |
| "loss": 0.7202, |
| "step": 2517 |
| }, |
| { |
| "epoch": 0.9296086635491017, |
| "grad_norm": 0.8942249417304993, |
| "learning_rate": 4.728442966969363e-06, |
| "loss": 0.6895, |
| "step": 2518 |
| }, |
| { |
| "epoch": 0.9299778488801378, |
| "grad_norm": 0.8909201622009277, |
| "learning_rate": 4.728222469399964e-06, |
| "loss": 0.7394, |
| "step": 2519 |
| }, |
| { |
| "epoch": 0.930347034211174, |
| "grad_norm": 0.8551014065742493, |
| "learning_rate": 4.728001887493048e-06, |
| "loss": 0.6906, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.9307162195422102, |
| "grad_norm": 0.8875094652175903, |
| "learning_rate": 4.727781221256963e-06, |
| "loss": 0.7134, |
| "step": 2521 |
| }, |
| { |
| "epoch": 0.9310854048732464, |
| "grad_norm": 0.8559072613716125, |
| "learning_rate": 4.727560470700064e-06, |
| "loss": 0.6838, |
| "step": 2522 |
| }, |
| { |
| "epoch": 0.9314545902042826, |
| "grad_norm": 0.8632515668869019, |
| "learning_rate": 4.727339635830702e-06, |
| "loss": 0.6783, |
| "step": 2523 |
| }, |
| { |
| "epoch": 0.9318237755353187, |
| "grad_norm": 0.8152856230735779, |
| "learning_rate": 4.727118716657239e-06, |
| "loss": 0.6142, |
| "step": 2524 |
| }, |
| { |
| "epoch": 0.932192960866355, |
| "grad_norm": 0.9158169031143188, |
| "learning_rate": 4.726897713188035e-06, |
| "loss": 0.7147, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.9325621461973911, |
| "grad_norm": 0.8849809765815735, |
| "learning_rate": 4.726676625431454e-06, |
| "loss": 0.6753, |
| "step": 2526 |
| }, |
| { |
| "epoch": 0.9329313315284272, |
| "grad_norm": 0.8620732426643372, |
| "learning_rate": 4.726455453395867e-06, |
| "loss": 0.6979, |
| "step": 2527 |
| }, |
| { |
| "epoch": 0.9333005168594635, |
| "grad_norm": 0.8598245978355408, |
| "learning_rate": 4.726234197089644e-06, |
| "loss": 0.7447, |
| "step": 2528 |
| }, |
| { |
| "epoch": 0.9336697021904996, |
| "grad_norm": 0.8678368330001831, |
| "learning_rate": 4.726012856521158e-06, |
| "loss": 0.693, |
| "step": 2529 |
| }, |
| { |
| "epoch": 0.9340388875215359, |
| "grad_norm": 0.8540114164352417, |
| "learning_rate": 4.72579143169879e-06, |
| "loss": 0.6663, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.934408072852572, |
| "grad_norm": 0.8417086601257324, |
| "learning_rate": 4.725569922630917e-06, |
| "loss": 0.689, |
| "step": 2531 |
| }, |
| { |
| "epoch": 0.9347772581836081, |
| "grad_norm": 0.8707804679870605, |
| "learning_rate": 4.725348329325925e-06, |
| "loss": 0.7039, |
| "step": 2532 |
| }, |
| { |
| "epoch": 0.9351464435146444, |
| "grad_norm": 0.8656783103942871, |
| "learning_rate": 4.725126651792202e-06, |
| "loss": 0.7056, |
| "step": 2533 |
| }, |
| { |
| "epoch": 0.9355156288456805, |
| "grad_norm": 0.8286459445953369, |
| "learning_rate": 4.724904890038137e-06, |
| "loss": 0.6859, |
| "step": 2534 |
| }, |
| { |
| "epoch": 0.9358848141767168, |
| "grad_norm": 0.8686536550521851, |
| "learning_rate": 4.724683044072124e-06, |
| "loss": 0.6817, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.9362539995077529, |
| "grad_norm": 0.8872658014297485, |
| "learning_rate": 4.7244611139025595e-06, |
| "loss": 0.7093, |
| "step": 2536 |
| }, |
| { |
| "epoch": 0.936623184838789, |
| "grad_norm": 0.8674843907356262, |
| "learning_rate": 4.724239099537845e-06, |
| "loss": 0.7042, |
| "step": 2537 |
| }, |
| { |
| "epoch": 0.9369923701698253, |
| "grad_norm": 0.8639495968818665, |
| "learning_rate": 4.7240170009863816e-06, |
| "loss": 0.7111, |
| "step": 2538 |
| }, |
| { |
| "epoch": 0.9373615555008614, |
| "grad_norm": 0.8369062542915344, |
| "learning_rate": 4.7237948182565765e-06, |
| "loss": 0.6811, |
| "step": 2539 |
| }, |
| { |
| "epoch": 0.9377307408318976, |
| "grad_norm": 0.8846485614776611, |
| "learning_rate": 4.72357255135684e-06, |
| "loss": 0.6932, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.9380999261629338, |
| "grad_norm": 0.8939961194992065, |
| "learning_rate": 4.723350200295584e-06, |
| "loss": 0.7092, |
| "step": 2541 |
| }, |
| { |
| "epoch": 0.9384691114939699, |
| "grad_norm": 0.864433228969574, |
| "learning_rate": 4.723127765081225e-06, |
| "loss": 0.6878, |
| "step": 2542 |
| }, |
| { |
| "epoch": 0.9388382968250062, |
| "grad_norm": 0.8481778502464294, |
| "learning_rate": 4.7229052457221816e-06, |
| "loss": 0.6775, |
| "step": 2543 |
| }, |
| { |
| "epoch": 0.9392074821560423, |
| "grad_norm": 0.8581600785255432, |
| "learning_rate": 4.722682642226875e-06, |
| "loss": 0.6961, |
| "step": 2544 |
| }, |
| { |
| "epoch": 0.9395766674870785, |
| "grad_norm": 0.8689659833908081, |
| "learning_rate": 4.722459954603733e-06, |
| "loss": 0.6986, |
| "step": 2545 |
| }, |
| { |
| "epoch": 0.9399458528181147, |
| "grad_norm": 0.8631998896598816, |
| "learning_rate": 4.722237182861183e-06, |
| "loss": 0.6756, |
| "step": 2546 |
| }, |
| { |
| "epoch": 0.9403150381491509, |
| "grad_norm": 0.8720018863677979, |
| "learning_rate": 4.722014327007657e-06, |
| "loss": 0.7085, |
| "step": 2547 |
| }, |
| { |
| "epoch": 0.9406842234801871, |
| "grad_norm": 0.8763655424118042, |
| "learning_rate": 4.72179138705159e-06, |
| "loss": 0.6919, |
| "step": 2548 |
| }, |
| { |
| "epoch": 0.9410534088112232, |
| "grad_norm": 0.8881059288978577, |
| "learning_rate": 4.72156836300142e-06, |
| "loss": 0.6924, |
| "step": 2549 |
| }, |
| { |
| "epoch": 0.9414225941422594, |
| "grad_norm": 0.8810322284698486, |
| "learning_rate": 4.721345254865589e-06, |
| "loss": 0.7164, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.9417917794732956, |
| "grad_norm": 0.8332381248474121, |
| "learning_rate": 4.721122062652541e-06, |
| "loss": 0.7418, |
| "step": 2551 |
| }, |
| { |
| "epoch": 0.9421609648043318, |
| "grad_norm": 0.8442291617393494, |
| "learning_rate": 4.720898786370723e-06, |
| "loss": 0.6884, |
| "step": 2552 |
| }, |
| { |
| "epoch": 0.942530150135368, |
| "grad_norm": 0.8065565824508667, |
| "learning_rate": 4.720675426028588e-06, |
| "loss": 0.6191, |
| "step": 2553 |
| }, |
| { |
| "epoch": 0.9428993354664041, |
| "grad_norm": 0.8640344142913818, |
| "learning_rate": 4.720451981634589e-06, |
| "loss": 0.7072, |
| "step": 2554 |
| }, |
| { |
| "epoch": 0.9432685207974403, |
| "grad_norm": 0.8636446595191956, |
| "learning_rate": 4.720228453197183e-06, |
| "loss": 0.6901, |
| "step": 2555 |
| }, |
| { |
| "epoch": 0.9436377061284765, |
| "grad_norm": 0.8631062507629395, |
| "learning_rate": 4.720004840724831e-06, |
| "loss": 0.7035, |
| "step": 2556 |
| }, |
| { |
| "epoch": 0.9440068914595127, |
| "grad_norm": 0.8414477705955505, |
| "learning_rate": 4.7197811442259955e-06, |
| "loss": 0.6541, |
| "step": 2557 |
| }, |
| { |
| "epoch": 0.9443760767905488, |
| "grad_norm": 0.8783524632453918, |
| "learning_rate": 4.719557363709145e-06, |
| "loss": 0.6971, |
| "step": 2558 |
| }, |
| { |
| "epoch": 0.944745262121585, |
| "grad_norm": 0.8537473678588867, |
| "learning_rate": 4.7193334991827486e-06, |
| "loss": 0.6927, |
| "step": 2559 |
| }, |
| { |
| "epoch": 0.9451144474526212, |
| "grad_norm": 0.9084930419921875, |
| "learning_rate": 4.7191095506552795e-06, |
| "loss": 0.6974, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.9454836327836574, |
| "grad_norm": 0.9035632610321045, |
| "learning_rate": 4.718885518135215e-06, |
| "loss": 0.746, |
| "step": 2561 |
| }, |
| { |
| "epoch": 0.9458528181146936, |
| "grad_norm": 0.8823369741439819, |
| "learning_rate": 4.718661401631033e-06, |
| "loss": 0.6662, |
| "step": 2562 |
| }, |
| { |
| "epoch": 0.9462220034457297, |
| "grad_norm": 0.8840197324752808, |
| "learning_rate": 4.718437201151218e-06, |
| "loss": 0.692, |
| "step": 2563 |
| }, |
| { |
| "epoch": 0.946591188776766, |
| "grad_norm": 0.865744948387146, |
| "learning_rate": 4.718212916704254e-06, |
| "loss": 0.7252, |
| "step": 2564 |
| }, |
| { |
| "epoch": 0.9469603741078021, |
| "grad_norm": 0.8541647791862488, |
| "learning_rate": 4.717988548298633e-06, |
| "loss": 0.69, |
| "step": 2565 |
| }, |
| { |
| "epoch": 0.9473295594388383, |
| "grad_norm": 0.9258102774620056, |
| "learning_rate": 4.717764095942844e-06, |
| "loss": 0.7265, |
| "step": 2566 |
| }, |
| { |
| "epoch": 0.9476987447698745, |
| "grad_norm": 0.8850582242012024, |
| "learning_rate": 4.717539559645384e-06, |
| "loss": 0.7063, |
| "step": 2567 |
| }, |
| { |
| "epoch": 0.9480679301009106, |
| "grad_norm": 0.8705160021781921, |
| "learning_rate": 4.717314939414752e-06, |
| "loss": 0.713, |
| "step": 2568 |
| }, |
| { |
| "epoch": 0.9484371154319469, |
| "grad_norm": 0.8894596099853516, |
| "learning_rate": 4.717090235259449e-06, |
| "loss": 0.6615, |
| "step": 2569 |
| }, |
| { |
| "epoch": 0.948806300762983, |
| "grad_norm": 0.8761441707611084, |
| "learning_rate": 4.7168654471879806e-06, |
| "loss": 0.6738, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.9491754860940192, |
| "grad_norm": 0.9032109975814819, |
| "learning_rate": 4.716640575208855e-06, |
| "loss": 0.7081, |
| "step": 2571 |
| }, |
| { |
| "epoch": 0.9495446714250554, |
| "grad_norm": 0.8425540924072266, |
| "learning_rate": 4.716415619330582e-06, |
| "loss": 0.6321, |
| "step": 2572 |
| }, |
| { |
| "epoch": 0.9499138567560915, |
| "grad_norm": 0.87245112657547, |
| "learning_rate": 4.716190579561678e-06, |
| "loss": 0.7024, |
| "step": 2573 |
| }, |
| { |
| "epoch": 0.9502830420871278, |
| "grad_norm": 0.8776750564575195, |
| "learning_rate": 4.71596545591066e-06, |
| "loss": 0.6897, |
| "step": 2574 |
| }, |
| { |
| "epoch": 0.9506522274181639, |
| "grad_norm": 0.8822212815284729, |
| "learning_rate": 4.7157402483860496e-06, |
| "loss": 0.6829, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.9510214127492, |
| "grad_norm": 0.8839916586875916, |
| "learning_rate": 4.7155149569963696e-06, |
| "loss": 0.6821, |
| "step": 2576 |
| }, |
| { |
| "epoch": 0.9513905980802363, |
| "grad_norm": 0.8598265647888184, |
| "learning_rate": 4.715289581750147e-06, |
| "loss": 0.714, |
| "step": 2577 |
| }, |
| { |
| "epoch": 0.9517597834112724, |
| "grad_norm": 0.8639992475509644, |
| "learning_rate": 4.7150641226559136e-06, |
| "loss": 0.7089, |
| "step": 2578 |
| }, |
| { |
| "epoch": 0.9521289687423087, |
| "grad_norm": 0.8500651717185974, |
| "learning_rate": 4.714838579722202e-06, |
| "loss": 0.6681, |
| "step": 2579 |
| }, |
| { |
| "epoch": 0.9524981540733448, |
| "grad_norm": 0.8705741763114929, |
| "learning_rate": 4.714612952957549e-06, |
| "loss": 0.6966, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.952867339404381, |
| "grad_norm": 0.8744728565216064, |
| "learning_rate": 4.714387242370495e-06, |
| "loss": 0.7105, |
| "step": 2581 |
| }, |
| { |
| "epoch": 0.9532365247354172, |
| "grad_norm": 0.844121515750885, |
| "learning_rate": 4.714161447969583e-06, |
| "loss": 0.6552, |
| "step": 2582 |
| }, |
| { |
| "epoch": 0.9536057100664533, |
| "grad_norm": 0.8838944435119629, |
| "learning_rate": 4.7139355697633584e-06, |
| "loss": 0.6735, |
| "step": 2583 |
| }, |
| { |
| "epoch": 0.9539748953974896, |
| "grad_norm": 0.8674659132957458, |
| "learning_rate": 4.713709607760371e-06, |
| "loss": 0.6755, |
| "step": 2584 |
| }, |
| { |
| "epoch": 0.9543440807285257, |
| "grad_norm": 0.8746424913406372, |
| "learning_rate": 4.713483561969175e-06, |
| "loss": 0.7173, |
| "step": 2585 |
| }, |
| { |
| "epoch": 0.9547132660595619, |
| "grad_norm": 0.8750790357589722, |
| "learning_rate": 4.713257432398324e-06, |
| "loss": 0.7156, |
| "step": 2586 |
| }, |
| { |
| "epoch": 0.9550824513905981, |
| "grad_norm": 0.9194653034210205, |
| "learning_rate": 4.713031219056377e-06, |
| "loss": 0.7156, |
| "step": 2587 |
| }, |
| { |
| "epoch": 0.9554516367216342, |
| "grad_norm": 0.8654546737670898, |
| "learning_rate": 4.712804921951898e-06, |
| "loss": 0.6755, |
| "step": 2588 |
| }, |
| { |
| "epoch": 0.9558208220526705, |
| "grad_norm": 0.8833775520324707, |
| "learning_rate": 4.71257854109345e-06, |
| "loss": 0.6777, |
| "step": 2589 |
| }, |
| { |
| "epoch": 0.9561900073837066, |
| "grad_norm": 0.8929355144500732, |
| "learning_rate": 4.712352076489603e-06, |
| "loss": 0.7201, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.9565591927147428, |
| "grad_norm": 0.8803203105926514, |
| "learning_rate": 4.7121255281489275e-06, |
| "loss": 0.7037, |
| "step": 2591 |
| }, |
| { |
| "epoch": 0.956928378045779, |
| "grad_norm": 3.147451877593994, |
| "learning_rate": 4.71189889608e-06, |
| "loss": 0.7059, |
| "step": 2592 |
| }, |
| { |
| "epoch": 0.9572975633768152, |
| "grad_norm": 0.8626553416252136, |
| "learning_rate": 4.711672180291397e-06, |
| "loss": 0.7017, |
| "step": 2593 |
| }, |
| { |
| "epoch": 0.9576667487078513, |
| "grad_norm": 0.8505844473838806, |
| "learning_rate": 4.711445380791699e-06, |
| "loss": 0.6857, |
| "step": 2594 |
| }, |
| { |
| "epoch": 0.9580359340388875, |
| "grad_norm": 0.8895529508590698, |
| "learning_rate": 4.711218497589493e-06, |
| "loss": 0.7022, |
| "step": 2595 |
| }, |
| { |
| "epoch": 0.9584051193699237, |
| "grad_norm": 0.8883222341537476, |
| "learning_rate": 4.710991530693364e-06, |
| "loss": 0.6423, |
| "step": 2596 |
| }, |
| { |
| "epoch": 0.9587743047009599, |
| "grad_norm": 0.8487628102302551, |
| "learning_rate": 4.710764480111903e-06, |
| "loss": 0.6979, |
| "step": 2597 |
| }, |
| { |
| "epoch": 0.9591434900319961, |
| "grad_norm": 0.8244770765304565, |
| "learning_rate": 4.710537345853704e-06, |
| "loss": 0.6925, |
| "step": 2598 |
| }, |
| { |
| "epoch": 0.9595126753630322, |
| "grad_norm": 0.9382370710372925, |
| "learning_rate": 4.710310127927364e-06, |
| "loss": 0.7106, |
| "step": 2599 |
| }, |
| { |
| "epoch": 0.9598818606940684, |
| "grad_norm": 0.8556570410728455, |
| "learning_rate": 4.710082826341484e-06, |
| "loss": 0.6918, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.9602510460251046, |
| "grad_norm": 0.8350138068199158, |
| "learning_rate": 4.709855441104667e-06, |
| "loss": 0.6908, |
| "step": 2601 |
| }, |
| { |
| "epoch": 0.9606202313561408, |
| "grad_norm": 0.8622647523880005, |
| "learning_rate": 4.7096279722255175e-06, |
| "loss": 0.7214, |
| "step": 2602 |
| }, |
| { |
| "epoch": 0.960989416687177, |
| "grad_norm": 0.8928597569465637, |
| "learning_rate": 4.709400419712648e-06, |
| "loss": 0.7067, |
| "step": 2603 |
| }, |
| { |
| "epoch": 0.9613586020182131, |
| "grad_norm": 0.8485970497131348, |
| "learning_rate": 4.709172783574669e-06, |
| "loss": 0.691, |
| "step": 2604 |
| }, |
| { |
| "epoch": 0.9617277873492494, |
| "grad_norm": 0.8822405934333801, |
| "learning_rate": 4.708945063820198e-06, |
| "loss": 0.684, |
| "step": 2605 |
| }, |
| { |
| "epoch": 0.9620969726802855, |
| "grad_norm": 0.854975700378418, |
| "learning_rate": 4.708717260457853e-06, |
| "loss": 0.6718, |
| "step": 2606 |
| }, |
| { |
| "epoch": 0.9624661580113217, |
| "grad_norm": 0.86021488904953, |
| "learning_rate": 4.7084893734962565e-06, |
| "loss": 0.6677, |
| "step": 2607 |
| }, |
| { |
| "epoch": 0.9628353433423579, |
| "grad_norm": 0.9012849926948547, |
| "learning_rate": 4.708261402944036e-06, |
| "loss": 0.7292, |
| "step": 2608 |
| }, |
| { |
| "epoch": 0.963204528673394, |
| "grad_norm": 0.864490270614624, |
| "learning_rate": 4.708033348809816e-06, |
| "loss": 0.7166, |
| "step": 2609 |
| }, |
| { |
| "epoch": 0.9635737140044303, |
| "grad_norm": 0.8407897353172302, |
| "learning_rate": 4.707805211102232e-06, |
| "loss": 0.7277, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.9639428993354664, |
| "grad_norm": 0.8841310739517212, |
| "learning_rate": 4.707576989829917e-06, |
| "loss": 0.6973, |
| "step": 2611 |
| }, |
| { |
| "epoch": 0.9643120846665025, |
| "grad_norm": 0.887823760509491, |
| "learning_rate": 4.7073486850015095e-06, |
| "loss": 0.71, |
| "step": 2612 |
| }, |
| { |
| "epoch": 0.9646812699975388, |
| "grad_norm": 0.8758772015571594, |
| "learning_rate": 4.707120296625651e-06, |
| "loss": 0.6605, |
| "step": 2613 |
| }, |
| { |
| "epoch": 0.9650504553285749, |
| "grad_norm": 0.8583731055259705, |
| "learning_rate": 4.7068918247109865e-06, |
| "loss": 0.6964, |
| "step": 2614 |
| }, |
| { |
| "epoch": 0.9654196406596112, |
| "grad_norm": 0.8955153822898865, |
| "learning_rate": 4.706663269266163e-06, |
| "loss": 0.7276, |
| "step": 2615 |
| }, |
| { |
| "epoch": 0.9657888259906473, |
| "grad_norm": 0.8411804437637329, |
| "learning_rate": 4.70643463029983e-06, |
| "loss": 0.6319, |
| "step": 2616 |
| }, |
| { |
| "epoch": 0.9661580113216834, |
| "grad_norm": 0.8701980710029602, |
| "learning_rate": 4.706205907820643e-06, |
| "loss": 0.6796, |
| "step": 2617 |
| }, |
| { |
| "epoch": 0.9665271966527197, |
| "grad_norm": 0.8797522783279419, |
| "learning_rate": 4.705977101837259e-06, |
| "loss": 0.7139, |
| "step": 2618 |
| }, |
| { |
| "epoch": 0.9668963819837558, |
| "grad_norm": 0.8831002712249756, |
| "learning_rate": 4.705748212358339e-06, |
| "loss": 0.6813, |
| "step": 2619 |
| }, |
| { |
| "epoch": 0.9672655673147921, |
| "grad_norm": 0.8374364376068115, |
| "learning_rate": 4.705519239392544e-06, |
| "loss": 0.6509, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.9676347526458282, |
| "grad_norm": 0.8649409413337708, |
| "learning_rate": 4.705290182948542e-06, |
| "loss": 0.6692, |
| "step": 2621 |
| }, |
| { |
| "epoch": 0.9680039379768643, |
| "grad_norm": 0.9725558757781982, |
| "learning_rate": 4.705061043035002e-06, |
| "loss": 0.6921, |
| "step": 2622 |
| }, |
| { |
| "epoch": 0.9683731233079006, |
| "grad_norm": 1.5726717710494995, |
| "learning_rate": 4.704831819660598e-06, |
| "loss": 0.73, |
| "step": 2623 |
| }, |
| { |
| "epoch": 0.9687423086389367, |
| "grad_norm": 0.8848654627799988, |
| "learning_rate": 4.704602512834006e-06, |
| "loss": 0.692, |
| "step": 2624 |
| }, |
| { |
| "epoch": 0.969111493969973, |
| "grad_norm": 0.8604928255081177, |
| "learning_rate": 4.7043731225639045e-06, |
| "loss": 0.6849, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.9694806793010091, |
| "grad_norm": 0.9043660759925842, |
| "learning_rate": 4.704143648858976e-06, |
| "loss": 0.7108, |
| "step": 2626 |
| }, |
| { |
| "epoch": 0.9698498646320453, |
| "grad_norm": 0.8489352464675903, |
| "learning_rate": 4.703914091727906e-06, |
| "loss": 0.6897, |
| "step": 2627 |
| }, |
| { |
| "epoch": 0.9702190499630815, |
| "grad_norm": 0.8445559144020081, |
| "learning_rate": 4.703684451179382e-06, |
| "loss": 0.7079, |
| "step": 2628 |
| }, |
| { |
| "epoch": 0.9705882352941176, |
| "grad_norm": 0.8575627207756042, |
| "learning_rate": 4.7034547272220985e-06, |
| "loss": 0.7166, |
| "step": 2629 |
| }, |
| { |
| "epoch": 0.9709574206251538, |
| "grad_norm": 0.8811299800872803, |
| "learning_rate": 4.703224919864748e-06, |
| "loss": 0.6726, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.97132660595619, |
| "grad_norm": 0.8279196619987488, |
| "learning_rate": 4.702995029116031e-06, |
| "loss": 0.6713, |
| "step": 2631 |
| }, |
| { |
| "epoch": 0.9716957912872262, |
| "grad_norm": 0.8848355412483215, |
| "learning_rate": 4.702765054984646e-06, |
| "loss": 0.6671, |
| "step": 2632 |
| }, |
| { |
| "epoch": 0.9720649766182624, |
| "grad_norm": 0.8621148467063904, |
| "learning_rate": 4.7025349974793e-06, |
| "loss": 0.6558, |
| "step": 2633 |
| }, |
| { |
| "epoch": 0.9724341619492985, |
| "grad_norm": 0.8555501699447632, |
| "learning_rate": 4.702304856608698e-06, |
| "loss": 0.7102, |
| "step": 2634 |
| }, |
| { |
| "epoch": 0.9728033472803347, |
| "grad_norm": 0.8470124006271362, |
| "learning_rate": 4.702074632381553e-06, |
| "loss": 0.6579, |
| "step": 2635 |
| }, |
| { |
| "epoch": 0.9731725326113709, |
| "grad_norm": 0.8950283527374268, |
| "learning_rate": 4.701844324806579e-06, |
| "loss": 0.7557, |
| "step": 2636 |
| }, |
| { |
| "epoch": 0.9735417179424071, |
| "grad_norm": 0.8791554570198059, |
| "learning_rate": 4.701613933892491e-06, |
| "loss": 0.7336, |
| "step": 2637 |
| }, |
| { |
| "epoch": 0.9739109032734433, |
| "grad_norm": 0.8484707474708557, |
| "learning_rate": 4.701383459648011e-06, |
| "loss": 0.6983, |
| "step": 2638 |
| }, |
| { |
| "epoch": 0.9742800886044795, |
| "grad_norm": 0.8885069489479065, |
| "learning_rate": 4.701152902081863e-06, |
| "loss": 0.6982, |
| "step": 2639 |
| }, |
| { |
| "epoch": 0.9746492739355156, |
| "grad_norm": 0.8474615216255188, |
| "learning_rate": 4.700922261202771e-06, |
| "loss": 0.6982, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.9750184592665518, |
| "grad_norm": 0.8749250769615173, |
| "learning_rate": 4.7006915370194655e-06, |
| "loss": 0.6771, |
| "step": 2641 |
| }, |
| { |
| "epoch": 0.975387644597588, |
| "grad_norm": 0.8137475848197937, |
| "learning_rate": 4.70046072954068e-06, |
| "loss": 0.6859, |
| "step": 2642 |
| }, |
| { |
| "epoch": 0.9757568299286241, |
| "grad_norm": 0.8566490411758423, |
| "learning_rate": 4.700229838775151e-06, |
| "loss": 0.7159, |
| "step": 2643 |
| }, |
| { |
| "epoch": 0.9761260152596604, |
| "grad_norm": 0.8723783493041992, |
| "learning_rate": 4.699998864731617e-06, |
| "loss": 0.7309, |
| "step": 2644 |
| }, |
| { |
| "epoch": 0.9764952005906965, |
| "grad_norm": 0.8612366914749146, |
| "learning_rate": 4.699767807418821e-06, |
| "loss": 0.6921, |
| "step": 2645 |
| }, |
| { |
| "epoch": 0.9768643859217327, |
| "grad_norm": 0.8468891382217407, |
| "learning_rate": 4.699536666845507e-06, |
| "loss": 0.6677, |
| "step": 2646 |
| }, |
| { |
| "epoch": 0.9772335712527689, |
| "grad_norm": 0.8550125956535339, |
| "learning_rate": 4.699305443020424e-06, |
| "loss": 0.6939, |
| "step": 2647 |
| }, |
| { |
| "epoch": 0.977602756583805, |
| "grad_norm": 0.9459806680679321, |
| "learning_rate": 4.699074135952324e-06, |
| "loss": 0.6429, |
| "step": 2648 |
| }, |
| { |
| "epoch": 0.9779719419148413, |
| "grad_norm": 0.8765318989753723, |
| "learning_rate": 4.6988427456499624e-06, |
| "loss": 0.6997, |
| "step": 2649 |
| }, |
| { |
| "epoch": 0.9783411272458774, |
| "grad_norm": 0.8627922534942627, |
| "learning_rate": 4.698611272122097e-06, |
| "loss": 0.6853, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.9787103125769137, |
| "grad_norm": 0.8556888103485107, |
| "learning_rate": 4.698379715377489e-06, |
| "loss": 0.6969, |
| "step": 2651 |
| }, |
| { |
| "epoch": 0.9790794979079498, |
| "grad_norm": 0.8721787929534912, |
| "learning_rate": 4.698148075424902e-06, |
| "loss": 0.6976, |
| "step": 2652 |
| }, |
| { |
| "epoch": 0.9794486832389859, |
| "grad_norm": 0.8606870770454407, |
| "learning_rate": 4.697916352273104e-06, |
| "loss": 0.6786, |
| "step": 2653 |
| }, |
| { |
| "epoch": 0.9798178685700222, |
| "grad_norm": 0.8361164927482605, |
| "learning_rate": 4.6976845459308664e-06, |
| "loss": 0.6847, |
| "step": 2654 |
| }, |
| { |
| "epoch": 0.9801870539010583, |
| "grad_norm": 0.8861737847328186, |
| "learning_rate": 4.697452656406963e-06, |
| "loss": 0.721, |
| "step": 2655 |
| }, |
| { |
| "epoch": 0.9805562392320946, |
| "grad_norm": 0.8805361390113831, |
| "learning_rate": 4.697220683710168e-06, |
| "loss": 0.681, |
| "step": 2656 |
| }, |
| { |
| "epoch": 0.9809254245631307, |
| "grad_norm": 0.8715226650238037, |
| "learning_rate": 4.696988627849265e-06, |
| "loss": 0.7021, |
| "step": 2657 |
| }, |
| { |
| "epoch": 0.9812946098941668, |
| "grad_norm": 0.866218090057373, |
| "learning_rate": 4.696756488833035e-06, |
| "loss": 0.6957, |
| "step": 2658 |
| }, |
| { |
| "epoch": 0.9816637952252031, |
| "grad_norm": 0.8797804713249207, |
| "learning_rate": 4.696524266670266e-06, |
| "loss": 0.7064, |
| "step": 2659 |
| }, |
| { |
| "epoch": 0.9820329805562392, |
| "grad_norm": 0.9008505940437317, |
| "learning_rate": 4.696291961369747e-06, |
| "loss": 0.6895, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.9824021658872754, |
| "grad_norm": 0.8940179944038391, |
| "learning_rate": 4.696059572940269e-06, |
| "loss": 0.6735, |
| "step": 2661 |
| }, |
| { |
| "epoch": 0.9827713512183116, |
| "grad_norm": 0.8762912750244141, |
| "learning_rate": 4.69582710139063e-06, |
| "loss": 0.7487, |
| "step": 2662 |
| }, |
| { |
| "epoch": 0.9831405365493477, |
| "grad_norm": 0.8566141724586487, |
| "learning_rate": 4.695594546729629e-06, |
| "loss": 0.7056, |
| "step": 2663 |
| }, |
| { |
| "epoch": 0.983509721880384, |
| "grad_norm": 0.8390647172927856, |
| "learning_rate": 4.695361908966066e-06, |
| "loss": 0.6885, |
| "step": 2664 |
| }, |
| { |
| "epoch": 0.9838789072114201, |
| "grad_norm": 0.8426521420478821, |
| "learning_rate": 4.6951291881087495e-06, |
| "loss": 0.7038, |
| "step": 2665 |
| }, |
| { |
| "epoch": 0.9842480925424563, |
| "grad_norm": 0.8740254640579224, |
| "learning_rate": 4.6948963841664845e-06, |
| "loss": 0.7153, |
| "step": 2666 |
| }, |
| { |
| "epoch": 0.9846172778734925, |
| "grad_norm": 0.8205375075340271, |
| "learning_rate": 4.694663497148084e-06, |
| "loss": 0.7011, |
| "step": 2667 |
| }, |
| { |
| "epoch": 0.9849864632045287, |
| "grad_norm": 0.8569832444190979, |
| "learning_rate": 4.6944305270623634e-06, |
| "loss": 0.6939, |
| "step": 2668 |
| }, |
| { |
| "epoch": 0.9853556485355649, |
| "grad_norm": 0.8959291577339172, |
| "learning_rate": 4.694197473918139e-06, |
| "loss": 0.7057, |
| "step": 2669 |
| }, |
| { |
| "epoch": 0.985724833866601, |
| "grad_norm": 0.8417198657989502, |
| "learning_rate": 4.6939643377242335e-06, |
| "loss": 0.6881, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.9860940191976372, |
| "grad_norm": 0.8587856888771057, |
| "learning_rate": 4.693731118489471e-06, |
| "loss": 0.7073, |
| "step": 2671 |
| }, |
| { |
| "epoch": 0.9864632045286734, |
| "grad_norm": 0.8757145404815674, |
| "learning_rate": 4.693497816222676e-06, |
| "loss": 0.7277, |
| "step": 2672 |
| }, |
| { |
| "epoch": 0.9868323898597096, |
| "grad_norm": 0.8414481282234192, |
| "learning_rate": 4.693264430932683e-06, |
| "loss": 0.68, |
| "step": 2673 |
| }, |
| { |
| "epoch": 0.9872015751907458, |
| "grad_norm": 0.8073148131370544, |
| "learning_rate": 4.693030962628322e-06, |
| "loss": 0.6434, |
| "step": 2674 |
| }, |
| { |
| "epoch": 0.9875707605217819, |
| "grad_norm": 0.8849506378173828, |
| "learning_rate": 4.692797411318432e-06, |
| "loss": 0.7039, |
| "step": 2675 |
| }, |
| { |
| "epoch": 0.9879399458528181, |
| "grad_norm": 0.829002857208252, |
| "learning_rate": 4.692563777011852e-06, |
| "loss": 0.6811, |
| "step": 2676 |
| }, |
| { |
| "epoch": 0.9883091311838543, |
| "grad_norm": 0.8336803317070007, |
| "learning_rate": 4.692330059717425e-06, |
| "loss": 0.6915, |
| "step": 2677 |
| }, |
| { |
| "epoch": 0.9886783165148905, |
| "grad_norm": 0.8502470850944519, |
| "learning_rate": 4.6920962594439965e-06, |
| "loss": 0.6678, |
| "step": 2678 |
| }, |
| { |
| "epoch": 0.9890475018459266, |
| "grad_norm": 0.8810563683509827, |
| "learning_rate": 4.691862376200418e-06, |
| "loss": 0.702, |
| "step": 2679 |
| }, |
| { |
| "epoch": 0.9894166871769629, |
| "grad_norm": 0.8533656597137451, |
| "learning_rate": 4.691628409995539e-06, |
| "loss": 0.6748, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.989785872507999, |
| "grad_norm": 0.8637438416481018, |
| "learning_rate": 4.6913943608382166e-06, |
| "loss": 0.7256, |
| "step": 2681 |
| }, |
| { |
| "epoch": 0.9901550578390352, |
| "grad_norm": 0.8353433609008789, |
| "learning_rate": 4.691160228737309e-06, |
| "loss": 0.6965, |
| "step": 2682 |
| }, |
| { |
| "epoch": 0.9905242431700714, |
| "grad_norm": 0.8654287457466125, |
| "learning_rate": 4.690926013701678e-06, |
| "loss": 0.6864, |
| "step": 2683 |
| }, |
| { |
| "epoch": 0.9908934285011075, |
| "grad_norm": 0.8387684226036072, |
| "learning_rate": 4.69069171574019e-06, |
| "loss": 0.6389, |
| "step": 2684 |
| }, |
| { |
| "epoch": 0.9912626138321438, |
| "grad_norm": 0.908822774887085, |
| "learning_rate": 4.690457334861711e-06, |
| "loss": 0.7105, |
| "step": 2685 |
| }, |
| { |
| "epoch": 0.9916317991631799, |
| "grad_norm": 0.8545365333557129, |
| "learning_rate": 4.690222871075114e-06, |
| "loss": 0.722, |
| "step": 2686 |
| }, |
| { |
| "epoch": 0.9920009844942161, |
| "grad_norm": 0.8538405299186707, |
| "learning_rate": 4.689988324389272e-06, |
| "loss": 0.6503, |
| "step": 2687 |
| }, |
| { |
| "epoch": 0.9923701698252523, |
| "grad_norm": 0.8731146454811096, |
| "learning_rate": 4.689753694813063e-06, |
| "loss": 0.6982, |
| "step": 2688 |
| }, |
| { |
| "epoch": 0.9927393551562884, |
| "grad_norm": 0.8791890144348145, |
| "learning_rate": 4.689518982355369e-06, |
| "loss": 0.6837, |
| "step": 2689 |
| }, |
| { |
| "epoch": 0.9931085404873247, |
| "grad_norm": 0.8614819049835205, |
| "learning_rate": 4.68928418702507e-06, |
| "loss": 0.7137, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.9934777258183608, |
| "grad_norm": 0.8743491768836975, |
| "learning_rate": 4.689049308831057e-06, |
| "loss": 0.7015, |
| "step": 2691 |
| }, |
| { |
| "epoch": 0.993846911149397, |
| "grad_norm": 0.8765535354614258, |
| "learning_rate": 4.688814347782219e-06, |
| "loss": 0.6774, |
| "step": 2692 |
| }, |
| { |
| "epoch": 0.9942160964804332, |
| "grad_norm": 0.8722323775291443, |
| "learning_rate": 4.6885793038874486e-06, |
| "loss": 0.6862, |
| "step": 2693 |
| }, |
| { |
| "epoch": 0.9945852818114693, |
| "grad_norm": 0.8379456400871277, |
| "learning_rate": 4.688344177155642e-06, |
| "loss": 0.6752, |
| "step": 2694 |
| }, |
| { |
| "epoch": 0.9949544671425056, |
| "grad_norm": 0.8696368932723999, |
| "learning_rate": 4.688108967595699e-06, |
| "loss": 0.6786, |
| "step": 2695 |
| }, |
| { |
| "epoch": 0.9953236524735417, |
| "grad_norm": 0.8123127818107605, |
| "learning_rate": 4.687873675216522e-06, |
| "loss": 0.6704, |
| "step": 2696 |
| }, |
| { |
| "epoch": 0.9956928378045778, |
| "grad_norm": 0.8309715390205383, |
| "learning_rate": 4.687638300027016e-06, |
| "loss": 0.6552, |
| "step": 2697 |
| }, |
| { |
| "epoch": 0.9960620231356141, |
| "grad_norm": 0.8737226724624634, |
| "learning_rate": 4.687402842036092e-06, |
| "loss": 0.7223, |
| "step": 2698 |
| }, |
| { |
| "epoch": 0.9964312084666502, |
| "grad_norm": 0.8684542179107666, |
| "learning_rate": 4.687167301252661e-06, |
| "loss": 0.6992, |
| "step": 2699 |
| }, |
| { |
| "epoch": 0.9968003937976865, |
| "grad_norm": 0.848534345626831, |
| "learning_rate": 4.686931677685637e-06, |
| "loss": 0.7061, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.9971695791287226, |
| "grad_norm": 0.862679123878479, |
| "learning_rate": 4.68669597134394e-06, |
| "loss": 0.7094, |
| "step": 2701 |
| }, |
| { |
| "epoch": 0.9975387644597588, |
| "grad_norm": 0.8820730447769165, |
| "learning_rate": 4.68646018223649e-06, |
| "loss": 0.6943, |
| "step": 2702 |
| }, |
| { |
| "epoch": 0.997907949790795, |
| "grad_norm": 0.8731999397277832, |
| "learning_rate": 4.686224310372213e-06, |
| "loss": 0.7011, |
| "step": 2703 |
| }, |
| { |
| "epoch": 0.9982771351218311, |
| "grad_norm": 0.852350115776062, |
| "learning_rate": 4.685988355760035e-06, |
| "loss": 0.6635, |
| "step": 2704 |
| }, |
| { |
| "epoch": 0.9986463204528674, |
| "grad_norm": 0.8917254209518433, |
| "learning_rate": 4.6857523184088875e-06, |
| "loss": 0.7044, |
| "step": 2705 |
| }, |
| { |
| "epoch": 0.9990155057839035, |
| "grad_norm": 0.900327742099762, |
| "learning_rate": 4.685516198327705e-06, |
| "loss": 0.7145, |
| "step": 2706 |
| }, |
| { |
| "epoch": 0.9993846911149397, |
| "grad_norm": 0.9128772020339966, |
| "learning_rate": 4.685279995525424e-06, |
| "loss": 0.7169, |
| "step": 2707 |
| }, |
| { |
| "epoch": 0.9997538764459759, |
| "grad_norm": 0.8816156983375549, |
| "learning_rate": 4.685043710010985e-06, |
| "loss": 0.7372, |
| "step": 2708 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 16248, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 2708, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.840507374826947e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|