| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 900, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005555555555555556, |
| "grad_norm": 0.03562309220433235, |
| "learning_rate": 0.0, |
| "loss": 1.3897, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.011111111111111112, |
| "grad_norm": 0.03496583178639412, |
| "learning_rate": 4e-05, |
| "loss": 1.2107, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.016666666666666666, |
| "grad_norm": 0.0381351076066494, |
| "learning_rate": 8e-05, |
| "loss": 1.3566, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.022222222222222223, |
| "grad_norm": 0.05364065244793892, |
| "learning_rate": 0.00012, |
| "loss": 1.4139, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.027777777777777776, |
| "grad_norm": 0.0683208778500557, |
| "learning_rate": 0.00016, |
| "loss": 1.2812, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.03333333333333333, |
| "grad_norm": 0.08995749801397324, |
| "learning_rate": 0.0002, |
| "loss": 1.3688, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.03888888888888889, |
| "grad_norm": 0.11530529707670212, |
| "learning_rate": 0.00019977653631284917, |
| "loss": 1.2782, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.044444444444444446, |
| "grad_norm": 0.08583567291498184, |
| "learning_rate": 0.00019955307262569833, |
| "loss": 1.2665, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.05587838962674141, |
| "learning_rate": 0.0001993296089385475, |
| "loss": 1.2385, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.05555555555555555, |
| "grad_norm": 0.11535882204771042, |
| "learning_rate": 0.00019910614525139666, |
| "loss": 1.0813, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.06111111111111111, |
| "grad_norm": 0.13201650977134705, |
| "learning_rate": 0.00019888268156424582, |
| "loss": 1.1436, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.06666666666666667, |
| "grad_norm": 0.11374582350254059, |
| "learning_rate": 0.00019865921787709498, |
| "loss": 1.1855, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.07222222222222222, |
| "grad_norm": 0.06903394311666489, |
| "learning_rate": 0.00019843575418994415, |
| "loss": 1.0681, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.07777777777777778, |
| "grad_norm": 0.04648435115814209, |
| "learning_rate": 0.0001982122905027933, |
| "loss": 1.2021, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.08333333333333333, |
| "grad_norm": 0.044310279190540314, |
| "learning_rate": 0.00019798882681564247, |
| "loss": 1.2205, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.08888888888888889, |
| "grad_norm": 0.04107815772294998, |
| "learning_rate": 0.00019776536312849163, |
| "loss": 1.2386, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.09444444444444444, |
| "grad_norm": 0.04393622279167175, |
| "learning_rate": 0.0001975418994413408, |
| "loss": 1.0073, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.0427585206925869, |
| "learning_rate": 0.00019731843575418996, |
| "loss": 1.1893, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.10555555555555556, |
| "grad_norm": 0.04664256423711777, |
| "learning_rate": 0.00019709497206703912, |
| "loss": 0.9927, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.1111111111111111, |
| "grad_norm": 0.04508348926901817, |
| "learning_rate": 0.00019687150837988828, |
| "loss": 1.0925, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.11666666666666667, |
| "grad_norm": 0.039513278752565384, |
| "learning_rate": 0.00019664804469273744, |
| "loss": 0.9871, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.12222222222222222, |
| "grad_norm": 0.03960327059030533, |
| "learning_rate": 0.0001964245810055866, |
| "loss": 1.0176, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.12777777777777777, |
| "grad_norm": 0.04235127568244934, |
| "learning_rate": 0.00019620111731843577, |
| "loss": 1.049, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.13333333333333333, |
| "grad_norm": 0.0568842850625515, |
| "learning_rate": 0.00019597765363128493, |
| "loss": 1.2287, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.1388888888888889, |
| "grad_norm": 0.04214571416378021, |
| "learning_rate": 0.0001957541899441341, |
| "loss": 1.1542, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.14444444444444443, |
| "grad_norm": 0.047842007130384445, |
| "learning_rate": 0.00019553072625698326, |
| "loss": 1.016, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.044961851090192795, |
| "learning_rate": 0.00019530726256983242, |
| "loss": 1.0749, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.15555555555555556, |
| "grad_norm": 0.04217054322361946, |
| "learning_rate": 0.00019508379888268158, |
| "loss": 0.9727, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.16111111111111112, |
| "grad_norm": 0.0383504293859005, |
| "learning_rate": 0.00019486033519553074, |
| "loss": 1.0661, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.16666666666666666, |
| "grad_norm": 0.03532204404473305, |
| "learning_rate": 0.0001946368715083799, |
| "loss": 1.1068, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.17222222222222222, |
| "grad_norm": 0.038037098944187164, |
| "learning_rate": 0.00019441340782122907, |
| "loss": 1.1115, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.17777777777777778, |
| "grad_norm": 0.037036098539829254, |
| "learning_rate": 0.00019418994413407823, |
| "loss": 1.0929, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.18333333333333332, |
| "grad_norm": 0.0363038145005703, |
| "learning_rate": 0.00019396648044692737, |
| "loss": 1.0947, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.18888888888888888, |
| "grad_norm": 0.034751344472169876, |
| "learning_rate": 0.00019374301675977655, |
| "loss": 0.9976, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.19444444444444445, |
| "grad_norm": 0.034770041704177856, |
| "learning_rate": 0.0001935195530726257, |
| "loss": 0.957, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.039629194885492325, |
| "learning_rate": 0.00019329608938547488, |
| "loss": 1.008, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.20555555555555555, |
| "grad_norm": 0.0362899973988533, |
| "learning_rate": 0.00019307262569832401, |
| "loss": 1.0663, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.2111111111111111, |
| "grad_norm": 0.03607248142361641, |
| "learning_rate": 0.0001928491620111732, |
| "loss": 0.9987, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.21666666666666667, |
| "grad_norm": 0.039631396532058716, |
| "learning_rate": 0.00019262569832402234, |
| "loss": 1.0823, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 0.035003967583179474, |
| "learning_rate": 0.00019240223463687153, |
| "loss": 1.1496, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.22777777777777777, |
| "grad_norm": 0.037705037742853165, |
| "learning_rate": 0.00019217877094972066, |
| "loss": 1.0588, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.23333333333333334, |
| "grad_norm": 0.04814685508608818, |
| "learning_rate": 0.00019195530726256985, |
| "loss": 1.1946, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.2388888888888889, |
| "grad_norm": 0.04323168098926544, |
| "learning_rate": 0.000191731843575419, |
| "loss": 0.9522, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.24444444444444444, |
| "grad_norm": 0.03730936348438263, |
| "learning_rate": 0.00019150837988826818, |
| "loss": 0.9357, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.038458798080682755, |
| "learning_rate": 0.0001912849162011173, |
| "loss": 0.9934, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.25555555555555554, |
| "grad_norm": 0.0425235778093338, |
| "learning_rate": 0.0001910614525139665, |
| "loss": 0.9022, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.2611111111111111, |
| "grad_norm": 0.037031710147857666, |
| "learning_rate": 0.00019083798882681564, |
| "loss": 1.0644, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "grad_norm": 0.037151917815208435, |
| "learning_rate": 0.00019061452513966483, |
| "loss": 1.0072, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.2722222222222222, |
| "grad_norm": 0.03987253084778786, |
| "learning_rate": 0.00019039106145251396, |
| "loss": 1.1129, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.2777777777777778, |
| "grad_norm": 0.04249132424592972, |
| "learning_rate": 0.00019016759776536315, |
| "loss": 0.8697, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2833333333333333, |
| "grad_norm": 0.03178909793496132, |
| "learning_rate": 0.0001899441340782123, |
| "loss": 0.9633, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.28888888888888886, |
| "grad_norm": 0.03648042678833008, |
| "learning_rate": 0.00018972067039106148, |
| "loss": 0.9787, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.29444444444444445, |
| "grad_norm": 0.037291720509529114, |
| "learning_rate": 0.0001894972067039106, |
| "loss": 1.0091, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.051739390939474106, |
| "learning_rate": 0.00018927374301675977, |
| "loss": 0.8872, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.3055555555555556, |
| "grad_norm": 0.05843161419034004, |
| "learning_rate": 0.00018905027932960894, |
| "loss": 1.1597, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.3111111111111111, |
| "grad_norm": 0.038655612617731094, |
| "learning_rate": 0.0001888268156424581, |
| "loss": 0.9919, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.31666666666666665, |
| "grad_norm": 0.038581885397434235, |
| "learning_rate": 0.00018860335195530726, |
| "loss": 1.0442, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.32222222222222224, |
| "grad_norm": 0.04138307645916939, |
| "learning_rate": 0.00018837988826815642, |
| "loss": 0.9998, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.3277777777777778, |
| "grad_norm": 0.037572942674160004, |
| "learning_rate": 0.00018815642458100559, |
| "loss": 1.1027, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.03829139843583107, |
| "learning_rate": 0.00018793296089385475, |
| "loss": 0.9609, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3388888888888889, |
| "grad_norm": 0.041106369346380234, |
| "learning_rate": 0.0001877094972067039, |
| "loss": 0.8585, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.34444444444444444, |
| "grad_norm": 0.036691080778837204, |
| "learning_rate": 0.00018748603351955307, |
| "loss": 0.8896, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.03947650268673897, |
| "learning_rate": 0.00018726256983240224, |
| "loss": 0.9298, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "grad_norm": 0.03706413879990578, |
| "learning_rate": 0.0001870391061452514, |
| "loss": 1.0745, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.3611111111111111, |
| "grad_norm": 0.0503302700817585, |
| "learning_rate": 0.00018681564245810056, |
| "loss": 1.0301, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.36666666666666664, |
| "grad_norm": 0.040827762335538864, |
| "learning_rate": 0.00018659217877094972, |
| "loss": 0.9669, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.37222222222222223, |
| "grad_norm": 0.042443402111530304, |
| "learning_rate": 0.00018636871508379888, |
| "loss": 0.9118, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.37777777777777777, |
| "grad_norm": 0.04341750964522362, |
| "learning_rate": 0.00018614525139664805, |
| "loss": 1.0042, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.38333333333333336, |
| "grad_norm": 0.04730634391307831, |
| "learning_rate": 0.0001859217877094972, |
| "loss": 1.049, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.3888888888888889, |
| "grad_norm": 0.03982226550579071, |
| "learning_rate": 0.00018569832402234637, |
| "loss": 1.1031, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.39444444444444443, |
| "grad_norm": 0.04159389063715935, |
| "learning_rate": 0.00018547486033519553, |
| "loss": 0.9836, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.043531641364097595, |
| "learning_rate": 0.0001852513966480447, |
| "loss": 1.0529, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.40555555555555556, |
| "grad_norm": 0.042536310851573944, |
| "learning_rate": 0.00018502793296089386, |
| "loss": 0.9352, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.4111111111111111, |
| "grad_norm": 0.04230835288763046, |
| "learning_rate": 0.00018480446927374302, |
| "loss": 0.9066, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.4166666666666667, |
| "grad_norm": 0.04795027896761894, |
| "learning_rate": 0.00018458100558659218, |
| "loss": 0.9301, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.4222222222222222, |
| "grad_norm": 0.04159845784306526, |
| "learning_rate": 0.00018435754189944135, |
| "loss": 1.0217, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.42777777777777776, |
| "grad_norm": 0.046765729784965515, |
| "learning_rate": 0.0001841340782122905, |
| "loss": 0.9583, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.43333333333333335, |
| "grad_norm": 0.04374508187174797, |
| "learning_rate": 0.00018391061452513967, |
| "loss": 1.0433, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.4388888888888889, |
| "grad_norm": 0.04641956463456154, |
| "learning_rate": 0.00018368715083798883, |
| "loss": 0.9135, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 0.0653461143374443, |
| "learning_rate": 0.000183463687150838, |
| "loss": 0.8327, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.05362270772457123, |
| "learning_rate": 0.00018324022346368716, |
| "loss": 0.8941, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.45555555555555555, |
| "grad_norm": 0.054075635969638824, |
| "learning_rate": 0.00018301675977653632, |
| "loss": 0.9034, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.46111111111111114, |
| "grad_norm": 0.05428635701537132, |
| "learning_rate": 0.00018279329608938548, |
| "loss": 0.9889, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.4666666666666667, |
| "grad_norm": 0.046751026064157486, |
| "learning_rate": 0.00018256983240223464, |
| "loss": 0.9755, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.4722222222222222, |
| "grad_norm": 0.055183425545692444, |
| "learning_rate": 0.0001823463687150838, |
| "loss": 0.8958, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.4777777777777778, |
| "grad_norm": 0.045744914561510086, |
| "learning_rate": 0.00018212290502793297, |
| "loss": 1.1, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.48333333333333334, |
| "grad_norm": 0.047536078840494156, |
| "learning_rate": 0.00018189944134078213, |
| "loss": 0.9612, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.4888888888888889, |
| "grad_norm": 0.04590754956007004, |
| "learning_rate": 0.0001816759776536313, |
| "loss": 0.9353, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.49444444444444446, |
| "grad_norm": 0.04202994331717491, |
| "learning_rate": 0.00018145251396648046, |
| "loss": 1.0445, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.047461919486522675, |
| "learning_rate": 0.00018122905027932962, |
| "loss": 0.9836, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5055555555555555, |
| "grad_norm": 0.0570930652320385, |
| "learning_rate": 0.00018100558659217878, |
| "loss": 1.0391, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.5111111111111111, |
| "grad_norm": 0.04890509322285652, |
| "learning_rate": 0.00018078212290502794, |
| "loss": 0.9243, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.5166666666666667, |
| "grad_norm": 0.04897640645503998, |
| "learning_rate": 0.0001805586592178771, |
| "loss": 1.0666, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.5222222222222223, |
| "grad_norm": 0.04432108625769615, |
| "learning_rate": 0.00018033519553072627, |
| "loss": 0.9082, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.5277777777777778, |
| "grad_norm": 0.04745204374194145, |
| "learning_rate": 0.00018011173184357543, |
| "loss": 1.0106, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 0.052698925137519836, |
| "learning_rate": 0.0001798882681564246, |
| "loss": 0.9143, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.5388888888888889, |
| "grad_norm": 0.04712430015206337, |
| "learning_rate": 0.00017966480446927375, |
| "loss": 0.9098, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.5444444444444444, |
| "grad_norm": 0.046263325959444046, |
| "learning_rate": 0.00017944134078212292, |
| "loss": 1.0247, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.05879923328757286, |
| "learning_rate": 0.00017921787709497208, |
| "loss": 0.8499, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 0.05413980782032013, |
| "learning_rate": 0.00017899441340782124, |
| "loss": 0.8356, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5611111111111111, |
| "grad_norm": 0.043978795409202576, |
| "learning_rate": 0.00017877094972067038, |
| "loss": 1.003, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.5666666666666667, |
| "grad_norm": 0.04537949338555336, |
| "learning_rate": 0.00017854748603351957, |
| "loss": 0.9116, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.5722222222222222, |
| "grad_norm": 0.046335939317941666, |
| "learning_rate": 0.0001783240223463687, |
| "loss": 0.8829, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.5777777777777777, |
| "grad_norm": 0.06141021102666855, |
| "learning_rate": 0.0001781005586592179, |
| "loss": 0.9723, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.5833333333333334, |
| "grad_norm": 0.048380546271800995, |
| "learning_rate": 0.00017787709497206703, |
| "loss": 1.0035, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.5888888888888889, |
| "grad_norm": 0.05503736436367035, |
| "learning_rate": 0.00017765363128491622, |
| "loss": 0.9792, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.5944444444444444, |
| "grad_norm": 0.05387064814567566, |
| "learning_rate": 0.00017743016759776535, |
| "loss": 0.9593, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.04959545284509659, |
| "learning_rate": 0.00017720670391061454, |
| "loss": 1.1505, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.6055555555555555, |
| "grad_norm": 0.05498025193810463, |
| "learning_rate": 0.00017698324022346368, |
| "loss": 1.0054, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.6111111111111112, |
| "grad_norm": 0.05924772098660469, |
| "learning_rate": 0.00017675977653631287, |
| "loss": 0.9245, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6166666666666667, |
| "grad_norm": 0.05125448480248451, |
| "learning_rate": 0.000176536312849162, |
| "loss": 0.9627, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.6222222222222222, |
| "grad_norm": 0.04882131516933441, |
| "learning_rate": 0.0001763128491620112, |
| "loss": 1.0934, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.6277777777777778, |
| "grad_norm": 0.06039188802242279, |
| "learning_rate": 0.00017608938547486033, |
| "loss": 0.9644, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.6333333333333333, |
| "grad_norm": 0.05410723015666008, |
| "learning_rate": 0.00017586592178770951, |
| "loss": 0.8913, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.6388888888888888, |
| "grad_norm": 0.042146410793066025, |
| "learning_rate": 0.00017564245810055865, |
| "loss": 0.9539, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.6444444444444445, |
| "grad_norm": 0.04869828000664711, |
| "learning_rate": 0.00017541899441340784, |
| "loss": 1.0292, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.04636748507618904, |
| "learning_rate": 0.00017519553072625697, |
| "loss": 0.9346, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.6555555555555556, |
| "grad_norm": 0.049692243337631226, |
| "learning_rate": 0.00017497206703910616, |
| "loss": 0.9506, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.6611111111111111, |
| "grad_norm": 0.05080572888255119, |
| "learning_rate": 0.0001747486033519553, |
| "loss": 0.9036, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.050407495349645615, |
| "learning_rate": 0.0001745251396648045, |
| "loss": 0.9325, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6722222222222223, |
| "grad_norm": 0.04986334592103958, |
| "learning_rate": 0.00017430167597765362, |
| "loss": 0.9872, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.6777777777777778, |
| "grad_norm": 0.05374254286289215, |
| "learning_rate": 0.0001740782122905028, |
| "loss": 0.9968, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.6833333333333333, |
| "grad_norm": 0.04589278623461723, |
| "learning_rate": 0.00017385474860335195, |
| "loss": 0.8981, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.6888888888888889, |
| "grad_norm": 0.05185263976454735, |
| "learning_rate": 0.00017363128491620114, |
| "loss": 0.8315, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.6944444444444444, |
| "grad_norm": 0.06388653814792633, |
| "learning_rate": 0.00017340782122905027, |
| "loss": 0.9927, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.05252877622842789, |
| "learning_rate": 0.00017318435754189946, |
| "loss": 1.0065, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.7055555555555556, |
| "grad_norm": 0.04771338775753975, |
| "learning_rate": 0.0001729608938547486, |
| "loss": 1.011, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.7111111111111111, |
| "grad_norm": 0.04832189902663231, |
| "learning_rate": 0.0001727374301675978, |
| "loss": 0.9703, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.7166666666666667, |
| "grad_norm": 0.058851100504398346, |
| "learning_rate": 0.00017251396648044692, |
| "loss": 0.9536, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.7222222222222222, |
| "grad_norm": 0.05162525922060013, |
| "learning_rate": 0.0001722905027932961, |
| "loss": 0.9376, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.7277777777777777, |
| "grad_norm": 0.04926559329032898, |
| "learning_rate": 0.00017206703910614525, |
| "loss": 0.9675, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.7333333333333333, |
| "grad_norm": 0.045212697237730026, |
| "learning_rate": 0.00017184357541899444, |
| "loss": 1.1185, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.7388888888888889, |
| "grad_norm": 0.048744745552539825, |
| "learning_rate": 0.00017162011173184357, |
| "loss": 0.8514, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.7444444444444445, |
| "grad_norm": 0.05711376294493675, |
| "learning_rate": 0.00017139664804469276, |
| "loss": 0.8688, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.054411597549915314, |
| "learning_rate": 0.0001711731843575419, |
| "loss": 0.9715, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.7555555555555555, |
| "grad_norm": 0.05230865627527237, |
| "learning_rate": 0.00017094972067039109, |
| "loss": 0.8751, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.7611111111111111, |
| "grad_norm": 0.049234066158533096, |
| "learning_rate": 0.00017072625698324022, |
| "loss": 0.9204, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.7666666666666667, |
| "grad_norm": 0.050416141748428345, |
| "learning_rate": 0.0001705027932960894, |
| "loss": 1.0438, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.7722222222222223, |
| "grad_norm": 0.044703587889671326, |
| "learning_rate": 0.00017027932960893855, |
| "loss": 0.9383, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.7777777777777778, |
| "grad_norm": 0.050353001803159714, |
| "learning_rate": 0.00017005586592178774, |
| "loss": 0.9202, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.7833333333333333, |
| "grad_norm": 0.050380364060401917, |
| "learning_rate": 0.00016983240223463687, |
| "loss": 0.9781, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.7888888888888889, |
| "grad_norm": 0.049743395298719406, |
| "learning_rate": 0.00016960893854748606, |
| "loss": 1.0145, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.7944444444444444, |
| "grad_norm": 0.04901986941695213, |
| "learning_rate": 0.0001693854748603352, |
| "loss": 0.917, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.04620293155312538, |
| "learning_rate": 0.00016916201117318438, |
| "loss": 0.9965, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.8055555555555556, |
| "grad_norm": 0.0640861988067627, |
| "learning_rate": 0.00016893854748603352, |
| "loss": 0.8774, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.8111111111111111, |
| "grad_norm": 0.06719637662172318, |
| "learning_rate": 0.0001687150837988827, |
| "loss": 0.8781, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.8166666666666667, |
| "grad_norm": 0.048986878246068954, |
| "learning_rate": 0.00016849162011173184, |
| "loss": 0.9208, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.8222222222222222, |
| "grad_norm": 0.04842989146709442, |
| "learning_rate": 0.000168268156424581, |
| "loss": 0.872, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.8277777777777777, |
| "grad_norm": 0.04718875512480736, |
| "learning_rate": 0.00016804469273743017, |
| "loss": 0.9608, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 0.04663201794028282, |
| "learning_rate": 0.00016782122905027933, |
| "loss": 0.9788, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8388888888888889, |
| "grad_norm": 0.06023184582591057, |
| "learning_rate": 0.0001675977653631285, |
| "loss": 0.9136, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.8444444444444444, |
| "grad_norm": 0.04846452176570892, |
| "learning_rate": 0.00016737430167597766, |
| "loss": 0.9934, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.04904169216752052, |
| "learning_rate": 0.00016715083798882682, |
| "loss": 0.8698, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.8555555555555555, |
| "grad_norm": 0.05261608958244324, |
| "learning_rate": 0.00016692737430167598, |
| "loss": 0.9769, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.8611111111111112, |
| "grad_norm": 0.05482027307152748, |
| "learning_rate": 0.00016670391061452514, |
| "loss": 0.9682, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.8666666666666667, |
| "grad_norm": 0.04853905364871025, |
| "learning_rate": 0.0001664804469273743, |
| "loss": 0.8324, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.8722222222222222, |
| "grad_norm": 0.052008483558893204, |
| "learning_rate": 0.00016625698324022347, |
| "loss": 0.9262, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.8777777777777778, |
| "grad_norm": 0.057141151279211044, |
| "learning_rate": 0.00016603351955307263, |
| "loss": 0.9687, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.8833333333333333, |
| "grad_norm": 0.053614623844623566, |
| "learning_rate": 0.0001658100558659218, |
| "loss": 1.0153, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 0.05462003871798515, |
| "learning_rate": 0.00016558659217877095, |
| "loss": 1.066, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.8944444444444445, |
| "grad_norm": 0.06442496925592422, |
| "learning_rate": 0.00016536312849162012, |
| "loss": 0.9421, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.0469006709754467, |
| "learning_rate": 0.00016513966480446928, |
| "loss": 0.9178, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.9055555555555556, |
| "grad_norm": 0.04962185025215149, |
| "learning_rate": 0.00016491620111731844, |
| "loss": 1.0243, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.9111111111111111, |
| "grad_norm": 0.048993416130542755, |
| "learning_rate": 0.0001646927374301676, |
| "loss": 0.987, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.9166666666666666, |
| "grad_norm": 0.05156516283750534, |
| "learning_rate": 0.00016446927374301677, |
| "loss": 0.8975, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.9222222222222223, |
| "grad_norm": 0.06078791618347168, |
| "learning_rate": 0.00016424581005586593, |
| "loss": 0.9709, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.9277777777777778, |
| "grad_norm": 0.04482847452163696, |
| "learning_rate": 0.0001640223463687151, |
| "loss": 0.955, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.9333333333333333, |
| "grad_norm": 0.04539848864078522, |
| "learning_rate": 0.00016379888268156425, |
| "loss": 0.9566, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.9388888888888889, |
| "grad_norm": 0.05767229571938515, |
| "learning_rate": 0.00016357541899441342, |
| "loss": 0.8146, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.9444444444444444, |
| "grad_norm": 0.054371606558561325, |
| "learning_rate": 0.00016335195530726258, |
| "loss": 1.0004, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.05640481039881706, |
| "learning_rate": 0.00016312849162011174, |
| "loss": 1.0064, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.9555555555555556, |
| "grad_norm": 0.05987238138914108, |
| "learning_rate": 0.0001629050279329609, |
| "loss": 1.0158, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.9611111111111111, |
| "grad_norm": 0.052737317979335785, |
| "learning_rate": 0.00016268156424581007, |
| "loss": 0.9256, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.9666666666666667, |
| "grad_norm": 0.049311403185129166, |
| "learning_rate": 0.00016245810055865923, |
| "loss": 0.9628, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.9722222222222222, |
| "grad_norm": 0.056468550115823746, |
| "learning_rate": 0.0001622346368715084, |
| "loss": 0.9609, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.9777777777777777, |
| "grad_norm": 0.06114795804023743, |
| "learning_rate": 0.00016201117318435755, |
| "loss": 1.0214, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.9833333333333333, |
| "grad_norm": 0.05194453150033951, |
| "learning_rate": 0.00016178770949720671, |
| "loss": 0.9056, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.9888888888888889, |
| "grad_norm": 0.0524967759847641, |
| "learning_rate": 0.00016156424581005588, |
| "loss": 0.9775, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.9944444444444445, |
| "grad_norm": 0.057767920196056366, |
| "learning_rate": 0.00016134078212290504, |
| "loss": 0.9784, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.05753879249095917, |
| "learning_rate": 0.0001611173184357542, |
| "loss": 0.9531, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.0055555555555555, |
| "grad_norm": 0.06806821376085281, |
| "learning_rate": 0.00016089385474860336, |
| "loss": 0.7893, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.011111111111111, |
| "grad_norm": 0.04672916978597641, |
| "learning_rate": 0.00016067039106145253, |
| "loss": 0.8986, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.0166666666666666, |
| "grad_norm": 0.062532939016819, |
| "learning_rate": 0.0001604469273743017, |
| "loss": 0.9351, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.0222222222222221, |
| "grad_norm": 0.062364086508750916, |
| "learning_rate": 0.00016022346368715085, |
| "loss": 0.9116, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.0277777777777777, |
| "grad_norm": 0.05442197248339653, |
| "learning_rate": 0.00016, |
| "loss": 0.9064, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.0333333333333334, |
| "grad_norm": 0.0521186888217926, |
| "learning_rate": 0.00015977653631284918, |
| "loss": 0.9243, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.038888888888889, |
| "grad_norm": 0.05240177735686302, |
| "learning_rate": 0.00015955307262569834, |
| "loss": 0.9321, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.0444444444444445, |
| "grad_norm": 0.07044881582260132, |
| "learning_rate": 0.0001593296089385475, |
| "loss": 0.8636, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.05, |
| "grad_norm": 0.050740137696266174, |
| "learning_rate": 0.00015910614525139666, |
| "loss": 0.9124, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.0555555555555556, |
| "grad_norm": 0.05037765949964523, |
| "learning_rate": 0.00015888268156424582, |
| "loss": 0.8686, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.0611111111111111, |
| "grad_norm": 0.057770561426877975, |
| "learning_rate": 0.000158659217877095, |
| "loss": 0.8354, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.0666666666666667, |
| "grad_norm": 0.06425413489341736, |
| "learning_rate": 0.00015843575418994415, |
| "loss": 0.8598, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.0722222222222222, |
| "grad_norm": 0.0666314959526062, |
| "learning_rate": 0.0001582122905027933, |
| "loss": 0.9796, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.0777777777777777, |
| "grad_norm": 0.055953703820705414, |
| "learning_rate": 0.00015798882681564247, |
| "loss": 0.9676, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.0833333333333333, |
| "grad_norm": 0.05948743224143982, |
| "learning_rate": 0.0001577653631284916, |
| "loss": 1.017, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.0888888888888888, |
| "grad_norm": 0.06179089844226837, |
| "learning_rate": 0.0001575418994413408, |
| "loss": 0.8852, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.0944444444444446, |
| "grad_norm": 0.054043907672166824, |
| "learning_rate": 0.00015731843575418993, |
| "loss": 0.9897, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 0.053820669651031494, |
| "learning_rate": 0.00015709497206703912, |
| "loss": 0.9258, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.1055555555555556, |
| "grad_norm": 0.05031691491603851, |
| "learning_rate": 0.00015687150837988826, |
| "loss": 1.0193, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.1111111111111112, |
| "grad_norm": 0.05572971701622009, |
| "learning_rate": 0.00015664804469273745, |
| "loss": 0.8231, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.1166666666666667, |
| "grad_norm": 0.06840377300977707, |
| "learning_rate": 0.00015642458100558658, |
| "loss": 0.9018, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.1222222222222222, |
| "grad_norm": 0.048216886818408966, |
| "learning_rate": 0.00015620111731843577, |
| "loss": 0.8592, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.1277777777777778, |
| "grad_norm": 0.05099362134933472, |
| "learning_rate": 0.0001559776536312849, |
| "loss": 0.9244, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.1333333333333333, |
| "grad_norm": 0.07767224311828613, |
| "learning_rate": 0.0001557541899441341, |
| "loss": 0.888, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.1388888888888888, |
| "grad_norm": 0.05542586371302605, |
| "learning_rate": 0.00015553072625698323, |
| "loss": 0.9489, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.1444444444444444, |
| "grad_norm": 0.05055686831474304, |
| "learning_rate": 0.00015530726256983242, |
| "loss": 0.958, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.15, |
| "grad_norm": 0.06010639667510986, |
| "learning_rate": 0.00015508379888268156, |
| "loss": 0.867, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.1555555555555554, |
| "grad_norm": 0.056401461362838745, |
| "learning_rate": 0.00015486033519553075, |
| "loss": 0.8492, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.1611111111111112, |
| "grad_norm": 0.04562723636627197, |
| "learning_rate": 0.00015463687150837988, |
| "loss": 0.8798, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.1666666666666667, |
| "grad_norm": 0.05210921913385391, |
| "learning_rate": 0.00015441340782122907, |
| "loss": 1.0221, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.1722222222222223, |
| "grad_norm": 0.05825547128915787, |
| "learning_rate": 0.0001541899441340782, |
| "loss": 0.8814, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.1777777777777778, |
| "grad_norm": 0.054828494787216187, |
| "learning_rate": 0.0001539664804469274, |
| "loss": 0.9182, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.1833333333333333, |
| "grad_norm": 0.05495524778962135, |
| "learning_rate": 0.00015374301675977653, |
| "loss": 0.9478, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.1888888888888889, |
| "grad_norm": 0.05834140256047249, |
| "learning_rate": 0.00015351955307262572, |
| "loss": 0.8907, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.1944444444444444, |
| "grad_norm": 0.06122478097677231, |
| "learning_rate": 0.00015329608938547486, |
| "loss": 0.9009, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.056065574288368225, |
| "learning_rate": 0.00015307262569832405, |
| "loss": 0.8999, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.2055555555555555, |
| "grad_norm": 0.06170939654111862, |
| "learning_rate": 0.00015284916201117318, |
| "loss": 0.9633, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.211111111111111, |
| "grad_norm": 0.06926306337118149, |
| "learning_rate": 0.00015262569832402237, |
| "loss": 0.872, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.2166666666666668, |
| "grad_norm": 0.04986730217933655, |
| "learning_rate": 0.0001524022346368715, |
| "loss": 0.8654, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.2222222222222223, |
| "grad_norm": 0.06529076397418976, |
| "learning_rate": 0.0001521787709497207, |
| "loss": 0.8414, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.2277777777777779, |
| "grad_norm": 0.05794944614171982, |
| "learning_rate": 0.00015195530726256983, |
| "loss": 1.003, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.2333333333333334, |
| "grad_norm": 0.05351187661290169, |
| "learning_rate": 0.00015173184357541902, |
| "loss": 0.9542, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.238888888888889, |
| "grad_norm": 0.055845387279987335, |
| "learning_rate": 0.00015150837988826815, |
| "loss": 1.0061, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.2444444444444445, |
| "grad_norm": 0.0538068562746048, |
| "learning_rate": 0.00015128491620111734, |
| "loss": 0.8362, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.05743606016039848, |
| "learning_rate": 0.00015106145251396648, |
| "loss": 0.9349, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.2555555555555555, |
| "grad_norm": 0.05550825595855713, |
| "learning_rate": 0.00015083798882681567, |
| "loss": 0.9371, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.261111111111111, |
| "grad_norm": 0.06106347590684891, |
| "learning_rate": 0.0001506145251396648, |
| "loss": 1.0165, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.2666666666666666, |
| "grad_norm": 0.05469049885869026, |
| "learning_rate": 0.000150391061452514, |
| "loss": 0.779, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.2722222222222221, |
| "grad_norm": 0.057001929730176926, |
| "learning_rate": 0.00015016759776536313, |
| "loss": 0.9395, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.2777777777777777, |
| "grad_norm": 0.05202470347285271, |
| "learning_rate": 0.00014994413407821232, |
| "loss": 0.8192, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.2833333333333332, |
| "grad_norm": 0.05119827017188072, |
| "learning_rate": 0.00014972067039106145, |
| "loss": 0.8434, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.2888888888888888, |
| "grad_norm": 0.05172817409038544, |
| "learning_rate": 0.00014949720670391064, |
| "loss": 0.853, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.2944444444444445, |
| "grad_norm": 0.05262301489710808, |
| "learning_rate": 0.00014927374301675978, |
| "loss": 1.0772, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 0.051478542387485504, |
| "learning_rate": 0.00014905027932960897, |
| "loss": 0.8971, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.3055555555555556, |
| "grad_norm": 0.05555481091141701, |
| "learning_rate": 0.0001488268156424581, |
| "loss": 0.8355, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.3111111111111111, |
| "grad_norm": 0.053314127027988434, |
| "learning_rate": 0.0001486033519553073, |
| "loss": 0.9963, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.3166666666666667, |
| "grad_norm": 0.0556037463247776, |
| "learning_rate": 0.00014837988826815643, |
| "loss": 0.8912, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.3222222222222222, |
| "grad_norm": 0.05510379374027252, |
| "learning_rate": 0.00014815642458100562, |
| "loss": 0.9354, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.3277777777777777, |
| "grad_norm": 0.051465388387441635, |
| "learning_rate": 0.00014793296089385475, |
| "loss": 0.9561, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 0.0541684553027153, |
| "learning_rate": 0.00014770949720670394, |
| "loss": 0.9456, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.338888888888889, |
| "grad_norm": 0.05365219712257385, |
| "learning_rate": 0.00014748603351955308, |
| "loss": 0.8872, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.3444444444444446, |
| "grad_norm": 0.05588521808385849, |
| "learning_rate": 0.00014726256983240224, |
| "loss": 0.9394, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.35, |
| "grad_norm": 0.05516066029667854, |
| "learning_rate": 0.0001470391061452514, |
| "loss": 0.8989, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.3555555555555556, |
| "grad_norm": 0.051985450088977814, |
| "learning_rate": 0.00014681564245810056, |
| "loss": 0.8705, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.3611111111111112, |
| "grad_norm": 0.059811756014823914, |
| "learning_rate": 0.00014659217877094973, |
| "loss": 0.8617, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.3666666666666667, |
| "grad_norm": 0.05300361290574074, |
| "learning_rate": 0.0001463687150837989, |
| "loss": 0.92, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.3722222222222222, |
| "grad_norm": 0.05296464264392853, |
| "learning_rate": 0.00014614525139664805, |
| "loss": 0.9194, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.3777777777777778, |
| "grad_norm": 0.05771278962492943, |
| "learning_rate": 0.0001459217877094972, |
| "loss": 0.9094, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.3833333333333333, |
| "grad_norm": 0.05478692054748535, |
| "learning_rate": 0.00014569832402234638, |
| "loss": 0.9012, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.3888888888888888, |
| "grad_norm": 0.06128135323524475, |
| "learning_rate": 0.00014547486033519554, |
| "loss": 0.8545, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.3944444444444444, |
| "grad_norm": 0.05041109025478363, |
| "learning_rate": 0.0001452513966480447, |
| "loss": 0.9212, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.06289547681808472, |
| "learning_rate": 0.00014502793296089386, |
| "loss": 0.8682, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.4055555555555554, |
| "grad_norm": 0.05629614740610123, |
| "learning_rate": 0.00014480446927374302, |
| "loss": 0.9318, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.411111111111111, |
| "grad_norm": 0.051047634333372116, |
| "learning_rate": 0.0001445810055865922, |
| "loss": 0.8505, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.4166666666666667, |
| "grad_norm": 0.05612725391983986, |
| "learning_rate": 0.00014435754189944135, |
| "loss": 1.0799, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.4222222222222223, |
| "grad_norm": 0.06313491612672806, |
| "learning_rate": 0.0001441340782122905, |
| "loss": 0.9158, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.4277777777777778, |
| "grad_norm": 0.06289134919643402, |
| "learning_rate": 0.00014391061452513967, |
| "loss": 1.0624, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.4333333333333333, |
| "grad_norm": 0.055123552680015564, |
| "learning_rate": 0.00014368715083798884, |
| "loss": 0.9525, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.4388888888888889, |
| "grad_norm": 0.0679507851600647, |
| "learning_rate": 0.000143463687150838, |
| "loss": 0.9827, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.4444444444444444, |
| "grad_norm": 0.05274106189608574, |
| "learning_rate": 0.00014324022346368716, |
| "loss": 0.9233, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.45, |
| "grad_norm": 0.05777543783187866, |
| "learning_rate": 0.00014301675977653632, |
| "loss": 0.9792, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.4555555555555555, |
| "grad_norm": 0.06628414243459702, |
| "learning_rate": 0.00014279329608938549, |
| "loss": 1.0126, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.4611111111111112, |
| "grad_norm": 0.056076258420944214, |
| "learning_rate": 0.00014256983240223465, |
| "loss": 0.8909, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.4666666666666668, |
| "grad_norm": 0.05960844084620476, |
| "learning_rate": 0.0001423463687150838, |
| "loss": 0.9109, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.4722222222222223, |
| "grad_norm": 0.056989919394254684, |
| "learning_rate": 0.00014212290502793297, |
| "loss": 0.9186, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.4777777777777779, |
| "grad_norm": 0.05334057658910751, |
| "learning_rate": 0.00014189944134078214, |
| "loss": 0.7592, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.4833333333333334, |
| "grad_norm": 0.05288785323500633, |
| "learning_rate": 0.0001416759776536313, |
| "loss": 0.8798, |
| "step": 267 |
| }, |
| { |
| "epoch": 1.488888888888889, |
| "grad_norm": 0.05396222323179245, |
| "learning_rate": 0.00014145251396648046, |
| "loss": 0.8969, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.4944444444444445, |
| "grad_norm": 0.06071707606315613, |
| "learning_rate": 0.00014122905027932962, |
| "loss": 0.9187, |
| "step": 269 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.06300662457942963, |
| "learning_rate": 0.00014100558659217878, |
| "loss": 0.8843, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.5055555555555555, |
| "grad_norm": 0.051903385668992996, |
| "learning_rate": 0.00014078212290502795, |
| "loss": 0.8527, |
| "step": 271 |
| }, |
| { |
| "epoch": 1.511111111111111, |
| "grad_norm": 0.05332471430301666, |
| "learning_rate": 0.0001405586592178771, |
| "loss": 1.0175, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.5166666666666666, |
| "grad_norm": 0.05092576891183853, |
| "learning_rate": 0.00014033519553072627, |
| "loss": 0.8844, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.5222222222222221, |
| "grad_norm": 0.056000836193561554, |
| "learning_rate": 0.00014011173184357543, |
| "loss": 0.8721, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.5277777777777777, |
| "grad_norm": 0.06461361795663834, |
| "learning_rate": 0.0001398882681564246, |
| "loss": 0.9402, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.5333333333333332, |
| "grad_norm": 0.05129456892609596, |
| "learning_rate": 0.00013966480446927376, |
| "loss": 0.845, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.5388888888888888, |
| "grad_norm": 0.05994970351457596, |
| "learning_rate": 0.00013944134078212292, |
| "loss": 0.9381, |
| "step": 277 |
| }, |
| { |
| "epoch": 1.5444444444444443, |
| "grad_norm": 0.0574822761118412, |
| "learning_rate": 0.00013921787709497208, |
| "loss": 0.8692, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.55, |
| "grad_norm": 0.06318029761314392, |
| "learning_rate": 0.00013899441340782125, |
| "loss": 0.8749, |
| "step": 279 |
| }, |
| { |
| "epoch": 1.5555555555555556, |
| "grad_norm": 0.05438155308365822, |
| "learning_rate": 0.0001387709497206704, |
| "loss": 0.9355, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.5611111111111111, |
| "grad_norm": 0.053403034806251526, |
| "learning_rate": 0.00013854748603351957, |
| "loss": 1.0251, |
| "step": 281 |
| }, |
| { |
| "epoch": 1.5666666666666667, |
| "grad_norm": 0.05166739597916603, |
| "learning_rate": 0.00013832402234636873, |
| "loss": 0.8564, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.5722222222222222, |
| "grad_norm": 0.05890066921710968, |
| "learning_rate": 0.0001381005586592179, |
| "loss": 0.9476, |
| "step": 283 |
| }, |
| { |
| "epoch": 1.5777777777777777, |
| "grad_norm": 0.05818413943052292, |
| "learning_rate": 0.00013787709497206706, |
| "loss": 1.0245, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.5833333333333335, |
| "grad_norm": 0.05561775341629982, |
| "learning_rate": 0.00013765363128491622, |
| "loss": 0.9022, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.588888888888889, |
| "grad_norm": 0.07865152508020401, |
| "learning_rate": 0.00013743016759776538, |
| "loss": 0.8565, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.5944444444444446, |
| "grad_norm": 0.06238972768187523, |
| "learning_rate": 0.00013720670391061454, |
| "loss": 0.9641, |
| "step": 287 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.06291598081588745, |
| "learning_rate": 0.0001369832402234637, |
| "loss": 0.8763, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.6055555555555556, |
| "grad_norm": 0.05643616244196892, |
| "learning_rate": 0.00013675977653631284, |
| "loss": 0.928, |
| "step": 289 |
| }, |
| { |
| "epoch": 1.6111111111111112, |
| "grad_norm": 0.0563821904361248, |
| "learning_rate": 0.00013653631284916203, |
| "loss": 1.0347, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.6166666666666667, |
| "grad_norm": 0.05175093561410904, |
| "learning_rate": 0.00013631284916201117, |
| "loss": 0.9617, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.6222222222222222, |
| "grad_norm": 0.06567844748497009, |
| "learning_rate": 0.00013608938547486036, |
| "loss": 0.9322, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.6277777777777778, |
| "grad_norm": 0.0599331296980381, |
| "learning_rate": 0.0001358659217877095, |
| "loss": 0.8131, |
| "step": 293 |
| }, |
| { |
| "epoch": 1.6333333333333333, |
| "grad_norm": 0.062242232263088226, |
| "learning_rate": 0.00013564245810055868, |
| "loss": 0.8291, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.6388888888888888, |
| "grad_norm": 0.060543399304151535, |
| "learning_rate": 0.00013541899441340782, |
| "loss": 0.979, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.6444444444444444, |
| "grad_norm": 0.0498482882976532, |
| "learning_rate": 0.00013519553072625698, |
| "loss": 0.8559, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.65, |
| "grad_norm": 0.057573337107896805, |
| "learning_rate": 0.00013497206703910614, |
| "loss": 0.9465, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.6555555555555554, |
| "grad_norm": 0.06115110218524933, |
| "learning_rate": 0.0001347486033519553, |
| "loss": 0.9466, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.661111111111111, |
| "grad_norm": 0.06601329892873764, |
| "learning_rate": 0.00013452513966480446, |
| "loss": 0.7758, |
| "step": 299 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.06447427719831467, |
| "learning_rate": 0.00013430167597765363, |
| "loss": 0.8847, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.6722222222222223, |
| "grad_norm": 0.05786776542663574, |
| "learning_rate": 0.0001340782122905028, |
| "loss": 0.9338, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.6777777777777778, |
| "grad_norm": 0.0608854703605175, |
| "learning_rate": 0.00013385474860335195, |
| "loss": 0.895, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.6833333333333333, |
| "grad_norm": 0.07569009065628052, |
| "learning_rate": 0.00013363128491620111, |
| "loss": 1.1542, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.6888888888888889, |
| "grad_norm": 0.05544517934322357, |
| "learning_rate": 0.00013340782122905028, |
| "loss": 0.8586, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.6944444444444444, |
| "grad_norm": 0.06403032690286636, |
| "learning_rate": 0.00013318435754189944, |
| "loss": 1.04, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 0.054366301745176315, |
| "learning_rate": 0.0001329608938547486, |
| "loss": 0.8516, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.7055555555555557, |
| "grad_norm": 0.053673211485147476, |
| "learning_rate": 0.00013273743016759776, |
| "loss": 0.9132, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.7111111111111112, |
| "grad_norm": 0.05420944094657898, |
| "learning_rate": 0.00013251396648044693, |
| "loss": 0.8347, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.7166666666666668, |
| "grad_norm": 0.05830496922135353, |
| "learning_rate": 0.0001322905027932961, |
| "loss": 0.9329, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.7222222222222223, |
| "grad_norm": 0.06897345185279846, |
| "learning_rate": 0.00013206703910614525, |
| "loss": 1.0272, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.7277777777777779, |
| "grad_norm": 0.056916285306215286, |
| "learning_rate": 0.0001318435754189944, |
| "loss": 0.8287, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.7333333333333334, |
| "grad_norm": 0.06664946675300598, |
| "learning_rate": 0.00013162011173184358, |
| "loss": 0.8709, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.738888888888889, |
| "grad_norm": 0.06383366882801056, |
| "learning_rate": 0.00013139664804469274, |
| "loss": 0.8616, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.7444444444444445, |
| "grad_norm": 0.05588764324784279, |
| "learning_rate": 0.0001311731843575419, |
| "loss": 0.9632, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 0.07679787278175354, |
| "learning_rate": 0.00013094972067039106, |
| "loss": 1.0148, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.7555555555555555, |
| "grad_norm": 0.05762128904461861, |
| "learning_rate": 0.00013072625698324022, |
| "loss": 0.8947, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.761111111111111, |
| "grad_norm": 0.058024149388074875, |
| "learning_rate": 0.0001305027932960894, |
| "loss": 0.8333, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.7666666666666666, |
| "grad_norm": 0.05518782511353493, |
| "learning_rate": 0.00013027932960893855, |
| "loss": 0.8047, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.7722222222222221, |
| "grad_norm": 0.06511031091213226, |
| "learning_rate": 0.0001300558659217877, |
| "loss": 0.9108, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.7777777777777777, |
| "grad_norm": 0.05511653795838356, |
| "learning_rate": 0.00012983240223463687, |
| "loss": 0.8318, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.7833333333333332, |
| "grad_norm": 0.06081447750329971, |
| "learning_rate": 0.00012960893854748604, |
| "loss": 0.9912, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.7888888888888888, |
| "grad_norm": 0.05632082372903824, |
| "learning_rate": 0.0001293854748603352, |
| "loss": 0.8826, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.7944444444444443, |
| "grad_norm": 0.06698265671730042, |
| "learning_rate": 0.00012916201117318436, |
| "loss": 0.8744, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.055358611047267914, |
| "learning_rate": 0.00012893854748603352, |
| "loss": 0.8341, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.8055555555555556, |
| "grad_norm": 0.05644188076257706, |
| "learning_rate": 0.00012871508379888269, |
| "loss": 0.8476, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.8111111111111111, |
| "grad_norm": 0.06225137785077095, |
| "learning_rate": 0.00012849162011173185, |
| "loss": 0.8155, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.8166666666666667, |
| "grad_norm": 0.0710151419043541, |
| "learning_rate": 0.000128268156424581, |
| "loss": 0.8062, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.8222222222222222, |
| "grad_norm": 0.0803740844130516, |
| "learning_rate": 0.00012804469273743017, |
| "loss": 0.8069, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.8277777777777777, |
| "grad_norm": 0.060480996966362, |
| "learning_rate": 0.00012782122905027933, |
| "loss": 0.983, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.8333333333333335, |
| "grad_norm": 0.057829972356557846, |
| "learning_rate": 0.0001275977653631285, |
| "loss": 0.9551, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.838888888888889, |
| "grad_norm": 0.05823640152812004, |
| "learning_rate": 0.00012737430167597766, |
| "loss": 0.8379, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.8444444444444446, |
| "grad_norm": 0.06028196960687637, |
| "learning_rate": 0.00012715083798882682, |
| "loss": 0.9486, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.85, |
| "grad_norm": 0.06556443870067596, |
| "learning_rate": 0.00012692737430167598, |
| "loss": 0.9407, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.8555555555555556, |
| "grad_norm": 0.05368395894765854, |
| "learning_rate": 0.00012670391061452515, |
| "loss": 0.8535, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.8611111111111112, |
| "grad_norm": 0.059937816113233566, |
| "learning_rate": 0.0001264804469273743, |
| "loss": 1.0518, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.8666666666666667, |
| "grad_norm": 0.05733738839626312, |
| "learning_rate": 0.00012625698324022347, |
| "loss": 0.8455, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.8722222222222222, |
| "grad_norm": 0.0627962127327919, |
| "learning_rate": 0.00012603351955307263, |
| "loss": 0.8769, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.8777777777777778, |
| "grad_norm": 0.060158621519804, |
| "learning_rate": 0.0001258100558659218, |
| "loss": 1.0327, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.8833333333333333, |
| "grad_norm": 0.061249684542417526, |
| "learning_rate": 0.00012558659217877096, |
| "loss": 1.0201, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.8888888888888888, |
| "grad_norm": 0.06041628494858742, |
| "learning_rate": 0.00012536312849162012, |
| "loss": 0.9178, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.8944444444444444, |
| "grad_norm": 0.052881740033626556, |
| "learning_rate": 0.00012513966480446928, |
| "loss": 0.8148, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 0.05886710062623024, |
| "learning_rate": 0.00012491620111731845, |
| "loss": 0.9192, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.9055555555555554, |
| "grad_norm": 0.06002869829535484, |
| "learning_rate": 0.0001246927374301676, |
| "loss": 1.0299, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.911111111111111, |
| "grad_norm": 0.06028445437550545, |
| "learning_rate": 0.00012446927374301677, |
| "loss": 0.8417, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.9166666666666665, |
| "grad_norm": 0.054097313433885574, |
| "learning_rate": 0.00012424581005586593, |
| "loss": 0.9451, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.9222222222222223, |
| "grad_norm": 0.06041548028588295, |
| "learning_rate": 0.0001240223463687151, |
| "loss": 0.9961, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.9277777777777778, |
| "grad_norm": 0.06818769127130508, |
| "learning_rate": 0.00012379888268156426, |
| "loss": 0.9391, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.9333333333333333, |
| "grad_norm": 0.05610937625169754, |
| "learning_rate": 0.0001235754189944134, |
| "loss": 0.9148, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.9388888888888889, |
| "grad_norm": 0.053130947053432465, |
| "learning_rate": 0.00012335195530726258, |
| "loss": 0.8921, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.9444444444444444, |
| "grad_norm": 0.054186947643756866, |
| "learning_rate": 0.00012312849162011172, |
| "loss": 0.8349, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.95, |
| "grad_norm": 0.05862600356340408, |
| "learning_rate": 0.0001229050279329609, |
| "loss": 0.9988, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.9555555555555557, |
| "grad_norm": 0.05840226262807846, |
| "learning_rate": 0.00012268156424581004, |
| "loss": 0.9984, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.9611111111111112, |
| "grad_norm": 0.05921921879053116, |
| "learning_rate": 0.00012245810055865923, |
| "loss": 0.8881, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.9666666666666668, |
| "grad_norm": 0.05658441781997681, |
| "learning_rate": 0.00012223463687150837, |
| "loss": 1.0116, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.9722222222222223, |
| "grad_norm": 0.05988461151719093, |
| "learning_rate": 0.00012201117318435756, |
| "loss": 1.0666, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.9777777777777779, |
| "grad_norm": 0.06414967775344849, |
| "learning_rate": 0.0001217877094972067, |
| "loss": 0.8861, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.9833333333333334, |
| "grad_norm": 0.05677973851561546, |
| "learning_rate": 0.00012156424581005588, |
| "loss": 0.9792, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.988888888888889, |
| "grad_norm": 0.05779249966144562, |
| "learning_rate": 0.00012134078212290503, |
| "loss": 0.7463, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.9944444444444445, |
| "grad_norm": 0.05512448772788048, |
| "learning_rate": 0.0001211173184357542, |
| "loss": 0.862, |
| "step": 359 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.05796084180474281, |
| "learning_rate": 0.00012089385474860335, |
| "loss": 0.8388, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.0055555555555555, |
| "grad_norm": 0.05795562267303467, |
| "learning_rate": 0.00012067039106145253, |
| "loss": 0.8016, |
| "step": 361 |
| }, |
| { |
| "epoch": 2.011111111111111, |
| "grad_norm": 0.050691671669483185, |
| "learning_rate": 0.00012044692737430168, |
| "loss": 0.7879, |
| "step": 362 |
| }, |
| { |
| "epoch": 2.0166666666666666, |
| "grad_norm": 0.05370429530739784, |
| "learning_rate": 0.00012022346368715085, |
| "loss": 0.9062, |
| "step": 363 |
| }, |
| { |
| "epoch": 2.022222222222222, |
| "grad_norm": 0.058020543307065964, |
| "learning_rate": 0.00012, |
| "loss": 0.9786, |
| "step": 364 |
| }, |
| { |
| "epoch": 2.0277777777777777, |
| "grad_norm": 0.058133818209171295, |
| "learning_rate": 0.00011977653631284918, |
| "loss": 0.8352, |
| "step": 365 |
| }, |
| { |
| "epoch": 2.033333333333333, |
| "grad_norm": 0.06408903002738953, |
| "learning_rate": 0.00011955307262569833, |
| "loss": 0.7529, |
| "step": 366 |
| }, |
| { |
| "epoch": 2.0388888888888888, |
| "grad_norm": 0.05270511284470558, |
| "learning_rate": 0.0001193296089385475, |
| "loss": 0.8984, |
| "step": 367 |
| }, |
| { |
| "epoch": 2.0444444444444443, |
| "grad_norm": 0.06286352872848511, |
| "learning_rate": 0.00011910614525139665, |
| "loss": 0.8179, |
| "step": 368 |
| }, |
| { |
| "epoch": 2.05, |
| "grad_norm": 0.057727862149477005, |
| "learning_rate": 0.00011888268156424583, |
| "loss": 0.9198, |
| "step": 369 |
| }, |
| { |
| "epoch": 2.0555555555555554, |
| "grad_norm": 0.054201334714889526, |
| "learning_rate": 0.00011865921787709498, |
| "loss": 0.9529, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.061111111111111, |
| "grad_norm": 0.0592007115483284, |
| "learning_rate": 0.00011843575418994415, |
| "loss": 0.9469, |
| "step": 371 |
| }, |
| { |
| "epoch": 2.066666666666667, |
| "grad_norm": 0.059728022664785385, |
| "learning_rate": 0.0001182122905027933, |
| "loss": 0.8239, |
| "step": 372 |
| }, |
| { |
| "epoch": 2.0722222222222224, |
| "grad_norm": 0.059505872428417206, |
| "learning_rate": 0.00011798882681564248, |
| "loss": 0.9161, |
| "step": 373 |
| }, |
| { |
| "epoch": 2.077777777777778, |
| "grad_norm": 0.05772401764988899, |
| "learning_rate": 0.00011776536312849163, |
| "loss": 0.8978, |
| "step": 374 |
| }, |
| { |
| "epoch": 2.0833333333333335, |
| "grad_norm": 0.05417037755250931, |
| "learning_rate": 0.0001175418994413408, |
| "loss": 0.8191, |
| "step": 375 |
| }, |
| { |
| "epoch": 2.088888888888889, |
| "grad_norm": 0.056001752614974976, |
| "learning_rate": 0.00011731843575418995, |
| "loss": 0.8713, |
| "step": 376 |
| }, |
| { |
| "epoch": 2.0944444444444446, |
| "grad_norm": 0.051353681832551956, |
| "learning_rate": 0.00011709497206703913, |
| "loss": 0.9181, |
| "step": 377 |
| }, |
| { |
| "epoch": 2.1, |
| "grad_norm": 0.06121430918574333, |
| "learning_rate": 0.00011687150837988828, |
| "loss": 0.853, |
| "step": 378 |
| }, |
| { |
| "epoch": 2.1055555555555556, |
| "grad_norm": 0.06297145783901215, |
| "learning_rate": 0.00011664804469273745, |
| "loss": 0.8925, |
| "step": 379 |
| }, |
| { |
| "epoch": 2.111111111111111, |
| "grad_norm": 0.05027139186859131, |
| "learning_rate": 0.0001164245810055866, |
| "loss": 0.8682, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.1166666666666667, |
| "grad_norm": 0.056529451161623, |
| "learning_rate": 0.00011620111731843578, |
| "loss": 0.788, |
| "step": 381 |
| }, |
| { |
| "epoch": 2.1222222222222222, |
| "grad_norm": 0.06378895789384842, |
| "learning_rate": 0.00011597765363128493, |
| "loss": 0.8812, |
| "step": 382 |
| }, |
| { |
| "epoch": 2.1277777777777778, |
| "grad_norm": 0.057098448276519775, |
| "learning_rate": 0.00011575418994413407, |
| "loss": 0.8691, |
| "step": 383 |
| }, |
| { |
| "epoch": 2.1333333333333333, |
| "grad_norm": 0.056282833218574524, |
| "learning_rate": 0.00011553072625698325, |
| "loss": 0.8211, |
| "step": 384 |
| }, |
| { |
| "epoch": 2.138888888888889, |
| "grad_norm": 0.057828135788440704, |
| "learning_rate": 0.0001153072625698324, |
| "loss": 0.9009, |
| "step": 385 |
| }, |
| { |
| "epoch": 2.1444444444444444, |
| "grad_norm": 0.06760100275278091, |
| "learning_rate": 0.00011508379888268157, |
| "loss": 0.9088, |
| "step": 386 |
| }, |
| { |
| "epoch": 2.15, |
| "grad_norm": 0.06131958216428757, |
| "learning_rate": 0.00011486033519553072, |
| "loss": 0.893, |
| "step": 387 |
| }, |
| { |
| "epoch": 2.1555555555555554, |
| "grad_norm": 0.06303835660219193, |
| "learning_rate": 0.0001146368715083799, |
| "loss": 0.8712, |
| "step": 388 |
| }, |
| { |
| "epoch": 2.161111111111111, |
| "grad_norm": 0.07197017967700958, |
| "learning_rate": 0.00011441340782122905, |
| "loss": 0.7844, |
| "step": 389 |
| }, |
| { |
| "epoch": 2.1666666666666665, |
| "grad_norm": 0.07960271835327148, |
| "learning_rate": 0.00011418994413407822, |
| "loss": 1.105, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.172222222222222, |
| "grad_norm": 0.061151616275310516, |
| "learning_rate": 0.00011396648044692737, |
| "loss": 0.9528, |
| "step": 391 |
| }, |
| { |
| "epoch": 2.1777777777777776, |
| "grad_norm": 0.06251414120197296, |
| "learning_rate": 0.00011374301675977655, |
| "loss": 0.9686, |
| "step": 392 |
| }, |
| { |
| "epoch": 2.183333333333333, |
| "grad_norm": 0.05882757902145386, |
| "learning_rate": 0.0001135195530726257, |
| "loss": 0.8891, |
| "step": 393 |
| }, |
| { |
| "epoch": 2.188888888888889, |
| "grad_norm": 0.06422755867242813, |
| "learning_rate": 0.00011329608938547487, |
| "loss": 0.8485, |
| "step": 394 |
| }, |
| { |
| "epoch": 2.1944444444444446, |
| "grad_norm": 0.06758706271648407, |
| "learning_rate": 0.00011307262569832402, |
| "loss": 1.0025, |
| "step": 395 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 0.05804925039410591, |
| "learning_rate": 0.0001128491620111732, |
| "loss": 0.9549, |
| "step": 396 |
| }, |
| { |
| "epoch": 2.2055555555555557, |
| "grad_norm": 0.0643215924501419, |
| "learning_rate": 0.00011262569832402235, |
| "loss": 0.9752, |
| "step": 397 |
| }, |
| { |
| "epoch": 2.2111111111111112, |
| "grad_norm": 0.07827122509479523, |
| "learning_rate": 0.00011240223463687152, |
| "loss": 0.7972, |
| "step": 398 |
| }, |
| { |
| "epoch": 2.216666666666667, |
| "grad_norm": 0.06952020525932312, |
| "learning_rate": 0.00011217877094972067, |
| "loss": 0.8841, |
| "step": 399 |
| }, |
| { |
| "epoch": 2.2222222222222223, |
| "grad_norm": 0.06420132517814636, |
| "learning_rate": 0.00011195530726256985, |
| "loss": 0.9259, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.227777777777778, |
| "grad_norm": 0.06278407573699951, |
| "learning_rate": 0.000111731843575419, |
| "loss": 0.8838, |
| "step": 401 |
| }, |
| { |
| "epoch": 2.2333333333333334, |
| "grad_norm": 0.06624957174062729, |
| "learning_rate": 0.00011150837988826817, |
| "loss": 0.8352, |
| "step": 402 |
| }, |
| { |
| "epoch": 2.238888888888889, |
| "grad_norm": 0.0669984444975853, |
| "learning_rate": 0.00011128491620111732, |
| "loss": 1.096, |
| "step": 403 |
| }, |
| { |
| "epoch": 2.2444444444444445, |
| "grad_norm": 0.0525604784488678, |
| "learning_rate": 0.0001110614525139665, |
| "loss": 0.8419, |
| "step": 404 |
| }, |
| { |
| "epoch": 2.25, |
| "grad_norm": 0.06767426431179047, |
| "learning_rate": 0.00011083798882681565, |
| "loss": 0.8489, |
| "step": 405 |
| }, |
| { |
| "epoch": 2.2555555555555555, |
| "grad_norm": 0.06003541871905327, |
| "learning_rate": 0.00011061452513966482, |
| "loss": 0.8687, |
| "step": 406 |
| }, |
| { |
| "epoch": 2.261111111111111, |
| "grad_norm": 0.058969881385564804, |
| "learning_rate": 0.00011039106145251397, |
| "loss": 0.9746, |
| "step": 407 |
| }, |
| { |
| "epoch": 2.2666666666666666, |
| "grad_norm": 0.06415044516324997, |
| "learning_rate": 0.00011016759776536315, |
| "loss": 0.7584, |
| "step": 408 |
| }, |
| { |
| "epoch": 2.272222222222222, |
| "grad_norm": 0.05901414901018143, |
| "learning_rate": 0.0001099441340782123, |
| "loss": 0.9582, |
| "step": 409 |
| }, |
| { |
| "epoch": 2.2777777777777777, |
| "grad_norm": 0.05653878673911095, |
| "learning_rate": 0.00010972067039106147, |
| "loss": 0.9437, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.283333333333333, |
| "grad_norm": 0.09101811051368713, |
| "learning_rate": 0.00010949720670391062, |
| "loss": 0.7102, |
| "step": 411 |
| }, |
| { |
| "epoch": 2.2888888888888888, |
| "grad_norm": 0.07861984521150589, |
| "learning_rate": 0.0001092737430167598, |
| "loss": 0.9758, |
| "step": 412 |
| }, |
| { |
| "epoch": 2.2944444444444443, |
| "grad_norm": 0.06312280148267746, |
| "learning_rate": 0.00010905027932960894, |
| "loss": 0.8661, |
| "step": 413 |
| }, |
| { |
| "epoch": 2.3, |
| "grad_norm": 0.05427064001560211, |
| "learning_rate": 0.00010882681564245812, |
| "loss": 0.8831, |
| "step": 414 |
| }, |
| { |
| "epoch": 2.3055555555555554, |
| "grad_norm": 0.06113605201244354, |
| "learning_rate": 0.00010860335195530727, |
| "loss": 0.9743, |
| "step": 415 |
| }, |
| { |
| "epoch": 2.311111111111111, |
| "grad_norm": 0.06222099810838699, |
| "learning_rate": 0.00010837988826815643, |
| "loss": 0.951, |
| "step": 416 |
| }, |
| { |
| "epoch": 2.3166666666666664, |
| "grad_norm": 0.06552956253290176, |
| "learning_rate": 0.00010815642458100559, |
| "loss": 0.9082, |
| "step": 417 |
| }, |
| { |
| "epoch": 2.3222222222222224, |
| "grad_norm": 0.056746430695056915, |
| "learning_rate": 0.00010793296089385476, |
| "loss": 0.8688, |
| "step": 418 |
| }, |
| { |
| "epoch": 2.327777777777778, |
| "grad_norm": 0.06256411224603653, |
| "learning_rate": 0.00010770949720670392, |
| "loss": 0.9633, |
| "step": 419 |
| }, |
| { |
| "epoch": 2.3333333333333335, |
| "grad_norm": 0.059118397533893585, |
| "learning_rate": 0.00010748603351955308, |
| "loss": 0.8636, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.338888888888889, |
| "grad_norm": 0.0635746568441391, |
| "learning_rate": 0.00010726256983240224, |
| "loss": 0.9987, |
| "step": 421 |
| }, |
| { |
| "epoch": 2.3444444444444446, |
| "grad_norm": 0.060392703860998154, |
| "learning_rate": 0.0001070391061452514, |
| "loss": 0.9516, |
| "step": 422 |
| }, |
| { |
| "epoch": 2.35, |
| "grad_norm": 0.06231021136045456, |
| "learning_rate": 0.00010681564245810057, |
| "loss": 0.8542, |
| "step": 423 |
| }, |
| { |
| "epoch": 2.3555555555555556, |
| "grad_norm": 0.06495688110589981, |
| "learning_rate": 0.00010659217877094973, |
| "loss": 0.9057, |
| "step": 424 |
| }, |
| { |
| "epoch": 2.361111111111111, |
| "grad_norm": 0.060686029493808746, |
| "learning_rate": 0.00010636871508379889, |
| "loss": 0.8741, |
| "step": 425 |
| }, |
| { |
| "epoch": 2.3666666666666667, |
| "grad_norm": 0.05688054487109184, |
| "learning_rate": 0.00010614525139664805, |
| "loss": 0.9297, |
| "step": 426 |
| }, |
| { |
| "epoch": 2.3722222222222222, |
| "grad_norm": 0.06722468882799149, |
| "learning_rate": 0.00010592178770949722, |
| "loss": 0.8538, |
| "step": 427 |
| }, |
| { |
| "epoch": 2.3777777777777778, |
| "grad_norm": 0.05857893079519272, |
| "learning_rate": 0.00010569832402234638, |
| "loss": 0.999, |
| "step": 428 |
| }, |
| { |
| "epoch": 2.3833333333333333, |
| "grad_norm": 0.07357199490070343, |
| "learning_rate": 0.00010547486033519554, |
| "loss": 0.8309, |
| "step": 429 |
| }, |
| { |
| "epoch": 2.388888888888889, |
| "grad_norm": 0.05783568322658539, |
| "learning_rate": 0.00010525139664804469, |
| "loss": 0.8526, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.3944444444444444, |
| "grad_norm": 0.06645176559686661, |
| "learning_rate": 0.00010502793296089387, |
| "loss": 0.9398, |
| "step": 431 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.058013804256916046, |
| "learning_rate": 0.00010480446927374301, |
| "loss": 0.8838, |
| "step": 432 |
| }, |
| { |
| "epoch": 2.4055555555555554, |
| "grad_norm": 0.058770034462213516, |
| "learning_rate": 0.00010458100558659219, |
| "loss": 0.8896, |
| "step": 433 |
| }, |
| { |
| "epoch": 2.411111111111111, |
| "grad_norm": 0.06524931639432907, |
| "learning_rate": 0.00010435754189944134, |
| "loss": 0.8416, |
| "step": 434 |
| }, |
| { |
| "epoch": 2.4166666666666665, |
| "grad_norm": 0.06197141110897064, |
| "learning_rate": 0.00010413407821229052, |
| "loss": 0.9319, |
| "step": 435 |
| }, |
| { |
| "epoch": 2.422222222222222, |
| "grad_norm": 0.0624687522649765, |
| "learning_rate": 0.00010391061452513966, |
| "loss": 0.9049, |
| "step": 436 |
| }, |
| { |
| "epoch": 2.4277777777777776, |
| "grad_norm": 0.056374967098236084, |
| "learning_rate": 0.00010368715083798884, |
| "loss": 0.8338, |
| "step": 437 |
| }, |
| { |
| "epoch": 2.4333333333333336, |
| "grad_norm": 0.07068292796611786, |
| "learning_rate": 0.00010346368715083799, |
| "loss": 0.8846, |
| "step": 438 |
| }, |
| { |
| "epoch": 2.438888888888889, |
| "grad_norm": 0.062448639422655106, |
| "learning_rate": 0.00010324022346368716, |
| "loss": 0.9501, |
| "step": 439 |
| }, |
| { |
| "epoch": 2.4444444444444446, |
| "grad_norm": 0.05375821515917778, |
| "learning_rate": 0.00010301675977653631, |
| "loss": 0.9341, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.45, |
| "grad_norm": 0.07366905361413956, |
| "learning_rate": 0.00010279329608938548, |
| "loss": 0.9159, |
| "step": 441 |
| }, |
| { |
| "epoch": 2.4555555555555557, |
| "grad_norm": 0.06192503124475479, |
| "learning_rate": 0.00010256983240223464, |
| "loss": 0.8413, |
| "step": 442 |
| }, |
| { |
| "epoch": 2.4611111111111112, |
| "grad_norm": 0.055158581584692, |
| "learning_rate": 0.0001023463687150838, |
| "loss": 0.8213, |
| "step": 443 |
| }, |
| { |
| "epoch": 2.466666666666667, |
| "grad_norm": 0.07399091124534607, |
| "learning_rate": 0.00010212290502793296, |
| "loss": 0.7782, |
| "step": 444 |
| }, |
| { |
| "epoch": 2.4722222222222223, |
| "grad_norm": 0.06105738878250122, |
| "learning_rate": 0.00010189944134078212, |
| "loss": 0.7667, |
| "step": 445 |
| }, |
| { |
| "epoch": 2.477777777777778, |
| "grad_norm": 0.05666331201791763, |
| "learning_rate": 0.00010167597765363129, |
| "loss": 0.9552, |
| "step": 446 |
| }, |
| { |
| "epoch": 2.4833333333333334, |
| "grad_norm": 0.0822184756398201, |
| "learning_rate": 0.00010145251396648045, |
| "loss": 0.769, |
| "step": 447 |
| }, |
| { |
| "epoch": 2.488888888888889, |
| "grad_norm": 0.05613156408071518, |
| "learning_rate": 0.00010122905027932961, |
| "loss": 0.8304, |
| "step": 448 |
| }, |
| { |
| "epoch": 2.4944444444444445, |
| "grad_norm": 0.06470299512147903, |
| "learning_rate": 0.00010100558659217877, |
| "loss": 0.8313, |
| "step": 449 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.05422027409076691, |
| "learning_rate": 0.00010078212290502794, |
| "loss": 0.9173, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.5055555555555555, |
| "grad_norm": 0.06566402316093445, |
| "learning_rate": 0.0001005586592178771, |
| "loss": 0.7651, |
| "step": 451 |
| }, |
| { |
| "epoch": 2.511111111111111, |
| "grad_norm": 0.05321276932954788, |
| "learning_rate": 0.00010033519553072626, |
| "loss": 1.0276, |
| "step": 452 |
| }, |
| { |
| "epoch": 2.5166666666666666, |
| "grad_norm": 0.05509009584784508, |
| "learning_rate": 0.00010011173184357542, |
| "loss": 0.8098, |
| "step": 453 |
| }, |
| { |
| "epoch": 2.522222222222222, |
| "grad_norm": 0.05475517362356186, |
| "learning_rate": 9.988826815642459e-05, |
| "loss": 0.8852, |
| "step": 454 |
| }, |
| { |
| "epoch": 2.5277777777777777, |
| "grad_norm": 0.07419954985380173, |
| "learning_rate": 9.966480446927375e-05, |
| "loss": 0.8484, |
| "step": 455 |
| }, |
| { |
| "epoch": 2.533333333333333, |
| "grad_norm": 0.055524036288261414, |
| "learning_rate": 9.944134078212291e-05, |
| "loss": 0.9487, |
| "step": 456 |
| }, |
| { |
| "epoch": 2.5388888888888888, |
| "grad_norm": 0.06296243518590927, |
| "learning_rate": 9.921787709497207e-05, |
| "loss": 0.8428, |
| "step": 457 |
| }, |
| { |
| "epoch": 2.5444444444444443, |
| "grad_norm": 0.06132779270410538, |
| "learning_rate": 9.899441340782124e-05, |
| "loss": 0.896, |
| "step": 458 |
| }, |
| { |
| "epoch": 2.55, |
| "grad_norm": 0.06100643426179886, |
| "learning_rate": 9.87709497206704e-05, |
| "loss": 0.8459, |
| "step": 459 |
| }, |
| { |
| "epoch": 2.5555555555555554, |
| "grad_norm": 0.06480073928833008, |
| "learning_rate": 9.854748603351956e-05, |
| "loss": 0.8554, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.561111111111111, |
| "grad_norm": 0.06085658445954323, |
| "learning_rate": 9.832402234636872e-05, |
| "loss": 0.8376, |
| "step": 461 |
| }, |
| { |
| "epoch": 2.5666666666666664, |
| "grad_norm": 0.0531584732234478, |
| "learning_rate": 9.810055865921788e-05, |
| "loss": 0.8304, |
| "step": 462 |
| }, |
| { |
| "epoch": 2.572222222222222, |
| "grad_norm": 0.058641497045755386, |
| "learning_rate": 9.787709497206705e-05, |
| "loss": 0.9549, |
| "step": 463 |
| }, |
| { |
| "epoch": 2.5777777777777775, |
| "grad_norm": 0.06060364469885826, |
| "learning_rate": 9.765363128491621e-05, |
| "loss": 0.9154, |
| "step": 464 |
| }, |
| { |
| "epoch": 2.5833333333333335, |
| "grad_norm": 0.05848725885152817, |
| "learning_rate": 9.743016759776537e-05, |
| "loss": 0.8697, |
| "step": 465 |
| }, |
| { |
| "epoch": 2.588888888888889, |
| "grad_norm": 0.052560560405254364, |
| "learning_rate": 9.720670391061453e-05, |
| "loss": 0.8149, |
| "step": 466 |
| }, |
| { |
| "epoch": 2.5944444444444446, |
| "grad_norm": 0.05442071706056595, |
| "learning_rate": 9.698324022346368e-05, |
| "loss": 0.9492, |
| "step": 467 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 0.07098367810249329, |
| "learning_rate": 9.675977653631285e-05, |
| "loss": 0.8444, |
| "step": 468 |
| }, |
| { |
| "epoch": 2.6055555555555556, |
| "grad_norm": 0.06888148933649063, |
| "learning_rate": 9.653631284916201e-05, |
| "loss": 0.869, |
| "step": 469 |
| }, |
| { |
| "epoch": 2.611111111111111, |
| "grad_norm": 0.06614090502262115, |
| "learning_rate": 9.631284916201117e-05, |
| "loss": 0.8199, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.6166666666666667, |
| "grad_norm": 0.07912636548280716, |
| "learning_rate": 9.608938547486033e-05, |
| "loss": 0.9456, |
| "step": 471 |
| }, |
| { |
| "epoch": 2.6222222222222222, |
| "grad_norm": 0.07283525168895721, |
| "learning_rate": 9.58659217877095e-05, |
| "loss": 0.9366, |
| "step": 472 |
| }, |
| { |
| "epoch": 2.6277777777777778, |
| "grad_norm": 0.06150941178202629, |
| "learning_rate": 9.564245810055866e-05, |
| "loss": 0.8277, |
| "step": 473 |
| }, |
| { |
| "epoch": 2.6333333333333333, |
| "grad_norm": 0.061703942716121674, |
| "learning_rate": 9.541899441340782e-05, |
| "loss": 0.8633, |
| "step": 474 |
| }, |
| { |
| "epoch": 2.638888888888889, |
| "grad_norm": 0.06387785077095032, |
| "learning_rate": 9.519553072625698e-05, |
| "loss": 0.9069, |
| "step": 475 |
| }, |
| { |
| "epoch": 2.6444444444444444, |
| "grad_norm": 0.06995640695095062, |
| "learning_rate": 9.497206703910614e-05, |
| "loss": 0.8612, |
| "step": 476 |
| }, |
| { |
| "epoch": 2.65, |
| "grad_norm": 0.06264820694923401, |
| "learning_rate": 9.47486033519553e-05, |
| "loss": 0.9511, |
| "step": 477 |
| }, |
| { |
| "epoch": 2.6555555555555554, |
| "grad_norm": 0.0607755109667778, |
| "learning_rate": 9.452513966480447e-05, |
| "loss": 0.9067, |
| "step": 478 |
| }, |
| { |
| "epoch": 2.661111111111111, |
| "grad_norm": 0.056160129606723785, |
| "learning_rate": 9.430167597765363e-05, |
| "loss": 0.9151, |
| "step": 479 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 0.06626291573047638, |
| "learning_rate": 9.407821229050279e-05, |
| "loss": 0.8162, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.6722222222222225, |
| "grad_norm": 0.0568249337375164, |
| "learning_rate": 9.385474860335196e-05, |
| "loss": 0.7755, |
| "step": 481 |
| }, |
| { |
| "epoch": 2.677777777777778, |
| "grad_norm": 0.06192711368203163, |
| "learning_rate": 9.363128491620112e-05, |
| "loss": 0.9505, |
| "step": 482 |
| }, |
| { |
| "epoch": 2.6833333333333336, |
| "grad_norm": 0.060802996158599854, |
| "learning_rate": 9.340782122905028e-05, |
| "loss": 0.7398, |
| "step": 483 |
| }, |
| { |
| "epoch": 2.688888888888889, |
| "grad_norm": 0.062490034848451614, |
| "learning_rate": 9.318435754189944e-05, |
| "loss": 0.7442, |
| "step": 484 |
| }, |
| { |
| "epoch": 2.6944444444444446, |
| "grad_norm": 0.057932499796152115, |
| "learning_rate": 9.29608938547486e-05, |
| "loss": 0.8254, |
| "step": 485 |
| }, |
| { |
| "epoch": 2.7, |
| "grad_norm": 0.05519195646047592, |
| "learning_rate": 9.273743016759777e-05, |
| "loss": 0.8601, |
| "step": 486 |
| }, |
| { |
| "epoch": 2.7055555555555557, |
| "grad_norm": 0.06532099097967148, |
| "learning_rate": 9.251396648044693e-05, |
| "loss": 0.9135, |
| "step": 487 |
| }, |
| { |
| "epoch": 2.7111111111111112, |
| "grad_norm": 0.06363623589277267, |
| "learning_rate": 9.229050279329609e-05, |
| "loss": 1.1652, |
| "step": 488 |
| }, |
| { |
| "epoch": 2.716666666666667, |
| "grad_norm": 0.10435904562473297, |
| "learning_rate": 9.206703910614525e-05, |
| "loss": 0.8777, |
| "step": 489 |
| }, |
| { |
| "epoch": 2.7222222222222223, |
| "grad_norm": 0.05765729770064354, |
| "learning_rate": 9.184357541899442e-05, |
| "loss": 0.8535, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.727777777777778, |
| "grad_norm": 0.06165571138262749, |
| "learning_rate": 9.162011173184358e-05, |
| "loss": 0.9044, |
| "step": 491 |
| }, |
| { |
| "epoch": 2.7333333333333334, |
| "grad_norm": 0.07434576749801636, |
| "learning_rate": 9.139664804469274e-05, |
| "loss": 0.7882, |
| "step": 492 |
| }, |
| { |
| "epoch": 2.738888888888889, |
| "grad_norm": 0.05665205791592598, |
| "learning_rate": 9.11731843575419e-05, |
| "loss": 0.9364, |
| "step": 493 |
| }, |
| { |
| "epoch": 2.7444444444444445, |
| "grad_norm": 0.06064401566982269, |
| "learning_rate": 9.094972067039107e-05, |
| "loss": 0.8404, |
| "step": 494 |
| }, |
| { |
| "epoch": 2.75, |
| "grad_norm": 0.07089365273714066, |
| "learning_rate": 9.072625698324023e-05, |
| "loss": 0.8383, |
| "step": 495 |
| }, |
| { |
| "epoch": 2.7555555555555555, |
| "grad_norm": 0.05261383205652237, |
| "learning_rate": 9.050279329608939e-05, |
| "loss": 0.9759, |
| "step": 496 |
| }, |
| { |
| "epoch": 2.761111111111111, |
| "grad_norm": 0.05961441621184349, |
| "learning_rate": 9.027932960893855e-05, |
| "loss": 1.0338, |
| "step": 497 |
| }, |
| { |
| "epoch": 2.7666666666666666, |
| "grad_norm": 0.05751103162765503, |
| "learning_rate": 9.005586592178772e-05, |
| "loss": 0.8853, |
| "step": 498 |
| }, |
| { |
| "epoch": 2.772222222222222, |
| "grad_norm": 0.0545254722237587, |
| "learning_rate": 8.983240223463688e-05, |
| "loss": 0.8918, |
| "step": 499 |
| }, |
| { |
| "epoch": 2.7777777777777777, |
| "grad_norm": 0.06573151797056198, |
| "learning_rate": 8.960893854748604e-05, |
| "loss": 1.0418, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.783333333333333, |
| "grad_norm": 0.07065696269273758, |
| "learning_rate": 8.938547486033519e-05, |
| "loss": 0.9214, |
| "step": 501 |
| }, |
| { |
| "epoch": 2.7888888888888888, |
| "grad_norm": 0.05309811607003212, |
| "learning_rate": 8.916201117318435e-05, |
| "loss": 0.7963, |
| "step": 502 |
| }, |
| { |
| "epoch": 2.7944444444444443, |
| "grad_norm": 0.06055481359362602, |
| "learning_rate": 8.893854748603351e-05, |
| "loss": 0.902, |
| "step": 503 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.06306245177984238, |
| "learning_rate": 8.871508379888268e-05, |
| "loss": 0.8308, |
| "step": 504 |
| }, |
| { |
| "epoch": 2.8055555555555554, |
| "grad_norm": 0.05715975910425186, |
| "learning_rate": 8.849162011173184e-05, |
| "loss": 0.8721, |
| "step": 505 |
| }, |
| { |
| "epoch": 2.811111111111111, |
| "grad_norm": 0.0644611120223999, |
| "learning_rate": 8.8268156424581e-05, |
| "loss": 0.9017, |
| "step": 506 |
| }, |
| { |
| "epoch": 2.8166666666666664, |
| "grad_norm": 0.055696483701467514, |
| "learning_rate": 8.804469273743016e-05, |
| "loss": 0.8612, |
| "step": 507 |
| }, |
| { |
| "epoch": 2.822222222222222, |
| "grad_norm": 0.06337593495845795, |
| "learning_rate": 8.782122905027932e-05, |
| "loss": 0.8918, |
| "step": 508 |
| }, |
| { |
| "epoch": 2.8277777777777775, |
| "grad_norm": 0.0640796646475792, |
| "learning_rate": 8.759776536312849e-05, |
| "loss": 0.7485, |
| "step": 509 |
| }, |
| { |
| "epoch": 2.8333333333333335, |
| "grad_norm": 0.059003688395023346, |
| "learning_rate": 8.737430167597765e-05, |
| "loss": 0.7898, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.838888888888889, |
| "grad_norm": 0.06474993377923965, |
| "learning_rate": 8.715083798882681e-05, |
| "loss": 0.9111, |
| "step": 511 |
| }, |
| { |
| "epoch": 2.8444444444444446, |
| "grad_norm": 0.054653774946928024, |
| "learning_rate": 8.692737430167597e-05, |
| "loss": 0.8718, |
| "step": 512 |
| }, |
| { |
| "epoch": 2.85, |
| "grad_norm": 0.0752725899219513, |
| "learning_rate": 8.670391061452514e-05, |
| "loss": 0.9412, |
| "step": 513 |
| }, |
| { |
| "epoch": 2.8555555555555556, |
| "grad_norm": 0.058774758130311966, |
| "learning_rate": 8.64804469273743e-05, |
| "loss": 0.9482, |
| "step": 514 |
| }, |
| { |
| "epoch": 2.861111111111111, |
| "grad_norm": 0.0575130321085453, |
| "learning_rate": 8.625698324022346e-05, |
| "loss": 0.8133, |
| "step": 515 |
| }, |
| { |
| "epoch": 2.8666666666666667, |
| "grad_norm": 0.08530930429697037, |
| "learning_rate": 8.603351955307262e-05, |
| "loss": 0.8953, |
| "step": 516 |
| }, |
| { |
| "epoch": 2.8722222222222222, |
| "grad_norm": 0.0670664831995964, |
| "learning_rate": 8.581005586592179e-05, |
| "loss": 0.7985, |
| "step": 517 |
| }, |
| { |
| "epoch": 2.8777777777777778, |
| "grad_norm": 0.06128701567649841, |
| "learning_rate": 8.558659217877095e-05, |
| "loss": 0.9504, |
| "step": 518 |
| }, |
| { |
| "epoch": 2.8833333333333333, |
| "grad_norm": 0.06463362276554108, |
| "learning_rate": 8.536312849162011e-05, |
| "loss": 0.8723, |
| "step": 519 |
| }, |
| { |
| "epoch": 2.888888888888889, |
| "grad_norm": 0.05576709285378456, |
| "learning_rate": 8.513966480446927e-05, |
| "loss": 0.9225, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.8944444444444444, |
| "grad_norm": 0.05458657816052437, |
| "learning_rate": 8.491620111731844e-05, |
| "loss": 0.9803, |
| "step": 521 |
| }, |
| { |
| "epoch": 2.9, |
| "grad_norm": 0.07112407684326172, |
| "learning_rate": 8.46927374301676e-05, |
| "loss": 0.871, |
| "step": 522 |
| }, |
| { |
| "epoch": 2.9055555555555554, |
| "grad_norm": 0.05647515505552292, |
| "learning_rate": 8.446927374301676e-05, |
| "loss": 0.8819, |
| "step": 523 |
| }, |
| { |
| "epoch": 2.911111111111111, |
| "grad_norm": 0.06105800345540047, |
| "learning_rate": 8.424581005586592e-05, |
| "loss": 0.904, |
| "step": 524 |
| }, |
| { |
| "epoch": 2.9166666666666665, |
| "grad_norm": 0.058846697211265564, |
| "learning_rate": 8.402234636871508e-05, |
| "loss": 0.9291, |
| "step": 525 |
| }, |
| { |
| "epoch": 2.9222222222222225, |
| "grad_norm": 0.06616098433732986, |
| "learning_rate": 8.379888268156425e-05, |
| "loss": 0.8572, |
| "step": 526 |
| }, |
| { |
| "epoch": 2.927777777777778, |
| "grad_norm": 0.07449668645858765, |
| "learning_rate": 8.357541899441341e-05, |
| "loss": 1.0035, |
| "step": 527 |
| }, |
| { |
| "epoch": 2.9333333333333336, |
| "grad_norm": 0.05502082407474518, |
| "learning_rate": 8.335195530726257e-05, |
| "loss": 0.8301, |
| "step": 528 |
| }, |
| { |
| "epoch": 2.938888888888889, |
| "grad_norm": 0.061066657304763794, |
| "learning_rate": 8.312849162011173e-05, |
| "loss": 1.0352, |
| "step": 529 |
| }, |
| { |
| "epoch": 2.9444444444444446, |
| "grad_norm": 0.06203857809305191, |
| "learning_rate": 8.29050279329609e-05, |
| "loss": 0.9107, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.95, |
| "grad_norm": 0.06409101188182831, |
| "learning_rate": 8.268156424581006e-05, |
| "loss": 0.8879, |
| "step": 531 |
| }, |
| { |
| "epoch": 2.9555555555555557, |
| "grad_norm": 0.061299391090869904, |
| "learning_rate": 8.245810055865922e-05, |
| "loss": 0.9661, |
| "step": 532 |
| }, |
| { |
| "epoch": 2.9611111111111112, |
| "grad_norm": 0.06148533150553703, |
| "learning_rate": 8.223463687150838e-05, |
| "loss": 0.9174, |
| "step": 533 |
| }, |
| { |
| "epoch": 2.966666666666667, |
| "grad_norm": 0.07080871611833572, |
| "learning_rate": 8.201117318435755e-05, |
| "loss": 0.8577, |
| "step": 534 |
| }, |
| { |
| "epoch": 2.9722222222222223, |
| "grad_norm": 0.06243567168712616, |
| "learning_rate": 8.178770949720671e-05, |
| "loss": 0.8783, |
| "step": 535 |
| }, |
| { |
| "epoch": 2.977777777777778, |
| "grad_norm": 0.057299647480249405, |
| "learning_rate": 8.156424581005587e-05, |
| "loss": 0.9022, |
| "step": 536 |
| }, |
| { |
| "epoch": 2.9833333333333334, |
| "grad_norm": 0.07001875340938568, |
| "learning_rate": 8.134078212290503e-05, |
| "loss": 0.8131, |
| "step": 537 |
| }, |
| { |
| "epoch": 2.988888888888889, |
| "grad_norm": 0.07312119752168655, |
| "learning_rate": 8.11173184357542e-05, |
| "loss": 0.7677, |
| "step": 538 |
| }, |
| { |
| "epoch": 2.9944444444444445, |
| "grad_norm": 0.0653364360332489, |
| "learning_rate": 8.089385474860336e-05, |
| "loss": 0.7791, |
| "step": 539 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.06341056525707245, |
| "learning_rate": 8.067039106145252e-05, |
| "loss": 0.8752, |
| "step": 540 |
| }, |
| { |
| "epoch": 3.0055555555555555, |
| "grad_norm": 0.059537846595048904, |
| "learning_rate": 8.044692737430168e-05, |
| "loss": 0.9272, |
| "step": 541 |
| }, |
| { |
| "epoch": 3.011111111111111, |
| "grad_norm": 0.05694005265831947, |
| "learning_rate": 8.022346368715084e-05, |
| "loss": 0.9407, |
| "step": 542 |
| }, |
| { |
| "epoch": 3.0166666666666666, |
| "grad_norm": 0.05771559104323387, |
| "learning_rate": 8e-05, |
| "loss": 0.7792, |
| "step": 543 |
| }, |
| { |
| "epoch": 3.022222222222222, |
| "grad_norm": 0.07127627730369568, |
| "learning_rate": 7.977653631284917e-05, |
| "loss": 0.9883, |
| "step": 544 |
| }, |
| { |
| "epoch": 3.0277777777777777, |
| "grad_norm": 0.05568571761250496, |
| "learning_rate": 7.955307262569833e-05, |
| "loss": 0.8275, |
| "step": 545 |
| }, |
| { |
| "epoch": 3.033333333333333, |
| "grad_norm": 0.055315401405096054, |
| "learning_rate": 7.93296089385475e-05, |
| "loss": 0.8727, |
| "step": 546 |
| }, |
| { |
| "epoch": 3.0388888888888888, |
| "grad_norm": 0.055111952126026154, |
| "learning_rate": 7.910614525139666e-05, |
| "loss": 0.9536, |
| "step": 547 |
| }, |
| { |
| "epoch": 3.0444444444444443, |
| "grad_norm": 0.05845943093299866, |
| "learning_rate": 7.88826815642458e-05, |
| "loss": 0.911, |
| "step": 548 |
| }, |
| { |
| "epoch": 3.05, |
| "grad_norm": 0.07708202302455902, |
| "learning_rate": 7.865921787709497e-05, |
| "loss": 0.7579, |
| "step": 549 |
| }, |
| { |
| "epoch": 3.0555555555555554, |
| "grad_norm": 0.061242036521434784, |
| "learning_rate": 7.843575418994413e-05, |
| "loss": 0.8589, |
| "step": 550 |
| }, |
| { |
| "epoch": 3.061111111111111, |
| "grad_norm": 0.05471673607826233, |
| "learning_rate": 7.821229050279329e-05, |
| "loss": 0.8064, |
| "step": 551 |
| }, |
| { |
| "epoch": 3.066666666666667, |
| "grad_norm": 0.0710383802652359, |
| "learning_rate": 7.798882681564245e-05, |
| "loss": 0.6753, |
| "step": 552 |
| }, |
| { |
| "epoch": 3.0722222222222224, |
| "grad_norm": 0.05542483553290367, |
| "learning_rate": 7.776536312849162e-05, |
| "loss": 0.836, |
| "step": 553 |
| }, |
| { |
| "epoch": 3.077777777777778, |
| "grad_norm": 0.07493164390325546, |
| "learning_rate": 7.754189944134078e-05, |
| "loss": 0.8456, |
| "step": 554 |
| }, |
| { |
| "epoch": 3.0833333333333335, |
| "grad_norm": 0.06546488404273987, |
| "learning_rate": 7.731843575418994e-05, |
| "loss": 0.8458, |
| "step": 555 |
| }, |
| { |
| "epoch": 3.088888888888889, |
| "grad_norm": 0.06345933675765991, |
| "learning_rate": 7.70949720670391e-05, |
| "loss": 0.8942, |
| "step": 556 |
| }, |
| { |
| "epoch": 3.0944444444444446, |
| "grad_norm": 0.05905034765601158, |
| "learning_rate": 7.687150837988827e-05, |
| "loss": 0.7957, |
| "step": 557 |
| }, |
| { |
| "epoch": 3.1, |
| "grad_norm": 0.05303044617176056, |
| "learning_rate": 7.664804469273743e-05, |
| "loss": 0.8961, |
| "step": 558 |
| }, |
| { |
| "epoch": 3.1055555555555556, |
| "grad_norm": 0.06295602023601532, |
| "learning_rate": 7.642458100558659e-05, |
| "loss": 0.724, |
| "step": 559 |
| }, |
| { |
| "epoch": 3.111111111111111, |
| "grad_norm": 0.0815122202038765, |
| "learning_rate": 7.620111731843575e-05, |
| "loss": 0.8327, |
| "step": 560 |
| }, |
| { |
| "epoch": 3.1166666666666667, |
| "grad_norm": 0.06387761980295181, |
| "learning_rate": 7.597765363128491e-05, |
| "loss": 0.8112, |
| "step": 561 |
| }, |
| { |
| "epoch": 3.1222222222222222, |
| "grad_norm": 0.062088917940855026, |
| "learning_rate": 7.575418994413408e-05, |
| "loss": 0.839, |
| "step": 562 |
| }, |
| { |
| "epoch": 3.1277777777777778, |
| "grad_norm": 0.06654959172010422, |
| "learning_rate": 7.553072625698324e-05, |
| "loss": 0.9513, |
| "step": 563 |
| }, |
| { |
| "epoch": 3.1333333333333333, |
| "grad_norm": 0.06470506638288498, |
| "learning_rate": 7.53072625698324e-05, |
| "loss": 0.9015, |
| "step": 564 |
| }, |
| { |
| "epoch": 3.138888888888889, |
| "grad_norm": 0.06290192157030106, |
| "learning_rate": 7.508379888268156e-05, |
| "loss": 0.8237, |
| "step": 565 |
| }, |
| { |
| "epoch": 3.1444444444444444, |
| "grad_norm": 0.06542884558439255, |
| "learning_rate": 7.486033519553073e-05, |
| "loss": 0.995, |
| "step": 566 |
| }, |
| { |
| "epoch": 3.15, |
| "grad_norm": 0.055929798632860184, |
| "learning_rate": 7.463687150837989e-05, |
| "loss": 0.8617, |
| "step": 567 |
| }, |
| { |
| "epoch": 3.1555555555555554, |
| "grad_norm": 0.06138373166322708, |
| "learning_rate": 7.441340782122905e-05, |
| "loss": 0.9203, |
| "step": 568 |
| }, |
| { |
| "epoch": 3.161111111111111, |
| "grad_norm": 0.07268285751342773, |
| "learning_rate": 7.418994413407821e-05, |
| "loss": 0.6976, |
| "step": 569 |
| }, |
| { |
| "epoch": 3.1666666666666665, |
| "grad_norm": 0.0731147900223732, |
| "learning_rate": 7.396648044692738e-05, |
| "loss": 0.837, |
| "step": 570 |
| }, |
| { |
| "epoch": 3.172222222222222, |
| "grad_norm": 0.07046571373939514, |
| "learning_rate": 7.374301675977654e-05, |
| "loss": 1.009, |
| "step": 571 |
| }, |
| { |
| "epoch": 3.1777777777777776, |
| "grad_norm": 0.062040820717811584, |
| "learning_rate": 7.35195530726257e-05, |
| "loss": 0.7878, |
| "step": 572 |
| }, |
| { |
| "epoch": 3.183333333333333, |
| "grad_norm": 0.09983498603105545, |
| "learning_rate": 7.329608938547486e-05, |
| "loss": 0.9348, |
| "step": 573 |
| }, |
| { |
| "epoch": 3.188888888888889, |
| "grad_norm": 0.06784769147634506, |
| "learning_rate": 7.307262569832403e-05, |
| "loss": 0.8148, |
| "step": 574 |
| }, |
| { |
| "epoch": 3.1944444444444446, |
| "grad_norm": 0.0642843022942543, |
| "learning_rate": 7.284916201117319e-05, |
| "loss": 0.8725, |
| "step": 575 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 0.05647365376353264, |
| "learning_rate": 7.262569832402235e-05, |
| "loss": 0.7609, |
| "step": 576 |
| }, |
| { |
| "epoch": 3.2055555555555557, |
| "grad_norm": 0.09439057856798172, |
| "learning_rate": 7.240223463687151e-05, |
| "loss": 0.7815, |
| "step": 577 |
| }, |
| { |
| "epoch": 3.2111111111111112, |
| "grad_norm": 0.06756071001291275, |
| "learning_rate": 7.217877094972067e-05, |
| "loss": 0.8371, |
| "step": 578 |
| }, |
| { |
| "epoch": 3.216666666666667, |
| "grad_norm": 0.059290554374456406, |
| "learning_rate": 7.195530726256984e-05, |
| "loss": 0.867, |
| "step": 579 |
| }, |
| { |
| "epoch": 3.2222222222222223, |
| "grad_norm": 0.06018024682998657, |
| "learning_rate": 7.1731843575419e-05, |
| "loss": 0.8788, |
| "step": 580 |
| }, |
| { |
| "epoch": 3.227777777777778, |
| "grad_norm": 0.05990862846374512, |
| "learning_rate": 7.150837988826816e-05, |
| "loss": 0.9285, |
| "step": 581 |
| }, |
| { |
| "epoch": 3.2333333333333334, |
| "grad_norm": 0.07019732892513275, |
| "learning_rate": 7.128491620111732e-05, |
| "loss": 0.7933, |
| "step": 582 |
| }, |
| { |
| "epoch": 3.238888888888889, |
| "grad_norm": 0.06324657797813416, |
| "learning_rate": 7.106145251396649e-05, |
| "loss": 0.8966, |
| "step": 583 |
| }, |
| { |
| "epoch": 3.2444444444444445, |
| "grad_norm": 0.06172401085495949, |
| "learning_rate": 7.083798882681565e-05, |
| "loss": 0.7741, |
| "step": 584 |
| }, |
| { |
| "epoch": 3.25, |
| "grad_norm": 0.05882123112678528, |
| "learning_rate": 7.061452513966481e-05, |
| "loss": 0.8038, |
| "step": 585 |
| }, |
| { |
| "epoch": 3.2555555555555555, |
| "grad_norm": 0.06002512946724892, |
| "learning_rate": 7.039106145251397e-05, |
| "loss": 0.8359, |
| "step": 586 |
| }, |
| { |
| "epoch": 3.261111111111111, |
| "grad_norm": 0.06072268262505531, |
| "learning_rate": 7.016759776536314e-05, |
| "loss": 0.8718, |
| "step": 587 |
| }, |
| { |
| "epoch": 3.2666666666666666, |
| "grad_norm": 0.06172073259949684, |
| "learning_rate": 6.99441340782123e-05, |
| "loss": 0.8243, |
| "step": 588 |
| }, |
| { |
| "epoch": 3.272222222222222, |
| "grad_norm": 0.07151300460100174, |
| "learning_rate": 6.972067039106146e-05, |
| "loss": 0.7819, |
| "step": 589 |
| }, |
| { |
| "epoch": 3.2777777777777777, |
| "grad_norm": 0.06847742199897766, |
| "learning_rate": 6.949720670391062e-05, |
| "loss": 0.9227, |
| "step": 590 |
| }, |
| { |
| "epoch": 3.283333333333333, |
| "grad_norm": 0.05952233448624611, |
| "learning_rate": 6.927374301675979e-05, |
| "loss": 0.9275, |
| "step": 591 |
| }, |
| { |
| "epoch": 3.2888888888888888, |
| "grad_norm": 0.0579167939722538, |
| "learning_rate": 6.905027932960895e-05, |
| "loss": 0.8781, |
| "step": 592 |
| }, |
| { |
| "epoch": 3.2944444444444443, |
| "grad_norm": 0.06359118968248367, |
| "learning_rate": 6.882681564245811e-05, |
| "loss": 0.8284, |
| "step": 593 |
| }, |
| { |
| "epoch": 3.3, |
| "grad_norm": 0.06277986615896225, |
| "learning_rate": 6.860335195530727e-05, |
| "loss": 0.8671, |
| "step": 594 |
| }, |
| { |
| "epoch": 3.3055555555555554, |
| "grad_norm": 0.06046503409743309, |
| "learning_rate": 6.837988826815642e-05, |
| "loss": 0.8964, |
| "step": 595 |
| }, |
| { |
| "epoch": 3.311111111111111, |
| "grad_norm": 0.062156785279512405, |
| "learning_rate": 6.815642458100558e-05, |
| "loss": 0.9618, |
| "step": 596 |
| }, |
| { |
| "epoch": 3.3166666666666664, |
| "grad_norm": 0.06117270141839981, |
| "learning_rate": 6.793296089385475e-05, |
| "loss": 0.9061, |
| "step": 597 |
| }, |
| { |
| "epoch": 3.3222222222222224, |
| "grad_norm": 0.06024372577667236, |
| "learning_rate": 6.770949720670391e-05, |
| "loss": 0.8553, |
| "step": 598 |
| }, |
| { |
| "epoch": 3.327777777777778, |
| "grad_norm": 0.07103978097438812, |
| "learning_rate": 6.748603351955307e-05, |
| "loss": 0.844, |
| "step": 599 |
| }, |
| { |
| "epoch": 3.3333333333333335, |
| "grad_norm": 0.06111348792910576, |
| "learning_rate": 6.726256983240223e-05, |
| "loss": 0.9836, |
| "step": 600 |
| }, |
| { |
| "epoch": 3.338888888888889, |
| "grad_norm": 0.07593747973442078, |
| "learning_rate": 6.70391061452514e-05, |
| "loss": 0.9246, |
| "step": 601 |
| }, |
| { |
| "epoch": 3.3444444444444446, |
| "grad_norm": 0.06724842637777328, |
| "learning_rate": 6.681564245810056e-05, |
| "loss": 0.8799, |
| "step": 602 |
| }, |
| { |
| "epoch": 3.35, |
| "grad_norm": 0.0651603415608406, |
| "learning_rate": 6.659217877094972e-05, |
| "loss": 0.9114, |
| "step": 603 |
| }, |
| { |
| "epoch": 3.3555555555555556, |
| "grad_norm": 0.06483855843544006, |
| "learning_rate": 6.636871508379888e-05, |
| "loss": 0.8418, |
| "step": 604 |
| }, |
| { |
| "epoch": 3.361111111111111, |
| "grad_norm": 0.07553449273109436, |
| "learning_rate": 6.614525139664804e-05, |
| "loss": 0.8239, |
| "step": 605 |
| }, |
| { |
| "epoch": 3.3666666666666667, |
| "grad_norm": 0.06333702057600021, |
| "learning_rate": 6.59217877094972e-05, |
| "loss": 0.8718, |
| "step": 606 |
| }, |
| { |
| "epoch": 3.3722222222222222, |
| "grad_norm": 0.06561273336410522, |
| "learning_rate": 6.569832402234637e-05, |
| "loss": 1.0049, |
| "step": 607 |
| }, |
| { |
| "epoch": 3.3777777777777778, |
| "grad_norm": 0.05982871726155281, |
| "learning_rate": 6.547486033519553e-05, |
| "loss": 0.8011, |
| "step": 608 |
| }, |
| { |
| "epoch": 3.3833333333333333, |
| "grad_norm": 0.06759478896856308, |
| "learning_rate": 6.52513966480447e-05, |
| "loss": 0.973, |
| "step": 609 |
| }, |
| { |
| "epoch": 3.388888888888889, |
| "grad_norm": 0.06581491231918335, |
| "learning_rate": 6.502793296089386e-05, |
| "loss": 0.9308, |
| "step": 610 |
| }, |
| { |
| "epoch": 3.3944444444444444, |
| "grad_norm": 0.06133756786584854, |
| "learning_rate": 6.480446927374302e-05, |
| "loss": 0.8157, |
| "step": 611 |
| }, |
| { |
| "epoch": 3.4, |
| "grad_norm": 0.07396920770406723, |
| "learning_rate": 6.458100558659218e-05, |
| "loss": 0.8828, |
| "step": 612 |
| }, |
| { |
| "epoch": 3.4055555555555554, |
| "grad_norm": 0.06472024321556091, |
| "learning_rate": 6.435754189944134e-05, |
| "loss": 0.7748, |
| "step": 613 |
| }, |
| { |
| "epoch": 3.411111111111111, |
| "grad_norm": 0.06640879809856415, |
| "learning_rate": 6.41340782122905e-05, |
| "loss": 0.8435, |
| "step": 614 |
| }, |
| { |
| "epoch": 3.4166666666666665, |
| "grad_norm": 0.07183702290058136, |
| "learning_rate": 6.391061452513967e-05, |
| "loss": 0.9378, |
| "step": 615 |
| }, |
| { |
| "epoch": 3.422222222222222, |
| "grad_norm": 0.0727454274892807, |
| "learning_rate": 6.368715083798883e-05, |
| "loss": 0.9332, |
| "step": 616 |
| }, |
| { |
| "epoch": 3.4277777777777776, |
| "grad_norm": 0.06644386053085327, |
| "learning_rate": 6.346368715083799e-05, |
| "loss": 0.9157, |
| "step": 617 |
| }, |
| { |
| "epoch": 3.4333333333333336, |
| "grad_norm": 0.07607833296060562, |
| "learning_rate": 6.324022346368715e-05, |
| "loss": 0.7545, |
| "step": 618 |
| }, |
| { |
| "epoch": 3.438888888888889, |
| "grad_norm": 0.056877944618463516, |
| "learning_rate": 6.301675977653632e-05, |
| "loss": 0.8431, |
| "step": 619 |
| }, |
| { |
| "epoch": 3.4444444444444446, |
| "grad_norm": 0.0674533024430275, |
| "learning_rate": 6.279329608938548e-05, |
| "loss": 0.908, |
| "step": 620 |
| }, |
| { |
| "epoch": 3.45, |
| "grad_norm": 0.06054288148880005, |
| "learning_rate": 6.256983240223464e-05, |
| "loss": 0.9227, |
| "step": 621 |
| }, |
| { |
| "epoch": 3.4555555555555557, |
| "grad_norm": 0.06399651616811752, |
| "learning_rate": 6.23463687150838e-05, |
| "loss": 0.8975, |
| "step": 622 |
| }, |
| { |
| "epoch": 3.4611111111111112, |
| "grad_norm": 0.06626058369874954, |
| "learning_rate": 6.212290502793297e-05, |
| "loss": 0.9773, |
| "step": 623 |
| }, |
| { |
| "epoch": 3.466666666666667, |
| "grad_norm": 0.060863859951496124, |
| "learning_rate": 6.189944134078213e-05, |
| "loss": 1.0029, |
| "step": 624 |
| }, |
| { |
| "epoch": 3.4722222222222223, |
| "grad_norm": 0.0591045580804348, |
| "learning_rate": 6.167597765363129e-05, |
| "loss": 0.8533, |
| "step": 625 |
| }, |
| { |
| "epoch": 3.477777777777778, |
| "grad_norm": 0.056986406445503235, |
| "learning_rate": 6.145251396648045e-05, |
| "loss": 0.8151, |
| "step": 626 |
| }, |
| { |
| "epoch": 3.4833333333333334, |
| "grad_norm": 0.05965357646346092, |
| "learning_rate": 6.122905027932962e-05, |
| "loss": 0.9094, |
| "step": 627 |
| }, |
| { |
| "epoch": 3.488888888888889, |
| "grad_norm": 0.05617017298936844, |
| "learning_rate": 6.100558659217878e-05, |
| "loss": 0.7653, |
| "step": 628 |
| }, |
| { |
| "epoch": 3.4944444444444445, |
| "grad_norm": 0.09308428317308426, |
| "learning_rate": 6.078212290502794e-05, |
| "loss": 0.9318, |
| "step": 629 |
| }, |
| { |
| "epoch": 3.5, |
| "grad_norm": 0.06707720458507538, |
| "learning_rate": 6.05586592178771e-05, |
| "loss": 1.0462, |
| "step": 630 |
| }, |
| { |
| "epoch": 3.5055555555555555, |
| "grad_norm": 0.05806177482008934, |
| "learning_rate": 6.0335195530726265e-05, |
| "loss": 0.8771, |
| "step": 631 |
| }, |
| { |
| "epoch": 3.511111111111111, |
| "grad_norm": 0.060027267783880234, |
| "learning_rate": 6.011173184357543e-05, |
| "loss": 0.8578, |
| "step": 632 |
| }, |
| { |
| "epoch": 3.5166666666666666, |
| "grad_norm": 0.06268207728862762, |
| "learning_rate": 5.988826815642459e-05, |
| "loss": 0.8026, |
| "step": 633 |
| }, |
| { |
| "epoch": 3.522222222222222, |
| "grad_norm": 0.06065778434276581, |
| "learning_rate": 5.966480446927375e-05, |
| "loss": 0.8887, |
| "step": 634 |
| }, |
| { |
| "epoch": 3.5277777777777777, |
| "grad_norm": 0.06879955530166626, |
| "learning_rate": 5.9441340782122914e-05, |
| "loss": 0.9636, |
| "step": 635 |
| }, |
| { |
| "epoch": 3.533333333333333, |
| "grad_norm": 0.062394339591264725, |
| "learning_rate": 5.9217877094972076e-05, |
| "loss": 0.9258, |
| "step": 636 |
| }, |
| { |
| "epoch": 3.5388888888888888, |
| "grad_norm": 0.06418924033641815, |
| "learning_rate": 5.899441340782124e-05, |
| "loss": 0.9875, |
| "step": 637 |
| }, |
| { |
| "epoch": 3.5444444444444443, |
| "grad_norm": 0.06523976475000381, |
| "learning_rate": 5.87709497206704e-05, |
| "loss": 0.8122, |
| "step": 638 |
| }, |
| { |
| "epoch": 3.55, |
| "grad_norm": 0.09336890280246735, |
| "learning_rate": 5.8547486033519563e-05, |
| "loss": 0.7608, |
| "step": 639 |
| }, |
| { |
| "epoch": 3.5555555555555554, |
| "grad_norm": 0.06870478391647339, |
| "learning_rate": 5.8324022346368726e-05, |
| "loss": 0.8859, |
| "step": 640 |
| }, |
| { |
| "epoch": 3.561111111111111, |
| "grad_norm": 0.06951097398996353, |
| "learning_rate": 5.810055865921789e-05, |
| "loss": 0.8773, |
| "step": 641 |
| }, |
| { |
| "epoch": 3.5666666666666664, |
| "grad_norm": 0.0712515339255333, |
| "learning_rate": 5.787709497206704e-05, |
| "loss": 0.7315, |
| "step": 642 |
| }, |
| { |
| "epoch": 3.572222222222222, |
| "grad_norm": 0.0626714825630188, |
| "learning_rate": 5.76536312849162e-05, |
| "loss": 0.8983, |
| "step": 643 |
| }, |
| { |
| "epoch": 3.5777777777777775, |
| "grad_norm": 0.06400678306818008, |
| "learning_rate": 5.743016759776536e-05, |
| "loss": 0.8323, |
| "step": 644 |
| }, |
| { |
| "epoch": 3.5833333333333335, |
| "grad_norm": 0.07090960443019867, |
| "learning_rate": 5.7206703910614524e-05, |
| "loss": 0.8491, |
| "step": 645 |
| }, |
| { |
| "epoch": 3.588888888888889, |
| "grad_norm": 0.07009242475032806, |
| "learning_rate": 5.6983240223463686e-05, |
| "loss": 0.8861, |
| "step": 646 |
| }, |
| { |
| "epoch": 3.5944444444444446, |
| "grad_norm": 0.056394800543785095, |
| "learning_rate": 5.675977653631285e-05, |
| "loss": 0.9701, |
| "step": 647 |
| }, |
| { |
| "epoch": 3.6, |
| "grad_norm": 0.0645764097571373, |
| "learning_rate": 5.653631284916201e-05, |
| "loss": 0.872, |
| "step": 648 |
| }, |
| { |
| "epoch": 3.6055555555555556, |
| "grad_norm": 0.05909927189350128, |
| "learning_rate": 5.631284916201117e-05, |
| "loss": 0.7737, |
| "step": 649 |
| }, |
| { |
| "epoch": 3.611111111111111, |
| "grad_norm": 0.06914041191339493, |
| "learning_rate": 5.6089385474860336e-05, |
| "loss": 0.7916, |
| "step": 650 |
| }, |
| { |
| "epoch": 3.6166666666666667, |
| "grad_norm": 0.06700372695922852, |
| "learning_rate": 5.58659217877095e-05, |
| "loss": 0.9287, |
| "step": 651 |
| }, |
| { |
| "epoch": 3.6222222222222222, |
| "grad_norm": 0.08735419064760208, |
| "learning_rate": 5.564245810055866e-05, |
| "loss": 0.6891, |
| "step": 652 |
| }, |
| { |
| "epoch": 3.6277777777777778, |
| "grad_norm": 0.05871176719665527, |
| "learning_rate": 5.541899441340782e-05, |
| "loss": 0.8665, |
| "step": 653 |
| }, |
| { |
| "epoch": 3.6333333333333333, |
| "grad_norm": 0.060677025467157364, |
| "learning_rate": 5.5195530726256985e-05, |
| "loss": 0.7776, |
| "step": 654 |
| }, |
| { |
| "epoch": 3.638888888888889, |
| "grad_norm": 0.06749715656042099, |
| "learning_rate": 5.497206703910615e-05, |
| "loss": 0.8589, |
| "step": 655 |
| }, |
| { |
| "epoch": 3.6444444444444444, |
| "grad_norm": 0.06428337097167969, |
| "learning_rate": 5.474860335195531e-05, |
| "loss": 0.8476, |
| "step": 656 |
| }, |
| { |
| "epoch": 3.65, |
| "grad_norm": 0.07902880758047104, |
| "learning_rate": 5.452513966480447e-05, |
| "loss": 0.975, |
| "step": 657 |
| }, |
| { |
| "epoch": 3.6555555555555554, |
| "grad_norm": 0.07362475991249084, |
| "learning_rate": 5.4301675977653634e-05, |
| "loss": 0.8756, |
| "step": 658 |
| }, |
| { |
| "epoch": 3.661111111111111, |
| "grad_norm": 0.07670604437589645, |
| "learning_rate": 5.4078212290502797e-05, |
| "loss": 0.9614, |
| "step": 659 |
| }, |
| { |
| "epoch": 3.6666666666666665, |
| "grad_norm": 0.05876456946134567, |
| "learning_rate": 5.385474860335196e-05, |
| "loss": 1.0271, |
| "step": 660 |
| }, |
| { |
| "epoch": 3.6722222222222225, |
| "grad_norm": 0.06619323790073395, |
| "learning_rate": 5.363128491620112e-05, |
| "loss": 0.9151, |
| "step": 661 |
| }, |
| { |
| "epoch": 3.677777777777778, |
| "grad_norm": 0.06175459921360016, |
| "learning_rate": 5.3407821229050284e-05, |
| "loss": 0.9548, |
| "step": 662 |
| }, |
| { |
| "epoch": 3.6833333333333336, |
| "grad_norm": 0.06050381436944008, |
| "learning_rate": 5.3184357541899446e-05, |
| "loss": 0.9309, |
| "step": 663 |
| }, |
| { |
| "epoch": 3.688888888888889, |
| "grad_norm": 0.07536690682172775, |
| "learning_rate": 5.296089385474861e-05, |
| "loss": 0.8292, |
| "step": 664 |
| }, |
| { |
| "epoch": 3.6944444444444446, |
| "grad_norm": 0.05690660700201988, |
| "learning_rate": 5.273743016759777e-05, |
| "loss": 0.8964, |
| "step": 665 |
| }, |
| { |
| "epoch": 3.7, |
| "grad_norm": 0.05835000425577164, |
| "learning_rate": 5.251396648044693e-05, |
| "loss": 0.8679, |
| "step": 666 |
| }, |
| { |
| "epoch": 3.7055555555555557, |
| "grad_norm": 0.061024926602840424, |
| "learning_rate": 5.2290502793296095e-05, |
| "loss": 0.8628, |
| "step": 667 |
| }, |
| { |
| "epoch": 3.7111111111111112, |
| "grad_norm": 0.07365045696496964, |
| "learning_rate": 5.206703910614526e-05, |
| "loss": 0.918, |
| "step": 668 |
| }, |
| { |
| "epoch": 3.716666666666667, |
| "grad_norm": 0.06592633575201035, |
| "learning_rate": 5.184357541899442e-05, |
| "loss": 0.9424, |
| "step": 669 |
| }, |
| { |
| "epoch": 3.7222222222222223, |
| "grad_norm": 0.056026358157396317, |
| "learning_rate": 5.162011173184358e-05, |
| "loss": 0.8151, |
| "step": 670 |
| }, |
| { |
| "epoch": 3.727777777777778, |
| "grad_norm": 0.07303276658058167, |
| "learning_rate": 5.139664804469274e-05, |
| "loss": 0.8787, |
| "step": 671 |
| }, |
| { |
| "epoch": 3.7333333333333334, |
| "grad_norm": 0.07177183032035828, |
| "learning_rate": 5.11731843575419e-05, |
| "loss": 0.867, |
| "step": 672 |
| }, |
| { |
| "epoch": 3.738888888888889, |
| "grad_norm": 0.06418969482183456, |
| "learning_rate": 5.094972067039106e-05, |
| "loss": 0.9005, |
| "step": 673 |
| }, |
| { |
| "epoch": 3.7444444444444445, |
| "grad_norm": 0.07607243955135345, |
| "learning_rate": 5.0726256983240225e-05, |
| "loss": 0.646, |
| "step": 674 |
| }, |
| { |
| "epoch": 3.75, |
| "grad_norm": 0.06639571487903595, |
| "learning_rate": 5.050279329608939e-05, |
| "loss": 0.9202, |
| "step": 675 |
| }, |
| { |
| "epoch": 3.7555555555555555, |
| "grad_norm": 0.06520118564367294, |
| "learning_rate": 5.027932960893855e-05, |
| "loss": 1.0109, |
| "step": 676 |
| }, |
| { |
| "epoch": 3.761111111111111, |
| "grad_norm": 0.06542754173278809, |
| "learning_rate": 5.005586592178771e-05, |
| "loss": 0.9124, |
| "step": 677 |
| }, |
| { |
| "epoch": 3.7666666666666666, |
| "grad_norm": 0.06325247138738632, |
| "learning_rate": 4.9832402234636874e-05, |
| "loss": 0.7919, |
| "step": 678 |
| }, |
| { |
| "epoch": 3.772222222222222, |
| "grad_norm": 0.06934817135334015, |
| "learning_rate": 4.9608938547486036e-05, |
| "loss": 0.9353, |
| "step": 679 |
| }, |
| { |
| "epoch": 3.7777777777777777, |
| "grad_norm": 0.06293205171823502, |
| "learning_rate": 4.93854748603352e-05, |
| "loss": 0.8681, |
| "step": 680 |
| }, |
| { |
| "epoch": 3.783333333333333, |
| "grad_norm": 0.05741385743021965, |
| "learning_rate": 4.916201117318436e-05, |
| "loss": 0.9049, |
| "step": 681 |
| }, |
| { |
| "epoch": 3.7888888888888888, |
| "grad_norm": 0.06111886352300644, |
| "learning_rate": 4.8938547486033523e-05, |
| "loss": 0.8285, |
| "step": 682 |
| }, |
| { |
| "epoch": 3.7944444444444443, |
| "grad_norm": 0.06742309778928757, |
| "learning_rate": 4.8715083798882686e-05, |
| "loss": 0.8506, |
| "step": 683 |
| }, |
| { |
| "epoch": 3.8, |
| "grad_norm": 0.07073011994361877, |
| "learning_rate": 4.849162011173184e-05, |
| "loss": 0.8671, |
| "step": 684 |
| }, |
| { |
| "epoch": 3.8055555555555554, |
| "grad_norm": 0.05728191137313843, |
| "learning_rate": 4.8268156424581004e-05, |
| "loss": 0.8419, |
| "step": 685 |
| }, |
| { |
| "epoch": 3.811111111111111, |
| "grad_norm": 0.06402203440666199, |
| "learning_rate": 4.8044692737430166e-05, |
| "loss": 0.841, |
| "step": 686 |
| }, |
| { |
| "epoch": 3.8166666666666664, |
| "grad_norm": 0.0669245570898056, |
| "learning_rate": 4.782122905027933e-05, |
| "loss": 0.8425, |
| "step": 687 |
| }, |
| { |
| "epoch": 3.822222222222222, |
| "grad_norm": 0.06659059971570969, |
| "learning_rate": 4.759776536312849e-05, |
| "loss": 0.9707, |
| "step": 688 |
| }, |
| { |
| "epoch": 3.8277777777777775, |
| "grad_norm": 0.06549696624279022, |
| "learning_rate": 4.737430167597765e-05, |
| "loss": 0.9782, |
| "step": 689 |
| }, |
| { |
| "epoch": 3.8333333333333335, |
| "grad_norm": 0.05991567671298981, |
| "learning_rate": 4.7150837988826815e-05, |
| "loss": 0.8978, |
| "step": 690 |
| }, |
| { |
| "epoch": 3.838888888888889, |
| "grad_norm": 0.07296551018953323, |
| "learning_rate": 4.692737430167598e-05, |
| "loss": 0.8963, |
| "step": 691 |
| }, |
| { |
| "epoch": 3.8444444444444446, |
| "grad_norm": 0.07527071982622147, |
| "learning_rate": 4.670391061452514e-05, |
| "loss": 0.8314, |
| "step": 692 |
| }, |
| { |
| "epoch": 3.85, |
| "grad_norm": 0.08402854949235916, |
| "learning_rate": 4.64804469273743e-05, |
| "loss": 0.7886, |
| "step": 693 |
| }, |
| { |
| "epoch": 3.8555555555555556, |
| "grad_norm": 0.06636254489421844, |
| "learning_rate": 4.6256983240223465e-05, |
| "loss": 0.8322, |
| "step": 694 |
| }, |
| { |
| "epoch": 3.861111111111111, |
| "grad_norm": 0.06297782063484192, |
| "learning_rate": 4.603351955307263e-05, |
| "loss": 0.9423, |
| "step": 695 |
| }, |
| { |
| "epoch": 3.8666666666666667, |
| "grad_norm": 0.07213331013917923, |
| "learning_rate": 4.581005586592179e-05, |
| "loss": 0.8553, |
| "step": 696 |
| }, |
| { |
| "epoch": 3.8722222222222222, |
| "grad_norm": 0.0674607902765274, |
| "learning_rate": 4.558659217877095e-05, |
| "loss": 0.8657, |
| "step": 697 |
| }, |
| { |
| "epoch": 3.8777777777777778, |
| "grad_norm": 0.06301897764205933, |
| "learning_rate": 4.5363128491620114e-05, |
| "loss": 0.8108, |
| "step": 698 |
| }, |
| { |
| "epoch": 3.8833333333333333, |
| "grad_norm": 0.059832848608493805, |
| "learning_rate": 4.5139664804469276e-05, |
| "loss": 0.918, |
| "step": 699 |
| }, |
| { |
| "epoch": 3.888888888888889, |
| "grad_norm": 0.06674478203058243, |
| "learning_rate": 4.491620111731844e-05, |
| "loss": 0.8521, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.8944444444444444, |
| "grad_norm": 0.07494413107633591, |
| "learning_rate": 4.4692737430167594e-05, |
| "loss": 0.8805, |
| "step": 701 |
| }, |
| { |
| "epoch": 3.9, |
| "grad_norm": 0.06808764487504959, |
| "learning_rate": 4.4469273743016757e-05, |
| "loss": 0.822, |
| "step": 702 |
| }, |
| { |
| "epoch": 3.9055555555555554, |
| "grad_norm": 0.061348509043455124, |
| "learning_rate": 4.424581005586592e-05, |
| "loss": 0.8472, |
| "step": 703 |
| }, |
| { |
| "epoch": 3.911111111111111, |
| "grad_norm": 0.06749361008405685, |
| "learning_rate": 4.402234636871508e-05, |
| "loss": 0.8881, |
| "step": 704 |
| }, |
| { |
| "epoch": 3.9166666666666665, |
| "grad_norm": 0.05673949047923088, |
| "learning_rate": 4.3798882681564244e-05, |
| "loss": 0.7887, |
| "step": 705 |
| }, |
| { |
| "epoch": 3.9222222222222225, |
| "grad_norm": 0.06126287952065468, |
| "learning_rate": 4.3575418994413406e-05, |
| "loss": 0.9302, |
| "step": 706 |
| }, |
| { |
| "epoch": 3.927777777777778, |
| "grad_norm": 0.06536653637886047, |
| "learning_rate": 4.335195530726257e-05, |
| "loss": 0.8985, |
| "step": 707 |
| }, |
| { |
| "epoch": 3.9333333333333336, |
| "grad_norm": 0.06179942563176155, |
| "learning_rate": 4.312849162011173e-05, |
| "loss": 0.9524, |
| "step": 708 |
| }, |
| { |
| "epoch": 3.938888888888889, |
| "grad_norm": 0.06911918520927429, |
| "learning_rate": 4.290502793296089e-05, |
| "loss": 0.8222, |
| "step": 709 |
| }, |
| { |
| "epoch": 3.9444444444444446, |
| "grad_norm": 0.058220285922288895, |
| "learning_rate": 4.2681564245810055e-05, |
| "loss": 0.7177, |
| "step": 710 |
| }, |
| { |
| "epoch": 3.95, |
| "grad_norm": 0.06330408155918121, |
| "learning_rate": 4.245810055865922e-05, |
| "loss": 0.7577, |
| "step": 711 |
| }, |
| { |
| "epoch": 3.9555555555555557, |
| "grad_norm": 0.06855887174606323, |
| "learning_rate": 4.223463687150838e-05, |
| "loss": 0.927, |
| "step": 712 |
| }, |
| { |
| "epoch": 3.9611111111111112, |
| "grad_norm": 0.0702371895313263, |
| "learning_rate": 4.201117318435754e-05, |
| "loss": 0.7464, |
| "step": 713 |
| }, |
| { |
| "epoch": 3.966666666666667, |
| "grad_norm": 0.07532446086406708, |
| "learning_rate": 4.1787709497206705e-05, |
| "loss": 0.8202, |
| "step": 714 |
| }, |
| { |
| "epoch": 3.9722222222222223, |
| "grad_norm": 0.07662215083837509, |
| "learning_rate": 4.156424581005587e-05, |
| "loss": 0.8381, |
| "step": 715 |
| }, |
| { |
| "epoch": 3.977777777777778, |
| "grad_norm": 0.0621059276163578, |
| "learning_rate": 4.134078212290503e-05, |
| "loss": 0.7477, |
| "step": 716 |
| }, |
| { |
| "epoch": 3.9833333333333334, |
| "grad_norm": 0.06151144951581955, |
| "learning_rate": 4.111731843575419e-05, |
| "loss": 0.8781, |
| "step": 717 |
| }, |
| { |
| "epoch": 3.988888888888889, |
| "grad_norm": 0.0767519623041153, |
| "learning_rate": 4.0893854748603354e-05, |
| "loss": 0.9478, |
| "step": 718 |
| }, |
| { |
| "epoch": 3.9944444444444445, |
| "grad_norm": 0.07194758951663971, |
| "learning_rate": 4.0670391061452516e-05, |
| "loss": 0.7946, |
| "step": 719 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.07248340547084808, |
| "learning_rate": 4.044692737430168e-05, |
| "loss": 0.8131, |
| "step": 720 |
| }, |
| { |
| "epoch": 4.0055555555555555, |
| "grad_norm": 0.05817415192723274, |
| "learning_rate": 4.022346368715084e-05, |
| "loss": 0.9452, |
| "step": 721 |
| }, |
| { |
| "epoch": 4.011111111111111, |
| "grad_norm": 0.06506936997175217, |
| "learning_rate": 4e-05, |
| "loss": 0.8357, |
| "step": 722 |
| }, |
| { |
| "epoch": 4.016666666666667, |
| "grad_norm": 0.06475253403186798, |
| "learning_rate": 3.9776536312849166e-05, |
| "loss": 0.8215, |
| "step": 723 |
| }, |
| { |
| "epoch": 4.022222222222222, |
| "grad_norm": 0.06521406769752502, |
| "learning_rate": 3.955307262569833e-05, |
| "loss": 0.8996, |
| "step": 724 |
| }, |
| { |
| "epoch": 4.027777777777778, |
| "grad_norm": 0.06060001254081726, |
| "learning_rate": 3.9329608938547483e-05, |
| "loss": 0.7959, |
| "step": 725 |
| }, |
| { |
| "epoch": 4.033333333333333, |
| "grad_norm": 0.059213463217020035, |
| "learning_rate": 3.9106145251396646e-05, |
| "loss": 0.9704, |
| "step": 726 |
| }, |
| { |
| "epoch": 4.038888888888889, |
| "grad_norm": 0.05990111082792282, |
| "learning_rate": 3.888268156424581e-05, |
| "loss": 0.8538, |
| "step": 727 |
| }, |
| { |
| "epoch": 4.044444444444444, |
| "grad_norm": 0.057270485907793045, |
| "learning_rate": 3.865921787709497e-05, |
| "loss": 0.8653, |
| "step": 728 |
| }, |
| { |
| "epoch": 4.05, |
| "grad_norm": 0.0747293010354042, |
| "learning_rate": 3.843575418994413e-05, |
| "loss": 0.9716, |
| "step": 729 |
| }, |
| { |
| "epoch": 4.055555555555555, |
| "grad_norm": 0.06068810448050499, |
| "learning_rate": 3.8212290502793295e-05, |
| "loss": 0.8447, |
| "step": 730 |
| }, |
| { |
| "epoch": 4.061111111111111, |
| "grad_norm": 0.0651971846818924, |
| "learning_rate": 3.798882681564246e-05, |
| "loss": 0.7749, |
| "step": 731 |
| }, |
| { |
| "epoch": 4.066666666666666, |
| "grad_norm": 0.07306065410375595, |
| "learning_rate": 3.776536312849162e-05, |
| "loss": 0.9461, |
| "step": 732 |
| }, |
| { |
| "epoch": 4.072222222222222, |
| "grad_norm": 0.06800976395606995, |
| "learning_rate": 3.754189944134078e-05, |
| "loss": 0.8621, |
| "step": 733 |
| }, |
| { |
| "epoch": 4.0777777777777775, |
| "grad_norm": 0.07779917120933533, |
| "learning_rate": 3.7318435754189944e-05, |
| "loss": 0.8329, |
| "step": 734 |
| }, |
| { |
| "epoch": 4.083333333333333, |
| "grad_norm": 0.07067432999610901, |
| "learning_rate": 3.709497206703911e-05, |
| "loss": 0.8911, |
| "step": 735 |
| }, |
| { |
| "epoch": 4.088888888888889, |
| "grad_norm": 0.0689227506518364, |
| "learning_rate": 3.687150837988827e-05, |
| "loss": 0.8637, |
| "step": 736 |
| }, |
| { |
| "epoch": 4.094444444444444, |
| "grad_norm": 0.07578172534704208, |
| "learning_rate": 3.664804469273743e-05, |
| "loss": 0.7896, |
| "step": 737 |
| }, |
| { |
| "epoch": 4.1, |
| "grad_norm": 0.06386541575193405, |
| "learning_rate": 3.6424581005586594e-05, |
| "loss": 0.814, |
| "step": 738 |
| }, |
| { |
| "epoch": 4.105555555555555, |
| "grad_norm": 0.0776628702878952, |
| "learning_rate": 3.6201117318435756e-05, |
| "loss": 0.8236, |
| "step": 739 |
| }, |
| { |
| "epoch": 4.111111111111111, |
| "grad_norm": 0.07262839376926422, |
| "learning_rate": 3.597765363128492e-05, |
| "loss": 0.7797, |
| "step": 740 |
| }, |
| { |
| "epoch": 4.116666666666666, |
| "grad_norm": 0.06866869330406189, |
| "learning_rate": 3.575418994413408e-05, |
| "loss": 0.7972, |
| "step": 741 |
| }, |
| { |
| "epoch": 4.122222222222222, |
| "grad_norm": 0.0663917064666748, |
| "learning_rate": 3.553072625698324e-05, |
| "loss": 0.9338, |
| "step": 742 |
| }, |
| { |
| "epoch": 4.127777777777778, |
| "grad_norm": 0.06902816146612167, |
| "learning_rate": 3.5307262569832406e-05, |
| "loss": 1.0073, |
| "step": 743 |
| }, |
| { |
| "epoch": 4.133333333333334, |
| "grad_norm": 0.06993508338928223, |
| "learning_rate": 3.508379888268157e-05, |
| "loss": 0.7839, |
| "step": 744 |
| }, |
| { |
| "epoch": 4.138888888888889, |
| "grad_norm": 0.0647396519780159, |
| "learning_rate": 3.486033519553073e-05, |
| "loss": 0.8312, |
| "step": 745 |
| }, |
| { |
| "epoch": 4.144444444444445, |
| "grad_norm": 0.053603801876306534, |
| "learning_rate": 3.463687150837989e-05, |
| "loss": 0.7799, |
| "step": 746 |
| }, |
| { |
| "epoch": 4.15, |
| "grad_norm": 0.06209754943847656, |
| "learning_rate": 3.4413407821229055e-05, |
| "loss": 0.8136, |
| "step": 747 |
| }, |
| { |
| "epoch": 4.155555555555556, |
| "grad_norm": 0.09486910700798035, |
| "learning_rate": 3.418994413407821e-05, |
| "loss": 0.6833, |
| "step": 748 |
| }, |
| { |
| "epoch": 4.161111111111111, |
| "grad_norm": 0.06692782789468765, |
| "learning_rate": 3.396648044692737e-05, |
| "loss": 0.8619, |
| "step": 749 |
| }, |
| { |
| "epoch": 4.166666666666667, |
| "grad_norm": 0.05541648343205452, |
| "learning_rate": 3.3743016759776535e-05, |
| "loss": 0.8244, |
| "step": 750 |
| }, |
| { |
| "epoch": 4.1722222222222225, |
| "grad_norm": 0.06439889967441559, |
| "learning_rate": 3.35195530726257e-05, |
| "loss": 0.8413, |
| "step": 751 |
| }, |
| { |
| "epoch": 4.177777777777778, |
| "grad_norm": 0.07166414707899094, |
| "learning_rate": 3.329608938547486e-05, |
| "loss": 0.989, |
| "step": 752 |
| }, |
| { |
| "epoch": 4.183333333333334, |
| "grad_norm": 0.0640282854437828, |
| "learning_rate": 3.307262569832402e-05, |
| "loss": 0.9962, |
| "step": 753 |
| }, |
| { |
| "epoch": 4.188888888888889, |
| "grad_norm": 0.07067931443452835, |
| "learning_rate": 3.2849162011173184e-05, |
| "loss": 0.7365, |
| "step": 754 |
| }, |
| { |
| "epoch": 4.194444444444445, |
| "grad_norm": 0.07782124727964401, |
| "learning_rate": 3.262569832402235e-05, |
| "loss": 0.7209, |
| "step": 755 |
| }, |
| { |
| "epoch": 4.2, |
| "grad_norm": 0.06391673535108566, |
| "learning_rate": 3.240223463687151e-05, |
| "loss": 0.8479, |
| "step": 756 |
| }, |
| { |
| "epoch": 4.205555555555556, |
| "grad_norm": 0.06089261546730995, |
| "learning_rate": 3.217877094972067e-05, |
| "loss": 0.7635, |
| "step": 757 |
| }, |
| { |
| "epoch": 4.211111111111111, |
| "grad_norm": 0.06284917145967484, |
| "learning_rate": 3.1955307262569834e-05, |
| "loss": 0.9128, |
| "step": 758 |
| }, |
| { |
| "epoch": 4.216666666666667, |
| "grad_norm": 0.060241833329200745, |
| "learning_rate": 3.1731843575418996e-05, |
| "loss": 0.8521, |
| "step": 759 |
| }, |
| { |
| "epoch": 4.222222222222222, |
| "grad_norm": 0.07311747968196869, |
| "learning_rate": 3.150837988826816e-05, |
| "loss": 0.7396, |
| "step": 760 |
| }, |
| { |
| "epoch": 4.227777777777778, |
| "grad_norm": 0.06416069716215134, |
| "learning_rate": 3.128491620111732e-05, |
| "loss": 0.8738, |
| "step": 761 |
| }, |
| { |
| "epoch": 4.233333333333333, |
| "grad_norm": 0.0591534860432148, |
| "learning_rate": 3.106145251396648e-05, |
| "loss": 0.8741, |
| "step": 762 |
| }, |
| { |
| "epoch": 4.238888888888889, |
| "grad_norm": 0.06801345944404602, |
| "learning_rate": 3.0837988826815645e-05, |
| "loss": 1.0116, |
| "step": 763 |
| }, |
| { |
| "epoch": 4.2444444444444445, |
| "grad_norm": 0.06485697627067566, |
| "learning_rate": 3.061452513966481e-05, |
| "loss": 0.8626, |
| "step": 764 |
| }, |
| { |
| "epoch": 4.25, |
| "grad_norm": 0.07373441010713577, |
| "learning_rate": 3.039106145251397e-05, |
| "loss": 0.7986, |
| "step": 765 |
| }, |
| { |
| "epoch": 4.2555555555555555, |
| "grad_norm": 0.07167431712150574, |
| "learning_rate": 3.0167597765363132e-05, |
| "loss": 0.9507, |
| "step": 766 |
| }, |
| { |
| "epoch": 4.261111111111111, |
| "grad_norm": 0.06408189237117767, |
| "learning_rate": 2.9944134078212295e-05, |
| "loss": 0.7867, |
| "step": 767 |
| }, |
| { |
| "epoch": 4.266666666666667, |
| "grad_norm": 0.07456216216087341, |
| "learning_rate": 2.9720670391061457e-05, |
| "loss": 0.934, |
| "step": 768 |
| }, |
| { |
| "epoch": 4.272222222222222, |
| "grad_norm": 0.060554083436727524, |
| "learning_rate": 2.949720670391062e-05, |
| "loss": 0.8407, |
| "step": 769 |
| }, |
| { |
| "epoch": 4.277777777777778, |
| "grad_norm": 0.07266189157962799, |
| "learning_rate": 2.9273743016759782e-05, |
| "loss": 0.9627, |
| "step": 770 |
| }, |
| { |
| "epoch": 4.283333333333333, |
| "grad_norm": 0.06674201786518097, |
| "learning_rate": 2.9050279329608944e-05, |
| "loss": 0.9316, |
| "step": 771 |
| }, |
| { |
| "epoch": 4.288888888888889, |
| "grad_norm": 0.06580832600593567, |
| "learning_rate": 2.88268156424581e-05, |
| "loss": 0.7616, |
| "step": 772 |
| }, |
| { |
| "epoch": 4.294444444444444, |
| "grad_norm": 0.067823126912117, |
| "learning_rate": 2.8603351955307262e-05, |
| "loss": 0.7364, |
| "step": 773 |
| }, |
| { |
| "epoch": 4.3, |
| "grad_norm": 0.06775198131799698, |
| "learning_rate": 2.8379888268156424e-05, |
| "loss": 0.9093, |
| "step": 774 |
| }, |
| { |
| "epoch": 4.305555555555555, |
| "grad_norm": 0.06065399944782257, |
| "learning_rate": 2.8156424581005587e-05, |
| "loss": 0.6999, |
| "step": 775 |
| }, |
| { |
| "epoch": 4.311111111111111, |
| "grad_norm": 0.06072010472416878, |
| "learning_rate": 2.793296089385475e-05, |
| "loss": 0.8397, |
| "step": 776 |
| }, |
| { |
| "epoch": 4.316666666666666, |
| "grad_norm": 0.06833003461360931, |
| "learning_rate": 2.770949720670391e-05, |
| "loss": 0.7948, |
| "step": 777 |
| }, |
| { |
| "epoch": 4.322222222222222, |
| "grad_norm": 0.06961791962385178, |
| "learning_rate": 2.7486033519553074e-05, |
| "loss": 0.8539, |
| "step": 778 |
| }, |
| { |
| "epoch": 4.3277777777777775, |
| "grad_norm": 0.07114412635564804, |
| "learning_rate": 2.7262569832402236e-05, |
| "loss": 0.835, |
| "step": 779 |
| }, |
| { |
| "epoch": 4.333333333333333, |
| "grad_norm": 0.07904283702373505, |
| "learning_rate": 2.7039106145251398e-05, |
| "loss": 0.7917, |
| "step": 780 |
| }, |
| { |
| "epoch": 4.338888888888889, |
| "grad_norm": 0.06877896934747696, |
| "learning_rate": 2.681564245810056e-05, |
| "loss": 0.9066, |
| "step": 781 |
| }, |
| { |
| "epoch": 4.344444444444444, |
| "grad_norm": 0.06604032218456268, |
| "learning_rate": 2.6592178770949723e-05, |
| "loss": 0.8103, |
| "step": 782 |
| }, |
| { |
| "epoch": 4.35, |
| "grad_norm": 0.06570107489824295, |
| "learning_rate": 2.6368715083798885e-05, |
| "loss": 0.9216, |
| "step": 783 |
| }, |
| { |
| "epoch": 4.355555555555555, |
| "grad_norm": 0.0643831342458725, |
| "learning_rate": 2.6145251396648048e-05, |
| "loss": 0.8306, |
| "step": 784 |
| }, |
| { |
| "epoch": 4.361111111111111, |
| "grad_norm": 0.06995333731174469, |
| "learning_rate": 2.592178770949721e-05, |
| "loss": 0.8415, |
| "step": 785 |
| }, |
| { |
| "epoch": 4.366666666666666, |
| "grad_norm": 0.06058323010802269, |
| "learning_rate": 2.569832402234637e-05, |
| "loss": 0.9729, |
| "step": 786 |
| }, |
| { |
| "epoch": 4.372222222222222, |
| "grad_norm": 0.06180157512426376, |
| "learning_rate": 2.547486033519553e-05, |
| "loss": 0.8585, |
| "step": 787 |
| }, |
| { |
| "epoch": 4.377777777777778, |
| "grad_norm": 0.07014794647693634, |
| "learning_rate": 2.5251396648044694e-05, |
| "loss": 0.7898, |
| "step": 788 |
| }, |
| { |
| "epoch": 4.383333333333334, |
| "grad_norm": 0.06525201350450516, |
| "learning_rate": 2.5027932960893856e-05, |
| "loss": 0.8687, |
| "step": 789 |
| }, |
| { |
| "epoch": 4.388888888888889, |
| "grad_norm": 0.07381299883127213, |
| "learning_rate": 2.4804469273743018e-05, |
| "loss": 0.7705, |
| "step": 790 |
| }, |
| { |
| "epoch": 4.394444444444445, |
| "grad_norm": 0.06867001950740814, |
| "learning_rate": 2.458100558659218e-05, |
| "loss": 0.8818, |
| "step": 791 |
| }, |
| { |
| "epoch": 4.4, |
| "grad_norm": 0.08557435870170593, |
| "learning_rate": 2.4357541899441343e-05, |
| "loss": 0.7607, |
| "step": 792 |
| }, |
| { |
| "epoch": 4.405555555555556, |
| "grad_norm": 0.0627717450261116, |
| "learning_rate": 2.4134078212290502e-05, |
| "loss": 0.8485, |
| "step": 793 |
| }, |
| { |
| "epoch": 4.411111111111111, |
| "grad_norm": 0.06512073427438736, |
| "learning_rate": 2.3910614525139664e-05, |
| "loss": 0.9801, |
| "step": 794 |
| }, |
| { |
| "epoch": 4.416666666666667, |
| "grad_norm": 0.07923205196857452, |
| "learning_rate": 2.3687150837988827e-05, |
| "loss": 0.9049, |
| "step": 795 |
| }, |
| { |
| "epoch": 4.4222222222222225, |
| "grad_norm": 0.06704343855381012, |
| "learning_rate": 2.346368715083799e-05, |
| "loss": 0.8302, |
| "step": 796 |
| }, |
| { |
| "epoch": 4.427777777777778, |
| "grad_norm": 0.06392168998718262, |
| "learning_rate": 2.324022346368715e-05, |
| "loss": 0.7922, |
| "step": 797 |
| }, |
| { |
| "epoch": 4.433333333333334, |
| "grad_norm": 0.06558392196893692, |
| "learning_rate": 2.3016759776536314e-05, |
| "loss": 0.8636, |
| "step": 798 |
| }, |
| { |
| "epoch": 4.438888888888889, |
| "grad_norm": 0.06815050542354584, |
| "learning_rate": 2.2793296089385476e-05, |
| "loss": 0.836, |
| "step": 799 |
| }, |
| { |
| "epoch": 4.444444444444445, |
| "grad_norm": 0.06234079599380493, |
| "learning_rate": 2.2569832402234638e-05, |
| "loss": 0.863, |
| "step": 800 |
| }, |
| { |
| "epoch": 4.45, |
| "grad_norm": 0.060776371508836746, |
| "learning_rate": 2.2346368715083797e-05, |
| "loss": 0.768, |
| "step": 801 |
| }, |
| { |
| "epoch": 4.455555555555556, |
| "grad_norm": 0.06404553353786469, |
| "learning_rate": 2.212290502793296e-05, |
| "loss": 0.8383, |
| "step": 802 |
| }, |
| { |
| "epoch": 4.461111111111111, |
| "grad_norm": 0.0616544634103775, |
| "learning_rate": 2.1899441340782122e-05, |
| "loss": 0.9098, |
| "step": 803 |
| }, |
| { |
| "epoch": 4.466666666666667, |
| "grad_norm": 0.06308100372552872, |
| "learning_rate": 2.1675977653631284e-05, |
| "loss": 0.838, |
| "step": 804 |
| }, |
| { |
| "epoch": 4.472222222222222, |
| "grad_norm": 0.06575177609920502, |
| "learning_rate": 2.1452513966480446e-05, |
| "loss": 0.9344, |
| "step": 805 |
| }, |
| { |
| "epoch": 4.477777777777778, |
| "grad_norm": 0.073246531188488, |
| "learning_rate": 2.122905027932961e-05, |
| "loss": 0.8616, |
| "step": 806 |
| }, |
| { |
| "epoch": 4.483333333333333, |
| "grad_norm": 0.06593457609415054, |
| "learning_rate": 2.100558659217877e-05, |
| "loss": 0.7611, |
| "step": 807 |
| }, |
| { |
| "epoch": 4.488888888888889, |
| "grad_norm": 0.06681575626134872, |
| "learning_rate": 2.0782122905027933e-05, |
| "loss": 0.913, |
| "step": 808 |
| }, |
| { |
| "epoch": 4.4944444444444445, |
| "grad_norm": 0.07066091895103455, |
| "learning_rate": 2.0558659217877096e-05, |
| "loss": 0.8538, |
| "step": 809 |
| }, |
| { |
| "epoch": 4.5, |
| "grad_norm": 0.08290861546993256, |
| "learning_rate": 2.0335195530726258e-05, |
| "loss": 0.8322, |
| "step": 810 |
| }, |
| { |
| "epoch": 4.5055555555555555, |
| "grad_norm": 0.07008577138185501, |
| "learning_rate": 2.011173184357542e-05, |
| "loss": 0.7787, |
| "step": 811 |
| }, |
| { |
| "epoch": 4.511111111111111, |
| "grad_norm": 0.05752347782254219, |
| "learning_rate": 1.9888268156424583e-05, |
| "loss": 0.8318, |
| "step": 812 |
| }, |
| { |
| "epoch": 4.516666666666667, |
| "grad_norm": 0.06387963891029358, |
| "learning_rate": 1.9664804469273742e-05, |
| "loss": 0.9929, |
| "step": 813 |
| }, |
| { |
| "epoch": 4.522222222222222, |
| "grad_norm": 0.07318349927663803, |
| "learning_rate": 1.9441340782122904e-05, |
| "loss": 0.7613, |
| "step": 814 |
| }, |
| { |
| "epoch": 4.527777777777778, |
| "grad_norm": 0.06772953271865845, |
| "learning_rate": 1.9217877094972066e-05, |
| "loss": 0.8136, |
| "step": 815 |
| }, |
| { |
| "epoch": 4.533333333333333, |
| "grad_norm": 0.07693421840667725, |
| "learning_rate": 1.899441340782123e-05, |
| "loss": 0.9406, |
| "step": 816 |
| }, |
| { |
| "epoch": 4.538888888888889, |
| "grad_norm": 0.06594596058130264, |
| "learning_rate": 1.877094972067039e-05, |
| "loss": 0.8637, |
| "step": 817 |
| }, |
| { |
| "epoch": 4.544444444444444, |
| "grad_norm": 0.06739532947540283, |
| "learning_rate": 1.8547486033519553e-05, |
| "loss": 0.9043, |
| "step": 818 |
| }, |
| { |
| "epoch": 4.55, |
| "grad_norm": 0.07356348633766174, |
| "learning_rate": 1.8324022346368716e-05, |
| "loss": 0.9696, |
| "step": 819 |
| }, |
| { |
| "epoch": 4.555555555555555, |
| "grad_norm": 0.07079844176769257, |
| "learning_rate": 1.8100558659217878e-05, |
| "loss": 0.9524, |
| "step": 820 |
| }, |
| { |
| "epoch": 4.561111111111111, |
| "grad_norm": 0.07685678452253342, |
| "learning_rate": 1.787709497206704e-05, |
| "loss": 0.7816, |
| "step": 821 |
| }, |
| { |
| "epoch": 4.566666666666666, |
| "grad_norm": 0.06801366806030273, |
| "learning_rate": 1.7653631284916203e-05, |
| "loss": 0.7547, |
| "step": 822 |
| }, |
| { |
| "epoch": 4.572222222222222, |
| "grad_norm": 0.06588180363178253, |
| "learning_rate": 1.7430167597765365e-05, |
| "loss": 0.8329, |
| "step": 823 |
| }, |
| { |
| "epoch": 4.5777777777777775, |
| "grad_norm": 0.06872644275426865, |
| "learning_rate": 1.7206703910614527e-05, |
| "loss": 0.8596, |
| "step": 824 |
| }, |
| { |
| "epoch": 4.583333333333333, |
| "grad_norm": 0.07037709653377533, |
| "learning_rate": 1.6983240223463686e-05, |
| "loss": 0.8422, |
| "step": 825 |
| }, |
| { |
| "epoch": 4.588888888888889, |
| "grad_norm": 0.05817841365933418, |
| "learning_rate": 1.675977653631285e-05, |
| "loss": 0.9255, |
| "step": 826 |
| }, |
| { |
| "epoch": 4.594444444444444, |
| "grad_norm": 0.06256680935621262, |
| "learning_rate": 1.653631284916201e-05, |
| "loss": 0.9732, |
| "step": 827 |
| }, |
| { |
| "epoch": 4.6, |
| "grad_norm": 0.06470783799886703, |
| "learning_rate": 1.6312849162011173e-05, |
| "loss": 1.0082, |
| "step": 828 |
| }, |
| { |
| "epoch": 4.605555555555555, |
| "grad_norm": 0.06321726739406586, |
| "learning_rate": 1.6089385474860336e-05, |
| "loss": 0.7958, |
| "step": 829 |
| }, |
| { |
| "epoch": 4.611111111111111, |
| "grad_norm": 0.06101881340146065, |
| "learning_rate": 1.5865921787709498e-05, |
| "loss": 0.9288, |
| "step": 830 |
| }, |
| { |
| "epoch": 4.616666666666667, |
| "grad_norm": 0.061096347868442535, |
| "learning_rate": 1.564245810055866e-05, |
| "loss": 0.8065, |
| "step": 831 |
| }, |
| { |
| "epoch": 4.622222222222222, |
| "grad_norm": 0.07092749327421188, |
| "learning_rate": 1.5418994413407823e-05, |
| "loss": 0.9874, |
| "step": 832 |
| }, |
| { |
| "epoch": 4.627777777777778, |
| "grad_norm": 0.06334253400564194, |
| "learning_rate": 1.5195530726256985e-05, |
| "loss": 0.8788, |
| "step": 833 |
| }, |
| { |
| "epoch": 4.633333333333333, |
| "grad_norm": 0.06317714601755142, |
| "learning_rate": 1.4972067039106147e-05, |
| "loss": 0.864, |
| "step": 834 |
| }, |
| { |
| "epoch": 4.638888888888889, |
| "grad_norm": 0.06688254326581955, |
| "learning_rate": 1.474860335195531e-05, |
| "loss": 0.9415, |
| "step": 835 |
| }, |
| { |
| "epoch": 4.644444444444445, |
| "grad_norm": 0.07368700951337814, |
| "learning_rate": 1.4525139664804472e-05, |
| "loss": 0.8026, |
| "step": 836 |
| }, |
| { |
| "epoch": 4.65, |
| "grad_norm": 0.06767589598894119, |
| "learning_rate": 1.4301675977653631e-05, |
| "loss": 0.7902, |
| "step": 837 |
| }, |
| { |
| "epoch": 4.655555555555556, |
| "grad_norm": 0.06546707451343536, |
| "learning_rate": 1.4078212290502793e-05, |
| "loss": 0.8749, |
| "step": 838 |
| }, |
| { |
| "epoch": 4.661111111111111, |
| "grad_norm": 0.06208932027220726, |
| "learning_rate": 1.3854748603351956e-05, |
| "loss": 0.8563, |
| "step": 839 |
| }, |
| { |
| "epoch": 4.666666666666667, |
| "grad_norm": 0.07527874410152435, |
| "learning_rate": 1.3631284916201118e-05, |
| "loss": 0.8443, |
| "step": 840 |
| }, |
| { |
| "epoch": 4.6722222222222225, |
| "grad_norm": 0.06952167302370071, |
| "learning_rate": 1.340782122905028e-05, |
| "loss": 0.8012, |
| "step": 841 |
| }, |
| { |
| "epoch": 4.677777777777778, |
| "grad_norm": 0.06032046675682068, |
| "learning_rate": 1.3184357541899443e-05, |
| "loss": 0.8254, |
| "step": 842 |
| }, |
| { |
| "epoch": 4.683333333333334, |
| "grad_norm": 0.057982437312603, |
| "learning_rate": 1.2960893854748605e-05, |
| "loss": 0.9092, |
| "step": 843 |
| }, |
| { |
| "epoch": 4.688888888888889, |
| "grad_norm": 0.07537980377674103, |
| "learning_rate": 1.2737430167597766e-05, |
| "loss": 0.8206, |
| "step": 844 |
| }, |
| { |
| "epoch": 4.694444444444445, |
| "grad_norm": 0.0689520314335823, |
| "learning_rate": 1.2513966480446928e-05, |
| "loss": 0.8488, |
| "step": 845 |
| }, |
| { |
| "epoch": 4.7, |
| "grad_norm": 0.0652664303779602, |
| "learning_rate": 1.229050279329609e-05, |
| "loss": 0.8944, |
| "step": 846 |
| }, |
| { |
| "epoch": 4.705555555555556, |
| "grad_norm": 0.05868719518184662, |
| "learning_rate": 1.2067039106145251e-05, |
| "loss": 0.8372, |
| "step": 847 |
| }, |
| { |
| "epoch": 4.711111111111111, |
| "grad_norm": 0.061663344502449036, |
| "learning_rate": 1.1843575418994413e-05, |
| "loss": 0.8319, |
| "step": 848 |
| }, |
| { |
| "epoch": 4.716666666666667, |
| "grad_norm": 0.06301644444465637, |
| "learning_rate": 1.1620111731843576e-05, |
| "loss": 0.8292, |
| "step": 849 |
| }, |
| { |
| "epoch": 4.722222222222222, |
| "grad_norm": 0.06448386609554291, |
| "learning_rate": 1.1396648044692738e-05, |
| "loss": 0.9751, |
| "step": 850 |
| }, |
| { |
| "epoch": 4.727777777777778, |
| "grad_norm": 0.06298605352640152, |
| "learning_rate": 1.1173184357541899e-05, |
| "loss": 0.9132, |
| "step": 851 |
| }, |
| { |
| "epoch": 4.733333333333333, |
| "grad_norm": 0.0622861348092556, |
| "learning_rate": 1.0949720670391061e-05, |
| "loss": 0.851, |
| "step": 852 |
| }, |
| { |
| "epoch": 4.738888888888889, |
| "grad_norm": 0.07923610508441925, |
| "learning_rate": 1.0726256983240223e-05, |
| "loss": 0.8115, |
| "step": 853 |
| }, |
| { |
| "epoch": 4.7444444444444445, |
| "grad_norm": 0.06684275716543198, |
| "learning_rate": 1.0502793296089386e-05, |
| "loss": 0.8796, |
| "step": 854 |
| }, |
| { |
| "epoch": 4.75, |
| "grad_norm": 0.058858904987573624, |
| "learning_rate": 1.0279329608938548e-05, |
| "loss": 0.786, |
| "step": 855 |
| }, |
| { |
| "epoch": 4.7555555555555555, |
| "grad_norm": 0.05823403596878052, |
| "learning_rate": 1.005586592178771e-05, |
| "loss": 0.9475, |
| "step": 856 |
| }, |
| { |
| "epoch": 4.761111111111111, |
| "grad_norm": 0.06352592259645462, |
| "learning_rate": 9.832402234636871e-06, |
| "loss": 0.8574, |
| "step": 857 |
| }, |
| { |
| "epoch": 4.766666666666667, |
| "grad_norm": 0.05929254740476608, |
| "learning_rate": 9.608938547486033e-06, |
| "loss": 0.7926, |
| "step": 858 |
| }, |
| { |
| "epoch": 4.772222222222222, |
| "grad_norm": 0.07241322845220566, |
| "learning_rate": 9.385474860335196e-06, |
| "loss": 0.8635, |
| "step": 859 |
| }, |
| { |
| "epoch": 4.777777777777778, |
| "grad_norm": 0.066829152405262, |
| "learning_rate": 9.162011173184358e-06, |
| "loss": 0.8275, |
| "step": 860 |
| }, |
| { |
| "epoch": 4.783333333333333, |
| "grad_norm": 0.05805254727602005, |
| "learning_rate": 8.93854748603352e-06, |
| "loss": 0.9695, |
| "step": 861 |
| }, |
| { |
| "epoch": 4.788888888888889, |
| "grad_norm": 0.06599583476781845, |
| "learning_rate": 8.715083798882683e-06, |
| "loss": 0.9317, |
| "step": 862 |
| }, |
| { |
| "epoch": 4.794444444444444, |
| "grad_norm": 0.0690189003944397, |
| "learning_rate": 8.491620111731843e-06, |
| "loss": 0.719, |
| "step": 863 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 0.0718541145324707, |
| "learning_rate": 8.268156424581006e-06, |
| "loss": 0.9752, |
| "step": 864 |
| }, |
| { |
| "epoch": 4.805555555555555, |
| "grad_norm": 0.06498520821332932, |
| "learning_rate": 8.044692737430168e-06, |
| "loss": 0.8896, |
| "step": 865 |
| }, |
| { |
| "epoch": 4.811111111111111, |
| "grad_norm": 0.06741782277822495, |
| "learning_rate": 7.82122905027933e-06, |
| "loss": 0.7783, |
| "step": 866 |
| }, |
| { |
| "epoch": 4.816666666666666, |
| "grad_norm": 0.05603065341711044, |
| "learning_rate": 7.5977653631284925e-06, |
| "loss": 0.7643, |
| "step": 867 |
| }, |
| { |
| "epoch": 4.822222222222222, |
| "grad_norm": 0.07300734519958496, |
| "learning_rate": 7.374301675977655e-06, |
| "loss": 0.8683, |
| "step": 868 |
| }, |
| { |
| "epoch": 4.8277777777777775, |
| "grad_norm": 0.06486313790082932, |
| "learning_rate": 7.1508379888268155e-06, |
| "loss": 0.9376, |
| "step": 869 |
| }, |
| { |
| "epoch": 4.833333333333333, |
| "grad_norm": 0.06596938520669937, |
| "learning_rate": 6.927374301675978e-06, |
| "loss": 0.7932, |
| "step": 870 |
| }, |
| { |
| "epoch": 4.838888888888889, |
| "grad_norm": 0.06245023012161255, |
| "learning_rate": 6.70391061452514e-06, |
| "loss": 0.7901, |
| "step": 871 |
| }, |
| { |
| "epoch": 4.844444444444444, |
| "grad_norm": 0.058026690036058426, |
| "learning_rate": 6.4804469273743025e-06, |
| "loss": 0.7906, |
| "step": 872 |
| }, |
| { |
| "epoch": 4.85, |
| "grad_norm": 0.07333751022815704, |
| "learning_rate": 6.256983240223464e-06, |
| "loss": 0.7123, |
| "step": 873 |
| }, |
| { |
| "epoch": 4.855555555555555, |
| "grad_norm": 0.0697869285941124, |
| "learning_rate": 6.0335195530726255e-06, |
| "loss": 0.8697, |
| "step": 874 |
| }, |
| { |
| "epoch": 4.861111111111111, |
| "grad_norm": 0.06364396214485168, |
| "learning_rate": 5.810055865921788e-06, |
| "loss": 0.9212, |
| "step": 875 |
| }, |
| { |
| "epoch": 4.866666666666667, |
| "grad_norm": 0.07756305485963821, |
| "learning_rate": 5.586592178770949e-06, |
| "loss": 0.7329, |
| "step": 876 |
| }, |
| { |
| "epoch": 4.872222222222222, |
| "grad_norm": 0.07673313468694687, |
| "learning_rate": 5.363128491620112e-06, |
| "loss": 0.9544, |
| "step": 877 |
| }, |
| { |
| "epoch": 4.877777777777778, |
| "grad_norm": 0.06823701411485672, |
| "learning_rate": 5.139664804469274e-06, |
| "loss": 0.8417, |
| "step": 878 |
| }, |
| { |
| "epoch": 4.883333333333333, |
| "grad_norm": 0.06566136330366135, |
| "learning_rate": 4.9162011173184354e-06, |
| "loss": 0.9043, |
| "step": 879 |
| }, |
| { |
| "epoch": 4.888888888888889, |
| "grad_norm": 0.07561615109443665, |
| "learning_rate": 4.692737430167598e-06, |
| "loss": 0.9289, |
| "step": 880 |
| }, |
| { |
| "epoch": 4.894444444444445, |
| "grad_norm": 0.0714927464723587, |
| "learning_rate": 4.46927374301676e-06, |
| "loss": 0.9992, |
| "step": 881 |
| }, |
| { |
| "epoch": 4.9, |
| "grad_norm": 0.07412128895521164, |
| "learning_rate": 4.245810055865922e-06, |
| "loss": 0.8937, |
| "step": 882 |
| }, |
| { |
| "epoch": 4.905555555555556, |
| "grad_norm": 0.06256308406591415, |
| "learning_rate": 4.022346368715084e-06, |
| "loss": 0.9327, |
| "step": 883 |
| }, |
| { |
| "epoch": 4.911111111111111, |
| "grad_norm": 0.06401494145393372, |
| "learning_rate": 3.7988826815642463e-06, |
| "loss": 0.9113, |
| "step": 884 |
| }, |
| { |
| "epoch": 4.916666666666667, |
| "grad_norm": 0.06480543315410614, |
| "learning_rate": 3.5754189944134077e-06, |
| "loss": 0.8912, |
| "step": 885 |
| }, |
| { |
| "epoch": 4.9222222222222225, |
| "grad_norm": 0.06543062627315521, |
| "learning_rate": 3.35195530726257e-06, |
| "loss": 0.8768, |
| "step": 886 |
| }, |
| { |
| "epoch": 4.927777777777778, |
| "grad_norm": 0.057017501443624496, |
| "learning_rate": 3.128491620111732e-06, |
| "loss": 0.7779, |
| "step": 887 |
| }, |
| { |
| "epoch": 4.933333333333334, |
| "grad_norm": 0.0745752677321434, |
| "learning_rate": 2.905027932960894e-06, |
| "loss": 0.8376, |
| "step": 888 |
| }, |
| { |
| "epoch": 4.938888888888889, |
| "grad_norm": 0.06534884124994278, |
| "learning_rate": 2.681564245810056e-06, |
| "loss": 0.8647, |
| "step": 889 |
| }, |
| { |
| "epoch": 4.944444444444445, |
| "grad_norm": 0.07408112287521362, |
| "learning_rate": 2.4581005586592177e-06, |
| "loss": 0.8212, |
| "step": 890 |
| }, |
| { |
| "epoch": 4.95, |
| "grad_norm": 0.06152744218707085, |
| "learning_rate": 2.23463687150838e-06, |
| "loss": 0.9138, |
| "step": 891 |
| }, |
| { |
| "epoch": 4.955555555555556, |
| "grad_norm": 0.06905697286128998, |
| "learning_rate": 2.011173184357542e-06, |
| "loss": 0.9246, |
| "step": 892 |
| }, |
| { |
| "epoch": 4.961111111111111, |
| "grad_norm": 0.06602156907320023, |
| "learning_rate": 1.7877094972067039e-06, |
| "loss": 0.8739, |
| "step": 893 |
| }, |
| { |
| "epoch": 4.966666666666667, |
| "grad_norm": 0.08502914756536484, |
| "learning_rate": 1.564245810055866e-06, |
| "loss": 0.7435, |
| "step": 894 |
| }, |
| { |
| "epoch": 4.972222222222222, |
| "grad_norm": 0.06289546936750412, |
| "learning_rate": 1.340782122905028e-06, |
| "loss": 0.864, |
| "step": 895 |
| }, |
| { |
| "epoch": 4.977777777777778, |
| "grad_norm": 0.05942307412624359, |
| "learning_rate": 1.11731843575419e-06, |
| "loss": 0.8965, |
| "step": 896 |
| }, |
| { |
| "epoch": 4.983333333333333, |
| "grad_norm": 0.056906238198280334, |
| "learning_rate": 8.938547486033519e-07, |
| "loss": 0.8629, |
| "step": 897 |
| }, |
| { |
| "epoch": 4.988888888888889, |
| "grad_norm": 0.06804513931274414, |
| "learning_rate": 6.70391061452514e-07, |
| "loss": 0.7078, |
| "step": 898 |
| }, |
| { |
| "epoch": 4.9944444444444445, |
| "grad_norm": 0.05733992159366608, |
| "learning_rate": 4.4692737430167597e-07, |
| "loss": 0.882, |
| "step": 899 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.05951790511608124, |
| "learning_rate": 2.2346368715083798e-07, |
| "loss": 0.8055, |
| "step": 900 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 900, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 300, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.924638745821184e+18, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|