| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.988929889298893, | |
| "eval_steps": 500, | |
| "global_step": 540, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005535055350553505, | |
| "grad_norm": 4.88795566772026, | |
| "learning_rate": 1.8518518518518518e-07, | |
| "loss": 0.8842, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.01107011070110701, | |
| "grad_norm": 5.294943049287627, | |
| "learning_rate": 3.7037037037037036e-07, | |
| "loss": 0.9582, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.016605166051660517, | |
| "grad_norm": 4.721450383705418, | |
| "learning_rate": 5.555555555555555e-07, | |
| "loss": 0.8326, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.02214022140221402, | |
| "grad_norm": 4.702865900802319, | |
| "learning_rate": 7.407407407407407e-07, | |
| "loss": 0.8705, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.027675276752767528, | |
| "grad_norm": 4.588661299548798, | |
| "learning_rate": 9.259259259259259e-07, | |
| "loss": 0.7979, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.033210332103321034, | |
| "grad_norm": 4.514875582905285, | |
| "learning_rate": 1.111111111111111e-06, | |
| "loss": 0.8133, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03874538745387454, | |
| "grad_norm": 4.3889371427334725, | |
| "learning_rate": 1.2962962962962962e-06, | |
| "loss": 0.8289, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.04428044280442804, | |
| "grad_norm": 3.681708725528932, | |
| "learning_rate": 1.4814814814814815e-06, | |
| "loss": 0.7494, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.04981549815498155, | |
| "grad_norm": 3.7696517468375483, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.8392, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.055350553505535055, | |
| "grad_norm": 3.5736119140088474, | |
| "learning_rate": 1.8518518518518519e-06, | |
| "loss": 0.7802, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.06088560885608856, | |
| "grad_norm": 2.352449321846944, | |
| "learning_rate": 2.037037037037037e-06, | |
| "loss": 0.7531, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.06642066420664207, | |
| "grad_norm": 2.1396669501274728, | |
| "learning_rate": 2.222222222222222e-06, | |
| "loss": 0.8033, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.07195571955719557, | |
| "grad_norm": 2.0686046715471744, | |
| "learning_rate": 2.4074074074074075e-06, | |
| "loss": 0.7739, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.07749077490774908, | |
| "grad_norm": 2.112065821246985, | |
| "learning_rate": 2.5925925925925925e-06, | |
| "loss": 0.7789, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.08302583025830258, | |
| "grad_norm": 1.6986741641118395, | |
| "learning_rate": 2.7777777777777783e-06, | |
| "loss": 0.7151, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.08856088560885608, | |
| "grad_norm": 1.9671299367169393, | |
| "learning_rate": 2.962962962962963e-06, | |
| "loss": 0.7321, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0940959409594096, | |
| "grad_norm": 1.727327433279914, | |
| "learning_rate": 3.1481481481481483e-06, | |
| "loss": 0.7381, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0996309963099631, | |
| "grad_norm": 1.65624237671333, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.7159, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.10516605166051661, | |
| "grad_norm": 1.3535125694848185, | |
| "learning_rate": 3.5185185185185187e-06, | |
| "loss": 0.6755, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.11070110701107011, | |
| "grad_norm": 1.2125088936587491, | |
| "learning_rate": 3.7037037037037037e-06, | |
| "loss": 0.7032, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.11623616236162361, | |
| "grad_norm": 1.107066250339579, | |
| "learning_rate": 3.88888888888889e-06, | |
| "loss": 0.6217, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.12177121771217712, | |
| "grad_norm": 1.3653673904628112, | |
| "learning_rate": 4.074074074074074e-06, | |
| "loss": 0.6824, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.12730627306273062, | |
| "grad_norm": 1.289828643250104, | |
| "learning_rate": 4.2592592592592596e-06, | |
| "loss": 0.6796, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.13284132841328414, | |
| "grad_norm": 1.379866304591633, | |
| "learning_rate": 4.444444444444444e-06, | |
| "loss": 0.8102, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.13837638376383765, | |
| "grad_norm": 1.1200972001221432, | |
| "learning_rate": 4.62962962962963e-06, | |
| "loss": 0.6953, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.14391143911439114, | |
| "grad_norm": 1.071016299230562, | |
| "learning_rate": 4.814814814814815e-06, | |
| "loss": 0.6914, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.14944649446494465, | |
| "grad_norm": 1.007816252436763, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7266, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.15498154981549817, | |
| "grad_norm": 0.8824750556078126, | |
| "learning_rate": 5.185185185185185e-06, | |
| "loss": 0.716, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.16051660516605165, | |
| "grad_norm": 1.0612669135866166, | |
| "learning_rate": 5.370370370370371e-06, | |
| "loss": 0.7289, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.16605166051660517, | |
| "grad_norm": 0.8626830347073103, | |
| "learning_rate": 5.555555555555557e-06, | |
| "loss": 0.6848, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.17158671586715868, | |
| "grad_norm": 0.8679382464145016, | |
| "learning_rate": 5.740740740740741e-06, | |
| "loss": 0.6615, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.17712177121771217, | |
| "grad_norm": 0.8023350481646084, | |
| "learning_rate": 5.925925925925926e-06, | |
| "loss": 0.708, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.18265682656826568, | |
| "grad_norm": 0.9095885057238613, | |
| "learning_rate": 6.111111111111112e-06, | |
| "loss": 0.7367, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.1881918819188192, | |
| "grad_norm": 0.8887858125212083, | |
| "learning_rate": 6.296296296296297e-06, | |
| "loss": 0.7039, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.1937269372693727, | |
| "grad_norm": 0.9119816001938974, | |
| "learning_rate": 6.481481481481482e-06, | |
| "loss": 0.7479, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.1992619926199262, | |
| "grad_norm": 0.9750213405228529, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.7354, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.2047970479704797, | |
| "grad_norm": 0.7893115852707117, | |
| "learning_rate": 6.851851851851853e-06, | |
| "loss": 0.5972, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.21033210332103322, | |
| "grad_norm": 0.8249712148416332, | |
| "learning_rate": 7.0370370370370375e-06, | |
| "loss": 0.6766, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.2158671586715867, | |
| "grad_norm": 0.8548009416827502, | |
| "learning_rate": 7.222222222222223e-06, | |
| "loss": 0.6979, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.22140221402214022, | |
| "grad_norm": 0.7430327142800517, | |
| "learning_rate": 7.4074074074074075e-06, | |
| "loss": 0.6364, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.22693726937269373, | |
| "grad_norm": 0.8804985241917266, | |
| "learning_rate": 7.592592592592594e-06, | |
| "loss": 0.7009, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.23247232472324722, | |
| "grad_norm": 0.8685723187944967, | |
| "learning_rate": 7.77777777777778e-06, | |
| "loss": 0.665, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.23800738007380073, | |
| "grad_norm": 0.7863830014618772, | |
| "learning_rate": 7.962962962962963e-06, | |
| "loss": 0.6506, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.24354243542435425, | |
| "grad_norm": 0.89002673850871, | |
| "learning_rate": 8.148148148148148e-06, | |
| "loss": 0.704, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.24907749077490776, | |
| "grad_norm": 0.9353559189317285, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.6407, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.25461254612546125, | |
| "grad_norm": 0.8860584109544853, | |
| "learning_rate": 8.518518518518519e-06, | |
| "loss": 0.6877, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.26014760147601473, | |
| "grad_norm": 1.0345385393930384, | |
| "learning_rate": 8.703703703703705e-06, | |
| "loss": 0.6195, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.2656826568265683, | |
| "grad_norm": 0.8390822845074132, | |
| "learning_rate": 8.888888888888888e-06, | |
| "loss": 0.7143, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.27121771217712176, | |
| "grad_norm": 0.7829416679007993, | |
| "learning_rate": 9.074074074074075e-06, | |
| "loss": 0.6848, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.2767527675276753, | |
| "grad_norm": 1.0206245762170603, | |
| "learning_rate": 9.25925925925926e-06, | |
| "loss": 0.7, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2822878228782288, | |
| "grad_norm": 0.8735389450158357, | |
| "learning_rate": 9.444444444444445e-06, | |
| "loss": 0.7061, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.2878228782287823, | |
| "grad_norm": 0.813152245836957, | |
| "learning_rate": 9.62962962962963e-06, | |
| "loss": 0.6943, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.2933579335793358, | |
| "grad_norm": 0.9984159950425506, | |
| "learning_rate": 9.814814814814815e-06, | |
| "loss": 0.736, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.2988929889298893, | |
| "grad_norm": 0.8289001717630332, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6843, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.3044280442804428, | |
| "grad_norm": 0.7804823508929742, | |
| "learning_rate": 9.999895536228031e-06, | |
| "loss": 0.6178, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.30996309963099633, | |
| "grad_norm": 0.8747433453571559, | |
| "learning_rate": 9.999582149277188e-06, | |
| "loss": 0.6482, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.3154981549815498, | |
| "grad_norm": 0.9957208410673493, | |
| "learning_rate": 9.999059852242508e-06, | |
| "loss": 0.7054, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.3210332103321033, | |
| "grad_norm": 0.7818825969466411, | |
| "learning_rate": 9.998328666948437e-06, | |
| "loss": 0.6968, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.32656826568265684, | |
| "grad_norm": 0.8999544343447169, | |
| "learning_rate": 9.997388623947927e-06, | |
| "loss": 0.6979, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.33210332103321033, | |
| "grad_norm": 0.877731722321392, | |
| "learning_rate": 9.996239762521152e-06, | |
| "loss": 0.5952, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3376383763837638, | |
| "grad_norm": 0.7490764317219278, | |
| "learning_rate": 9.994882130673869e-06, | |
| "loss": 0.6225, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.34317343173431736, | |
| "grad_norm": 0.8743296067498224, | |
| "learning_rate": 9.993315785135417e-06, | |
| "loss": 0.6924, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.34870848708487084, | |
| "grad_norm": 0.8559368435923252, | |
| "learning_rate": 9.991540791356342e-06, | |
| "loss": 0.7266, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.35424354243542433, | |
| "grad_norm": 0.8920516831324726, | |
| "learning_rate": 9.989557223505661e-06, | |
| "loss": 0.705, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.35977859778597787, | |
| "grad_norm": 0.8193570293903212, | |
| "learning_rate": 9.987365164467767e-06, | |
| "loss": 0.6945, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.36531365313653136, | |
| "grad_norm": 0.8473504309187051, | |
| "learning_rate": 9.98496470583896e-06, | |
| "loss": 0.6412, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.37084870848708484, | |
| "grad_norm": 1.14566744243091, | |
| "learning_rate": 9.98235594792363e-06, | |
| "loss": 0.7258, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.3763837638376384, | |
| "grad_norm": 0.8763668658056738, | |
| "learning_rate": 9.979538999730047e-06, | |
| "loss": 0.7084, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.38191881918819187, | |
| "grad_norm": 0.8849767742762311, | |
| "learning_rate": 9.976513978965829e-06, | |
| "loss": 0.6984, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.3874538745387454, | |
| "grad_norm": 1.0687474160518997, | |
| "learning_rate": 9.973281012033009e-06, | |
| "loss": 0.7183, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3929889298892989, | |
| "grad_norm": 0.781213177981436, | |
| "learning_rate": 9.96984023402275e-06, | |
| "loss": 0.6504, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.3985239852398524, | |
| "grad_norm": 0.7379835559329746, | |
| "learning_rate": 9.966191788709716e-06, | |
| "loss": 0.6661, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.4040590405904059, | |
| "grad_norm": 0.7984703551016957, | |
| "learning_rate": 9.962335828546049e-06, | |
| "loss": 0.6613, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.4095940959409594, | |
| "grad_norm": 1.2794348232710167, | |
| "learning_rate": 9.958272514655006e-06, | |
| "loss": 0.7307, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.4151291512915129, | |
| "grad_norm": 0.7630430424338107, | |
| "learning_rate": 9.954002016824226e-06, | |
| "loss": 0.665, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.42066420664206644, | |
| "grad_norm": 0.7527869433219629, | |
| "learning_rate": 9.949524513498636e-06, | |
| "loss": 0.6424, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.4261992619926199, | |
| "grad_norm": 1.0051926115967615, | |
| "learning_rate": 9.944840191772987e-06, | |
| "loss": 0.7625, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.4317343173431734, | |
| "grad_norm": 0.7754322119272198, | |
| "learning_rate": 9.939949247384046e-06, | |
| "loss": 0.648, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.43726937269372695, | |
| "grad_norm": 0.7146858924414144, | |
| "learning_rate": 9.934851884702415e-06, | |
| "loss": 0.6895, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.44280442804428044, | |
| "grad_norm": 0.8084465018612776, | |
| "learning_rate": 9.929548316723983e-06, | |
| "loss": 0.7189, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.4483394833948339, | |
| "grad_norm": 0.8066377981893613, | |
| "learning_rate": 9.924038765061042e-06, | |
| "loss": 0.6995, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.45387453874538747, | |
| "grad_norm": 0.738107027929258, | |
| "learning_rate": 9.918323459933006e-06, | |
| "loss": 0.6529, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.45940959409594095, | |
| "grad_norm": 0.7741808681072159, | |
| "learning_rate": 9.912402640156812e-06, | |
| "loss": 0.7351, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.46494464944649444, | |
| "grad_norm": 0.7180376057707722, | |
| "learning_rate": 9.906276553136924e-06, | |
| "loss": 0.6305, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.470479704797048, | |
| "grad_norm": 0.8145618675350315, | |
| "learning_rate": 9.899945454855007e-06, | |
| "loss": 0.7026, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.47601476014760147, | |
| "grad_norm": 0.8245175153559692, | |
| "learning_rate": 9.893409609859221e-06, | |
| "loss": 0.7033, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.48154981549815495, | |
| "grad_norm": 0.7026182893077407, | |
| "learning_rate": 9.886669291253178e-06, | |
| "loss": 0.6886, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.4870848708487085, | |
| "grad_norm": 0.7364819802578174, | |
| "learning_rate": 9.879724780684518e-06, | |
| "loss": 0.6942, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.492619926199262, | |
| "grad_norm": 0.7981620114825012, | |
| "learning_rate": 9.872576368333152e-06, | |
| "loss": 0.6716, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.4981549815498155, | |
| "grad_norm": 0.8836384710800025, | |
| "learning_rate": 9.86522435289912e-06, | |
| "loss": 0.7237, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.503690036900369, | |
| "grad_norm": 0.7914423436808887, | |
| "learning_rate": 9.857669041590135e-06, | |
| "loss": 0.7616, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.5092250922509225, | |
| "grad_norm": 0.7232583216067142, | |
| "learning_rate": 9.849910750108718e-06, | |
| "loss": 0.6715, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.514760147601476, | |
| "grad_norm": 0.8941702082274658, | |
| "learning_rate": 9.841949802639031e-06, | |
| "loss": 0.6854, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.5202952029520295, | |
| "grad_norm": 0.7537078088279118, | |
| "learning_rate": 9.833786531833311e-06, | |
| "loss": 0.6934, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.525830258302583, | |
| "grad_norm": 0.7496363805903822, | |
| "learning_rate": 9.825421278797984e-06, | |
| "loss": 0.6757, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.5313653136531366, | |
| "grad_norm": 0.7811094162416035, | |
| "learning_rate": 9.816854393079402e-06, | |
| "loss": 0.6575, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.5369003690036901, | |
| "grad_norm": 0.80064135608355, | |
| "learning_rate": 9.808086232649246e-06, | |
| "loss": 0.6753, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.5424354243542435, | |
| "grad_norm": 0.7536916723498005, | |
| "learning_rate": 9.79911716388956e-06, | |
| "loss": 0.6473, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.5479704797047971, | |
| "grad_norm": 0.8008444136665656, | |
| "learning_rate": 9.789947561577445e-06, | |
| "loss": 0.6942, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.5535055350553506, | |
| "grad_norm": 0.7175301808160046, | |
| "learning_rate": 9.7805778088694e-06, | |
| "loss": 0.7231, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.559040590405904, | |
| "grad_norm": 0.6865056427054099, | |
| "learning_rate": 9.771008297285307e-06, | |
| "loss": 0.6522, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.5645756457564576, | |
| "grad_norm": 0.6945540437205031, | |
| "learning_rate": 9.761239426692077e-06, | |
| "loss": 0.659, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.5701107011070111, | |
| "grad_norm": 0.7631536227470039, | |
| "learning_rate": 9.75127160528694e-06, | |
| "loss": 0.7251, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.5756457564575646, | |
| "grad_norm": 0.7528731350152382, | |
| "learning_rate": 9.741105249580383e-06, | |
| "loss": 0.6988, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.5811808118081181, | |
| "grad_norm": 0.7900410030089848, | |
| "learning_rate": 9.730740784378755e-06, | |
| "loss": 0.7346, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.5867158671586716, | |
| "grad_norm": 0.7179179917128015, | |
| "learning_rate": 9.7201786427665e-06, | |
| "loss": 0.6467, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.5922509225092251, | |
| "grad_norm": 0.754739804813151, | |
| "learning_rate": 9.709419266088086e-06, | |
| "loss": 0.7126, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.5977859778597786, | |
| "grad_norm": 0.7167700794769842, | |
| "learning_rate": 9.698463103929542e-06, | |
| "loss": 0.6259, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.6033210332103321, | |
| "grad_norm": 0.862393241288747, | |
| "learning_rate": 9.687310614099676e-06, | |
| "loss": 0.7462, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.6088560885608856, | |
| "grad_norm": 0.7999243808596918, | |
| "learning_rate": 9.67596226261095e-06, | |
| "loss": 0.6879, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.6143911439114391, | |
| "grad_norm": 0.7608890684314967, | |
| "learning_rate": 9.664418523660004e-06, | |
| "loss": 0.7107, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.6199261992619927, | |
| "grad_norm": 0.8642130795032539, | |
| "learning_rate": 9.652679879607843e-06, | |
| "loss": 0.6217, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.6254612546125461, | |
| "grad_norm": 0.8331393122517589, | |
| "learning_rate": 9.640746820959684e-06, | |
| "loss": 0.621, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.6309963099630996, | |
| "grad_norm": 0.6800899888090565, | |
| "learning_rate": 9.628619846344453e-06, | |
| "loss": 0.5866, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.6365313653136532, | |
| "grad_norm": 0.7683010600627519, | |
| "learning_rate": 9.616299462493952e-06, | |
| "loss": 0.6161, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.6420664206642066, | |
| "grad_norm": 0.8092930286332166, | |
| "learning_rate": 9.603786184221693e-06, | |
| "loss": 0.652, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.6476014760147601, | |
| "grad_norm": 0.75053691727633, | |
| "learning_rate": 9.591080534401371e-06, | |
| "loss": 0.6423, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.6531365313653137, | |
| "grad_norm": 0.713112544635858, | |
| "learning_rate": 9.578183043945031e-06, | |
| "loss": 0.6363, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.6586715867158671, | |
| "grad_norm": 0.8815831434837667, | |
| "learning_rate": 9.565094251780872e-06, | |
| "loss": 0.7031, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.6642066420664207, | |
| "grad_norm": 0.8001162241305896, | |
| "learning_rate": 9.551814704830734e-06, | |
| "loss": 0.6202, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6697416974169742, | |
| "grad_norm": 0.7135742572247368, | |
| "learning_rate": 9.538344957987245e-06, | |
| "loss": 0.704, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.6752767527675276, | |
| "grad_norm": 0.7687650494748194, | |
| "learning_rate": 9.524685574090627e-06, | |
| "loss": 0.6892, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.6808118081180812, | |
| "grad_norm": 0.8827181678396219, | |
| "learning_rate": 9.51083712390519e-06, | |
| "loss": 0.685, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.6863468634686347, | |
| "grad_norm": 0.8882151233390665, | |
| "learning_rate": 9.496800186095466e-06, | |
| "loss": 0.7506, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.6918819188191881, | |
| "grad_norm": 0.7930642149741711, | |
| "learning_rate": 9.482575347202047e-06, | |
| "loss": 0.6835, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.6974169741697417, | |
| "grad_norm": 0.6933167090473802, | |
| "learning_rate": 9.468163201617063e-06, | |
| "loss": 0.6992, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.7029520295202952, | |
| "grad_norm": 0.715214953281436, | |
| "learning_rate": 9.453564351559348e-06, | |
| "loss": 0.6442, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.7084870848708487, | |
| "grad_norm": 0.7554434464632439, | |
| "learning_rate": 9.438779407049282e-06, | |
| "loss": 0.6666, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.7140221402214022, | |
| "grad_norm": 0.7346261351044595, | |
| "learning_rate": 9.423808985883289e-06, | |
| "loss": 0.6314, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.7195571955719557, | |
| "grad_norm": 0.7394564505170546, | |
| "learning_rate": 9.40865371360804e-06, | |
| "loss": 0.6532, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.7250922509225092, | |
| "grad_norm": 0.6920823973121119, | |
| "learning_rate": 9.393314223494297e-06, | |
| "loss": 0.6871, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.7306273062730627, | |
| "grad_norm": 0.8019568716365827, | |
| "learning_rate": 9.377791156510456e-06, | |
| "loss": 0.6313, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.7361623616236163, | |
| "grad_norm": 0.731865215320956, | |
| "learning_rate": 9.362085161295768e-06, | |
| "loss": 0.6757, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.7416974169741697, | |
| "grad_norm": 0.7331269825104397, | |
| "learning_rate": 9.346196894133239e-06, | |
| "loss": 0.7192, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.7472324723247232, | |
| "grad_norm": 0.6689559430615435, | |
| "learning_rate": 9.330127018922195e-06, | |
| "loss": 0.6578, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.7527675276752768, | |
| "grad_norm": 0.8011088236436561, | |
| "learning_rate": 9.313876207150544e-06, | |
| "loss": 0.7185, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.7583025830258303, | |
| "grad_norm": 0.7497062827280815, | |
| "learning_rate": 9.297445137866726e-06, | |
| "loss": 0.6652, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.7638376383763837, | |
| "grad_norm": 0.7999274641250289, | |
| "learning_rate": 9.280834497651334e-06, | |
| "loss": 0.6987, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.7693726937269373, | |
| "grad_norm": 0.6443825206939584, | |
| "learning_rate": 9.264044980588415e-06, | |
| "loss": 0.6537, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.7749077490774908, | |
| "grad_norm": 0.6830543309432857, | |
| "learning_rate": 9.247077288236488e-06, | |
| "loss": 0.6389, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7804428044280443, | |
| "grad_norm": 0.7912659097565806, | |
| "learning_rate": 9.229932129599206e-06, | |
| "loss": 0.7265, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.7859778597785978, | |
| "grad_norm": 0.7608099518203012, | |
| "learning_rate": 9.212610221095748e-06, | |
| "loss": 0.6902, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.7915129151291513, | |
| "grad_norm": 0.7598999658417419, | |
| "learning_rate": 9.195112286530874e-06, | |
| "loss": 0.7237, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.7970479704797048, | |
| "grad_norm": 0.6790407377272779, | |
| "learning_rate": 9.177439057064684e-06, | |
| "loss": 0.689, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.8025830258302583, | |
| "grad_norm": 0.7024267843652401, | |
| "learning_rate": 9.159591271182058e-06, | |
| "loss": 0.6356, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.8081180811808119, | |
| "grad_norm": 0.704433084066074, | |
| "learning_rate": 9.141569674661816e-06, | |
| "loss": 0.6453, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.8136531365313653, | |
| "grad_norm": 0.7169550452222242, | |
| "learning_rate": 9.123375020545534e-06, | |
| "loss": 0.6373, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.8191881918819188, | |
| "grad_norm": 0.694690233125659, | |
| "learning_rate": 9.105008069106093e-06, | |
| "loss": 0.6091, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.8247232472324724, | |
| "grad_norm": 0.7335227368162949, | |
| "learning_rate": 9.086469587815904e-06, | |
| "loss": 0.6582, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.8302583025830258, | |
| "grad_norm": 0.6841111000042602, | |
| "learning_rate": 9.067760351314838e-06, | |
| "loss": 0.7072, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.8357933579335793, | |
| "grad_norm": 0.7083136755269921, | |
| "learning_rate": 9.048881141377863e-06, | |
| "loss": 0.6677, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.8413284132841329, | |
| "grad_norm": 0.6982890610414855, | |
| "learning_rate": 9.029832746882372e-06, | |
| "loss": 0.6526, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.8468634686346863, | |
| "grad_norm": 0.6860196076239153, | |
| "learning_rate": 9.01061596377522e-06, | |
| "loss": 0.6461, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.8523985239852399, | |
| "grad_norm": 0.7376158689862853, | |
| "learning_rate": 8.991231595039464e-06, | |
| "loss": 0.7101, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.8579335793357934, | |
| "grad_norm": 0.6975228556068289, | |
| "learning_rate": 8.97168045066082e-06, | |
| "loss": 0.7475, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.8634686346863468, | |
| "grad_norm": 0.7486491127857194, | |
| "learning_rate": 8.951963347593797e-06, | |
| "loss": 0.6996, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.8690036900369004, | |
| "grad_norm": 0.7912254693906845, | |
| "learning_rate": 8.932081109727582e-06, | |
| "loss": 0.6985, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.8745387453874539, | |
| "grad_norm": 0.6439570726845703, | |
| "learning_rate": 8.9120345678516e-06, | |
| "loss": 0.625, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.8800738007380073, | |
| "grad_norm": 0.6572694602060432, | |
| "learning_rate": 8.891824559620801e-06, | |
| "loss": 0.6784, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.8856088560885609, | |
| "grad_norm": 0.7971159900336966, | |
| "learning_rate": 8.871451929520662e-06, | |
| "loss": 0.6988, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8911439114391144, | |
| "grad_norm": 0.7050522383893928, | |
| "learning_rate": 8.8509175288319e-06, | |
| "loss": 0.6445, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.8966789667896679, | |
| "grad_norm": 0.6897534640630667, | |
| "learning_rate": 8.83022221559489e-06, | |
| "loss": 0.6698, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.9022140221402214, | |
| "grad_norm": 0.7286182755253351, | |
| "learning_rate": 8.80936685457383e-06, | |
| "loss": 0.6527, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.9077490774907749, | |
| "grad_norm": 0.7250443386250167, | |
| "learning_rate": 8.78835231722059e-06, | |
| "loss": 0.6785, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.9132841328413284, | |
| "grad_norm": 0.6795202817087662, | |
| "learning_rate": 8.767179481638303e-06, | |
| "loss": 0.712, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.9188191881918819, | |
| "grad_norm": 0.7673545156991856, | |
| "learning_rate": 8.74584923254468e-06, | |
| "loss": 0.7073, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.9243542435424354, | |
| "grad_norm": 0.7100681832870371, | |
| "learning_rate": 8.72436246123503e-06, | |
| "loss": 0.7097, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.9298892988929889, | |
| "grad_norm": 0.6750568200393077, | |
| "learning_rate": 8.702720065545024e-06, | |
| "loss": 0.6976, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.9354243542435424, | |
| "grad_norm": 0.6760810557529758, | |
| "learning_rate": 8.680922949813177e-06, | |
| "loss": 0.6883, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.940959409594096, | |
| "grad_norm": 0.6506834973761045, | |
| "learning_rate": 8.658972024843063e-06, | |
| "loss": 0.6434, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.9464944649446494, | |
| "grad_norm": 0.668877599120538, | |
| "learning_rate": 8.636868207865244e-06, | |
| "loss": 0.6198, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.9520295202952029, | |
| "grad_norm": 0.6532330641471769, | |
| "learning_rate": 8.614612422498965e-06, | |
| "loss": 0.6806, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.9575645756457565, | |
| "grad_norm": 0.7178919179692898, | |
| "learning_rate": 8.592205598713539e-06, | |
| "loss": 0.6887, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.9630996309963099, | |
| "grad_norm": 0.6999250045861761, | |
| "learning_rate": 8.569648672789496e-06, | |
| "loss": 0.6442, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.9686346863468634, | |
| "grad_norm": 0.7075371495307748, | |
| "learning_rate": 8.546942587279465e-06, | |
| "loss": 0.7256, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.974169741697417, | |
| "grad_norm": 0.7139921209949371, | |
| "learning_rate": 8.524088290968781e-06, | |
| "loss": 0.669, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.9797047970479705, | |
| "grad_norm": 0.7843465216757487, | |
| "learning_rate": 8.501086738835843e-06, | |
| "loss": 0.679, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.985239852398524, | |
| "grad_norm": 0.7078117075067463, | |
| "learning_rate": 8.477938892012209e-06, | |
| "loss": 0.6443, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.9907749077490775, | |
| "grad_norm": 0.7191146480817366, | |
| "learning_rate": 8.45464571774244e-06, | |
| "loss": 0.6752, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.996309963099631, | |
| "grad_norm": 0.7793217733052689, | |
| "learning_rate": 8.43120818934367e-06, | |
| "loss": 0.6809, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.0018450184501846, | |
| "grad_norm": 0.9535816156617457, | |
| "learning_rate": 8.407627286164948e-06, | |
| "loss": 0.8581, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 1.007380073800738, | |
| "grad_norm": 0.7904687166212481, | |
| "learning_rate": 8.38390399354631e-06, | |
| "loss": 0.6589, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 1.0129151291512914, | |
| "grad_norm": 0.7871113082768205, | |
| "learning_rate": 8.360039302777614e-06, | |
| "loss": 0.5974, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 1.018450184501845, | |
| "grad_norm": 0.7159872827730034, | |
| "learning_rate": 8.336034211057098e-06, | |
| "loss": 0.6009, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 1.0239852398523985, | |
| "grad_norm": 0.6616682950001487, | |
| "learning_rate": 8.31188972144974e-06, | |
| "loss": 0.5881, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.029520295202952, | |
| "grad_norm": 0.6850474409430557, | |
| "learning_rate": 8.28760684284532e-06, | |
| "loss": 0.5939, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.0350553505535056, | |
| "grad_norm": 0.7461860778494099, | |
| "learning_rate": 8.263186589916273e-06, | |
| "loss": 0.6383, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 1.040590405904059, | |
| "grad_norm": 0.7089074245925379, | |
| "learning_rate": 8.238629983075296e-06, | |
| "loss": 0.618, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 1.0461254612546125, | |
| "grad_norm": 0.6738261402996001, | |
| "learning_rate": 8.213938048432697e-06, | |
| "loss": 0.5554, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 1.051660516605166, | |
| "grad_norm": 0.7206220611502636, | |
| "learning_rate": 8.18911181775353e-06, | |
| "loss": 0.6136, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.0571955719557196, | |
| "grad_norm": 0.7352230176592326, | |
| "learning_rate": 8.164152328414476e-06, | |
| "loss": 0.6188, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.062730627306273, | |
| "grad_norm": 0.6943757541874687, | |
| "learning_rate": 8.139060623360494e-06, | |
| "loss": 0.5167, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.0682656826568266, | |
| "grad_norm": 0.7778412839253838, | |
| "learning_rate": 8.113837751061246e-06, | |
| "loss": 0.6154, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.07380073800738, | |
| "grad_norm": 0.6551339832525483, | |
| "learning_rate": 8.088484765467286e-06, | |
| "loss": 0.5878, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.0793357933579335, | |
| "grad_norm": 0.8149186205476209, | |
| "learning_rate": 8.063002725966014e-06, | |
| "loss": 0.7048, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.084870848708487, | |
| "grad_norm": 0.6880593641520117, | |
| "learning_rate": 8.037392697337418e-06, | |
| "loss": 0.6041, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.0904059040590406, | |
| "grad_norm": 0.7040695710194661, | |
| "learning_rate": 8.011655749709575e-06, | |
| "loss": 0.5979, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.0959409594095941, | |
| "grad_norm": 0.7965545246789066, | |
| "learning_rate": 7.985792958513932e-06, | |
| "loss": 0.5892, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.1014760147601477, | |
| "grad_norm": 0.7621766315488933, | |
| "learning_rate": 7.95980540444038e-06, | |
| "loss": 0.594, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.1070110701107012, | |
| "grad_norm": 0.7643147205973972, | |
| "learning_rate": 7.93369417339209e-06, | |
| "loss": 0.6597, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.1125461254612545, | |
| "grad_norm": 0.8216988466319578, | |
| "learning_rate": 7.907460356440133e-06, | |
| "loss": 0.6398, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.118081180811808, | |
| "grad_norm": 0.7718237762248213, | |
| "learning_rate": 7.881105049777902e-06, | |
| "loss": 0.5818, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.1236162361623616, | |
| "grad_norm": 0.6351521048330241, | |
| "learning_rate": 7.854629354675292e-06, | |
| "loss": 0.5774, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.1291512915129152, | |
| "grad_norm": 0.6908992052540243, | |
| "learning_rate": 7.828034377432694e-06, | |
| "loss": 0.557, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.1346863468634687, | |
| "grad_norm": 0.7232308696829104, | |
| "learning_rate": 7.801321229334764e-06, | |
| "loss": 0.5865, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.140221402214022, | |
| "grad_norm": 0.6705064929908767, | |
| "learning_rate": 7.774491026603985e-06, | |
| "loss": 0.5934, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.1457564575645756, | |
| "grad_norm": 0.6880567256154578, | |
| "learning_rate": 7.747544890354031e-06, | |
| "loss": 0.6281, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.151291512915129, | |
| "grad_norm": 0.7043185315494912, | |
| "learning_rate": 7.720483946542913e-06, | |
| "loss": 0.5789, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.1568265682656826, | |
| "grad_norm": 0.749312030144366, | |
| "learning_rate": 7.69330932592594e-06, | |
| "loss": 0.6413, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.1623616236162362, | |
| "grad_norm": 0.8155713214886554, | |
| "learning_rate": 7.666022164008458e-06, | |
| "loss": 0.6249, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.1678966789667897, | |
| "grad_norm": 0.736980019422596, | |
| "learning_rate": 7.638623600998409e-06, | |
| "loss": 0.5835, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.1734317343173433, | |
| "grad_norm": 0.6987882968304525, | |
| "learning_rate": 7.6111147817586925e-06, | |
| "loss": 0.6169, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.1789667896678966, | |
| "grad_norm": 0.7052697317791319, | |
| "learning_rate": 7.5834968557593155e-06, | |
| "loss": 0.5993, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.1845018450184501, | |
| "grad_norm": 0.7070482187757918, | |
| "learning_rate": 7.5557709770293664e-06, | |
| "loss": 0.5973, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.1900369003690037, | |
| "grad_norm": 0.6930393067005186, | |
| "learning_rate": 7.527938304108795e-06, | |
| "loss": 0.6242, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.1955719557195572, | |
| "grad_norm": 0.6851172667466551, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.6435, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.2011070110701108, | |
| "grad_norm": 0.7259877682356197, | |
| "learning_rate": 7.471957232119235e-06, | |
| "loss": 0.6212, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.2066420664206643, | |
| "grad_norm": 0.640453551277484, | |
| "learning_rate": 7.443811172247822e-06, | |
| "loss": 0.5481, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.2121771217712176, | |
| "grad_norm": 0.6922785890990849, | |
| "learning_rate": 7.415562996483193e-06, | |
| "loss": 0.6075, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.2177121771217712, | |
| "grad_norm": 0.7252156376468682, | |
| "learning_rate": 7.387213885189746e-06, | |
| "loss": 0.6357, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.2232472324723247, | |
| "grad_norm": 0.6646431209628268, | |
| "learning_rate": 7.358765022949519e-06, | |
| "loss": 0.6337, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.2287822878228782, | |
| "grad_norm": 0.6874785057119748, | |
| "learning_rate": 7.330217598512696e-06, | |
| "loss": 0.6359, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.2343173431734318, | |
| "grad_norm": 0.7165999600358128, | |
| "learning_rate": 7.30157280474793e-06, | |
| "loss": 0.5538, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.2398523985239853, | |
| "grad_norm": 0.6714722516214744, | |
| "learning_rate": 7.2728318385925035e-06, | |
| "loss": 0.6, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.2453874538745389, | |
| "grad_norm": 0.7107004304709478, | |
| "learning_rate": 7.243995901002312e-06, | |
| "loss": 0.6217, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.2509225092250922, | |
| "grad_norm": 0.7303659741053162, | |
| "learning_rate": 7.215066196901676e-06, | |
| "loss": 0.6022, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.2564575645756457, | |
| "grad_norm": 0.6912084957079973, | |
| "learning_rate": 7.186043935133005e-06, | |
| "loss": 0.6017, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.2619926199261993, | |
| "grad_norm": 0.8016568540326536, | |
| "learning_rate": 7.156930328406268e-06, | |
| "loss": 0.6542, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.2675276752767528, | |
| "grad_norm": 0.743350369247089, | |
| "learning_rate": 7.127726593248337e-06, | |
| "loss": 0.5978, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.2730627306273063, | |
| "grad_norm": 0.7404143977218907, | |
| "learning_rate": 7.098433949952146e-06, | |
| "loss": 0.6437, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.2785977859778597, | |
| "grad_norm": 0.684639141766216, | |
| "learning_rate": 7.069053622525697e-06, | |
| "loss": 0.6318, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.2841328413284132, | |
| "grad_norm": 0.6506218001874067, | |
| "learning_rate": 7.039586838640918e-06, | |
| "loss": 0.5983, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.2896678966789668, | |
| "grad_norm": 0.6249292386050818, | |
| "learning_rate": 7.0100348295823706e-06, | |
| "loss": 0.5439, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.2952029520295203, | |
| "grad_norm": 0.7671687660019585, | |
| "learning_rate": 6.980398830195785e-06, | |
| "loss": 0.6191, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.3007380073800738, | |
| "grad_norm": 0.6567756554372846, | |
| "learning_rate": 6.950680078836475e-06, | |
| "loss": 0.6097, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.3062730627306274, | |
| "grad_norm": 0.7256041165241631, | |
| "learning_rate": 6.920879817317588e-06, | |
| "loss": 0.6237, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.311808118081181, | |
| "grad_norm": 0.7062531126646335, | |
| "learning_rate": 6.890999290858213e-06, | |
| "loss": 0.5881, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.3173431734317342, | |
| "grad_norm": 0.7074515896572966, | |
| "learning_rate": 6.861039748031351e-06, | |
| "loss": 0.6399, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.3228782287822878, | |
| "grad_norm": 0.7132720107486031, | |
| "learning_rate": 6.8310024407117405e-06, | |
| "loss": 0.6356, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.3284132841328413, | |
| "grad_norm": 0.6998988120144802, | |
| "learning_rate": 6.800888624023552e-06, | |
| "loss": 0.6086, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.3339483394833949, | |
| "grad_norm": 0.7003663999513423, | |
| "learning_rate": 6.770699556287939e-06, | |
| "loss": 0.6242, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.3394833948339484, | |
| "grad_norm": 0.698672108839009, | |
| "learning_rate": 6.740436498970453e-06, | |
| "loss": 0.6583, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.3450184501845017, | |
| "grad_norm": 0.6864478737367914, | |
| "learning_rate": 6.710100716628345e-06, | |
| "loss": 0.5885, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.3505535055350553, | |
| "grad_norm": 0.7196144827279044, | |
| "learning_rate": 6.679693476857712e-06, | |
| "loss": 0.6075, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.3560885608856088, | |
| "grad_norm": 0.6437948099966094, | |
| "learning_rate": 6.649216050240539e-06, | |
| "loss": 0.6097, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.3616236162361623, | |
| "grad_norm": 0.7620305140396721, | |
| "learning_rate": 6.618669710291607e-06, | |
| "loss": 0.5926, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.367158671586716, | |
| "grad_norm": 0.7830266879556336, | |
| "learning_rate": 6.588055733405266e-06, | |
| "loss": 0.6033, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.3726937269372694, | |
| "grad_norm": 0.7123926132432568, | |
| "learning_rate": 6.557375398802124e-06, | |
| "loss": 0.6074, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.378228782287823, | |
| "grad_norm": 0.6724440991233431, | |
| "learning_rate": 6.526629988475567e-06, | |
| "loss": 0.6154, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.3837638376383765, | |
| "grad_norm": 0.705678216248485, | |
| "learning_rate": 6.495820787138209e-06, | |
| "loss": 0.5891, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.3892988929889298, | |
| "grad_norm": 0.7606972010978245, | |
| "learning_rate": 6.4649490821682035e-06, | |
| "loss": 0.6586, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.3948339483394834, | |
| "grad_norm": 0.627422187069653, | |
| "learning_rate": 6.434016163555452e-06, | |
| "loss": 0.5917, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.400369003690037, | |
| "grad_norm": 0.619815027726477, | |
| "learning_rate": 6.403023323847695e-06, | |
| "loss": 0.5878, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.4059040590405905, | |
| "grad_norm": 0.689330526944227, | |
| "learning_rate": 6.371971858096509e-06, | |
| "loss": 0.5972, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.4114391143911438, | |
| "grad_norm": 0.7173239755647481, | |
| "learning_rate": 6.340863063803187e-06, | |
| "loss": 0.6063, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.4169741697416973, | |
| "grad_norm": 0.6961778167779811, | |
| "learning_rate": 6.30969824086453e-06, | |
| "loss": 0.5807, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.4225092250922509, | |
| "grad_norm": 0.6879266863201914, | |
| "learning_rate": 6.278478691518519e-06, | |
| "loss": 0.5948, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.4280442804428044, | |
| "grad_norm": 0.7552082447717653, | |
| "learning_rate": 6.247205720289907e-06, | |
| "loss": 0.6406, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.433579335793358, | |
| "grad_norm": 0.6434001937152157, | |
| "learning_rate": 6.215880633935709e-06, | |
| "loss": 0.5952, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.4391143911439115, | |
| "grad_norm": 0.662148188293842, | |
| "learning_rate": 6.184504741390596e-06, | |
| "loss": 0.6319, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.444649446494465, | |
| "grad_norm": 0.6153480539853726, | |
| "learning_rate": 6.153079353712201e-06, | |
| "loss": 0.5907, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.4501845018450186, | |
| "grad_norm": 0.6846732208662787, | |
| "learning_rate": 6.121605784026339e-06, | |
| "loss": 0.575, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.455719557195572, | |
| "grad_norm": 0.7039311923510397, | |
| "learning_rate": 6.09008534747213e-06, | |
| "loss": 0.6249, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.4612546125461254, | |
| "grad_norm": 0.7025974163318355, | |
| "learning_rate": 6.058519361147055e-06, | |
| "loss": 0.6097, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.466789667896679, | |
| "grad_norm": 0.6897172606752235, | |
| "learning_rate": 6.02690914405191e-06, | |
| "loss": 0.6459, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.4723247232472325, | |
| "grad_norm": 0.6775134380343946, | |
| "learning_rate": 5.995256017035703e-06, | |
| "loss": 0.5369, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.477859778597786, | |
| "grad_norm": 0.7387800906697658, | |
| "learning_rate": 5.9635613027404495e-06, | |
| "loss": 0.6659, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.4833948339483394, | |
| "grad_norm": 0.5992142954740902, | |
| "learning_rate": 5.931826325545912e-06, | |
| "loss": 0.6023, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.488929889298893, | |
| "grad_norm": 0.6869613178503449, | |
| "learning_rate": 5.900052411514257e-06, | |
| "loss": 0.5711, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.4944649446494465, | |
| "grad_norm": 0.6727011895310602, | |
| "learning_rate": 5.8682408883346535e-06, | |
| "loss": 0.5675, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 0.6728450572952694, | |
| "learning_rate": 5.836393085267777e-06, | |
| "loss": 0.6195, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.5055350553505535, | |
| "grad_norm": 0.6846152979341545, | |
| "learning_rate": 5.804510333090287e-06, | |
| "loss": 0.6213, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.511070110701107, | |
| "grad_norm": 0.6507562063592163, | |
| "learning_rate": 5.772593964039203e-06, | |
| "loss": 0.6403, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.5166051660516606, | |
| "grad_norm": 0.6432647166128416, | |
| "learning_rate": 5.740645311756246e-06, | |
| "loss": 0.6248, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.5221402214022142, | |
| "grad_norm": 0.6415893639295898, | |
| "learning_rate": 5.708665711232103e-06, | |
| "loss": 0.5917, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.5276752767527675, | |
| "grad_norm": 0.6033176569216087, | |
| "learning_rate": 5.6766564987506564e-06, | |
| "loss": 0.5589, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.533210332103321, | |
| "grad_norm": 0.7017492010981081, | |
| "learning_rate": 5.644619011833134e-06, | |
| "loss": 0.6301, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.5387453874538746, | |
| "grad_norm": 0.7655426961892206, | |
| "learning_rate": 5.612554589182228e-06, | |
| "loss": 0.6593, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.5442804428044279, | |
| "grad_norm": 0.7237077018760665, | |
| "learning_rate": 5.5804645706261515e-06, | |
| "loss": 0.6946, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.5498154981549814, | |
| "grad_norm": 0.6848993007754327, | |
| "learning_rate": 5.548350297062659e-06, | |
| "loss": 0.6106, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.555350553505535, | |
| "grad_norm": 0.6816964703469334, | |
| "learning_rate": 5.516213110403009e-06, | |
| "loss": 0.6959, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.5608856088560885, | |
| "grad_norm": 0.7121757712642759, | |
| "learning_rate": 5.484054353515896e-06, | |
| "loss": 0.6304, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.566420664206642, | |
| "grad_norm": 0.785585840187818, | |
| "learning_rate": 5.451875370171341e-06, | |
| "loss": 0.6969, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.5719557195571956, | |
| "grad_norm": 0.6929061668579762, | |
| "learning_rate": 5.419677504984534e-06, | |
| "loss": 0.6396, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.5774907749077491, | |
| "grad_norm": 0.6711279274727765, | |
| "learning_rate": 5.387462103359655e-06, | |
| "loss": 0.6007, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.5830258302583027, | |
| "grad_norm": 0.6361660730682954, | |
| "learning_rate": 5.3552305114336515e-06, | |
| "loss": 0.6057, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.5885608856088562, | |
| "grad_norm": 0.6937146439997242, | |
| "learning_rate": 5.32298407601999e-06, | |
| "loss": 0.5955, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.5940959409594095, | |
| "grad_norm": 0.6641404122154957, | |
| "learning_rate": 5.290724144552379e-06, | |
| "loss": 0.5694, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.599630996309963, | |
| "grad_norm": 0.6925213117648781, | |
| "learning_rate": 5.258452065028473e-06, | |
| "loss": 0.578, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.6051660516605166, | |
| "grad_norm": 0.6749065542362322, | |
| "learning_rate": 5.2261691859535325e-06, | |
| "loss": 0.6517, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.6107011070110702, | |
| "grad_norm": 0.6569945077124647, | |
| "learning_rate": 5.193876856284085e-06, | |
| "loss": 0.5928, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.6162361623616235, | |
| "grad_norm": 0.6611834974919497, | |
| "learning_rate": 5.161576425371554e-06, | |
| "loss": 0.5757, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.621771217712177, | |
| "grad_norm": 0.6443267893510465, | |
| "learning_rate": 5.1292692429058824e-06, | |
| "loss": 0.567, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.6273062730627306, | |
| "grad_norm": 0.7370247594136543, | |
| "learning_rate": 5.096956658859122e-06, | |
| "loss": 0.6723, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.632841328413284, | |
| "grad_norm": 0.6633312016915573, | |
| "learning_rate": 5.064640023429042e-06, | |
| "loss": 0.6907, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.6383763837638377, | |
| "grad_norm": 0.6796625855678173, | |
| "learning_rate": 5.032320686982697e-06, | |
| "loss": 0.6061, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.6439114391143912, | |
| "grad_norm": 0.6703488843908327, | |
| "learning_rate": 5e-06, | |
| "loss": 0.5949, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.6494464944649447, | |
| "grad_norm": 0.6879324468041449, | |
| "learning_rate": 4.967679313017304e-06, | |
| "loss": 0.6459, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.6549815498154983, | |
| "grad_norm": 0.6993913963211266, | |
| "learning_rate": 4.9353599765709585e-06, | |
| "loss": 0.5606, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.6605166051660518, | |
| "grad_norm": 0.6842342047928159, | |
| "learning_rate": 4.903043341140879e-06, | |
| "loss": 0.5859, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.6660516605166051, | |
| "grad_norm": 0.6414031000352342, | |
| "learning_rate": 4.870730757094121e-06, | |
| "loss": 0.6028, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.6715867158671587, | |
| "grad_norm": 0.6730213374583364, | |
| "learning_rate": 4.838423574628447e-06, | |
| "loss": 0.5758, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.6771217712177122, | |
| "grad_norm": 0.685094126165947, | |
| "learning_rate": 4.806123143715916e-06, | |
| "loss": 0.5783, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.6826568265682655, | |
| "grad_norm": 0.6832171407137465, | |
| "learning_rate": 4.773830814046469e-06, | |
| "loss": 0.5928, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.688191881918819, | |
| "grad_norm": 0.6333752235654893, | |
| "learning_rate": 4.741547934971528e-06, | |
| "loss": 0.5476, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.6937269372693726, | |
| "grad_norm": 0.7064803499084308, | |
| "learning_rate": 4.7092758554476215e-06, | |
| "loss": 0.6216, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.6992619926199262, | |
| "grad_norm": 0.6392420306364016, | |
| "learning_rate": 4.677015923980012e-06, | |
| "loss": 0.6679, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.7047970479704797, | |
| "grad_norm": 0.6494731361510496, | |
| "learning_rate": 4.644769488566351e-06, | |
| "loss": 0.5886, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.7103321033210332, | |
| "grad_norm": 0.6913018221886911, | |
| "learning_rate": 4.6125378966403465e-06, | |
| "loss": 0.6472, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.7158671586715868, | |
| "grad_norm": 0.640802770083508, | |
| "learning_rate": 4.580322495015466e-06, | |
| "loss": 0.5561, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.7214022140221403, | |
| "grad_norm": 0.6960948975174029, | |
| "learning_rate": 4.548124629828661e-06, | |
| "loss": 0.6236, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.7269372693726939, | |
| "grad_norm": 0.6389521774827648, | |
| "learning_rate": 4.515945646484105e-06, | |
| "loss": 0.5764, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.7324723247232472, | |
| "grad_norm": 0.6505284331128637, | |
| "learning_rate": 4.483786889596993e-06, | |
| "loss": 0.6177, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.7380073800738007, | |
| "grad_norm": 0.7322008377502368, | |
| "learning_rate": 4.451649702937343e-06, | |
| "loss": 0.7031, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.7435424354243543, | |
| "grad_norm": 0.667856136908068, | |
| "learning_rate": 4.4195354293738484e-06, | |
| "loss": 0.6017, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.7490774907749076, | |
| "grad_norm": 0.6167961116394701, | |
| "learning_rate": 4.387445410817774e-06, | |
| "loss": 0.5948, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.7546125461254611, | |
| "grad_norm": 0.6488335556890084, | |
| "learning_rate": 4.355380988166867e-06, | |
| "loss": 0.5784, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.7601476014760147, | |
| "grad_norm": 0.6433975581483212, | |
| "learning_rate": 4.323343501249346e-06, | |
| "loss": 0.5302, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.7656826568265682, | |
| "grad_norm": 0.6706524043913139, | |
| "learning_rate": 4.291334288767899e-06, | |
| "loss": 0.5922, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.7712177121771218, | |
| "grad_norm": 0.6591469335328922, | |
| "learning_rate": 4.259354688243758e-06, | |
| "loss": 0.5898, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.7767527675276753, | |
| "grad_norm": 0.6481007609954463, | |
| "learning_rate": 4.227406035960798e-06, | |
| "loss": 0.5922, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.7822878228782288, | |
| "grad_norm": 0.6650428282720707, | |
| "learning_rate": 4.195489666909714e-06, | |
| "loss": 0.6023, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.7878228782287824, | |
| "grad_norm": 0.636497135234757, | |
| "learning_rate": 4.163606914732224e-06, | |
| "loss": 0.5807, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.793357933579336, | |
| "grad_norm": 0.6574211931288638, | |
| "learning_rate": 4.131759111665349e-06, | |
| "loss": 0.5459, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.7988929889298892, | |
| "grad_norm": 0.6684303900701675, | |
| "learning_rate": 4.099947588485744e-06, | |
| "loss": 0.6271, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.8044280442804428, | |
| "grad_norm": 0.7115325195567614, | |
| "learning_rate": 4.06817367445409e-06, | |
| "loss": 0.6444, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.8099630996309963, | |
| "grad_norm": 0.6896195168622968, | |
| "learning_rate": 4.036438697259551e-06, | |
| "loss": 0.5971, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.8154981549815496, | |
| "grad_norm": 0.6687990794813583, | |
| "learning_rate": 4.004743982964298e-06, | |
| "loss": 0.6449, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.8210332103321032, | |
| "grad_norm": 0.6668553440261493, | |
| "learning_rate": 3.9730908559480904e-06, | |
| "loss": 0.5765, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.8265682656826567, | |
| "grad_norm": 0.658154260938912, | |
| "learning_rate": 3.941480638852948e-06, | |
| "loss": 0.6424, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.8321033210332103, | |
| "grad_norm": 0.7676795703743602, | |
| "learning_rate": 3.909914652527872e-06, | |
| "loss": 0.5954, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.8376383763837638, | |
| "grad_norm": 0.753359036105102, | |
| "learning_rate": 3.878394215973663e-06, | |
| "loss": 0.6623, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.8431734317343174, | |
| "grad_norm": 0.6328329983343084, | |
| "learning_rate": 3.8469206462878e-06, | |
| "loss": 0.5646, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.848708487084871, | |
| "grad_norm": 0.6522576574675761, | |
| "learning_rate": 3.815495258609404e-06, | |
| "loss": 0.5644, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.8542435424354244, | |
| "grad_norm": 0.6478750849380562, | |
| "learning_rate": 3.784119366064293e-06, | |
| "loss": 0.5752, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.859778597785978, | |
| "grad_norm": 0.6468377900933637, | |
| "learning_rate": 3.752794279710094e-06, | |
| "loss": 0.6167, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.8653136531365315, | |
| "grad_norm": 0.6738182100392553, | |
| "learning_rate": 3.721521308481483e-06, | |
| "loss": 0.6567, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.8708487084870848, | |
| "grad_norm": 0.6812077855923881, | |
| "learning_rate": 3.690301759135471e-06, | |
| "loss": 0.5609, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.8763837638376384, | |
| "grad_norm": 0.739726141586659, | |
| "learning_rate": 3.6591369361968127e-06, | |
| "loss": 0.6721, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.881918819188192, | |
| "grad_norm": 0.6632323020436683, | |
| "learning_rate": 3.6280281419034934e-06, | |
| "loss": 0.6158, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.8874538745387452, | |
| "grad_norm": 0.6554347656581475, | |
| "learning_rate": 3.596976676152306e-06, | |
| "loss": 0.6118, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.8929889298892988, | |
| "grad_norm": 0.6943502042805372, | |
| "learning_rate": 3.5659838364445505e-06, | |
| "loss": 0.6257, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.8985239852398523, | |
| "grad_norm": 0.7344400676037952, | |
| "learning_rate": 3.535050917831797e-06, | |
| "loss": 0.6154, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.9040590405904059, | |
| "grad_norm": 0.6960672172751073, | |
| "learning_rate": 3.504179212861793e-06, | |
| "loss": 0.6025, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.9095940959409594, | |
| "grad_norm": 0.6793261432569583, | |
| "learning_rate": 3.473370011524435e-06, | |
| "loss": 0.5547, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.915129151291513, | |
| "grad_norm": 0.6780588276365805, | |
| "learning_rate": 3.442624601197877e-06, | |
| "loss": 0.5933, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.9206642066420665, | |
| "grad_norm": 0.6333360607546321, | |
| "learning_rate": 3.4119442665947346e-06, | |
| "loss": 0.6066, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.92619926199262, | |
| "grad_norm": 0.6297918625305783, | |
| "learning_rate": 3.3813302897083955e-06, | |
| "loss": 0.6139, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.9317343173431736, | |
| "grad_norm": 0.65128547682773, | |
| "learning_rate": 3.350783949759462e-06, | |
| "loss": 0.6065, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.937269372693727, | |
| "grad_norm": 0.663702410189263, | |
| "learning_rate": 3.3203065231422904e-06, | |
| "loss": 0.5595, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.9428044280442804, | |
| "grad_norm": 0.7456337908851745, | |
| "learning_rate": 3.289899283371657e-06, | |
| "loss": 0.5635, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.948339483394834, | |
| "grad_norm": 0.6245107839051393, | |
| "learning_rate": 3.259563501029548e-06, | |
| "loss": 0.6149, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.9538745387453873, | |
| "grad_norm": 0.6389202208791118, | |
| "learning_rate": 3.2293004437120622e-06, | |
| "loss": 0.6318, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.9594095940959408, | |
| "grad_norm": 0.6408635995037254, | |
| "learning_rate": 3.1991113759764493e-06, | |
| "loss": 0.6557, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.9649446494464944, | |
| "grad_norm": 0.6610364476788523, | |
| "learning_rate": 3.1689975592882603e-06, | |
| "loss": 0.6132, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.970479704797048, | |
| "grad_norm": 0.6162945914008929, | |
| "learning_rate": 3.1389602519686515e-06, | |
| "loss": 0.5645, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.9760147601476015, | |
| "grad_norm": 0.6343892270612764, | |
| "learning_rate": 3.1090007091417884e-06, | |
| "loss": 0.5662, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.981549815498155, | |
| "grad_norm": 0.6456644070716252, | |
| "learning_rate": 3.0791201826824117e-06, | |
| "loss": 0.5729, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.9870848708487086, | |
| "grad_norm": 0.6692278565767988, | |
| "learning_rate": 3.049319921163526e-06, | |
| "loss": 0.5692, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.992619926199262, | |
| "grad_norm": 0.6513390055411725, | |
| "learning_rate": 3.019601169804216e-06, | |
| "loss": 0.5702, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.9981549815498156, | |
| "grad_norm": 0.8207020809923308, | |
| "learning_rate": 2.9899651704176324e-06, | |
| "loss": 0.712, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 2.003690036900369, | |
| "grad_norm": 0.8316443587754938, | |
| "learning_rate": 2.9604131613590825e-06, | |
| "loss": 0.6622, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 2.0092250922509227, | |
| "grad_norm": 0.6321179010589463, | |
| "learning_rate": 2.9309463774743047e-06, | |
| "loss": 0.5349, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 2.014760147601476, | |
| "grad_norm": 0.6623408052994709, | |
| "learning_rate": 2.901566050047855e-06, | |
| "loss": 0.558, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 2.0202952029520294, | |
| "grad_norm": 0.6148409552359754, | |
| "learning_rate": 2.8722734067516637e-06, | |
| "loss": 0.5131, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 2.025830258302583, | |
| "grad_norm": 0.628811924344138, | |
| "learning_rate": 2.843069671593734e-06, | |
| "loss": 0.5053, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 2.0313653136531364, | |
| "grad_norm": 0.7178060640719511, | |
| "learning_rate": 2.813956064866996e-06, | |
| "loss": 0.5215, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 2.03690036900369, | |
| "grad_norm": 0.64237638492648, | |
| "learning_rate": 2.784933803098326e-06, | |
| "loss": 0.5344, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 2.0424354243542435, | |
| "grad_norm": 0.660701484636481, | |
| "learning_rate": 2.7560040989976894e-06, | |
| "loss": 0.5522, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 2.047970479704797, | |
| "grad_norm": 0.6442004508033177, | |
| "learning_rate": 2.7271681614074973e-06, | |
| "loss": 0.5692, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.0535055350553506, | |
| "grad_norm": 0.701530505525817, | |
| "learning_rate": 2.6984271952520723e-06, | |
| "loss": 0.558, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 2.059040590405904, | |
| "grad_norm": 0.6677439032763148, | |
| "learning_rate": 2.6697824014873076e-06, | |
| "loss": 0.5507, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 2.0645756457564577, | |
| "grad_norm": 0.750156552596835, | |
| "learning_rate": 2.641234977050484e-06, | |
| "loss": 0.5605, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 2.0701107011070112, | |
| "grad_norm": 0.7047824557323877, | |
| "learning_rate": 2.6127861148102552e-06, | |
| "loss": 0.5489, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 2.0756457564575648, | |
| "grad_norm": 0.711760947816998, | |
| "learning_rate": 2.5844370035168077e-06, | |
| "loss": 0.5525, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.081180811808118, | |
| "grad_norm": 0.6970505989934466, | |
| "learning_rate": 2.5561888277521797e-06, | |
| "loss": 0.533, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 2.0867158671586714, | |
| "grad_norm": 0.6679637897186145, | |
| "learning_rate": 2.528042767880766e-06, | |
| "loss": 0.5421, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 2.092250922509225, | |
| "grad_norm": 0.6516580625237066, | |
| "learning_rate": 2.5000000000000015e-06, | |
| "loss": 0.5511, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 2.0977859778597785, | |
| "grad_norm": 0.6626636324231238, | |
| "learning_rate": 2.4720616958912054e-06, | |
| "loss": 0.5732, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 2.103321033210332, | |
| "grad_norm": 0.6723428723615049, | |
| "learning_rate": 2.4442290229706344e-06, | |
| "loss": 0.5727, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.1088560885608856, | |
| "grad_norm": 0.6599974503924196, | |
| "learning_rate": 2.4165031442406857e-06, | |
| "loss": 0.5238, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 2.114391143911439, | |
| "grad_norm": 0.7008041331642095, | |
| "learning_rate": 2.3888852182413087e-06, | |
| "loss": 0.4859, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 2.1199261992619927, | |
| "grad_norm": 0.7500710516966634, | |
| "learning_rate": 2.361376399001592e-06, | |
| "loss": 0.545, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 2.125461254612546, | |
| "grad_norm": 0.6693296360494665, | |
| "learning_rate": 2.333977835991545e-06, | |
| "loss": 0.5467, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 2.1309963099630997, | |
| "grad_norm": 0.6478267617206592, | |
| "learning_rate": 2.3066906740740626e-06, | |
| "loss": 0.5347, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.1365313653136533, | |
| "grad_norm": 0.7177671429812185, | |
| "learning_rate": 2.2795160534570866e-06, | |
| "loss": 0.623, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 2.142066420664207, | |
| "grad_norm": 0.6423839307947838, | |
| "learning_rate": 2.2524551096459703e-06, | |
| "loss": 0.5646, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 2.14760147601476, | |
| "grad_norm": 0.6793354954895883, | |
| "learning_rate": 2.2255089733960162e-06, | |
| "loss": 0.5316, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 2.1531365313653135, | |
| "grad_norm": 0.663196223424689, | |
| "learning_rate": 2.1986787706652377e-06, | |
| "loss": 0.5788, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 2.158671586715867, | |
| "grad_norm": 0.613244135525591, | |
| "learning_rate": 2.171965622567308e-06, | |
| "loss": 0.4808, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.1642066420664205, | |
| "grad_norm": 0.6636202632875322, | |
| "learning_rate": 2.1453706453247088e-06, | |
| "loss": 0.5427, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 2.169741697416974, | |
| "grad_norm": 0.7361578196639839, | |
| "learning_rate": 2.1188949502220987e-06, | |
| "loss": 0.555, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 2.1752767527675276, | |
| "grad_norm": 0.7185291233253194, | |
| "learning_rate": 2.0925396435598665e-06, | |
| "loss": 0.5743, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 2.180811808118081, | |
| "grad_norm": 0.6519229097148226, | |
| "learning_rate": 2.066305826607911e-06, | |
| "loss": 0.5818, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 2.1863468634686347, | |
| "grad_norm": 0.6790978922388056, | |
| "learning_rate": 2.0401945955596206e-06, | |
| "loss": 0.517, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.1918819188191883, | |
| "grad_norm": 0.6753695790228323, | |
| "learning_rate": 2.0142070414860704e-06, | |
| "loss": 0.5176, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 2.197416974169742, | |
| "grad_norm": 0.7265806499649696, | |
| "learning_rate": 1.9883442502904284e-06, | |
| "loss": 0.6055, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 2.2029520295202953, | |
| "grad_norm": 0.6457410100738195, | |
| "learning_rate": 1.962607302662582e-06, | |
| "loss": 0.5406, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 2.208487084870849, | |
| "grad_norm": 0.625524661837684, | |
| "learning_rate": 1.936997274033986e-06, | |
| "loss": 0.5076, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 2.2140221402214024, | |
| "grad_norm": 0.637857740685298, | |
| "learning_rate": 1.9115152345327154e-06, | |
| "loss": 0.5578, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.2195571955719555, | |
| "grad_norm": 0.6101593685630197, | |
| "learning_rate": 1.8861622489387555e-06, | |
| "loss": 0.4875, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 2.225092250922509, | |
| "grad_norm": 0.6518718553669549, | |
| "learning_rate": 1.8609393766395083e-06, | |
| "loss": 0.57, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 2.2306273062730626, | |
| "grad_norm": 0.6624958993518161, | |
| "learning_rate": 1.8358476715855262e-06, | |
| "loss": 0.5176, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 2.236162361623616, | |
| "grad_norm": 0.6719691100692955, | |
| "learning_rate": 1.8108881822464697e-06, | |
| "loss": 0.5313, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 2.2416974169741697, | |
| "grad_norm": 0.6736416728036624, | |
| "learning_rate": 1.7860619515673034e-06, | |
| "loss": 0.5625, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.2472324723247232, | |
| "grad_norm": 0.6422294559583493, | |
| "learning_rate": 1.7613700169247055e-06, | |
| "loss": 0.5705, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 2.2527675276752768, | |
| "grad_norm": 0.6603028481218244, | |
| "learning_rate": 1.7368134100837286e-06, | |
| "loss": 0.589, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 2.2583025830258303, | |
| "grad_norm": 0.5987866874874727, | |
| "learning_rate": 1.7123931571546826e-06, | |
| "loss": 0.4857, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 2.263837638376384, | |
| "grad_norm": 0.6594047301752323, | |
| "learning_rate": 1.6881102785502618e-06, | |
| "loss": 0.5477, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 2.2693726937269374, | |
| "grad_norm": 0.6196530000721581, | |
| "learning_rate": 1.6639657889429017e-06, | |
| "loss": 0.5506, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.274907749077491, | |
| "grad_norm": 0.6722537304232283, | |
| "learning_rate": 1.639960697222388e-06, | |
| "loss": 0.5797, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 2.280442804428044, | |
| "grad_norm": 0.615523426294855, | |
| "learning_rate": 1.6160960064536907e-06, | |
| "loss": 0.4981, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 2.2859778597785976, | |
| "grad_norm": 0.6568878327953758, | |
| "learning_rate": 1.5923727138350548e-06, | |
| "loss": 0.5263, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 2.291512915129151, | |
| "grad_norm": 0.6557176714698197, | |
| "learning_rate": 1.5687918106563326e-06, | |
| "loss": 0.5977, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 2.2970479704797047, | |
| "grad_norm": 0.6343521214286307, | |
| "learning_rate": 1.5453542822575624e-06, | |
| "loss": 0.4672, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.302583025830258, | |
| "grad_norm": 0.6160998844576888, | |
| "learning_rate": 1.52206110798779e-06, | |
| "loss": 0.5385, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.3081180811808117, | |
| "grad_norm": 0.6465234486304856, | |
| "learning_rate": 1.4989132611641576e-06, | |
| "loss": 0.5165, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 2.3136531365313653, | |
| "grad_norm": 0.6620294899666387, | |
| "learning_rate": 1.4759117090312197e-06, | |
| "loss": 0.5605, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 2.319188191881919, | |
| "grad_norm": 0.6822424027974909, | |
| "learning_rate": 1.453057412720536e-06, | |
| "loss": 0.5921, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 2.3247232472324724, | |
| "grad_norm": 0.6384847627809093, | |
| "learning_rate": 1.4303513272105057e-06, | |
| "loss": 0.5403, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.330258302583026, | |
| "grad_norm": 0.6719145192710272, | |
| "learning_rate": 1.4077944012864636e-06, | |
| "loss": 0.5854, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 2.3357933579335795, | |
| "grad_norm": 0.640451661360814, | |
| "learning_rate": 1.3853875775010355e-06, | |
| "loss": 0.517, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 2.341328413284133, | |
| "grad_norm": 0.6398126379640671, | |
| "learning_rate": 1.3631317921347564e-06, | |
| "loss": 0.5791, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 2.3468634686346865, | |
| "grad_norm": 0.6487742664028805, | |
| "learning_rate": 1.3410279751569399e-06, | |
| "loss": 0.5157, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 2.35239852398524, | |
| "grad_norm": 0.6671465550992883, | |
| "learning_rate": 1.3190770501868243e-06, | |
| "loss": 0.5588, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.357933579335793, | |
| "grad_norm": 0.6454481634549197, | |
| "learning_rate": 1.297279934454978e-06, | |
| "loss": 0.5026, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 2.3634686346863467, | |
| "grad_norm": 0.6127427807997947, | |
| "learning_rate": 1.2756375387649717e-06, | |
| "loss": 0.5454, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 2.3690036900369003, | |
| "grad_norm": 0.6438945248930303, | |
| "learning_rate": 1.25415076745532e-06, | |
| "loss": 0.545, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 2.374538745387454, | |
| "grad_norm": 0.6124407692016695, | |
| "learning_rate": 1.2328205183616964e-06, | |
| "loss": 0.5189, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 2.3800738007380073, | |
| "grad_norm": 0.6639137528285265, | |
| "learning_rate": 1.2116476827794104e-06, | |
| "loss": 0.549, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.385608856088561, | |
| "grad_norm": 0.6472668638709654, | |
| "learning_rate": 1.1906331454261704e-06, | |
| "loss": 0.5616, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 2.3911439114391144, | |
| "grad_norm": 0.6382738061476565, | |
| "learning_rate": 1.1697777844051105e-06, | |
| "loss": 0.5069, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 2.396678966789668, | |
| "grad_norm": 0.6095694583232508, | |
| "learning_rate": 1.1490824711681026e-06, | |
| "loss": 0.5096, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 2.4022140221402215, | |
| "grad_norm": 0.6389566278508614, | |
| "learning_rate": 1.1285480704793378e-06, | |
| "loss": 0.5554, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 2.407749077490775, | |
| "grad_norm": 0.6565575087635012, | |
| "learning_rate": 1.1081754403792e-06, | |
| "loss": 0.5289, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.4132841328413286, | |
| "grad_norm": 0.6298283487396668, | |
| "learning_rate": 1.0879654321484012e-06, | |
| "loss": 0.5732, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.4188191881918817, | |
| "grad_norm": 0.6546518202275479, | |
| "learning_rate": 1.067918890272419e-06, | |
| "loss": 0.5408, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.4243542435424352, | |
| "grad_norm": 0.6656591924546393, | |
| "learning_rate": 1.0480366524062041e-06, | |
| "loss": 0.551, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.4298892988929888, | |
| "grad_norm": 0.6298524026967492, | |
| "learning_rate": 1.0283195493391823e-06, | |
| "loss": 0.5011, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.4354243542435423, | |
| "grad_norm": 0.6525468703564422, | |
| "learning_rate": 1.008768404960535e-06, | |
| "loss": 0.5737, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.440959409594096, | |
| "grad_norm": 0.6544594096244687, | |
| "learning_rate": 9.893840362247809e-07, | |
| "loss": 0.5503, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 2.4464944649446494, | |
| "grad_norm": 0.6318395119148414, | |
| "learning_rate": 9.701672531176287e-07, | |
| "loss": 0.5224, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 2.452029520295203, | |
| "grad_norm": 0.6196921977020754, | |
| "learning_rate": 9.511188586221376e-07, | |
| "loss": 0.545, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 2.4575645756457565, | |
| "grad_norm": 0.6327900918490961, | |
| "learning_rate": 9.322396486851626e-07, | |
| "loss": 0.5424, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 2.46309963099631, | |
| "grad_norm": 0.6363476446254773, | |
| "learning_rate": 9.135304121840976e-07, | |
| "loss": 0.5538, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.4686346863468636, | |
| "grad_norm": 0.6313076689214548, | |
| "learning_rate": 8.949919308939081e-07, | |
| "loss": 0.5218, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 2.474169741697417, | |
| "grad_norm": 0.657993153154449, | |
| "learning_rate": 8.766249794544662e-07, | |
| "loss": 0.5321, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 2.4797047970479706, | |
| "grad_norm": 0.6418969902696497, | |
| "learning_rate": 8.584303253381848e-07, | |
| "loss": 0.5194, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.485239852398524, | |
| "grad_norm": 0.6043587008301916, | |
| "learning_rate": 8.404087288179425e-07, | |
| "loss": 0.5134, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 2.4907749077490777, | |
| "grad_norm": 0.6431540180087669, | |
| "learning_rate": 8.225609429353187e-07, | |
| "loss": 0.5348, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.496309963099631, | |
| "grad_norm": 0.6268189919104177, | |
| "learning_rate": 8.048877134691269e-07, | |
| "loss": 0.5043, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.5018450184501844, | |
| "grad_norm": 0.6178939835577518, | |
| "learning_rate": 7.873897789042523e-07, | |
| "loss": 0.5165, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 2.507380073800738, | |
| "grad_norm": 0.6747581351563309, | |
| "learning_rate": 7.700678704007947e-07, | |
| "loss": 0.6184, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 2.5129151291512914, | |
| "grad_norm": 0.6379894631440596, | |
| "learning_rate": 7.529227117635135e-07, | |
| "loss": 0.5297, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 2.518450184501845, | |
| "grad_norm": 0.6345560431082808, | |
| "learning_rate": 7.35955019411585e-07, | |
| "loss": 0.5723, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.5239852398523985, | |
| "grad_norm": 0.7041260690229593, | |
| "learning_rate": 7.191655023486682e-07, | |
| "loss": 0.5518, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 2.529520295202952, | |
| "grad_norm": 0.6596174352194049, | |
| "learning_rate": 7.02554862133275e-07, | |
| "loss": 0.5507, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 2.5350553505535056, | |
| "grad_norm": 0.6491422253781083, | |
| "learning_rate": 6.86123792849458e-07, | |
| "loss": 0.5146, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 2.540590405904059, | |
| "grad_norm": 0.6702481216161904, | |
| "learning_rate": 6.698729810778065e-07, | |
| "loss": 0.5837, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 2.5461254612546127, | |
| "grad_norm": 0.6189552978352973, | |
| "learning_rate": 6.53803105866761e-07, | |
| "loss": 0.5343, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.551660516605166, | |
| "grad_norm": 0.6835144543692675, | |
| "learning_rate": 6.379148387042317e-07, | |
| "loss": 0.5462, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 2.5571955719557193, | |
| "grad_norm": 0.6464350965970691, | |
| "learning_rate": 6.222088434895462e-07, | |
| "loss": 0.5381, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.562730627306273, | |
| "grad_norm": 0.6957103842105568, | |
| "learning_rate": 6.066857765057055e-07, | |
| "loss": 0.6151, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 2.5682656826568264, | |
| "grad_norm": 0.6030789961609527, | |
| "learning_rate": 5.9134628639196e-07, | |
| "loss": 0.5061, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.57380073800738, | |
| "grad_norm": 0.6317615119983371, | |
| "learning_rate": 5.76191014116711e-07, | |
| "loss": 0.507, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.5793357933579335, | |
| "grad_norm": 0.6435467983289933, | |
| "learning_rate": 5.612205929507209e-07, | |
| "loss": 0.5501, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.584870848708487, | |
| "grad_norm": 0.6358149902010037, | |
| "learning_rate": 5.464356484406535e-07, | |
| "loss": 0.5211, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 2.5904059040590406, | |
| "grad_norm": 0.6195286275874121, | |
| "learning_rate": 5.318367983829393e-07, | |
| "loss": 0.5032, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.595940959409594, | |
| "grad_norm": 0.6692400503094423, | |
| "learning_rate": 5.174246527979532e-07, | |
| "loss": 0.5574, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 2.6014760147601477, | |
| "grad_norm": 0.6342700435947363, | |
| "learning_rate": 5.031998139045352e-07, | |
| "loss": 0.5523, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.607011070110701, | |
| "grad_norm": 0.6412069996618961, | |
| "learning_rate": 4.891628760948114e-07, | |
| "loss": 0.5423, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 2.6125461254612548, | |
| "grad_norm": 0.628500100341704, | |
| "learning_rate": 4.753144259093734e-07, | |
| "loss": 0.5451, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.6180811808118083, | |
| "grad_norm": 0.5904702310704353, | |
| "learning_rate": 4.6165504201275635e-07, | |
| "loss": 0.4915, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 2.623616236162362, | |
| "grad_norm": 0.7363244318199392, | |
| "learning_rate": 4.481852951692672e-07, | |
| "loss": 0.6614, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.6291512915129154, | |
| "grad_norm": 0.6437098900248531, | |
| "learning_rate": 4.349057482191299e-07, | |
| "loss": 0.5644, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.6346863468634685, | |
| "grad_norm": 0.6735179848745367, | |
| "learning_rate": 4.2181695605497066e-07, | |
| "loss": 0.6371, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.640221402214022, | |
| "grad_norm": 0.6166395492738032, | |
| "learning_rate": 4.089194655986306e-07, | |
| "loss": 0.5439, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 2.6457564575645756, | |
| "grad_norm": 0.5978015771749204, | |
| "learning_rate": 3.9621381577830855e-07, | |
| "loss": 0.4831, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.651291512915129, | |
| "grad_norm": 0.6462673792890513, | |
| "learning_rate": 3.837005375060482e-07, | |
| "loss": 0.597, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 2.6568265682656826, | |
| "grad_norm": 0.6130295271007709, | |
| "learning_rate": 3.7138015365554834e-07, | |
| "loss": 0.5106, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.662361623616236, | |
| "grad_norm": 0.6671222024328364, | |
| "learning_rate": 3.592531790403159e-07, | |
| "loss": 0.5363, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 2.6678966789667897, | |
| "grad_norm": 0.6138396843380316, | |
| "learning_rate": 3.473201203921578e-07, | |
| "loss": 0.5469, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.6734317343173433, | |
| "grad_norm": 0.6660598208965025, | |
| "learning_rate": 3.355814763399973e-07, | |
| "loss": 0.581, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 2.678966789667897, | |
| "grad_norm": 0.6108073985826324, | |
| "learning_rate": 3.2403773738905185e-07, | |
| "loss": 0.5273, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.6845018450184504, | |
| "grad_norm": 0.7379112157721128, | |
| "learning_rate": 3.1268938590032495e-07, | |
| "loss": 0.5626, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 2.6900369003690034, | |
| "grad_norm": 0.6732052466349552, | |
| "learning_rate": 3.015368960704584e-07, | |
| "loss": 0.5891, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.695571955719557, | |
| "grad_norm": 0.6659946641749191, | |
| "learning_rate": 2.905807339119138e-07, | |
| "loss": 0.5258, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 2.7011070110701105, | |
| "grad_norm": 0.6952105961310837, | |
| "learning_rate": 2.798213572335001e-07, | |
| "loss": 0.5581, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 2.706642066420664, | |
| "grad_norm": 0.6365348737573899, | |
| "learning_rate": 2.6925921562124867e-07, | |
| "loss": 0.5311, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 2.7121771217712176, | |
| "grad_norm": 0.5766266659794174, | |
| "learning_rate": 2.5889475041961767e-07, | |
| "loss": 0.4352, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.717712177121771, | |
| "grad_norm": 0.7319731694850389, | |
| "learning_rate": 2.487283947130609e-07, | |
| "loss": 0.5061, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 2.7232472324723247, | |
| "grad_norm": 0.6859080435441426, | |
| "learning_rate": 2.3876057330792344e-07, | |
| "loss": 0.55, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.7287822878228782, | |
| "grad_norm": 0.6487814667155516, | |
| "learning_rate": 2.289917027146943e-07, | |
| "loss": 0.5418, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 2.734317343173432, | |
| "grad_norm": 0.6761313635926454, | |
| "learning_rate": 2.1942219113060215e-07, | |
| "loss": 0.5819, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 2.7398523985239853, | |
| "grad_norm": 0.6242690303129593, | |
| "learning_rate": 2.1005243842255552e-07, | |
| "loss": 0.5655, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 2.745387453874539, | |
| "grad_norm": 0.6222359002332329, | |
| "learning_rate": 2.0088283611044034e-07, | |
| "loss": 0.5279, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 2.7509225092250924, | |
| "grad_norm": 0.6148480903080046, | |
| "learning_rate": 1.919137673507543e-07, | |
| "loss": 0.5385, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 2.756457564575646, | |
| "grad_norm": 0.6004801018266677, | |
| "learning_rate": 1.8314560692059836e-07, | |
| "loss": 0.4995, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 2.7619926199261995, | |
| "grad_norm": 0.6362099075257641, | |
| "learning_rate": 1.745787212020178e-07, | |
| "loss": 0.5248, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 2.767527675276753, | |
| "grad_norm": 0.6405183086515418, | |
| "learning_rate": 1.6621346816668993e-07, | |
| "loss": 0.5456, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.773062730627306, | |
| "grad_norm": 0.6383996319685834, | |
| "learning_rate": 1.5805019736097105e-07, | |
| "loss": 0.5484, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 2.7785977859778597, | |
| "grad_norm": 0.6484780190882176, | |
| "learning_rate": 1.500892498912826e-07, | |
| "loss": 0.5614, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.784132841328413, | |
| "grad_norm": 0.6104614408300383, | |
| "learning_rate": 1.4233095840986756e-07, | |
| "loss": 0.53, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 2.7896678966789668, | |
| "grad_norm": 0.602450823572722, | |
| "learning_rate": 1.3477564710088097e-07, | |
| "loss": 0.4869, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.7952029520295203, | |
| "grad_norm": 0.6213278708400987, | |
| "learning_rate": 1.2742363166685035e-07, | |
| "loss": 0.5867, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 2.800738007380074, | |
| "grad_norm": 0.6572722468206588, | |
| "learning_rate": 1.2027521931548214e-07, | |
| "loss": 0.5681, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.8062730627306274, | |
| "grad_norm": 0.6458832457141599, | |
| "learning_rate": 1.1333070874682217e-07, | |
| "loss": 0.5781, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 2.811808118081181, | |
| "grad_norm": 0.6173074533807742, | |
| "learning_rate": 1.0659039014077943e-07, | |
| "loss": 0.5485, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.8173431734317345, | |
| "grad_norm": 0.576088238404349, | |
| "learning_rate": 1.0005454514499413e-07, | |
| "loss": 0.4937, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 2.8228782287822876, | |
| "grad_norm": 0.6463368067791495, | |
| "learning_rate": 9.372344686307655e-08, | |
| "loss": 0.5686, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.828413284132841, | |
| "grad_norm": 0.6380928953673762, | |
| "learning_rate": 8.759735984318896e-08, | |
| "loss": 0.4994, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 2.8339483394833946, | |
| "grad_norm": 0.6305602061857106, | |
| "learning_rate": 8.167654006699444e-08, | |
| "loss": 0.5506, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.839483394833948, | |
| "grad_norm": 0.6225751887694984, | |
| "learning_rate": 7.59612349389599e-08, | |
| "loss": 0.5042, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 2.8450184501845017, | |
| "grad_norm": 0.6644219206184678, | |
| "learning_rate": 7.04516832760177e-08, | |
| "loss": 0.5969, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.8505535055350553, | |
| "grad_norm": 0.6453523828993342, | |
| "learning_rate": 6.514811529758747e-08, | |
| "loss": 0.5817, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 2.856088560885609, | |
| "grad_norm": 0.6229940991961801, | |
| "learning_rate": 6.005075261595495e-08, | |
| "loss": 0.537, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.8616236162361623, | |
| "grad_norm": 0.6614904718651109, | |
| "learning_rate": 5.515980822701439e-08, | |
| "loss": 0.5757, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 2.867158671586716, | |
| "grad_norm": 0.6494054996388359, | |
| "learning_rate": 5.047548650136513e-08, | |
| "loss": 0.5035, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.8726937269372694, | |
| "grad_norm": 0.5994286550317681, | |
| "learning_rate": 4.599798317577342e-08, | |
| "loss": 0.4803, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 2.878228782287823, | |
| "grad_norm": 0.6745239619202922, | |
| "learning_rate": 4.172748534499449e-08, | |
| "loss": 0.5271, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.8837638376383765, | |
| "grad_norm": 0.626628102926196, | |
| "learning_rate": 3.766417145395218e-08, | |
| "loss": 0.5359, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 2.88929889298893, | |
| "grad_norm": 0.6508218671654497, | |
| "learning_rate": 3.3808211290284886e-08, | |
| "loss": 0.5114, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.8948339483394836, | |
| "grad_norm": 0.5951817188696978, | |
| "learning_rate": 3.015976597725068e-08, | |
| "loss": 0.5294, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 2.900369003690037, | |
| "grad_norm": 0.671303151438165, | |
| "learning_rate": 2.6718987966992683e-08, | |
| "loss": 0.5213, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.9059040590405907, | |
| "grad_norm": 0.676031871740552, | |
| "learning_rate": 2.3486021034170857e-08, | |
| "loss": 0.5459, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.911439114391144, | |
| "grad_norm": 0.6208018052133741, | |
| "learning_rate": 2.0461000269953457e-08, | |
| "loss": 0.5358, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.9169741697416973, | |
| "grad_norm": 0.6381166866678656, | |
| "learning_rate": 1.7644052076371544e-08, | |
| "loss": 0.5146, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 2.922509225092251, | |
| "grad_norm": 0.6198869071038576, | |
| "learning_rate": 1.5035294161039882e-08, | |
| "loss": 0.5176, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.9280442804428044, | |
| "grad_norm": 0.6701848507204047, | |
| "learning_rate": 1.2634835532233658e-08, | |
| "loss": 0.5598, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 2.933579335793358, | |
| "grad_norm": 0.6207812803170829, | |
| "learning_rate": 1.044277649433989e-08, | |
| "loss": 0.4992, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.9391143911439115, | |
| "grad_norm": 0.6255257686553812, | |
| "learning_rate": 8.459208643659122e-09, | |
| "loss": 0.474, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 2.944649446494465, | |
| "grad_norm": 0.6636870576719867, | |
| "learning_rate": 6.6842148645840374e-09, | |
| "loss": 0.5284, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.9501845018450186, | |
| "grad_norm": 0.6474860377915387, | |
| "learning_rate": 5.11786932613223e-09, | |
| "loss": 0.5327, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 2.955719557195572, | |
| "grad_norm": 0.6112767808361741, | |
| "learning_rate": 3.760237478849793e-09, | |
| "loss": 0.5189, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.961254612546125, | |
| "grad_norm": 0.680320025195111, | |
| "learning_rate": 2.611376052073511e-09, | |
| "loss": 0.55, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.9667896678966788, | |
| "grad_norm": 0.6580418332010902, | |
| "learning_rate": 1.6713330515627512e-09, | |
| "loss": 0.5855, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.9723247232472323, | |
| "grad_norm": 0.6263030989787438, | |
| "learning_rate": 9.401477574932927e-10, | |
| "loss": 0.5423, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 2.977859778597786, | |
| "grad_norm": 0.6000757044114218, | |
| "learning_rate": 4.178507228136397e-10, | |
| "loss": 0.4987, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.9833948339483394, | |
| "grad_norm": 0.6436284868599454, | |
| "learning_rate": 1.0446377197104174e-10, | |
| "loss": 0.5854, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 2.988929889298893, | |
| "grad_norm": 0.6261144170087621, | |
| "learning_rate": 0.0, | |
| "loss": 0.5354, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.988929889298893, | |
| "step": 540, | |
| "total_flos": 110732096700416.0, | |
| "train_loss": 0.6148645067104587, | |
| "train_runtime": 2111.1531, | |
| "train_samples_per_second": 24.628, | |
| "train_steps_per_second": 0.256 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 540, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 110732096700416.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |